diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index ebc40e9279137..9f96f5beab3b2 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -149,7 +149,7 @@ jobs:
             catalyst, hive-thriftserver
           - >-
             streaming, sql-kafka-0-10, streaming-kafka-0-10,
-            mllib-local, mllib,
+            mllib-local, mllib-common, mllib,
             yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl,
             connect, protobuf
         # Here, we split Hive and SQL tests into some of slow ones and the rest of them.
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java
index a15d07cf59958..bf7c256fc94ff 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java
@@ -56,7 +56,7 @@ public KVTypeInfo(Class<?> type) {
       KVIndex idx = m.getAnnotation(KVIndex.class);
       if (idx != null) {
         checkIndex(idx, indices);
-        Preconditions.checkArgument(m.getParameterTypes().length == 0,
+        Preconditions.checkArgument(m.getParameterCount() == 0,
           "Annotated method %s::%s should not have any parameters.", type.getName(), m.getName());
         m.setAccessible(true);
         indices.put(idx.value(), idx);
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 7606795f8203a..ac4b1655f5ea0 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -62,6 +62,18 @@
         </exclusion>
       </exclusions>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-mllib-common_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
     <dependency>
       <groupId>com.google.protobuf</groupId>
       <artifactId>protobuf-java</artifactId>
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/Estimator.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/Estimator.scala
new file mode 100644
index 0000000000000..144a10641c758
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/Estimator.scala
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import scala.annotation.varargs
+
+import org.apache.spark.annotation.Since
+import org.apache.spark.ml.param.{ParamMap, ParamPair}
+import org.apache.spark.sql.Dataset
+
+/**
+ * Abstract class for estimators that fit models to data.
+ */
+abstract class Estimator[M <: Model[M]] extends PipelineStage {
+
+  /**
+   * Fits a single model to the input data with optional parameters.
+   *
+   * @param dataset
+   *   input dataset
+   * @param firstParamPair
+   *   the first param pair, overrides embedded params
+   * @param otherParamPairs
+   *   other param pairs. These values override any specified in this Estimator's embedded
+   *   ParamMap.
+   * @return
+   *   fitted model
+   */
+  @Since("3.5.0")
+  @varargs
+  def fit(
+      dataset: Dataset[_],
+      firstParamPair: ParamPair[_],
+      otherParamPairs: ParamPair[_]*): M = {
+    val map = new ParamMap()
+      .put(firstParamPair)
+      .put(otherParamPairs: _*)
+    fit(dataset, map)
+  }
+
+  /**
+   * Fits a single model to the input data with provided parameter map.
+   *
+   * @param dataset
+   *   input dataset
+   * @param paramMap
+   *   Parameter map. These values override any specified in this Estimator's embedded ParamMap.
+   * @return
+   *   fitted model
+   */
+  @Since("3.5.0")
+  def fit(dataset: Dataset[_], paramMap: ParamMap): M = {
+    copy(paramMap).fit(dataset)
+  }
+
+  /**
+   * Fits a model to the input data.
+   */
+  @Since("3.5.0")
+  def fit(dataset: Dataset[_]): M
+
+  /**
+   * Fits multiple models to the input data with multiple sets of parameters. The default
+   * implementation uses a for loop on each parameter map. Subclasses could override this to
+   * optimize multi-model training.
+   *
+   * @param dataset
+   *   input dataset
+   * @param paramMaps
+   *   An array of parameter maps. These values override any specified in this Estimator's
+   *   embedded ParamMap.
+   * @return
+   *   fitted models, matching the input parameter maps
+   */
+  @Since("3.5.0")
+  def fit(dataset: Dataset[_], paramMaps: Seq[ParamMap]): Seq[M] = {
+    paramMaps.map(fit(dataset, _))
+  }
+
+  @Since("3.5.0")
+  override def copy(extra: ParamMap): Estimator[M]
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/Model.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/Model.scala
new file mode 100644
index 0000000000000..a5d6aa1a0795c
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/Model.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import org.apache.spark.annotation.Since
+import org.apache.spark.ml.param.ParamMap
+
+/**
+ * A fitted model, i.e., a [[Transformer]] produced by an [[Estimator]].
+ *
+ * @tparam M
+ *   model type
+ */
+abstract class Model[M <: Model[M]] extends Transformer {
+
+  /**
+   * The parent estimator that produced this model.
+   * @note
+   *   For ensembles' component Models, this value can be null.
+   */
+  @transient var parent: Estimator[M] = _
+
+  /**
+   * Sets the parent of this model (Java API).
+   */
+  @Since("3.5.0")
+  def setParent(parent: Estimator[M]): M = {
+    this.parent = parent
+    this.asInstanceOf[M]
+  }
+
+  /** Indicates whether this [[Model]] has a corresponding parent. */
+  @Since("3.5.0")
+  def hasParent: Boolean = parent != null
+
+  @Since("3.5.0")
+  override def copy(extra: ParamMap): M
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/Pipeline.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/Pipeline.scala
new file mode 100644
index 0000000000000..cebbcd167ce34
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.internal.Logging
+import org.apache.spark.ml.param.{ParamMap, Params}
+import org.apache.spark.sql.types.StructType
+
+/**
+ * A stage in a pipeline, either an [[Estimator]] or a [[Transformer]].
+ */
+abstract class PipelineStage extends Params with Logging {
+
+  /**
+   * Check transform validity and derive the output schema from the input schema.
+   *
+   * We check validity for interactions between parameters during `transformSchema` and raise an
+   * exception if any parameter value is invalid. Parameter value checks which do not depend on
+   * other parameters are handled by `Param.validate()`.
+   *
+   * Typical implementation should first conduct verification on schema change and parameter
+   * validity, including complex parameter interaction checks.
+   */
+  def transformSchema(schema: StructType): StructType
+
+  /**
+   * :: DeveloperApi ::
+   *
+   * Derives the output schema from the input schema and parameters, optionally with logging.
+   *
+   * This should be optimistic. If it is unclear whether the schema will be valid, then it should
+   * be assumed valid until proven otherwise.
+   */
+  @DeveloperApi
+  protected def transformSchema(schema: StructType, logging: Boolean): StructType = {
+    if (logging) {
+      logDebug(s"Input schema: ${schema.json}")
+    }
+    val outputSchema = transformSchema(schema)
+    if (logging) {
+      logDebug(s"Expected output schema: ${outputSchema.json}")
+    }
+    outputSchema
+  }
+
+  override def copy(extra: ParamMap): PipelineStage
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/Predictor.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/Predictor.scala
new file mode 100644
index 0000000000000..517d5e060f531
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/Predictor.scala
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import org.apache.spark.annotation.Since
+import org.apache.spark.ml.linalg.VectorUDT
+import org.apache.spark.ml.param._
+import org.apache.spark.ml.util.SchemaUtils
+import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.types.{DataType, StructType}
+
+/**
+ * Abstraction for prediction problems (regression and classification). It accepts all NumericType
+ * labels and will automatically cast it to DoubleType in `fit()`. If this predictor supports
+ * weights, it accepts all NumericType weights, which will be automatically casted to DoubleType
+ * in `fit()`.
+ *
+ * @tparam FeaturesType
+ *   Type of features. E.g., `VectorUDT` for vector features.
+ * @tparam Learner
+ *   Specialization of this class. If you subclass this type, use this type parameter to specify
+ *   the concrete type.
+ * @tparam M
+ *   Specialization of [[PredictionModel]]. If you subclass this type, use this type parameter to
+ *   specify the concrete type for the corresponding model.
+ */
+abstract class Predictor[
+    FeaturesType,
+    Learner <: Predictor[FeaturesType, Learner, M],
+    M <: PredictionModel[FeaturesType, M]]
+    extends Estimator[M]
+    with PredictorParams {
+
+  /** @group setParam */
+  @Since("3.5.0")
+  def setLabelCol(value: String): Learner = set(labelCol, value).asInstanceOf[Learner]
+
+  /** @group setParam */
+  @Since("3.5.0")
+  def setFeaturesCol(value: String): Learner = set(featuresCol, value).asInstanceOf[Learner]
+
+  /** @group setParam */
+  @Since("3.5.0")
+  def setPredictionCol(value: String): Learner = set(predictionCol, value).asInstanceOf[Learner]
+
+  @Since("3.5.0")
+  override def fit(dataset: Dataset[_]): M = {
+    // TODO: should send the id of the input dataset and the latest params to the server,
+    //  then invoke the 'fit' method of the remote predictor
+    throw new NotImplementedError
+  }
+
+  @Since("3.5.0")
+  override def copy(extra: ParamMap): Learner
+
+  /**
+   * Returns the SQL DataType corresponding to the FeaturesType type parameter.
+   *
+   * This is used by `validateAndTransformSchema()`. This workaround is needed since SQL has
+   * different APIs for Scala and Java.
+   *
+   * The default value is VectorUDT, but it may be overridden if FeaturesType is not Vector.
+   */
+  private[ml] def featuresDataType: DataType = new VectorUDT
+
+  override def transformSchema(schema: StructType): StructType = {
+    validateAndTransformSchema(schema, fitting = true, featuresDataType)
+  }
+}
+
+/**
+ * Abstraction for a model for prediction tasks (regression and classification).
+ *
+ * @tparam FeaturesType
+ *   Type of features. E.g., `VectorUDT` for vector features.
+ * @tparam M
+ *   Specialization of [[PredictionModel]]. If you subclass this type, use this type parameter to
+ *   specify the concrete type for the corresponding model.
+ */
+abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType, M]]
+    extends Model[M]
+    with PredictorParams {
+
+  /** @group setParam */
+  @Since("3.5.0")
+  def setFeaturesCol(value: String): M = set(featuresCol, value).asInstanceOf[M]
+
+  /** @group setParam */
+  @Since("3.5.0")
+  def setPredictionCol(value: String): M = set(predictionCol, value).asInstanceOf[M]
+
+  /** Returns the number of features the model was trained on. If unknown, returns -1 */
+  @Since("3.5.0")
+  def numFeatures: Int = -1
+
+  /**
+   * Returns the SQL DataType corresponding to the FeaturesType type parameter.
+   *
+   * This is used by `validateAndTransformSchema()`. This workaround is needed since SQL has
+   * different APIs for Scala and Java.
+   *
+   * The default value is VectorUDT, but it may be overridden if FeaturesType is not Vector.
+   */
+  protected def featuresDataType: DataType = new VectorUDT
+
+  @Since("3.5.0")
+  override def transformSchema(schema: StructType): StructType = {
+    var outputSchema = validateAndTransformSchema(schema, fitting = false, featuresDataType)
+    if ($(predictionCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateNumeric(outputSchema, $(predictionCol))
+    }
+    outputSchema
+  }
+
+  /**
+   * Transforms dataset by reading from [[featuresCol]], calling `predict`, and storing the
+   * predictions as a new column [[predictionCol]].
+   *
+   * @param dataset
+   *   input dataset
+   * @return
+   *   transformed dataset with [[predictionCol]] of type `Double`
+   */
+  @Since("3.5.0")
+  override def transform(dataset: Dataset[_]): DataFrame = {
+    transformSchema(dataset.schema, logging = true)
+    if ($(predictionCol).nonEmpty) {
+      transformImpl(dataset)
+    } else {
+      this.logWarning(
+        s"$uid: Predictor.transform() does nothing" +
+          " because no output columns were set.")
+      dataset.toDF
+    }
+  }
+
+  protected def transformImpl(dataset: Dataset[_]): DataFrame = {
+    // TODO: should send the id of the input dataset and the latest params to the server,
+    //  then invoke the 'transform' method of the remote model
+    throw new NotImplementedError
+  }
+
+  /**
+   * Predict label for the given features. This method is used to implement `transform()` and
+   * output [[predictionCol]].
+   */
+  @Since("3.5.0")
+  def predict(features: FeaturesType): Double
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/Transformer.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/Transformer.scala
new file mode 100644
index 0000000000000..4eebf031b90de
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/Transformer.scala
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import scala.annotation.varargs
+import scala.reflect.runtime.universe.TypeTag
+
+import org.apache.spark.annotation.Since
+import org.apache.spark.internal.Logging
+import org.apache.spark.ml.param._
+import org.apache.spark.ml.param.shared._
+import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.types._
+
+/**
+ * Abstract class for transformers that transform one dataset into another.
+ */
+abstract class Transformer extends PipelineStage {
+
+  /**
+   * Transforms the dataset with optional parameters
+   * @param dataset
+   *   input dataset
+   * @param firstParamPair
+   *   the first param pair, overwrite embedded params
+   * @param otherParamPairs
+   *   other param pairs, overwrite embedded params
+   * @return
+   *   transformed dataset
+   */
+  @Since("3.5.0")
+  @varargs
+  def transform(
+      dataset: Dataset[_],
+      firstParamPair: ParamPair[_],
+      otherParamPairs: ParamPair[_]*): DataFrame = {
+    val map = new ParamMap()
+      .put(firstParamPair)
+      .put(otherParamPairs: _*)
+    transform(dataset, map)
+  }
+
+  /**
+   * Transforms the dataset with provided parameter map as additional parameters.
+   * @param dataset
+   *   input dataset
+   * @param paramMap
+   *   additional parameters, overwrite embedded params
+   * @return
+   *   transformed dataset
+   */
+  @Since("3.5.0")
+  def transform(dataset: Dataset[_], paramMap: ParamMap): DataFrame = {
+    this.copy(paramMap).transform(dataset)
+  }
+
+  /**
+   * Transforms the input dataset.
+   */
+  @Since("3.5.0")
+  def transform(dataset: Dataset[_]): DataFrame
+
+  @Since("3.5.0")
+  override def copy(extra: ParamMap): Transformer
+}
+
+/**
+ * Abstract class for transformers that take one input column, apply transformation, and output
+ * the result as a new column.
+ */
+abstract class UnaryTransformer[IN: TypeTag, OUT: TypeTag, T <: UnaryTransformer[IN, OUT, T]]
+    extends Transformer
+    with HasInputCol
+    with HasOutputCol
+    with Logging {
+
+  /** @group setParam */
+  @Since("3.5.0")
+  def setInputCol(value: String): T = set(inputCol, value).asInstanceOf[T]
+
+  /** @group setParam */
+  @Since("3.5.0")
+  def setOutputCol(value: String): T = set(outputCol, value).asInstanceOf[T]
+
+  /**
+   * Creates the transform function using the given param map. The input param map already takes
+   * account of the embedded param map. So the param values should be determined solely by the
+   * input param map.
+   */
+  protected def createTransformFunc: IN => OUT
+
+  /**
+   * Returns the data type of the output column.
+   */
+  @Since("3.5.0")
+  protected def outputDataType: DataType
+
+  /**
+   * Validates the input type. Throw an exception if it is invalid.
+   */
+  protected def validateInputType(inputType: DataType): Unit = {}
+
+  @Since("3.5.0")
+  override def transformSchema(schema: StructType): StructType = {
+    val inputType = schema($(inputCol)).dataType
+    validateInputType(inputType)
+    if (schema.fieldNames.contains($(outputCol))) {
+      throw new IllegalArgumentException(s"Output column ${$(outputCol)} already exists.")
+    }
+    val outputFields = schema.fields :+
+      StructField($(outputCol), outputDataType, nullable = false)
+    StructType(outputFields)
+  }
+
+  override def transform(dataset: Dataset[_]): DataFrame = {
+    // TODO: should send the id of the input dataset and the latest params to the server,
+    //  then invoke the 'transform' method of the remote model
+    throw new NotImplementedError
+  }
+
+  @Since("3.5.0")
+  override def copy(extra: ParamMap): T = defaultCopy(extra)
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/classification/Classifier.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
new file mode 100644
index 0000000000000..9adf49866b47f
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.classification
+
+import org.apache.spark.annotation.Since
+import org.apache.spark.ml.{PredictionModel, Predictor}
+import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.util._
+import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Single-label binary or multiclass classification. Classes are indexed {0, 1, ..., numClasses -
+ * 1}.
+ *
+ * @tparam FeaturesType
+ *   Type of input features. E.g., `Vector`
+ * @tparam E
+ *   Concrete Estimator type
+ * @tparam M
+ *   Concrete Model type
+ */
+abstract class Classifier[
+    FeaturesType,
+    E <: Classifier[FeaturesType, E, M],
+    M <: ClassificationModel[FeaturesType, M]]
+    extends Predictor[FeaturesType, E, M]
+    with ClassifierParams {
+
+  @Since("3.5.0")
+  def setRawPredictionCol(value: String): E = set(rawPredictionCol, value).asInstanceOf[E]
+
+  // TODO: defaultEvaluator (follow-up PR)
+}
+
+/**
+ * Model produced by a [[Classifier]]. Classes are indexed {0, 1, ..., numClasses - 1}.
+ *
+ * @tparam FeaturesType
+ *   Type of input features. E.g., `Vector`
+ * @tparam M
+ *   Concrete Model type
+ */
+abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[FeaturesType, M]]
+    extends PredictionModel[FeaturesType, M]
+    with ClassifierParams {
+
+  /** @group setParam */
+  @Since("3.5.0")
+  def setRawPredictionCol(value: String): M = set(rawPredictionCol, value).asInstanceOf[M]
+
+  /** Number of classes (values which the label can take). */
+  @Since("3.5.0")
+  def numClasses: Int
+
+  @Since("3.5.0")
+  override def transformSchema(schema: StructType): StructType = {
+    var outputSchema = super.transformSchema(schema)
+    if ($(predictionCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateNumValues(schema, $(predictionCol), numClasses)
+    }
+    if ($(rawPredictionCol).nonEmpty) {
+      outputSchema =
+        SchemaUtils.updateAttributeGroupSize(outputSchema, $(rawPredictionCol), numClasses)
+    }
+    outputSchema
+  }
+
+  /**
+   * Transforms dataset by reading from [[featuresCol]], and appending new columns as specified by
+   * parameters:
+   *   - predicted labels as [[predictionCol]] of type `Double`
+   *   - raw predictions (confidences) as [[rawPredictionCol]] of type `Vector`.
+   *
+   * @param dataset
+   *   input dataset
+   * @return
+   *   transformed dataset
+   */
+  @Since("3.5.0")
+  override def transform(dataset: Dataset[_]): DataFrame = {
+    // TODO: should send the id of the input dataset and the latest params to the server,
+    //  then invoke the 'transform' method of the remote model
+    throw new NotImplementedError
+  }
+
+  final override def transformImpl(dataset: Dataset[_]): DataFrame =
+    throw new UnsupportedOperationException(s"transformImpl is not supported in $getClass")
+
+  /**
+   * Predict label for the given features. This method is used to implement `transform()` and
+   * output [[predictionCol]].
+   *
+   * This default implementation for classification predicts the index of the maximum value from
+   * `predictRaw()`.
+   */
+  @Since("3.5.0")
+  override def predict(features: FeaturesType): Double = {
+    // TODO: should send the vector to the server,
+    //  then invoke the 'predict' method of the remote model
+
+    // Note: Subclass may need to override this, since the result
+    // maybe adjusted by param like `thresholds`.
+    throw new NotImplementedError
+  }
+
+  /**
+   * Raw prediction for each possible label. The meaning of a "raw" prediction may vary between
+   * algorithms, but it intuitively gives a measure of confidence in each possible label (where
+   * larger = more confident). This internal method is used to implement `transform()` and output
+   * [[rawPredictionCol]].
+   *
+   * @return
+   *   vector where element i is the raw prediction for label i. This raw prediction may be any
+   *   real number, where a larger value indicates greater confidence for that label.
+   */
+  @Since("3.5.0")
+  def predictRaw(features: FeaturesType): Vector = {
+    // TODO: should send the vector to the server,
+    //  then invoke the 'predictRaw' method of the remote model
+    throw new NotImplementedError
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
new file mode 100644
index 0000000000000..e4db8a047fa27
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.classification
+
+import org.apache.spark.annotation.Since
+import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.util.SchemaUtils
+import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Single-label binary or multiclass classifier which can output class conditional probabilities.
+ *
+ * @tparam FeaturesType
+ *   Type of input features. E.g., `Vector`
+ * @tparam E
+ *   Concrete Estimator type
+ * @tparam M
+ *   Concrete Model type
+ */
+abstract class ProbabilisticClassifier[
+    FeaturesType,
+    E <: ProbabilisticClassifier[FeaturesType, E, M],
+    M <: ProbabilisticClassificationModel[FeaturesType, M]]
+    extends Classifier[FeaturesType, E, M]
+    with ProbabilisticClassifierParams {
+
+  /** @group setParam */
+  @Since("3.5.0")
+  def setProbabilityCol(value: String): E = set(probabilityCol, value).asInstanceOf[E]
+
+  /** @group setParam */
+  @Since("3.5.0")
+  def setThresholds(value: Array[Double]): E = set(thresholds, value).asInstanceOf[E]
+}
+
+/**
+ * Model produced by a [[ProbabilisticClassifier]]. Classes are indexed {0, 1, ..., numClasses -
+ * 1}.
+ *
+ * @tparam FeaturesType
+ *   Type of input features. E.g., `Vector`
+ * @tparam M
+ *   Concrete Model type
+ */
+abstract class ProbabilisticClassificationModel[
+    FeaturesType,
+    M <: ProbabilisticClassificationModel[FeaturesType, M]]
+    extends ClassificationModel[FeaturesType, M]
+    with ProbabilisticClassifierParams {
+
+  /** @group setParam */
+  @Since("3.5.0")
+  def setProbabilityCol(value: String): M = set(probabilityCol, value).asInstanceOf[M]
+
+  /** @group setParam */
+  @Since("3.5.0")
+  def setThresholds(value: Array[Double]): M = {
+    require(
+      value.length == numClasses,
+      this.getClass.getSimpleName +
+        ".setThresholds() called with non-matching numClasses and thresholds.length." +
+        s" numClasses=$numClasses, but thresholds has length ${value.length}")
+    set(thresholds, value).asInstanceOf[M]
+  }
+
+  @Since("3.5.0")
+  override def transformSchema(schema: StructType): StructType = {
+    var outputSchema = super.transformSchema(schema)
+    if ($(probabilityCol).nonEmpty) {
+      outputSchema =
+        SchemaUtils.updateAttributeGroupSize(outputSchema, $(probabilityCol), numClasses)
+    }
+    outputSchema
+  }
+
+  /**
+   * Transforms dataset by reading from [[featuresCol]], and appending new columns as specified by
+   * parameters:
+   *   - predicted labels as [[predictionCol]] of type `Double`
+   *   - raw predictions (confidences) as [[rawPredictionCol]] of type `Vector`
+   *   - probability of each class as [[probabilityCol]] of type `Vector`.
+   *
+   * @param dataset
+   *   input dataset
+   * @return
+   *   transformed dataset
+   */
+  @Since("3.5.0")
+  override def transform(dataset: Dataset[_]): DataFrame = {
+    // TODO: should send the id of the input dataset and the latest params to the server,
+    //  then invoke the 'transform' method of the remote model
+    throw new NotImplementedError
+  }
+
+  /**
+   * Predict the probability of each class given the features. These predictions are also called
+   * class conditional probabilities.
+   *
+   * This internal method is used to implement `transform()` and output [[probabilityCol]].
+   *
+   * @return
+   *   Estimated class conditional probabilities
+   */
+  @Since("3.5.0")
+  def predictProbability(features: FeaturesType): Vector = {
+    // TODO: should send the vector to the server,
+    //  then invoke the 'predictProbability' method of the remote model
+    throw new NotImplementedError
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index ad921bcc4e3f8..193eb4faaaba2 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -24,8 +24,10 @@ import scala.collection.JavaConverters._
 import org.apache.spark.annotation.Stable
 import org.apache.spark.connect.proto.Parse.ParseFormat
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.StringEncoder
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils}
 import org.apache.spark.sql.connect.common.DataTypeProtoConverter
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -531,10 +533,8 @@ class DataFrameReader private[sql] (sparkSession: SparkSession) extends Logging
    */
   @scala.annotation.varargs
   def textFile(paths: String*): Dataset[String] = {
-    // scalastyle:off throwerror
-    // TODO: this method can be supported and should be included in the client API.
-    throw new NotImplementedError()
-    // scalastyle:on throwerror
+    assertNoSpecifiedSchema("textFile")
+    text(paths: _*).select("value").as(StringEncoder)
   }
 
   private def assertSourceFormatSpecified(): Unit = {
@@ -556,6 +556,15 @@ class DataFrameReader private[sql] (sparkSession: SparkSession) extends Logging
     }
   }
 
+  /**
+   * A convenient function for schema validation in APIs.
+   */
+  private def assertNoSpecifiedSchema(operation: String): Unit = {
+    if (userSpecifiedSchema.nonEmpty) {
+      throw QueryCompilationErrors.userSpecifiedSchemaUnsupportedError(operation)
+    }
+  }
+
   ///////////////////////////////////////////////////////////////////////////////////////
   // Builder pattern config options
   ///////////////////////////////////////////////////////////////////////////////////////
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
index 29c2e89c53779..729ee9ed6a09f 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
@@ -4003,6 +4003,16 @@ object functions {
    */
   def array_compact(column: Column): Column = Column.fn("array_compact", column)
 
+  /**
+   * Returns an array containing value as well as all elements from array. The new element is
+   * positioned at the beginning of the array.
+   *
+   * @group collection_funcs
+   * @since 3.5.0
+   */
+  def array_prepend(column: Column, element: Any): Column =
+    Column.fn("array_prepend", column, lit(element))
+
   /**
    * Removes duplicate values from the array.
    * @group collection_funcs
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
index 5aa5500116d8a..605b15123c670 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
@@ -25,6 +25,7 @@ import io.grpc.StatusRuntimeException
 import java.util.Properties
 import org.apache.commons.io.FileUtils
 import org.apache.commons.io.output.TeeOutputStream
+import org.apache.commons.lang3.{JavaVersion, SystemUtils}
 import org.scalactic.TolerantNumerics
 
 import org.apache.spark.SPARK_VERSION
@@ -55,6 +56,7 @@ class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper {
   }
 
   test("eager execution of sql") {
+    assume(IntegrationTestUtils.isSparkHiveJarAvailable)
     withTable("test_martin") {
       // Fails, because table does not exist.
       assertThrows[StatusRuntimeException] {
@@ -161,6 +163,26 @@ class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper {
     }
   }
 
+  test("textFile") {
+    val testDataPath = java.nio.file.Paths
+      .get(
+        IntegrationTestUtils.sparkHome,
+        "connector",
+        "connect",
+        "common",
+        "src",
+        "test",
+        "resources",
+        "query-tests",
+        "test-data",
+        "people.txt")
+      .toAbsolutePath
+    val result = spark.read.textFile(testDataPath.toString).collect()
+    val expected = Array("Michael, 29", "Andy, 30", "Justin, 19")
+    assert(result.length == 3)
+    assert(result === expected)
+  }
+
   test("write table") {
     withTable("myTable") {
       val df = spark.range(10).limit(3)
@@ -182,16 +204,18 @@ class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper {
   }
 
   test("write jdbc") {
-    val url = "jdbc:derby:memory:1234"
-    val table = "t1"
-    try {
-      spark.range(10).write.jdbc(url = s"$url;create=true", table, new Properties())
-      val result = spark.read.jdbc(url = url, table, new Properties()).collect()
-      assert(result.length == 10)
-    } finally {
-      // clean up
-      assertThrows[StatusRuntimeException] {
-        spark.read.jdbc(url = s"$url;drop=true", table, new Properties()).collect()
+    if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) {
+      val url = "jdbc:derby:memory:1234"
+      val table = "t1"
+      try {
+        spark.range(10).write.jdbc(url = s"$url;create=true", table, new Properties())
+        val result = spark.read.jdbc(url = url, table, new Properties()).collect()
+        assert(result.length == 10)
+      } finally {
+        // clean up
+        assertThrows[StatusRuntimeException] {
+          spark.read.jdbc(url = s"$url;drop=true", table, new Properties()).collect()
+        }
       }
     }
   }
@@ -227,6 +251,7 @@ class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper {
   // TODO (SPARK-42519): Revisit this test after we can set configs.
   //  e.g. spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName)
   test("writeTo with create") {
+    assume(IntegrationTestUtils.isSparkHiveJarAvailable)
     withTable("myTableV2") {
       // Failed to create as Hive support is required.
       spark.range(3).writeTo("myTableV2").create()
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index 3c7e1fdeee645..95d6fddc97caa 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -1714,6 +1714,10 @@ class PlanGenerationTestSuite
     fn.array_distinct(fn.col("e"))
   }
 
+  functionTest("array_prepend") {
+    fn.array_prepend(fn.col("e"), lit(1))
+  }
+
   functionTest("array_intersect") {
     fn.array_intersect(fn.col("e"), fn.array(lit(10), lit(4)))
   }
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
index 97d130421a242..a2b4762f0a96b 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
@@ -174,7 +174,6 @@ object CheckConnectJvmClientCompatibility {
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.callUDF"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.unwrap_udt"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.udaf"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.broadcast"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.typedlit"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.typedLit"),
 
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/IntegrationTestUtils.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/IntegrationTestUtils.scala
index f27ea614a7eb8..a98f7e9c13b37 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/IntegrationTestUtils.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/IntegrationTestUtils.scala
@@ -17,6 +17,7 @@
 package org.apache.spark.sql.connect.client.util
 
 import java.io.File
+import java.nio.file.{Files, Paths}
 
 import scala.util.Properties.versionNumberString
 
@@ -27,14 +28,15 @@ object IntegrationTestUtils {
   // System properties used for testing and debugging
   private val DEBUG_SC_JVM_CLIENT = "spark.debug.sc.jvm.client"
 
-  private[sql] lazy val scalaDir = {
-    val version = versionNumberString.split('.') match {
+  private[sql] lazy val scalaVersion = {
+    versionNumberString.split('.') match {
       case Array(major, minor, _*) => major + "." + minor
       case _ => versionNumberString
     }
-    "scala-" + version
   }
 
+  private[sql] lazy val scalaDir = s"scala-$scalaVersion"
+
   private[sql] lazy val sparkHome: String = {
     if (!(sys.props.contains("spark.test.home") || sys.env.contains("SPARK_HOME"))) {
       fail("spark.test.home or SPARK_HOME is not set.")
@@ -49,6 +51,12 @@ object IntegrationTestUtils {
   // scalastyle:on println
   private[connect] def debug(error: Throwable): Unit = if (isDebug) error.printStackTrace()
 
+  private[sql] lazy val isSparkHiveJarAvailable: Boolean = {
+    val filePath = s"$sparkHome/assembly/target/$scalaDir/jars/" +
+      s"spark-hive_$scalaVersion-${org.apache.spark.SPARK_VERSION}.jar"
+    Files.exists(Paths.get(filePath))
+  }
+
   /**
    * Find a jar in the Spark project artifacts. It requires a build first (e.g. build/sbt package,
    * build/mvn clean install -DskipTests) so that this method can find the jar in the target
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala
index beae5bfa27e2a..d1a34603f48cf 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala
@@ -62,6 +62,18 @@ object SparkConnectServerUtils {
       "connector/connect/server",
       "spark-connect-assembly",
       "spark-connect").getCanonicalPath
+    val catalogImplementation = if (IntegrationTestUtils.isSparkHiveJarAvailable) {
+      "hive"
+    } else {
+      // scalastyle:off println
+      println(
+        "Will start Spark Connect server with `spark.sql.catalogImplementation=in-memory`, " +
+          "some tests that rely on Hive will be ignored. If you don't want to skip them:\n" +
+          "1. Test with maven: run `build/mvn install -DskipTests -Phive` before testing\n" +
+          "2. Test with sbt: run test with `-Phive` profile")
+      // scalastyle:on println
+      "in-memory"
+    }
     val builder = Process(
       Seq(
         "bin/spark-submit",
@@ -72,7 +84,7 @@ object SparkConnectServerUtils {
         "--conf",
         "spark.sql.catalog.testcat=org.apache.spark.sql.connect.catalog.InMemoryTableCatalog",
         "--conf",
-        "spark.sql.catalogImplementation=hive",
+        s"spark.sql.catalogImplementation=$catalogImplementation",
         "--class",
         "org.apache.spark.sql.connect.SimpleSparkConnectService",
         jar),
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/base.proto b/connector/connect/common/src/main/protobuf/spark/connect/base.proto
index 2118f8e4823ee..da0f974a74906 100644
--- a/connector/connect/common/src/main/protobuf/spark/connect/base.proto
+++ b/connector/connect/common/src/main/protobuf/spark/connect/base.proto
@@ -272,6 +272,9 @@ message ExecutePlanResponse {
   // The metrics observed during the execution of the query plan.
   repeated ObservedMetrics observed_metrics = 6;
 
+  // (Optional) The Spark schema. This field is available when `collect` is called.
+  DataType schema = 7;
+
   // A SQL command returns an opaque Relation that can be directly used as input for the next
   // call.
   message SqlCommandResult {
@@ -413,6 +416,11 @@ message AddArtifactsRequest {
   // User context
   UserContext user_context = 2;
 
+  // Provides optional information about the client sending the request. This field
+  // can be used for language or version specific information and is only intended for
+  // logging purposes and will not be interpreted by the server.
+  optional string client_type = 6;
+
   // A chunk of an Artifact.
   message ArtifactChunk {
     // Data chunk.
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
index 69451e7b76eef..aba965082ea2a 100644
--- a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
+++ b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
@@ -63,6 +63,7 @@ message Relation {
     MapPartitions map_partitions = 28;
     CollectMetrics collect_metrics = 29;
     Parse parse = 30;
+    GroupMap group_map = 31;
 
     // NA functions
     NAFill fill_na = 90;
@@ -788,6 +789,17 @@ message MapPartitions {
   CommonInlineUserDefinedFunction func = 2;
 }
 
+message GroupMap {
+  // (Required) Input relation for Group Map API: apply, applyInPandas.
+  Relation input = 1;
+
+  // (Required) Expressions for grouping keys.
+  repeated Expression grouping_expressions = 2;
+
+  // (Required) Input user-defined function.
+  CommonInlineUserDefinedFunction func = 3;
+}
+
 // Collect arbitrary (named) metrics from a dataset.
 message CollectMetrics {
   // (Required) The input relation.
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala
index c30ea8c830136..28ddbe844d445 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala
@@ -335,6 +335,7 @@ object DataTypeProtoConverter {
               .setType("udt")
               .setPythonClass(pyudt.pyUDT)
               .setSqlType(toConnectProtoType(pyudt.sqlType))
+              .setSerializedPythonClass(pyudt.serializedPyClass)
               .build())
           .build()
 
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_prepend.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_prepend.explain
new file mode 100644
index 0000000000000..539e1eaf767cc
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_prepend.explain
@@ -0,0 +1,2 @@
+Project [array_prepend(e#0, 1) AS array_prepend(e, 1)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_prepend.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_prepend.json
new file mode 100644
index 0000000000000..ededeb015a227
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_prepend.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_prepend",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "literal": {
+            "integer": 1
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_prepend.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_prepend.proto.bin
new file mode 100644
index 0000000000000..837710597e7b6
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_prepend.proto.bin differ
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 079d07db362c1..4d8e082a2db57 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -93,6 +93,18 @@
         </exclusion>
       </exclusions>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-mllib_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index a057bd8d6c1e5..c8fdaa6641ab3 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -30,6 +30,7 @@ import org.apache.spark.connect.proto
 import org.apache.spark.connect.proto.{ExecutePlanResponse, SqlCommand}
 import org.apache.spark.connect.proto.ExecutePlanResponse.SqlCommandResult
 import org.apache.spark.connect.proto.Parse.ParseFormat
+import org.apache.spark.ml.{functions => MLFunctions}
 import org.apache.spark.sql.{Column, Dataset, Encoders, SparkSession}
 import org.apache.spark.sql.catalyst.{expressions, AliasIdentifier, FunctionIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, MultiAlias, ParameterizedQuery, UnresolvedAlias, UnresolvedAttribute, UnresolvedExtractValue, UnresolvedFunction, UnresolvedRegex, UnresolvedRelation, UnresolvedStar}
@@ -38,7 +39,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException, ParserUtils}
 import org.apache.spark.sql.catalyst.plans.{Cross, FullOuter, Inner, JoinType, LeftAnti, LeftOuter, LeftSemi, RightOuter, UsingJoin}
 import org.apache.spark.sql.catalyst.plans.logical
-import org.apache.spark.sql.catalyst.plans.logical.{CollectMetrics, CommandResult, Deduplicate, Except, Intersect, LocalRelation, LogicalPlan, Sample, Sort, SubqueryAlias, Union, Unpivot, UnresolvedHint}
+import org.apache.spark.sql.catalyst.plans.logical.{CollectMetrics, CommandResult, Deduplicate, Except, Intersect, LocalRelation, LogicalPlan, Project, Sample, Sort, SubqueryAlias, Union, Unpivot, UnresolvedHint}
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils}
 import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, InvalidPlanInput, UdfPacket}
 import org.apache.spark.sql.connect.config.Connect.CONNECT_GRPC_ARROW_MAX_BATCH_SIZE
@@ -116,6 +117,8 @@ class SparkConnectPlanner(val session: SparkSession) {
         transformRepartitionByExpression(rel.getRepartitionByExpression)
       case proto.Relation.RelTypeCase.MAP_PARTITIONS =>
         transformMapPartitions(rel.getMapPartitions)
+      case proto.Relation.RelTypeCase.GROUP_MAP =>
+        transformGroupMap(rel.getGroupMap)
       case proto.Relation.RelTypeCase.COLLECT_METRICS =>
         transformCollectMetrics(rel.getCollectMetrics)
       case proto.Relation.RelTypeCase.PARSE => transformParse(rel.getParse)
@@ -494,6 +497,18 @@ class SparkConnectPlanner(val session: SparkSession) {
     }
   }
 
+  private def transformGroupMap(rel: proto.GroupMap): LogicalPlan = {
+    val pythonUdf = transformPythonUDF(rel.getFunc)
+    val cols =
+      rel.getGroupingExpressionsList.asScala.toSeq.map(expr => Column(transformExpression(expr)))
+
+    Dataset
+      .ofRows(session, transformRelation(rel.getInput))
+      .groupBy(cols: _*)
+      .flatMapGroupsInPandas(pythonUdf)
+      .logicalPlan
+  }
+
   private def transformWithColumnsRenamed(rel: proto.WithColumnsRenamed): LogicalPlan = {
     Dataset
       .ofRows(session, transformRelation(rel.getInput))
@@ -675,16 +690,36 @@ class SparkConnectPlanner(val session: SparkSession) {
       }
       val attributes = structType.toAttributes
       val proj = UnsafeProjection.create(attributes, attributes)
-      val relation = logical.LocalRelation(attributes, rows.map(r => proj(r).copy()).toSeq)
+      val data = rows.map(proj)
 
       if (schema == null) {
-        relation
+        logical.LocalRelation(attributes, data.map(_.copy()).toSeq)
       } else {
-        Dataset
-          .ofRows(session, logicalPlan = relation)
-          .toDF(schema.names: _*)
-          .to(schema)
+        def udtToSqlType(dt: DataType): DataType = dt match {
+          case udt: UserDefinedType[_] => udt.sqlType
+          case StructType(fields) =>
+            val newFields = fields.map { case StructField(name, dataType, nullable, metadata) =>
+              StructField(name, udtToSqlType(dataType), nullable, metadata)
+            }
+            StructType(newFields)
+          case ArrayType(elementType, containsNull) =>
+            ArrayType(udtToSqlType(elementType), containsNull)
+          case MapType(keyType, valueType, valueContainsNull) =>
+            MapType(udtToSqlType(keyType), udtToSqlType(valueType), valueContainsNull)
+          case _ => dt
+        }
+
+        val sqlTypeOnlySchema = udtToSqlType(schema).asInstanceOf[StructType]
+
+        val project = Dataset
+          .ofRows(session, logicalPlan = logical.LocalRelation(attributes))
+          .toDF(sqlTypeOnlySchema.names: _*)
+          .to(sqlTypeOnlySchema)
           .logicalPlan
+          .asInstanceOf[Project]
+
+        val proj = UnsafeProjection.create(project.projectList, project.child.output)
+        logical.LocalRelation(schema.toAttributes, data.map(proj).map(_.copy()).toSeq)
       }
     } else {
       if (schema == null) {
@@ -1187,10 +1222,51 @@ class SparkConnectPlanner(val session: SparkSession) {
           None
         }
 
+      // ML-specific functions
+      case "vector_to_array" if fun.getArgumentsCount == 2 =>
+        val expr = transformExpression(fun.getArguments(0))
+        val dtype = transformExpression(fun.getArguments(1)) match {
+          case Literal(s, StringType) if s != null => s.toString
+          case other =>
+            throw InvalidPlanInput(
+              s"dtype in vector_to_array should be a literal string, but got $other")
+        }
+        dtype match {
+          case "float64" =>
+            Some(transformUnregisteredUDF(MLFunctions.vectorToArrayUdf, Seq(expr)))
+          case "float32" =>
+            Some(transformUnregisteredUDF(MLFunctions.vectorToArrayFloatUdf, Seq(expr)))
+          case other =>
+            throw InvalidPlanInput(s"Unsupported dtype: $other. Valid values: float64, float32.")
+        }
+
+      case "array_to_vector" if fun.getArgumentsCount == 1 =>
+        val expr = transformExpression(fun.getArguments(0))
+        Some(transformUnregisteredUDF(MLFunctions.arrayToVectorUdf, Seq(expr)))
+
       case _ => None
     }
   }
 
+  /**
+   * There are some built-in yet not registered UDFs, for example, 'ml.function.array_to_vector'.
+   * This method is to convert them to ScalaUDF expressions.
+   */
+  private def transformUnregisteredUDF(
+      fun: org.apache.spark.sql.expressions.UserDefinedFunction,
+      exprs: Seq[Expression]): ScalaUDF = {
+    val f = fun.asInstanceOf[org.apache.spark.sql.expressions.SparkUserDefinedFunction]
+    ScalaUDF(
+      function = f.f,
+      dataType = f.dataType,
+      children = exprs,
+      inputEncoders = f.inputEncoders,
+      outputEncoder = f.outputEncoder,
+      udfName = f.name,
+      nullable = f.nullable,
+      udfDeterministic = f.deterministic)
+  }
+
   private def transformAlias(alias: proto.Expression.Alias): NamedExpression = {
     if (alias.getNameCount == 1) {
       val metadata = if (alias.hasMetadata() && alias.getMetadata.nonEmpty) {
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamHandler.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamHandler.scala
index 104d840ed52bd..335b871d499be 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamHandler.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamHandler.scala
@@ -28,6 +28,7 @@ import org.apache.spark.connect.proto.{ExecutePlanRequest, ExecutePlanResponse}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connect.common.DataTypeProtoConverter
 import org.apache.spark.sql.connect.common.LiteralValueProtoConverter.toLiteralProto
 import org.apache.spark.sql.connect.config.Connect.CONNECT_GRPC_ARROW_MAX_BATCH_SIZE
 import org.apache.spark.sql.connect.planner.SparkConnectPlanner
@@ -60,6 +61,8 @@ class SparkConnectStreamHandler(responseObserver: StreamObserver[ExecutePlanResp
     // Extract the plan from the request and convert it to a logical plan
     val planner = new SparkConnectPlanner(session)
     val dataframe = Dataset.ofRows(session, planner.transformRelation(request.getPlan.getRoot))
+    responseObserver.onNext(
+      SparkConnectStreamHandler.sendSchemaToResponse(request.getSessionId, dataframe.schema))
     processAsArrowBatches(request.getSessionId, dataframe, responseObserver)
     responseObserver.onNext(
       SparkConnectStreamHandler.sendMetricsToResponse(request.getSessionId, dataframe))
@@ -203,6 +206,15 @@ object SparkConnectStreamHandler {
     }
   }
 
+  def sendSchemaToResponse(sessionId: String, schema: StructType): ExecutePlanResponse = {
+    // Send the Spark data type
+    ExecutePlanResponse
+      .newBuilder()
+      .setSessionId(sessionId)
+      .setSchema(DataTypeProtoConverter.toConnectProtoType(schema))
+      .build()
+  }
+
   def sendMetricsToResponse(sessionId: String, rows: DataFrame): ExecutePlanResponse = {
     // Send a last batch with the metrics
     ExecutePlanResponse
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
index e2aecaaea8602..c36ba76f98451 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
@@ -160,18 +160,22 @@ class SparkConnectServiceSuite extends SharedSparkSession {
     assert(done)
 
     // 4 Partitions + Metrics
-    assert(responses.size == 5)
+    assert(responses.size == 6)
+
+    // Make sure the first response is schema only
+    val head = responses.head
+    assert(head.hasSchema && !head.hasArrowBatch && !head.hasMetrics)
 
     // Make sure the last response is metrics only
     val last = responses.last
-    assert(last.hasMetrics && !last.hasArrowBatch)
+    assert(last.hasMetrics && !last.hasSchema && !last.hasArrowBatch)
 
     val allocator = new RootAllocator()
 
     // Check the 'data' batches
     var expectedId = 0L
     var previousEId = 0.0d
-    responses.dropRight(1).foreach { response =>
+    responses.tail.dropRight(1).foreach { response =>
       assert(response.hasArrowBatch)
       val batch = response.getArrowBatch
       assert(batch.getData != null)
@@ -347,11 +351,15 @@ class SparkConnectServiceSuite extends SharedSparkSession {
       // The current implementation is expected to be blocking. This is here to make sure it is.
       assert(done)
 
-      assert(responses.size == 6)
+      assert(responses.size == 7)
+
+      // Make sure the first response is schema only
+      val head = responses.head
+      assert(head.hasSchema && !head.hasArrowBatch && !head.hasMetrics)
 
       // Make sure the last response is observed metrics only
       val last = responses.last
-      assert(last.getObservedMetricsCount == 1 && !last.hasArrowBatch)
+      assert(last.getObservedMetricsCount == 1 && !last.hasSchema && !last.hasArrowBatch)
 
       val observedMetricsList = last.getObservedMetricsList.asScala
       val observedMetric = observedMetricsList.head
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
index 6a42158f5876a..291276c198144 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
@@ -38,6 +38,17 @@ import org.apache.spark.tags.DockerTest
  */
 @DockerTest
 class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
+
+  override def excluded: Seq[String] = Seq(
+    "scan with aggregate push-down: COVAR_POP with DISTINCT",
+    "scan with aggregate push-down: COVAR_SAMP with DISTINCT",
+    "scan with aggregate push-down: CORR with DISTINCT",
+    "scan with aggregate push-down: CORR without DISTINCT",
+    "scan with aggregate push-down: REGR_INTERCEPT with DISTINCT",
+    "scan with aggregate push-down: REGR_SLOPE with DISTINCT",
+    "scan with aggregate push-down: REGR_R2 with DISTINCT",
+    "scan with aggregate push-down: REGR_SXY with DISTINCT")
+
   override val catalogName: String = "db2"
   override val namespaceOpt: Option[String] = Some("DB2INST1")
   override val db = new DatabaseOnDocker {
@@ -97,23 +108,4 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
   }
 
   override def caseConvert(tableName: String): String = tableName.toUpperCase(Locale.ROOT)
-
-  testOffset()
-  testLimitAndOffset()
-  testPaging()
-
-  testVarPop()
-  testVarPop(true)
-  testVarSamp()
-  testVarSamp(true)
-  testStddevPop()
-  testStddevPop(true)
-  testStddevSamp()
-  testStddevSamp(true)
-  testCovarPop()
-  testCovarSamp()
-  testRegrIntercept()
-  testRegrSlope()
-  testRegrR2()
-  testRegrSXY()
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2NamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2NamespaceSuite.scala
index f0e98fc2722b0..f53dc1d5f6da7 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2NamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2NamespaceSuite.scala
@@ -68,7 +68,4 @@ class DB2NamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceT
   }
 
   override val supportsDropSchemaCascade: Boolean = false
-
-  testListNamespaces()
-  testDropNamespaces()
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
index 6b8d62f8f7b1d..107e28d1b3828 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
@@ -39,8 +39,27 @@ import org.apache.spark.tags.DockerTest
 @DockerTest
 class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
 
-  override val catalogName: String = "mssql"
+  override def excluded: Seq[String] = Seq(
+    "simple scan with OFFSET",
+    "simple scan with LIMIT and OFFSET",
+    "simple scan with paging: top N and OFFSET",
+    "scan with aggregate push-down: VAR_POP with DISTINCT",
+    "scan with aggregate push-down: COVAR_POP with DISTINCT",
+    "scan with aggregate push-down: COVAR_POP without DISTINCT",
+    "scan with aggregate push-down: COVAR_SAMP with DISTINCT",
+    "scan with aggregate push-down: COVAR_SAMP without DISTINCT",
+    "scan with aggregate push-down: CORR with DISTINCT",
+    "scan with aggregate push-down: CORR without DISTINCT",
+    "scan with aggregate push-down: REGR_INTERCEPT with DISTINCT",
+    "scan with aggregate push-down: REGR_INTERCEPT without DISTINCT",
+    "scan with aggregate push-down: REGR_SLOPE with DISTINCT",
+    "scan with aggregate push-down: REGR_SLOPE without DISTINCT",
+    "scan with aggregate push-down: REGR_R2 with DISTINCT",
+    "scan with aggregate push-down: REGR_R2 without DISTINCT",
+    "scan with aggregate push-down: REGR_SXY with DISTINCT",
+    "scan with aggregate push-down: REGR_SXY without DISTINCT")
 
+  override val catalogName: String = "mssql"
   override val db = new DatabaseOnDocker {
     override val imageName = sys.env.getOrElse("MSSQLSERVER_DOCKER_IMAGE_NAME",
       "mcr.microsoft.com/mssql/server:2019-CU13-ubuntu-20.04")
@@ -97,13 +116,4 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD
 
     assert(msg.contains("UpdateColumnNullability is not supported"))
   }
-
-  testVarPop()
-  testVarPop(true)
-  testVarSamp()
-  testVarSamp(true)
-  testStddevPop()
-  testStddevPop(true)
-  testStddevSamp()
-  testStddevSamp(true)
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
index aa8dac266380a..b0a2d37e465ac 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
@@ -70,7 +70,4 @@ class MsSqlServerNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNa
   override val supportsSchemaComment: Boolean = false
 
   override val supportsDropSchemaCascade: Boolean = false
-
-  testListNamespaces()
-  testDropNamespaces()
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
index 41a42e21f44d5..789dfeddc214c 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
@@ -37,6 +37,27 @@ import org.apache.spark.tags.DockerTest
  */
 @DockerTest
 class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
+
+  override def excluded: Seq[String] = Seq(
+    "scan with aggregate push-down: VAR_POP with DISTINCT",
+    "scan with aggregate push-down: VAR_SAMP with DISTINCT",
+    "scan with aggregate push-down: STDDEV_POP with DISTINCT",
+    "scan with aggregate push-down: STDDEV_SAMP with DISTINCT",
+    "scan with aggregate push-down: COVAR_POP with DISTINCT",
+    "scan with aggregate push-down: COVAR_POP without DISTINCT",
+    "scan with aggregate push-down: COVAR_SAMP with DISTINCT",
+    "scan with aggregate push-down: COVAR_SAMP without DISTINCT",
+    "scan with aggregate push-down: CORR with DISTINCT",
+    "scan with aggregate push-down: CORR without DISTINCT",
+    "scan with aggregate push-down: REGR_INTERCEPT with DISTINCT",
+    "scan with aggregate push-down: REGR_INTERCEPT without DISTINCT",
+    "scan with aggregate push-down: REGR_SLOPE with DISTINCT",
+    "scan with aggregate push-down: REGR_SLOPE without DISTINCT",
+    "scan with aggregate push-down: REGR_R2 with DISTINCT",
+    "scan with aggregate push-down: REGR_R2 without DISTINCT",
+    "scan with aggregate push-down: REGR_SXY with DISTINCT",
+    "scan with aggregate push-down: REGR_SXY without DISTINCT")
+
   override val catalogName: String = "mysql"
   override val db = new DatabaseOnDocker {
     override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:8.0.31")
@@ -124,13 +145,4 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest
   override def supportListIndexes: Boolean = true
 
   override def indexOptions: String = "KEY_BLOCK_SIZE=10"
-
-  testOffset()
-  testLimitAndOffset()
-  testPaging()
-
-  testVarPop()
-  testVarSamp()
-  testStddevPop()
-  testStddevSamp()
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
index b73e2b8fd23ca..0974a86fe9b83 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
@@ -68,9 +68,6 @@ class MySQLNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespac
 
   override val supportsDropSchemaRestrict: Boolean = false
 
-  testListNamespaces()
-  testDropNamespaces()
-
   test("Create or remove comment of namespace unsupported") {
     val e1 = intercept[AnalysisException] {
       catalog.createNamespace(Array("foo"), Map("comment" -> "test comment").asJava)
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
index a810602652766..f9923ef9e1c10 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
@@ -56,6 +56,20 @@ import org.apache.spark.tags.DockerTest
  */
 @DockerTest
 class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
+
+  override def excluded: Seq[String] = Seq(
+    "scan with aggregate push-down: VAR_POP with DISTINCT",
+    "scan with aggregate push-down: VAR_SAMP with DISTINCT",
+    "scan with aggregate push-down: STDDEV_POP with DISTINCT",
+    "scan with aggregate push-down: STDDEV_SAMP with DISTINCT",
+    "scan with aggregate push-down: COVAR_POP with DISTINCT",
+    "scan with aggregate push-down: COVAR_SAMP with DISTINCT",
+    "scan with aggregate push-down: CORR with DISTINCT",
+    "scan with aggregate push-down: REGR_INTERCEPT with DISTINCT",
+    "scan with aggregate push-down: REGR_SLOPE with DISTINCT",
+    "scan with aggregate push-down: REGR_R2 with DISTINCT",
+    "scan with aggregate push-down: REGR_SXY with DISTINCT")
+
   override val catalogName: String = "oracle"
   override val namespaceOpt: Option[String] = Some("SYSTEM")
   override val db = new DatabaseOnDocker {
@@ -105,20 +119,4 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTes
   }
 
   override def caseConvert(tableName: String): String = tableName.toUpperCase(Locale.ROOT)
-
-  testOffset()
-  testLimitAndOffset()
-  testPaging()
-
-  testVarPop()
-  testVarSamp()
-  testStddevPop()
-  testStddevSamp()
-  testCovarPop()
-  testCovarSamp()
-  testCorr()
-  testRegrIntercept()
-  testRegrSlope()
-  testRegrR2()
-  testRegrSXY()
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala
index b3e9d19a10f38..a365a1c4e82e4 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala
@@ -52,6 +52,9 @@ import org.apache.spark.tags.DockerTest
  */
 @DockerTest
 class OracleNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceTest {
+
+  override def excluded: Seq[String] = Seq("listNamespaces: basic behavior", "Drop namespace")
+
   override val db = new DatabaseOnDocker {
     lazy override val imageName =
       sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-xe:21.3.0")
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
index 4065dbcc036f6..4742764021bf5 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -90,31 +90,4 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT
   override def supportsIndex: Boolean = true
 
   override def indexOptions: String = "FILLFACTOR=70"
-
-  testOffset()
-  testLimitAndOffset()
-  testPaging()
-
-  testVarPop()
-  testVarPop(true)
-  testVarSamp()
-  testVarSamp(true)
-  testStddevPop()
-  testStddevPop(true)
-  testStddevSamp()
-  testStddevSamp(true)
-  testCovarPop()
-  testCovarPop(true)
-  testCovarSamp()
-  testCovarSamp(true)
-  testCorr()
-  testCorr(true)
-  testRegrIntercept()
-  testRegrIntercept(true)
-  testRegrSlope()
-  testRegrSlope(true)
-  testRegrR2()
-  testRegrR2(true)
-  testRegrSXY()
-  testRegrSXY(true)
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
index 8c525717758c3..cf7266e67e325 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
@@ -55,7 +55,4 @@ class PostgresNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNames
 
   override def builtinNamespaces: Array[Array[String]] =
     Array(Array("information_schema"), Array("pg_catalog"), Array("public"))
-
-  testListNamespaces()
-  testDropNamespaces()
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
index d3f17187a3754..b7c6e0aff20a7 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
@@ -55,83 +55,79 @@ private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession with DockerInte
 
   def supportsDropSchemaRestrict: Boolean = true
 
-  def testListNamespaces(): Unit = {
-    test("listNamespaces: basic behavior") {
-      val commentMap = if (supportsSchemaComment) {
-        Map("comment" -> "test comment")
-      } else {
-        Map.empty[String, String]
-      }
-      catalog.createNamespace(Array("foo"), commentMap.asJava)
-      assert(catalog.listNamespaces().map(_.toSet).toSet ===
-        listNamespaces(Array("foo")).map(_.toSet).toSet)
-      assert(catalog.listNamespaces(Array("foo")) === Array())
-      assert(catalog.namespaceExists(Array("foo")) === true)
-
-      if (supportsSchemaComment) {
-        val logAppender = new LogAppender("catalog comment")
-        withLogAppender(logAppender) {
-          catalog.alterNamespace(Array("foo"), NamespaceChange
-            .setProperty("comment", "comment for foo"))
-          catalog.alterNamespace(Array("foo"), NamespaceChange.removeProperty("comment"))
-        }
-        val createCommentWarning = logAppender.loggingEvents
-          .filter(_.getLevel == Level.WARN)
-          .map(_.getMessage.getFormattedMessage)
-          .exists(_.contains("catalog comment"))
-        assert(createCommentWarning === false)
+  test("listNamespaces: basic behavior") {
+    val commentMap = if (supportsSchemaComment) {
+      Map("comment" -> "test comment")
+    } else {
+      Map.empty[String, String]
+    }
+    catalog.createNamespace(Array("foo"), commentMap.asJava)
+    assert(catalog.listNamespaces().map(_.toSet).toSet ===
+      listNamespaces(Array("foo")).map(_.toSet).toSet)
+    assert(catalog.listNamespaces(Array("foo")) === Array())
+    assert(catalog.namespaceExists(Array("foo")) === true)
+
+    if (supportsSchemaComment) {
+      val logAppender = new LogAppender("catalog comment")
+      withLogAppender(logAppender) {
+        catalog.alterNamespace(Array("foo"), NamespaceChange
+          .setProperty("comment", "comment for foo"))
+        catalog.alterNamespace(Array("foo"), NamespaceChange.removeProperty("comment"))
       }
+      val createCommentWarning = logAppender.loggingEvents
+        .filter(_.getLevel == Level.WARN)
+        .map(_.getMessage.getFormattedMessage)
+        .exists(_.contains("catalog comment"))
+      assert(createCommentWarning === false)
+    }
 
-      if (supportsDropSchemaRestrict) {
-        catalog.dropNamespace(Array("foo"), cascade = false)
-      } else {
-        catalog.dropNamespace(Array("foo"), cascade = true)
-      }
-      assert(catalog.namespaceExists(Array("foo")) === false)
-      assert(catalog.listNamespaces() === builtinNamespaces)
-      val e = intercept[AnalysisException] {
-        catalog.listNamespaces(Array("foo"))
-      }
-      checkError(e,
-        errorClass = "SCHEMA_NOT_FOUND",
-        parameters = Map("schemaName" -> "`foo`"))
+    if (supportsDropSchemaRestrict) {
+      catalog.dropNamespace(Array("foo"), cascade = false)
+    } else {
+      catalog.dropNamespace(Array("foo"), cascade = true)
+    }
+    assert(catalog.namespaceExists(Array("foo")) === false)
+    assert(catalog.listNamespaces() === builtinNamespaces)
+    val e = intercept[AnalysisException] {
+      catalog.listNamespaces(Array("foo"))
     }
+    checkError(e,
+      errorClass = "SCHEMA_NOT_FOUND",
+      parameters = Map("schemaName" -> "`foo`"))
   }
 
-  def testDropNamespaces(): Unit = {
-    test("Drop namespace") {
-      val ident1 = Identifier.of(Array("foo"), "tab")
-      // Drop empty namespace without cascade
-      val commentMap = if (supportsSchemaComment) {
-        Map("comment" -> "test comment")
-      } else {
-        Map.empty[String, String]
-      }
-      catalog.createNamespace(Array("foo"), commentMap.asJava)
-      assert(catalog.namespaceExists(Array("foo")) === true)
-      if (supportsDropSchemaRestrict) {
+  test("Drop namespace") {
+    val ident1 = Identifier.of(Array("foo"), "tab")
+    // Drop empty namespace without cascade
+    val commentMap = if (supportsSchemaComment) {
+      Map("comment" -> "test comment")
+    } else {
+      Map.empty[String, String]
+    }
+    catalog.createNamespace(Array("foo"), commentMap.asJava)
+    assert(catalog.namespaceExists(Array("foo")) === true)
+    if (supportsDropSchemaRestrict) {
+      catalog.dropNamespace(Array("foo"), cascade = false)
+    } else {
+      catalog.dropNamespace(Array("foo"), cascade = true)
+    }
+    assert(catalog.namespaceExists(Array("foo")) === false)
+
+    // Drop non empty namespace without cascade
+    catalog.createNamespace(Array("foo"), commentMap.asJava)
+    assert(catalog.namespaceExists(Array("foo")) === true)
+    catalog.createTable(ident1, schema, Array.empty[Transform], emptyProps)
+    if (supportsDropSchemaRestrict) {
+      intercept[NonEmptyNamespaceException] {
         catalog.dropNamespace(Array("foo"), cascade = false)
-      } else {
-        catalog.dropNamespace(Array("foo"), cascade = true)
       }
-      assert(catalog.namespaceExists(Array("foo")) === false)
+    }
 
-      // Drop non empty namespace without cascade
-      catalog.createNamespace(Array("foo"), commentMap.asJava)
+    // Drop non empty namespace with cascade
+    if (supportsDropSchemaCascade) {
       assert(catalog.namespaceExists(Array("foo")) === true)
-      catalog.createTable(ident1, schema, Array.empty[Transform], emptyProps)
-      if (supportsDropSchemaRestrict) {
-        intercept[NonEmptyNamespaceException] {
-          catalog.dropNamespace(Array("foo"), cascade = false)
-        }
-      }
-
-      // Drop non empty namespace with cascade
-      if (supportsDropSchemaCascade) {
-        assert(catalog.namespaceExists(Array("foo")) === true)
-        catalog.dropNamespace(Array("foo"), cascade = true)
-        assert(catalog.namespaceExists(Array("foo")) === false)
-      }
+      catalog.dropNamespace(Array("foo"), cascade = true)
+      assert(catalog.namespaceExists(Array("foo")) === false)
     }
   }
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index 97ee338509031..85b0b807932aa 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -314,14 +314,13 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     }
   }
 
-  private def limitPushed(df: DataFrame, limit: Int): Boolean = {
+  private def checkLimitPushed(df: DataFrame, limit: Option[Int]): Unit = {
     df.queryExecution.optimizedPlan.collect {
       case relation: DataSourceV2ScanRelation => relation.scan match {
         case v1: V1ScanWrapper =>
-          return v1.pushedDownOperators.limit == Some(limit)
+          assert(v1.pushedDownOperators.limit == limit)
       }
     }
-    false
   }
 
   private def checkColumnPruned(df: DataFrame, col: String): Unit = {
@@ -354,7 +353,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
         val df3 = sql(s"SELECT col1 FROM $catalogName.new_table TABLESAMPLE (BUCKET 6 OUT OF 10)" +
           " LIMIT 2")
         checkSamplePushed(df3)
-        assert(limitPushed(df3, 2))
+        checkLimitPushed(df3, Some(2))
         checkColumnPruned(df3, "col1")
         assert(df3.collect().length <= 2)
 
@@ -362,7 +361,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
         val df4 = sql(s"SELECT col1 FROM $catalogName.new_table" +
           " TABLESAMPLE (50 PERCENT) REPEATABLE (12345) LIMIT 2")
         checkSamplePushed(df4)
-        assert(limitPushed(df4, 2))
+        checkLimitPushed(df4, Some(2))
         checkColumnPruned(df4, "col1")
         assert(df4.collect().length <= 2)
 
@@ -371,7 +370,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
           " TABLESAMPLE (BUCKET 6 OUT OF 10) WHERE col1 > 0 LIMIT 2")
         checkSamplePushed(df5)
         checkFilterPushed(df5)
-        assert(limitPushed(df5, 2))
+        checkLimitPushed(df5, Some(2))
         assert(df5.collect().length <= 2)
 
         // sample + filter + limit + column pruning
@@ -381,7 +380,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
           " TABLESAMPLE (BUCKET 6 OUT OF 10) WHERE col1 > 0 LIMIT 2")
         checkSamplePushed(df6)
         checkFilterPushed(df6, false)
-        assert(!limitPushed(df6, 2))
+        checkLimitPushed(df6, None)
         checkColumnPruned(df6, "col1")
         assert(df6.collect().length <= 2)
 
@@ -390,7 +389,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
         // only limit is pushed down because in this test sample is after limit
         val df7 = spark.read.table(s"$catalogName.new_table").limit(2).sample(0.5)
         checkSamplePushed(df7, false)
-        assert(limitPushed(df7, 2))
+        checkLimitPushed(df7, Some(2))
 
         // sample + filter
         // Push down order is sample -> filter -> limit
@@ -422,7 +421,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
   test("simple scan with LIMIT") {
     val df = sql(s"SELECT name, salary, bonus FROM $catalogAndNamespace." +
       s"${caseConvert("employee")} WHERE dept > 0 LIMIT 1")
-    assert(limitPushed(df, 1))
+    checkLimitPushed(df, Some(1))
     val rows = df.collect()
     assert(rows.length === 1)
     assert(rows(0).getString(0) === "amy")
@@ -434,7 +433,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     Seq(NullOrdering.values()).flatten.foreach { nullOrdering =>
       val df1 = sql(s"SELECT name, salary, bonus FROM $catalogAndNamespace." +
         s"${caseConvert("employee")} WHERE dept > 0 ORDER BY salary $nullOrdering LIMIT 1")
-      assert(limitPushed(df1, 1))
+      checkLimitPushed(df1, Some(1))
       checkSortRemoved(df1)
       val rows1 = df1.collect()
       assert(rows1.length === 1)
@@ -444,7 +443,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
 
       val df2 = sql(s"SELECT name, salary, bonus FROM $catalogAndNamespace." +
         s"${caseConvert("employee")} WHERE dept > 0 ORDER BY bonus DESC $nullOrdering LIMIT 1")
-      assert(limitPushed(df2, 1))
+      checkLimitPushed(df2, Some(1))
       checkSortRemoved(df2)
       val rows2 = df2.collect()
       assert(rows2.length === 1)
@@ -454,60 +453,54 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     }
   }
 
-  protected def testOffset(): Unit = {
-    test("simple scan with OFFSET") {
-      val df = sql(s"SELECT name, salary, bonus FROM $catalogAndNamespace." +
-        s"${caseConvert("employee")} WHERE dept > 0 OFFSET 4")
-      checkOffsetPushed(df, Some(4))
-      val rows = df.collect()
-      assert(rows.length === 1)
-      assert(rows(0).getString(0) === "jen")
-      assert(rows(0).getDecimal(1) === new java.math.BigDecimal("12000.00"))
-      assert(rows(0).getDouble(2) === 1200d)
-    }
+  test("simple scan with OFFSET") {
+    val df = sql(s"SELECT name, salary, bonus FROM $catalogAndNamespace." +
+      s"${caseConvert("employee")} WHERE dept > 0 OFFSET 4")
+    checkOffsetPushed(df, Some(4))
+    val rows = df.collect()
+    assert(rows.length === 1)
+    assert(rows(0).getString(0) === "jen")
+    assert(rows(0).getDecimal(1) === new java.math.BigDecimal("12000.00"))
+    assert(rows(0).getDouble(2) === 1200d)
   }
 
-  protected def testLimitAndOffset(): Unit = {
-    test("simple scan with LIMIT and OFFSET") {
-      val df = sql(s"SELECT name, salary, bonus FROM $catalogAndNamespace." +
-        s"${caseConvert("employee")} WHERE dept > 0 LIMIT 1 OFFSET 2")
-      assert(limitPushed(df, 3))
-      checkOffsetPushed(df, Some(2))
-      val rows = df.collect()
-      assert(rows.length === 1)
-      assert(rows(0).getString(0) === "cathy")
-      assert(rows(0).getDecimal(1) === new java.math.BigDecimal("9000.00"))
-      assert(rows(0).getDouble(2) === 1200d)
-    }
+  test("simple scan with LIMIT and OFFSET") {
+    val df = sql(s"SELECT name, salary, bonus FROM $catalogAndNamespace." +
+      s"${caseConvert("employee")} WHERE dept > 0 LIMIT 1 OFFSET 2")
+    checkLimitPushed(df, Some(3))
+    checkOffsetPushed(df, Some(2))
+    val rows = df.collect()
+    assert(rows.length === 1)
+    assert(rows(0).getString(0) === "cathy")
+    assert(rows(0).getDecimal(1) === new java.math.BigDecimal("9000.00"))
+    assert(rows(0).getDouble(2) === 1200d)
   }
 
-  protected def testPaging(): Unit = {
-    test("simple scan with paging: top N and OFFSET") {
-      Seq(NullOrdering.values()).flatten.foreach { nullOrdering =>
-        val df1 = sql(s"SELECT name, salary, bonus FROM $catalogAndNamespace." +
-          s"${caseConvert("employee")}" +
-          s" WHERE dept > 0 ORDER BY salary $nullOrdering, bonus LIMIT 1 OFFSET 2")
-        assert(limitPushed(df1, 3))
-        checkOffsetPushed(df1, Some(2))
-        checkSortRemoved(df1)
-        val rows1 = df1.collect()
-        assert(rows1.length === 1)
-        assert(rows1(0).getString(0) === "david")
-        assert(rows1(0).getDecimal(1) === new java.math.BigDecimal("10000.00"))
-        assert(rows1(0).getDouble(2) === 1300d)
-
-        val df2 = sql(s"SELECT name, salary, bonus FROM $catalogAndNamespace." +
-          s"${caseConvert("employee")}" +
-          s" WHERE dept > 0 ORDER BY salary DESC $nullOrdering, bonus LIMIT 1 OFFSET 2")
-        assert(limitPushed(df2, 3))
-        checkOffsetPushed(df2, Some(2))
-        checkSortRemoved(df2)
-        val rows2 = df2.collect()
-        assert(rows2.length === 1)
-        assert(rows2(0).getString(0) === "amy")
-        assert(rows2(0).getDecimal(1) === new java.math.BigDecimal("10000.00"))
-        assert(rows2(0).getDouble(2) === 1000d)
-      }
+  test("simple scan with paging: top N and OFFSET") {
+    Seq(NullOrdering.values()).flatten.foreach { nullOrdering =>
+      val df1 = sql(s"SELECT name, salary, bonus FROM $catalogAndNamespace." +
+        s"${caseConvert("employee")}" +
+        s" WHERE dept > 0 ORDER BY salary $nullOrdering, bonus LIMIT 1 OFFSET 2")
+      checkLimitPushed(df1, Some(3))
+      checkOffsetPushed(df1, Some(2))
+      checkSortRemoved(df1)
+      val rows1 = df1.collect()
+      assert(rows1.length === 1)
+      assert(rows1(0).getString(0) === "david")
+      assert(rows1(0).getDecimal(1) === new java.math.BigDecimal("10000.00"))
+      assert(rows1(0).getDouble(2) === 1300d)
+
+      val df2 = sql(s"SELECT name, salary, bonus FROM $catalogAndNamespace." +
+        s"${caseConvert("employee")}" +
+        s" WHERE dept > 0 ORDER BY salary DESC $nullOrdering, bonus LIMIT 1 OFFSET 2")
+      checkLimitPushed(df2, Some(3))
+      checkOffsetPushed(df2, Some(2))
+      checkSortRemoved(df2)
+      val rows2 = df2.collect()
+      assert(rows2.length === 1)
+      assert(rows2(0).getString(0) === "amy")
+      assert(rows2(0).getDecimal(1) === new java.math.BigDecimal("10000.00"))
+      assert(rows2(0).getDouble(2) === 1000d)
     }
   }
 
@@ -536,9 +529,11 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
 
   private def withOrWithout(isDistinct: Boolean): String = if (isDistinct) "with" else "without"
 
-  protected def testVarPop(isDistinct: Boolean = false): Unit = {
+  Seq(true, false).foreach { isDistinct =>
     val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: VAR_POP ${withOrWithout(isDistinct)} DISTINCT") {
+    val withOrWithout = if (isDistinct) "with" else "without"
+
+    test(s"scan with aggregate push-down: VAR_POP $withOrWithout DISTINCT") {
       val df = sql(s"SELECT VAR_POP(${distinct}bonus) FROM $catalogAndNamespace." +
         s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
       checkFilterPushed(df)
@@ -550,14 +545,11 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       assert(row(1).getDouble(0) === 2500.0)
       assert(row(2).getDouble(0) === 0.0)
     }
-  }
 
-  protected def testVarSamp(isDistinct: Boolean = false): Unit = {
-    val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: VAR_SAMP ${withOrWithout(isDistinct)} DISTINCT") {
+    test(s"scan with aggregate push-down: VAR_SAMP $withOrWithout DISTINCT") {
       val df = sql(
         s"SELECT VAR_SAMP(${distinct}bonus) FROM $catalogAndNamespace." +
-        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+          s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
       checkFilterPushed(df)
       checkAggregateRemoved(df)
       checkAggregatePushed(df, "VAR_SAMP")
@@ -567,14 +559,11 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       assert(row(1).getDouble(0) === 5000.0)
       assert(row(2).isNullAt(0))
     }
-  }
 
-  protected def testStddevPop(isDistinct: Boolean = false): Unit = {
-    val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: STDDEV_POP ${withOrWithout(isDistinct)} DISTINCT") {
+    test(s"scan with aggregate push-down: STDDEV_POP $withOrWithout DISTINCT") {
       val df = sql(
         s"SELECT STDDEV_POP(${distinct}bonus) FROM $catalogAndNamespace." +
-        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+          s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
       checkFilterPushed(df)
       checkAggregateRemoved(df)
       checkAggregatePushed(df, "STDDEV_POP")
@@ -584,14 +573,11 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       assert(row(1).getDouble(0) === 50.0)
       assert(row(2).getDouble(0) === 0.0)
     }
-  }
 
-  protected def testStddevSamp(isDistinct: Boolean = false): Unit = {
-    val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: STDDEV_SAMP ${withOrWithout(isDistinct)} DISTINCT") {
+    test(s"scan with aggregate push-down: STDDEV_SAMP $withOrWithout DISTINCT") {
       val df = sql(
         s"SELECT STDDEV_SAMP(${distinct}bonus) FROM $catalogAndNamespace." +
-        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+          s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
       checkFilterPushed(df)
       checkAggregateRemoved(df)
       checkAggregatePushed(df, "STDDEV_SAMP")
@@ -601,14 +587,11 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       assert(row(1).getDouble(0) === 70.71067811865476)
       assert(row(2).isNullAt(0))
     }
-  }
 
-  protected def testCovarPop(isDistinct: Boolean = false): Unit = {
-    val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: COVAR_POP ${withOrWithout(isDistinct)} DISTINCT") {
+    test(s"scan with aggregate push-down: COVAR_POP $withOrWithout DISTINCT") {
       val df = sql(
         s"SELECT COVAR_POP(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
-        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+          s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
       checkFilterPushed(df)
       checkAggregateRemoved(df)
       checkAggregatePushed(df, "COVAR_POP")
@@ -618,14 +601,11 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       assert(row(1).getDouble(0) === 2500.0)
       assert(row(2).getDouble(0) === 0.0)
     }
-  }
 
-  protected def testCovarSamp(isDistinct: Boolean = false): Unit = {
-    val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: COVAR_SAMP ${withOrWithout(isDistinct)} DISTINCT") {
+    test(s"scan with aggregate push-down: COVAR_SAMP $withOrWithout DISTINCT") {
       val df = sql(
         s"SELECT COVAR_SAMP(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
-        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+          s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
       checkFilterPushed(df)
       checkAggregateRemoved(df)
       checkAggregatePushed(df, "COVAR_SAMP")
@@ -635,14 +615,11 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       assert(row(1).getDouble(0) === 5000.0)
       assert(row(2).isNullAt(0))
     }
-  }
 
-  protected def testCorr(isDistinct: Boolean = false): Unit = {
-    val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: CORR ${withOrWithout(isDistinct)} DISTINCT") {
+    test(s"scan with aggregate push-down: CORR $withOrWithout DISTINCT") {
       val df = sql(
         s"SELECT CORR(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
-        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+          s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
       checkFilterPushed(df)
       checkAggregateRemoved(df)
       checkAggregatePushed(df, "CORR")
@@ -652,11 +629,8 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       assert(row(1).getDouble(0) === 1.0)
       assert(row(2).isNullAt(0))
     }
-  }
 
-  protected def testRegrIntercept(isDistinct: Boolean = false): Unit = {
-    val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: REGR_INTERCEPT ${withOrWithout(isDistinct)} DISTINCT") {
+    test(s"scan with aggregate push-down: REGR_INTERCEPT $withOrWithout DISTINCT") {
       val df = sql(
         s"SELECT REGR_INTERCEPT(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
           s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
@@ -669,11 +643,8 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       assert(row(1).getDouble(0) === 0.0)
       assert(row(2).isNullAt(0))
     }
-  }
 
-  protected def testRegrSlope(isDistinct: Boolean = false): Unit = {
-    val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: REGR_SLOPE ${withOrWithout(isDistinct)} DISTINCT") {
+    test(s"scan with aggregate push-down: REGR_SLOPE $withOrWithout DISTINCT") {
       val df = sql(
         s"SELECT REGR_SLOPE(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
           s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
@@ -686,11 +657,8 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       assert(row(1).getDouble(0) === 1.0)
       assert(row(2).isNullAt(0))
     }
-  }
 
-  protected def testRegrR2(isDistinct: Boolean = false): Unit = {
-    val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: REGR_R2 ${withOrWithout(isDistinct)} DISTINCT") {
+    test(s"scan with aggregate push-down: REGR_R2 $withOrWithout DISTINCT") {
       val df = sql(
         s"SELECT REGR_R2(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
           s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
@@ -703,11 +671,8 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       assert(row(1).getDouble(0) === 1.0)
       assert(row(2).isNullAt(0))
     }
-  }
 
-  protected def testRegrSXY(isDistinct: Boolean = false): Unit = {
-    val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: REGR_SXY ${withOrWithout(isDistinct)} DISTINCT") {
+    test(s"scan with aggregate push-down: REGR_SXY $withOrWithout DISTINCT") {
       val df = sql(
         s"SELECT REGR_SXY(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
           s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 34026083bb9a6..0433747822391 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -1063,6 +1063,28 @@
     ],
     "sqlState" : "42903"
   },
+  "INVALID_WRITE_DISTRIBUTION" : {
+    "message" : [
+      "The requested write distribution is invalid."
+    ],
+    "subClass" : {
+      "PARTITION_NUM_AND_SIZE" : {
+        "message" : [
+          "The partition number and advisory partition size can't be specified at the same time."
+        ]
+      },
+      "PARTITION_NUM_WITH_UNSPECIFIED_DISTRIBUTION" : {
+        "message" : [
+          "The number of partitions can't be specified with unspecified distribution."
+        ]
+      },
+      "PARTITION_SIZE_WITH_UNSPECIFIED_DISTRIBUTION" : {
+        "message" : [
+          "The advisory partition size can't be specified with unspecified distribution."
+        ]
+      }
+    }
+  },
   "LOCATION_ALREADY_EXISTS" : {
     "message" : [
       "Cannot name the managed table as <identifier>, as its associated location <location> already exists. Please pick a different table name, or remove the existing location first."
@@ -2931,11 +2953,6 @@
       "Unsupported data type <dataType>."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1178" : {
-    "message" : [
-      "The number of partitions can't be specified with unspecified distribution. Invalid writer requirements detected."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1181" : {
     "message" : [
       "Stream-stream join without equality predicate is not supported."
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
index 0ee0dc6ae6016..2d4624828a94d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
@@ -103,11 +103,8 @@ class TaskInfo(
     // finishTime should be set larger than 0, otherwise "finished" below will return false.
     assert(time > 0)
     finishTime = time
-    if (state == TaskState.FAILED) {
-      failed = true
-    } else if (state == TaskState.KILLED) {
-      killed = true
-    }
+    failed = state == TaskState.FAILED
+    killed = state == TaskState.KILLED
   }
 
   private[spark] def launchSucceeded(): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
index 27198039fdbaa..ff12f643497d0 100644
--- a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
@@ -31,7 +31,8 @@ import org.apache.logging.log4j._
 import org.apache.logging.log4j.core.{LogEvent, Logger, LoggerContext}
 import org.apache.logging.log4j.core.appender.AbstractAppender
 import org.apache.logging.log4j.core.config.Property
-import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, BeforeAndAfterEach, Failed, Outcome}
+import org.scalactic.source.Position
+import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, BeforeAndAfterEach, Failed, Outcome, Tag}
 import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite
 
 import org.apache.spark.deploy.LocalSparkCluster
@@ -137,6 +138,19 @@ abstract class SparkFunSuite
     java.nio.file.Paths.get(sparkHome, first +: more: _*)
   }
 
+  // subclasses can override this to exclude certain tests by name
+  // useful when inheriting a test suite but do not want to run all tests in it
+  protected def excluded: Seq[String] = Seq.empty
+
+  override protected def test(testName: String, testTags: Tag*)(testBody: => Any)
+    (implicit pos: Position): Unit = {
+    if (excluded.contains(testName)) {
+      ignore(s"$testName (excluded)")(testBody)
+    } else {
+      super.test(testName, testTags: _*)(testBody)
+    }
+  }
+
   /**
    * Note: this method doesn't support `BeforeAndAfter`. You must use `BeforeAndAfterEach` to
    * set up and tear down resources.
diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index d9edb110f48a2..e3d588d36cd7b 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -223,9 +223,9 @@ objenesis/3.2//objenesis-3.2.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.15.0//okio-1.15.0.jar
 opencsv/2.3//opencsv-2.3.jar
-orc-core/1.8.2/shaded-protobuf/orc-core-1.8.2-shaded-protobuf.jar
-orc-mapreduce/1.8.2/shaded-protobuf/orc-mapreduce-1.8.2-shaded-protobuf.jar
-orc-shims/1.8.2//orc-shims-1.8.2.jar
+orc-core/1.8.3/shaded-protobuf/orc-core-1.8.3-shaded-protobuf.jar
+orc-mapreduce/1.8.3/shaded-protobuf/orc-mapreduce-1.8.3-shaded-protobuf.jar
+orc-shims/1.8.3//orc-shims-1.8.3.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 5c17fff4d3789..fd32245ec2865 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -210,9 +210,9 @@ opencsv/2.3//opencsv-2.3.jar
 opentracing-api/0.33.0//opentracing-api-0.33.0.jar
 opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar
 opentracing-util/0.33.0//opentracing-util-0.33.0.jar
-orc-core/1.8.2/shaded-protobuf/orc-core-1.8.2-shaded-protobuf.jar
-orc-mapreduce/1.8.2/shaded-protobuf/orc-mapreduce-1.8.2-shaded-protobuf.jar
-orc-shims/1.8.2//orc-shims-1.8.2.jar
+orc-core/1.8.3/shaded-protobuf/orc-core-1.8.3-shaded-protobuf.jar
+orc-mapreduce/1.8.3/shaded-protobuf/orc-mapreduce-1.8.3-shaded-protobuf.jar
+orc-shims/1.8.3//orc-shims-1.8.3.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 751f0687f2c8e..c31a9362cd7fb 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -271,12 +271,36 @@ def __hash__(self):
     ],
 )
 
+mllib_local = Module(
+    name="mllib-local",
+    dependencies=[tags, core],
+    source_file_regexes=[
+        "mllib/local",
+    ],
+    sbt_test_goals=[
+        "mllib-local/test",
+    ],
+)
+
+
+mllib_common = Module(
+    name="mllib-common",
+    dependencies=[tags, mllib_local, sql],
+    source_file_regexes=[
+        "mllib/common",
+    ],
+    sbt_test_goals=[
+        "mllib-common/test",
+    ],
+)
+
 connect = Module(
     name="connect",
-    dependencies=[hive],
+    dependencies=[hive, mllib_common],
     source_file_regexes=[
         "connector/connect",
     ],
+    build_profile_flags=["-Pconnect"],
     sbt_test_goals=[
         "connect/test",
         "connect-client-jvm/test",
@@ -358,24 +382,12 @@ def __hash__(self):
 )
 
 
-mllib_local = Module(
-    name="mllib-local",
-    dependencies=[tags, core],
-    source_file_regexes=[
-        "mllib-local",
-    ],
-    sbt_test_goals=[
-        "mllib-local/test",
-    ],
-)
-
-
 mllib = Module(
     name="mllib",
-    dependencies=[mllib_local, streaming, sql],
+    dependencies=[mllib_local, mllib_common, streaming, sql],
     source_file_regexes=[
         "data/mllib/",
-        "mllib/",
+        "mllib/core/",
     ],
     sbt_test_goals=[
         "mllib/test",
@@ -501,48 +513,6 @@ def __hash__(self):
     ],
 )
 
-pyspark_connect = Module(
-    name="pyspark-connect",
-    dependencies=[pyspark_sql, connect],
-    source_file_regexes=["python/pyspark/sql/connect"],
-    python_test_goals=[
-        # doctests
-        "pyspark.sql.connect.catalog",
-        "pyspark.sql.connect.conf",
-        "pyspark.sql.connect.group",
-        "pyspark.sql.connect.session",
-        "pyspark.sql.connect.window",
-        "pyspark.sql.connect.column",
-        "pyspark.sql.connect.readwriter",
-        "pyspark.sql.connect.dataframe",
-        "pyspark.sql.connect.functions",
-        # unittests
-        "pyspark.sql.tests.connect.test_client",
-        "pyspark.sql.tests.connect.test_connect_plan",
-        "pyspark.sql.tests.connect.test_connect_basic",
-        "pyspark.sql.tests.connect.test_connect_function",
-        "pyspark.sql.tests.connect.test_connect_column",
-        "pyspark.sql.tests.connect.test_parity_datasources",
-        "pyspark.sql.tests.connect.test_parity_catalog",
-        "pyspark.sql.tests.connect.test_parity_conf",
-        "pyspark.sql.tests.connect.test_parity_serde",
-        "pyspark.sql.tests.connect.test_parity_functions",
-        "pyspark.sql.tests.connect.test_parity_group",
-        "pyspark.sql.tests.connect.test_parity_dataframe",
-        "pyspark.sql.tests.connect.test_parity_types",
-        "pyspark.sql.tests.connect.test_parity_column",
-        "pyspark.sql.tests.connect.test_parity_readwriter",
-        "pyspark.sql.tests.connect.test_parity_udf",
-        "pyspark.sql.tests.connect.test_parity_pandas_udf",
-        "pyspark.sql.tests.connect.test_parity_pandas_map",
-        "pyspark.sql.tests.connect.test_parity_arrow_map",
-    ],
-    excluded_python_implementations=[
-        "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
-        # they aren't available there
-    ],
-)
-
 pyspark_resource = Module(
     name="pyspark-resource",
     dependencies=[pyspark_core],
@@ -769,6 +739,58 @@ def __hash__(self):
     ],
 )
 
+
+pyspark_connect = Module(
+    name="pyspark-connect",
+    dependencies=[pyspark_sql, pyspark_ml, connect],
+    source_file_regexes=[
+        "python/pyspark/sql/connect",
+        "python/pyspark/ml/connect",
+    ],
+    python_test_goals=[
+        # sql doctests
+        "pyspark.sql.connect.catalog",
+        "pyspark.sql.connect.conf",
+        "pyspark.sql.connect.group",
+        "pyspark.sql.connect.session",
+        "pyspark.sql.connect.window",
+        "pyspark.sql.connect.column",
+        "pyspark.sql.connect.readwriter",
+        "pyspark.sql.connect.dataframe",
+        "pyspark.sql.connect.functions",
+        # sql unittests
+        "pyspark.sql.tests.connect.test_client",
+        "pyspark.sql.tests.connect.test_connect_plan",
+        "pyspark.sql.tests.connect.test_connect_basic",
+        "pyspark.sql.tests.connect.test_connect_function",
+        "pyspark.sql.tests.connect.test_connect_column",
+        "pyspark.sql.tests.connect.test_parity_datasources",
+        "pyspark.sql.tests.connect.test_parity_catalog",
+        "pyspark.sql.tests.connect.test_parity_conf",
+        "pyspark.sql.tests.connect.test_parity_serde",
+        "pyspark.sql.tests.connect.test_parity_functions",
+        "pyspark.sql.tests.connect.test_parity_group",
+        "pyspark.sql.tests.connect.test_parity_dataframe",
+        "pyspark.sql.tests.connect.test_parity_types",
+        "pyspark.sql.tests.connect.test_parity_column",
+        "pyspark.sql.tests.connect.test_parity_readwriter",
+        "pyspark.sql.tests.connect.test_parity_udf",
+        "pyspark.sql.tests.connect.test_parity_pandas_udf",
+        "pyspark.sql.tests.connect.test_parity_pandas_map",
+        "pyspark.sql.tests.connect.test_parity_arrow_map",
+        "pyspark.sql.tests.connect.test_parity_pandas_grouped_map",
+        # ml doctests
+        "pyspark.ml.connect.functions",
+        # ml unittests
+        "pyspark.ml.tests.connect.test_connect_function",
+    ],
+    excluded_python_implementations=[
+        "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
+        # they aren't available there
+    ],
+)
+
+
 pyspark_errors = Module(
     name="pyspark-errors",
     dependencies=[],
diff --git a/dev/sparktestsupport/utils.py b/dev/sparktestsupport/utils.py
index 6b190eb5ab27a..5c270d0948eca 100755
--- a/dev/sparktestsupport/utils.py
+++ b/dev/sparktestsupport/utils.py
@@ -112,22 +112,25 @@ def determine_modules_to_test(changed_modules, deduplicated=True):
     >>> sorted([x.name for x in determine_modules_to_test([modules.sql])])
     ... # doctest: +NORMALIZE_WHITESPACE
     ['avro', 'connect', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver',
-     'mllib', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas',
-     'pyspark-pandas-slow', 'pyspark-sql', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
+     'mllib', 'mllib-common', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-mllib',
+     'pyspark-pandas', 'pyspark-pandas-slow', 'pyspark-sql', 'repl', 'sparkr', 'sql',
+     'sql-kafka-0-10']
     >>> sorted([x.name for x in determine_modules_to_test(
     ...     [modules.sparkr, modules.sql], deduplicated=False)])
     ... # doctest: +NORMALIZE_WHITESPACE
     ['avro', 'connect', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver',
-     'mllib', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas',
-     'pyspark-pandas-slow', 'pyspark-sql', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
+     'mllib', 'mllib-common', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-mllib',
+     'pyspark-pandas', 'pyspark-pandas-slow', 'pyspark-sql', 'repl', 'sparkr', 'sql',
+     'sql-kafka-0-10']
     >>> sorted([x.name for x in determine_modules_to_test(
     ...     [modules.sql, modules.core], deduplicated=False)])
     ... # doctest: +NORMALIZE_WHITESPACE
     ['avro', 'catalyst', 'connect', 'core', 'docker-integration-tests', 'examples', 'graphx',
-     'hive', 'hive-thriftserver', 'mllib', 'mllib-local', 'protobuf', 'pyspark-connect',
-     'pyspark-core', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-slow',
-     'pyspark-resource', 'pyspark-sql', 'pyspark-streaming', 'repl', 'root', 'sparkr', 'sql',
-     'sql-kafka-0-10', 'streaming', 'streaming-kafka-0-10', 'streaming-kinesis-asl']
+     'hive', 'hive-thriftserver', 'mllib', 'mllib-common', 'mllib-local', 'protobuf',
+     'pyspark-connect', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas',
+     'pyspark-pandas-slow', 'pyspark-resource', 'pyspark-sql', 'pyspark-streaming', 'repl',
+     'root', 'sparkr', 'sql', 'sql-kafka-0-10', 'streaming', 'streaming-kafka-0-10',
+     'streaming-kinesis-asl']
     """
     modules_to_test = set()
     for module in changed_modules:
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index d44639227665d..9b7c469246165 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -7,6 +7,8 @@
     <head>
         <meta charset="utf-8">
         <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
         <title>{{ page.title }} - Spark {{site.SPARK_VERSION_SHORT}} Documentation</title>
         {% if page.description %}
           <meta name="description" content="{{page.description | replace: 'SPARK_VERSION_SHORT', site.SPARK_VERSION_SHORT}}">
@@ -17,16 +19,13 @@
           <link rel="canonical" href="{{page.redirect}}" />
         {% endif %}
 
-        <link rel="stylesheet" href="css/bootstrap.min.css">
-        <style>
-            body {
-                padding-top: 60px;
-                padding-bottom: 40px;
-            }
-        </style>
-        <meta name="viewport" content="width=device-width">
-        <link rel="stylesheet" href="css/main.css">
 
+        <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
+        integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
+        <link rel="preconnect" href="https://fonts.googleapis.com">
+        <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+        <link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&Courier+Prime:wght@400;700&display=swap" rel="stylesheet">
+        <link href="css/custom.css" rel="stylesheet">
         <script src="js/vendor/modernizr-2.6.1-respond-1.1.0.min.js"></script>
 
         <link rel="stylesheet" href="css/pygments-default.css">
@@ -34,96 +33,118 @@
         <link rel="stylesheet" href="css/docsearch.css">
 
     </head>
-    <body>
+    <body class="global">
         <!--[if lt IE 7]>
             <p class="chromeframe">You are using an outdated browser. <a href="https://browsehappy.com/">Upgrade your browser today</a> or <a href="http://www.google.com/chromeframe/?redirect=true">install Google Chrome Frame</a> to better experience this site.</p>
         <![endif]-->
 
         <!-- This code is taken from http://twitter.github.com/bootstrap/examples/hero.html -->
 
-        <nav class="navbar fixed-top navbar-expand-md navbar-light bg-light" id="topbar">
-            <div class="container">
-                <div class="navbar-header">
-                    <div class="navbar-brand"><a href="index.html">
-                        <img src="img/spark-logo-hd.png" style="height:50px;"/></a><span class="version">{{site.SPARK_VERSION_SHORT}}</span>
-                    </div>
+        <nav class="navbar navbar-expand-lg navbar-dark p-0 px-4 fixed-top" style="background: #1d6890;" id="topbar">
+            <div class="navbar-brand"><a href="index.html">
+                <img src="img/spark-logo-rev.svg" width="141" height="72"/></a><span class="version">{{site.SPARK_VERSION_SHORT}}</span>
+            </div>
+            <button class="navbar-toggler" type="button" data-toggle="collapse"
+                    data-target="#navbarCollapse" aria-controls="navbarCollapse"
+                    aria-expanded="false" aria-label="Toggle navigation">
+                <span class="navbar-toggler-icon"></span>
+            </button>
+            <div class="collapse navbar-collapse" id="navbarCollapse">
+                <ul class="navbar-nav me-auto">
+                    <li class="nav-item"><a href="index.html" class="nav-link">Overview</a></li>
+
+                    <li class="nav-item dropdown">
+                        <a href="#" class="nav-link dropdown-toggle" id="navbarQuickStart" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Programming Guides</a>
+                        <div class="dropdown-menu" aria-labelledby="navbarQuickStart">
+                            <a class="dropdown-item" href="quick-start.html">Quick Start</a>
+                            <a class="dropdown-item" href="rdd-programming-guide.html">RDDs, Accumulators, Broadcasts Vars</a>
+                            <a class="dropdown-item" href="sql-programming-guide.html">SQL, DataFrames, and Datasets</a>
+                            <a class="dropdown-item" href="structured-streaming-programming-guide.html">Structured Streaming</a>
+                            <a class="dropdown-item" href="streaming-programming-guide.html">Spark Streaming (DStreams)</a>
+                            <a class="dropdown-item" href="ml-guide.html">MLlib (Machine Learning)</a>
+                            <a class="dropdown-item" href="graphx-programming-guide.html">GraphX (Graph Processing)</a>
+                            <a class="dropdown-item" href="sparkr.html">SparkR (R on Spark)</a>
+                            <a class="dropdown-item" href="api/python/getting_started/index.html">PySpark (Python on Spark)</a>
+                        </div>
+                    </li>
+
+                    <li class="nav-item dropdown">
+                        <a href="#" class="nav-link dropdown-toggle" id="navbarAPIDocs" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">API Docs</a>
+                        <div class="dropdown-menu" aria-labelledby="navbarAPIDocs">
+                            <a class="dropdown-item" href="api/scala/org/apache/spark/index.html">Scala</a>
+                            <a class="dropdown-item" href="api/java/index.html">Java</a>
+                            <a class="dropdown-item" href="api/python/index.html">Python</a>
+                            <a class="dropdown-item" href="api/R/index.html">R</a>
+                            <a class="dropdown-item" href="api/sql/index.html">SQL, Built-in Functions</a>
+                        </div>
+                    </li>
+
+                    <li class="nav-item dropdown">
+                        <a href="#" class="nav-link dropdown-toggle" id="navbarDeploying" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Deploying</a>
+                        <div class="dropdown-menu" aria-labelledby="navbarDeploying">
+                            <a class="dropdown-item" href="cluster-overview.html">Overview</a>
+                            <a class="dropdown-item" href="submitting-applications.html">Submitting Applications</a>
+                            <div class="dropdown-divider"></div>
+                            <a class="dropdown-item" href="spark-standalone.html">Spark Standalone</a>
+                            <a class="dropdown-item" href="running-on-mesos.html">Mesos</a>
+                            <a class="dropdown-item" href="running-on-yarn.html">YARN</a>
+                            <a class="dropdown-item" href="running-on-kubernetes.html">Kubernetes</a>
+                        </div>
+                    </li>
+
+                    <li class="nav-item dropdown">
+                        <a href="#" class="nav-link dropdown-toggle" id="navbarMore" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">More</a>
+                        <div class="dropdown-menu" aria-labelledby="navbarMore">
+                            <a class="dropdown-item" href="configuration.html">Configuration</a>
+                            <a class="dropdown-item" href="monitoring.html">Monitoring</a>
+                            <a class="dropdown-item" href="tuning.html">Tuning Guide</a>
+                            <a class="dropdown-item" href="job-scheduling.html">Job Scheduling</a>
+                            <a class="dropdown-item" href="security.html">Security</a>
+                            <a class="dropdown-item" href="hardware-provisioning.html">Hardware Provisioning</a>
+                            <a class="dropdown-item" href="migration-guide.html">Migration Guide</a>
+                            <div class="dropdown-divider"></div>
+                            <a class="dropdown-item" href="building-spark.html">Building Spark</a>
+                            <a class="dropdown-item" href="https://spark.apache.org/contributing.html">Contributing to Spark</a>
+                            <a class="dropdown-item" href="https://spark.apache.org/third-party-projects.html">Third Party Projects</a>
+                        </div>
+                    </li>
+
+                    <li class="nav-item">
+                        <input type="text" id="docsearch-input" placeholder="Search the docs…">
+                    </li>
+                </ul>
+                <!--<span class="navbar-text navbar-right"><span class="version-text">v{{site.SPARK_VERSION_SHORT}}</span></span>-->
+            </div>
+        </nav>
+
+        {% if page.url == "/" %}
+            <section class="hero-banner position-relative">
+            <div class="bg">
+            </div>
+            <div class="container position-relative">
+                <div class="row">
+                  <h1 style="max-width: 680px;">Apache Spark - A Unified engine for large-scale data analytics</h1>
                 </div>
-                <button class="navbar-toggler" type="button" data-toggle="collapse"
-                        data-target="#navbarCollapse" aria-controls="navbarCollapse"
-                        aria-expanded="false" aria-label="Toggle navigation">
-                    <span class="navbar-toggler-icon"></span>
-                </button>
-                <div class="collapse navbar-collapse" id="navbarCollapse">
-                    <ul class="navbar-nav">
-                        <!--TODO(andyk): Add class="active" attribute to li some how.-->
-                        <li class="nav-item"><a href="index.html" class="nav-link">Overview</a></li>
-
-                        <li class="nav-item dropdown">
-                            <a href="#" class="nav-link dropdown-toggle" id="navbarQuickStart" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Programming Guides</a>
-                            <div class="dropdown-menu" aria-labelledby="navbarQuickStart">
-                                <a class="dropdown-item" href="quick-start.html">Quick Start</a>
-                                <a class="dropdown-item" href="rdd-programming-guide.html">RDDs, Accumulators, Broadcasts Vars</a>
-                                <a class="dropdown-item" href="sql-programming-guide.html">SQL, DataFrames, and Datasets</a>
-                                <a class="dropdown-item" href="structured-streaming-programming-guide.html">Structured Streaming</a>
-                                <a class="dropdown-item" href="streaming-programming-guide.html">Spark Streaming (DStreams)</a>
-                                <a class="dropdown-item" href="ml-guide.html">MLlib (Machine Learning)</a>
-                                <a class="dropdown-item" href="graphx-programming-guide.html">GraphX (Graph Processing)</a>
-                                <a class="dropdown-item" href="sparkr.html">SparkR (R on Spark)</a>
-                                <a class="dropdown-item" href="api/python/getting_started/index.html">PySpark (Python on Spark)</a>
-                            </div>
-                        </li>
-
-                        <li class="nav-item dropdown">
-                            <a href="#" class="nav-link dropdown-toggle" id="navbarAPIDocs" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">API Docs</a>
-                            <div class="dropdown-menu" aria-labelledby="navbarAPIDocs">
-                                <a class="dropdown-item" href="api/scala/org/apache/spark/index.html">Scala</a>
-                                <a class="dropdown-item" href="api/java/index.html">Java</a>
-                                <a class="dropdown-item" href="api/python/index.html">Python</a>
-                                <a class="dropdown-item" href="api/R/index.html">R</a>
-                                <a class="dropdown-item" href="api/sql/index.html">SQL, Built-in Functions</a>
-                            </div>
-                        </li>
-
-                        <li class="nav-item dropdown">
-                            <a href="#" class="nav-link dropdown-toggle" id="navbarDeploying" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Deploying</a>
-                            <div class="dropdown-menu" aria-labelledby="navbarDeploying">
-                                <a class="dropdown-item" href="cluster-overview.html">Overview</a>
-                                <a class="dropdown-item" href="submitting-applications.html">Submitting Applications</a>
-                                <div class="dropdown-divider"></div>
-                                <a class="dropdown-item" href="spark-standalone.html">Spark Standalone</a>
-                                <a class="dropdown-item" href="running-on-mesos.html">Mesos</a>
-                                <a class="dropdown-item" href="running-on-yarn.html">YARN</a>
-                                <a class="dropdown-item" href="running-on-kubernetes.html">Kubernetes</a>
-                            </div>
-                        </li>
-
-                        <li class="nav-item dropdown">
-                            <a href="#" class="nav-link dropdown-toggle" id="navbarMore" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">More</a>
-                            <div class="dropdown-menu" aria-labelledby="navbarMore">
-                                <a class="dropdown-item" href="configuration.html">Configuration</a>
-                                <a class="dropdown-item" href="monitoring.html">Monitoring</a>
-                                <a class="dropdown-item" href="tuning.html">Tuning Guide</a>
-                                <a class="dropdown-item" href="job-scheduling.html">Job Scheduling</a>
-                                <a class="dropdown-item" href="security.html">Security</a>
-                                <a class="dropdown-item" href="hardware-provisioning.html">Hardware Provisioning</a>
-                                <a class="dropdown-item" href="migration-guide.html">Migration Guide</a>
-                                <div class="dropdown-divider"></div>
-                                <a class="dropdown-item" href="building-spark.html">Building Spark</a>
-                                <a class="dropdown-item" href="https://spark.apache.org/contributing.html">Contributing to Spark</a>
-                                <a class="dropdown-item" href="https://spark.apache.org/third-party-projects.html">Third Party Projects</a>
-                            </div>
-                        </li>
-
-                        <li class="nav-item">
-                            <input type="text" id="docsearch-input" placeholder="Search the docs…">
-                        </li>
-                    </ul>
-                    <!--<span class="navbar-text navbar-right"><span class="version-text">v{{site.SPARK_VERSION_SHORT}}</span></span>-->
+                <div class="row mt-5">
+                  <div class="col-12 col-lg-6 no-gutters">
+                    Apache Spark is a unified analytics engine for large-scale data processing.
+                    It provides high-level APIs in Java, Scala, Python and R,
+                    and an optimized engine that supports general execution graphs.
+                    It also supports a rich set of higher-level tools including
+                    <a href="sql-programming-guide.html">Spark SQL</a> for SQL and structured data processing,
+                    <a href="api/python/getting_started/quickstart_ps.html">pandas API on Spark</a> for pandas workloads,
+                    <a href="ml-guide.html">MLlib</a> for machine learning,
+                    <a href="graphx-programming-guide.html">GraphX</a> for graph processing,
+                     and <a href="structured-streaming-programming-guide.html">Structured Streaming</a>
+                     for incremental computation and stream processing.
+                  </div>
                 </div>
             </div>
-        </nav>
+          </section>
+
+        {% endif %}
 
-        <div class="container-wrapper">
+        <div class="container">
 
             {% if page.url contains "/ml" or page.url contains "/sql" or page.url contains "migration-guide.html" %}
                 {% if page.url contains "migration-guide.html" %}
@@ -147,23 +168,27 @@ <h1 class="title">{{ page.title }}</h1>
                 </div>
             {% else %}
                 <div class="content mr-3" id="content">
-                    {% if page.displayTitle %}
-                        <h1 class="title">{{ page.displayTitle }}</h1>
-                    {% else %}
-                        <h1 class="title">{{ page.title }}</h1>
+                    {% if page.url != "/" %}
+                        {% if page.displayTitle %}
+                            <h1 class="title">{{ page.displayTitle }}</h1>
+                        {% else %}
+                            <h1 class="title">{{ page.title }}</h1>
+                        {% endif %}
                     {% endif %}
-
                     {{ content }}
-
                 </div>
             {% endif %}
              <!-- /container -->
         </div>
 
-        <script src="js/vendor/jquery-3.5.1.min.js"></script>
-        <script src="js/vendor/bootstrap.bundle.min.js"></script>
+        <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js"
+        integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM"
+        crossorigin="anonymous"></script>
+        <script src="https://code.jquery.com/jquery.js"></script>
+
         <script src="js/vendor/anchor.min.js"></script>
         <script src="js/main.js"></script>
+
         <script type="text/javascript" src="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.js"></script>
         <script type="text/javascript">
             // DocSearch is entirely free and automated. DocSearch is built in two parts:
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 8487c4826153c..de92f7cf594a2 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -47,7 +47,7 @@ You can fix these problems by setting the `MAVEN_OPTS` variable as discussed bef
 **Note:**
 
 * If using `build/mvn` with no `MAVEN_OPTS` set, the script will automatically add the above options to the `MAVEN_OPTS` environment variable.
-* The `test` phase of the Spark build will automatically add these options to `MAVEN_OPTS`, even when not using `build/mvn`.    
+* The `test` phase of the Spark build will automatically add these options to `MAVEN_OPTS`, even when not using `build/mvn`.
 
 ### build/mvn
 
@@ -290,8 +290,8 @@ If use an individual repository or a repository on GitHub Enterprise, export bel
 
 ### Related environment variables
 
-<table class="table">
-<tr><th>Variable Name</th><th>Default</th><th>Meaning</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Variable Name</th><th>Default</th><th>Meaning</th></tr></thead>
 <tr>
   <td><code>SPARK_PROJECT_URL</code></td>
   <td>https://github.com/apache/spark</td>
diff --git a/docs/cluster-overview.md b/docs/cluster-overview.md
index 49ce800ce4713..711ec6e697e70 100644
--- a/docs/cluster-overview.md
+++ b/docs/cluster-overview.md
@@ -8,9 +8,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -91,7 +91,7 @@ The [job scheduling overview](job-scheduling.html) describes this in more detail
 
 The following table summarizes terms you'll see used to refer to cluster concepts:
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr><th style="width: 130px;">Term</th><th>Meaning</th></tr>
   </thead>
diff --git a/docs/configuration.md b/docs/configuration.md
index 932cd4d503cdc..5b29261c58af1 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -74,7 +74,7 @@ The following format is accepted:
     1p or 1pb (pebibytes = 1024 tebibytes)
 
 While numbers without units are generally interpreted as bytes, a few are interpreted as KiB or MiB.
-See documentation of individual configuration properties. Specifying units is desirable where 
+See documentation of individual configuration properties. Specifying units is desirable where
 possible.
 
 ## Dynamically Loading Spark Properties
@@ -135,8 +135,8 @@ of the most common options to set are:
 
 ### Application Properties
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.app.name</code></td>
   <td>(none)</td>
@@ -157,8 +157,8 @@ of the most common options to set are:
   <td><code>spark.driver.maxResultSize</code></td>
   <td>1g</td>
   <td>
-    Limit of total size of serialized results of all partitions for each Spark action (e.g. 
-    collect) in bytes. Should be at least 1M, or 0 for unlimited. Jobs will be aborted if the total 
+    Limit of total size of serialized results of all partitions for each Spark action (e.g.
+    collect) in bytes. Should be at least 1M, or 0 for unlimited. Jobs will be aborted if the total
     size is above this limit.
     Having a high limit may cause out-of-memory errors in driver (depends on spark.driver.memory
     and memory overhead of objects in JVM). Setting a proper limit can protect the driver from
@@ -187,13 +187,13 @@ of the most common options to set are:
   <td>
     Amount of non-heap memory to be allocated per driver process in cluster mode, in MiB unless
     otherwise specified. This is memory that accounts for things like VM overheads, interned strings,
-    other native overheads, etc. This tends to grow with the container size (typically 6-10%). 
+    other native overheads, etc. This tends to grow with the container size (typically 6-10%).
     This option is currently supported on YARN, Mesos and Kubernetes.
-    <em>Note:</em> Non-heap memory includes off-heap memory 
+    <em>Note:</em> Non-heap memory includes off-heap memory
     (when <code>spark.memory.offHeap.enabled=true</code>) and memory used by other driver processes
-    (e.g. python process that goes with a PySpark driver) and memory used by other non-driver 
-    processes running in the same container. The maximum memory size of container to running 
-    driver is determined by the sum of <code>spark.driver.memoryOverhead</code> 
+    (e.g. python process that goes with a PySpark driver) and memory used by other non-driver
+    processes running in the same container. The maximum memory size of container to running
+    driver is determined by the sum of <code>spark.driver.memoryOverhead</code>
     and <code>spark.driver.memory</code>.
   </td>
   <td>2.3.0</td>
@@ -279,8 +279,8 @@ of the most common options to set are:
     shared with other non-JVM processes. When PySpark is run in YARN or Kubernetes, this memory
     is added to executor resource requests.
     <br/>
-    <em>Note:</em> This feature is dependent on Python's `resource` module; therefore, the behaviors and 
-    limitations are inherited. For instance, Windows does not support resource limiting and actual 
+    <em>Note:</em> This feature is dependent on Python's `resource` module; therefore, the behaviors and
+    limitations are inherited. For instance, Windows does not support resource limiting and actual
     resource is not limited on MacOS.
   </td>
   <td>2.4.0</td>
@@ -293,11 +293,11 @@ of the most common options to set are:
     This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc.
     This tends to grow with the executor size (typically 6-10%). This option is currently supported on YARN and Kubernetes.
     <br/>
-    <em>Note:</em> Additional memory includes PySpark executor memory 
+    <em>Note:</em> Additional memory includes PySpark executor memory
     (when <code>spark.executor.pyspark.memory</code> is not configured) and memory used by other
-    non-executor processes running in the same container. The maximum memory size of container to 
-    running executor is determined by the sum of <code>spark.executor.memoryOverhead</code>, 
-    <code>spark.executor.memory</code>, <code>spark.memory.offHeap.size</code> and 
+    non-executor processes running in the same container. The maximum memory size of container to
+    running executor is determined by the sum of <code>spark.executor.memoryOverhead</code>,
+    <code>spark.executor.memory</code>, <code>spark.memory.offHeap.size</code> and
     <code>spark.executor.pyspark.memory</code>.
   </td>
   <td>2.3.0</td>
@@ -472,10 +472,10 @@ of the most common options to set are:
   <td><code>spark.decommission.enabled</code></td>
   <td>false</td>
   <td>
-    When decommission enabled, Spark will try its best to shut down the executor gracefully. 
+    When decommission enabled, Spark will try its best to shut down the executor gracefully.
     Spark will try to migrate all the RDD blocks (controlled by <code>spark.storage.decommission.rddBlocks.enabled</code>)
-    and shuffle blocks (controlled by <code>spark.storage.decommission.shuffleBlocks.enabled</code>) from the decommissioning 
-    executor to a remote executor when <code>spark.storage.decommission.enabled</code> is enabled. 
+    and shuffle blocks (controlled by <code>spark.storage.decommission.shuffleBlocks.enabled</code>) from the decommissioning
+    executor to a remote executor when <code>spark.storage.decommission.enabled</code> is enabled.
     With decommission enabled, Spark will also decommission an executor instead of killing when <code>spark.dynamicAllocation.enabled</code> enabled.
   </td>
   <td>3.1.0</td>
@@ -492,7 +492,7 @@ of the most common options to set are:
   <td><code>spark.executor.decommission.forceKillTimeout</code></td>
   <td>(none)</td>
   <td>
-    Duration after which a Spark will force a decommissioning executor to exit. 
+    Duration after which a Spark will force a decommissioning executor to exit.
     This should be set to a high value in most situations as low values will prevent block migrations from having enough time to complete.
   </td>
   <td>3.2.0</td>
@@ -511,8 +511,8 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Runtime Environment
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.driver.extraClassPath</code></td>
   <td>(none)</td>
@@ -730,8 +730,8 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.redaction.string.regex</code></td>
   <td>(none)</td>
   <td>
-    Regex to decide which parts of strings produced by Spark contain sensitive information. 
-    When this regex matches a string part, that string part is replaced by a dummy value. 
+    Regex to decide which parts of strings produced by Spark contain sensitive information.
+    When this regex matches a string part, that string part is replaced by a dummy value.
     This is currently used to redact the output of SQL explain commands.
   </td>
   <td>2.2.0</td>
@@ -898,14 +898,14 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Shuffle Behavior
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.reducer.maxSizeInFlight</code></td>
   <td>48m</td>
   <td>
-    Maximum size of map outputs to fetch simultaneously from each reduce task, in MiB unless 
-    otherwise specified. Since each output requires us to create a buffer to receive it, this 
+    Maximum size of map outputs to fetch simultaneously from each reduce task, in MiB unless
+    otherwise specified. Since each output requires us to create a buffer to receive it, this
     represents a fixed memory overhead per reduce task, so keep it small unless you have a
     large amount of memory.
   </td>
@@ -947,8 +947,8 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.shuffle.file.buffer</code></td>
   <td>32k</td>
   <td>
-    Size of the in-memory buffer for each shuffle file output stream, in KiB unless otherwise 
-    specified. These buffers reduce the number of disk seeks and system calls made in creating 
+    Size of the in-memory buffer for each shuffle file output stream, in KiB unless otherwise
+    specified. These buffers reduce the number of disk seeks and system calls made in creating
     intermediate shuffle files.
   </td>
   <td>1.4.0</td>
@@ -957,7 +957,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.shuffle.unsafe.file.output.buffer</code></td>
   <td>32k</td>
   <td>
-    The file system for this buffer size after each partition is written in unsafe shuffle writer. 
+    The file system for this buffer size after each partition is written in unsafe shuffle writer.
     In KiB unless otherwise specified.
   </td>
   <td>2.3.0</td>
@@ -966,7 +966,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.shuffle.spill.diskWriteBufferSize</code></td>
   <td>1024 * 1024</td>
   <td>
-    The buffer size, in bytes, to use when writing the sorted records to an on-disk file. 
+    The buffer size, in bytes, to use when writing the sorted records to an on-disk file.
   </td>
   <td>2.3.0</td>
 </tr>
@@ -1037,7 +1037,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>false</td>
   <td>
     Enables the external shuffle service. This service preserves the shuffle files written by
-    executors e.g. so that executors can be safely removed, or so that shuffle fetches can continue in 
+    executors e.g. so that executors can be safely removed, or so that shuffle fetches can continue in
     the event of executor failure. The external shuffle service must be set up in order to enable it. See
     <a href="job-scheduling.html#configuration-and-setup">dynamic allocation
     configuration and setup documentation</a> for more information.
@@ -1056,9 +1056,9 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.shuffle.service.name</code></td>
   <td>spark_shuffle</td>
   <td>
-    The configured name of the Spark shuffle service the client should communicate with. 
-    This must match the name used to configure the Shuffle within the YARN NodeManager configuration 
-    (<code>yarn.nodemanager.aux-services</code>). Only takes effect 
+    The configured name of the Spark shuffle service the client should communicate with.
+    This must match the name used to configure the Shuffle within the YARN NodeManager configuration
+    (<code>yarn.nodemanager.aux-services</code>). Only takes effect
     when <code>spark.shuffle.service.enabled</code> is set to true.
   </td>
   <td>3.2.0</td>
@@ -1124,8 +1124,8 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.shuffle.accurateBlockThreshold</code></td>
   <td>100 * 1024 * 1024</td>
   <td>
-    Threshold in bytes above which the size of shuffle blocks in HighlyCompressedMapStatus is 
-    accurately recorded. This helps to prevent OOM by avoiding underestimating shuffle 
+    Threshold in bytes above which the size of shuffle blocks in HighlyCompressedMapStatus is
+    accurately recorded. This helps to prevent OOM by avoiding underestimating shuffle
     block size when fetch shuffle blocks.
   </td>
   <td>2.2.1</td>
@@ -1174,8 +1174,8 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.shuffle.detectCorrupt.useExtraMemory</code></td>
   <td>false</td>
   <td>
-    If enabled, part of a compressed/encrypted stream will be de-compressed/de-crypted by using extra memory 
-    to detect early corruption. Any IOException thrown will cause the task to be retried once 
+    If enabled, part of a compressed/encrypted stream will be de-compressed/de-crypted by using extra memory
+    to detect early corruption. Any IOException thrown will cause the task to be retried once
     and if it fails again with same exception, then FetchFailedException will be thrown to retry previous stage.
   </td>
   <td>3.0.0</td>
@@ -1184,7 +1184,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.shuffle.useOldFetchProtocol</code></td>
   <td>false</td>
   <td>
-    Whether to use the old protocol while doing the shuffle block fetching. It is only enabled while we need the 
+    Whether to use the old protocol while doing the shuffle block fetching. It is only enabled while we need the
     compatibility in the scenario of new Spark version job fetching shuffle blocks from old version external shuffle service.
   </td>
   <td>3.0.0</td>
@@ -1193,7 +1193,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.shuffle.readHostLocalDisk</code></td>
   <td>true</td>
   <td>
-    If enabled (and <code>spark.shuffle.useOldFetchProtocol</code> is disabled, shuffle blocks requested from those block managers 
+    If enabled (and <code>spark.shuffle.useOldFetchProtocol</code> is disabled, shuffle blocks requested from those block managers
     which are running on the same host are read from the disk directly instead of being fetched as remote blocks over the network.
   </td>
   <td>3.0.0</td>
@@ -1232,7 +1232,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>
     Whether to use the ExternalShuffleService for fetching disk persisted RDD blocks.
     In case of dynamic allocation if this feature is enabled executors having only disk
-    persisted blocks are considered idle after 
+    persisted blocks are considered idle after
     <code>spark.dynamicAllocation.executorIdleTimeout</code> and will be released accordingly.
   </td>
   <td>3.0.0</td>
@@ -1257,8 +1257,8 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Spark UI
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.eventLog.logBlockUpdates.enabled</code></td>
   <td>false</td>
@@ -1649,8 +1649,8 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Compression and Serialization
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.broadcast.compress</code></td>
   <td>true</td>
@@ -1699,7 +1699,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.io.compression.snappy.blockSize</code></td>
   <td>32k</td>
   <td>
-    Block size in Snappy compression, in the case when Snappy compression codec is used. 
+    Block size in Snappy compression, in the case when Snappy compression codec is used.
     Lowering this block size will also lower shuffle memory usage when Snappy is used.
     Default unit is bytes, unless otherwise specified. This configuration only applies
     to `spark.io.compression.codec`.
@@ -1711,7 +1711,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>1</td>
   <td>
     Compression level for Zstd compression codec. Increasing the compression level will result in better
-    compression at the expense of more CPU and memory. This configuration only applies to 
+    compression at the expense of more CPU and memory. This configuration only applies to
     `spark.io.compression.codec`.
   </td>
   <td>2.3.0</td>
@@ -1797,7 +1797,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.kryoserializer.buffer</code></td>
   <td>64k</td>
   <td>
-    Initial size of Kryo's serialization buffer, in KiB unless otherwise specified. 
+    Initial size of Kryo's serialization buffer, in KiB unless otherwise specified.
     Note that there will be one buffer <i>per core</i> on each worker. This buffer will grow up to
     <code>spark.kryoserializer.buffer.max</code> if needed.
   </td>
@@ -1847,8 +1847,8 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Memory Management
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.memory.fraction</code></td>
   <td>0.6</td>
@@ -1879,7 +1879,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.memory.offHeap.enabled</code></td>
   <td>false</td>
   <td>
-    If true, Spark will attempt to use off-heap memory for certain operations. If off-heap memory 
+    If true, Spark will attempt to use off-heap memory for certain operations. If off-heap memory
     use is enabled, then <code>spark.memory.offHeap.size</code> must be positive.
   </td>
   <td>1.6.0</td>
@@ -1889,7 +1889,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>0</td>
   <td>
     The absolute amount of memory which can be used for off-heap allocation, in bytes unless otherwise specified.
-    This setting has no impact on heap memory usage, so if your executors' total memory consumption 
+    This setting has no impact on heap memory usage, so if your executors' total memory consumption
     must fit within some hard limit then be sure to shrink your JVM heap size accordingly.
     This must be set to a positive value when <code>spark.memory.offHeap.enabled=true</code>.
   </td>
@@ -1917,8 +1917,8 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.storage.localDiskByExecutors.cacheSize</code></td>
   <td>1000</td>
   <td>
-    The max number of executors for which the local dirs are stored. This size is both applied for the driver and 
-    both for the executors side to avoid having an unbounded store. This cache will be used to avoid the network 
+    The max number of executors for which the local dirs are stored. This size is both applied for the driver and
+    both for the executors side to avoid having an unbounded store. This cache will be used to avoid the network
     in case of fetching disk persisted RDD blocks or shuffle blocks (when <code>spark.shuffle.readHostLocalDisk</code> is set) from the same host.
   </td>
   <td>3.0.0</td>
@@ -1972,14 +1972,14 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Execution Behavior
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.broadcast.blockSize</code></td>
   <td>4m</td>
   <td>
-    Size of each piece of a block for <code>TorrentBroadcastFactory</code>, in KiB unless otherwise 
-    specified. Too large a value decreases parallelism during broadcast (makes it slower); however, 
+    Size of each piece of a block for <code>TorrentBroadcastFactory</code>, in KiB unless otherwise
+    specified. Too large a value decreases parallelism during broadcast (makes it slower); however,
     if it is too small, <code>BlockManager</code> might take a performance hit.
   </td>
   <td>0.5.0</td>
@@ -2072,7 +2072,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>false</td>
   <td>
     Whether to overwrite any files which exist at the startup. Users can not overwrite the files added by
-    <code>SparkContext.addFile</code> or <code>SparkContext.addJar</code> before even if this option is set 
+    <code>SparkContext.addFile</code> or <code>SparkContext.addJar</code> before even if this option is set
     <code>true</code>.
   </td>
   <td>1.0.0</td>
@@ -2081,7 +2081,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.files.ignoreCorruptFiles</code></td>
   <td>false</td>
   <td>
-    Whether to ignore corrupt files. If true, the Spark jobs will continue to run when encountering corrupted or 
+    Whether to ignore corrupt files. If true, the Spark jobs will continue to run when encountering corrupted or
     non-existing files and contents that have been read will still be returned.
   </td>
   <td>2.1.0</td>
@@ -2090,7 +2090,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.files.ignoreMissingFiles</code></td>
   <td>false</td>
   <td>
-    Whether to ignore missing files. If true, the Spark jobs will continue to run when encountering missing files and 
+    Whether to ignore missing files. If true, the Spark jobs will continue to run when encountering missing files and
     the contents that have been read will still be returned.
   </td>
   <td>2.4.0</td>
@@ -2143,7 +2143,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>2m</td>
   <td>
     Size of a block above which Spark memory maps when reading a block from disk. Default unit is bytes,
-    unless specified otherwise. This prevents Spark from memory mapping very small blocks. In general, 
+    unless specified otherwise. This prevents Spark from memory mapping very small blocks. In general,
     memory mapping has high overhead for blocks close to or below the page size of the operating system.
   </td>
   <td>0.9.2</td>
@@ -2160,7 +2160,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.storage.decommission.shuffleBlocks.enabled</code></td>
   <td>true</td>
   <td>
-    Whether to transfer shuffle blocks during block manager decommissioning. Requires a migratable shuffle resolver 
+    Whether to transfer shuffle blocks during block manager decommissioning. Requires a migratable shuffle resolver
     (like sort based shuffle).
   </td>
   <td>3.1.0</td>
@@ -2185,7 +2185,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.storage.decommission.fallbackStorage.path</code></td>
   <td>(none)</td>
   <td>
-    The location for fallback storage during block manager decommissioning. For example, <code>s3a://spark-storage/</code>. 
+    The location for fallback storage during block manager decommissioning. For example, <code>s3a://spark-storage/</code>.
     In case of empty, fallback storage is disabled. The storage should be managed by TTL because Spark will not clean it up.
   </td>
   <td>3.1.0</td>
@@ -2202,9 +2202,9 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.storage.decommission.shuffleBlocks.maxDiskSize</code></td>
   <td>(none)</td>
   <td>
-    Maximum disk space to use to store shuffle blocks before rejecting remote shuffle blocks. 
-    Rejecting remote shuffle blocks means that an executor will not receive any shuffle migrations, 
-    and if there are no other executors available for migration then shuffle blocks will be lost unless 
+    Maximum disk space to use to store shuffle blocks before rejecting remote shuffle blocks.
+    Rejecting remote shuffle blocks means that an executor will not receive any shuffle migrations,
+    and if there are no other executors available for migration then shuffle blocks will be lost unless
     <code>spark.storage.decommission.fallbackStorage.path</code> is configured.
   </td>
   <td>3.2.0</td>
@@ -2222,8 +2222,8 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Executor Metrics
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.eventLog.logStageExecutorMetrics</code></td>
   <td>false</td>
@@ -2290,8 +2290,8 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Networking
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.rpc.message.maxSize</code></td>
   <td>128</td>
@@ -2437,8 +2437,8 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Scheduling
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.cores.max</code></td>
   <td>(not set)</td>
@@ -2536,10 +2536,10 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.scheduler.listenerbus.eventqueue.capacity</code></td>
   <td>10000</td>
   <td>
-    The default capacity for event queues. Spark will try to initialize an event queue 
-    using capacity specified by `spark.scheduler.listenerbus.eventqueue.queueName.capacity` 
-    first. If it's not configured, Spark will use the default capacity specified by this 
-    config. Note that capacity must be greater than 0. Consider increasing value (e.g. 20000) 
+    The default capacity for event queues. Spark will try to initialize an event queue
+    using capacity specified by `spark.scheduler.listenerbus.eventqueue.queueName.capacity`
+    first. If it's not configured, Spark will use the default capacity specified by this
+    config. Note that capacity must be greater than 0. Consider increasing value (e.g. 20000)
     if listener events are dropped. Increasing this value may result in the driver using more memory.
   </td>
   <td>2.3.0</td>
@@ -2618,8 +2618,8 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.standalone.submit.waitAppCompletion</code></td>
   <td>false</td>
   <td>
-    If set to true, Spark will merge ResourceProfiles when different profiles are specified in RDDs that get combined into a single stage. 
-    When they are merged, Spark chooses the maximum of each resource and creates a new ResourceProfile. 
+    If set to true, Spark will merge ResourceProfiles when different profiles are specified in RDDs that get combined into a single stage.
+    When they are merged, Spark chooses the maximum of each resource and creates a new ResourceProfile.
     The default of false results in Spark throwing an exception if multiple different ResourceProfiles are found in RDDs going into the same stage.
   </td>
   <td>3.1.0</td>
@@ -2709,9 +2709,9 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.excludeOnFailure.killExcludedExecutors</code></td>
   <td>false</td>
   <td>
-    (Experimental) If set to "true", allow Spark to automatically kill the executors 
-    when they are excluded on fetch failure or excluded for the entire application, 
-    as controlled by spark.killExcludedExecutors.application.*. Note that, when an entire node is added 
+    (Experimental) If set to "true", allow Spark to automatically kill the executors
+    when they are excluded on fetch failure or excluded for the entire application,
+    as controlled by spark.killExcludedExecutors.application.*. Note that, when an entire node is added
     excluded, all of the executors on that node will be killed.
   </td>
   <td>2.2.0</td>
@@ -2831,13 +2831,13 @@ Apart from these, the following properties are also available, and may be useful
   <td>1</td>
   <td>
     Amount of a particular resource type to allocate for each task, note that this can be a double.
-    If this is specified you must also provide the executor config 
-    <code>spark.executor.resource.{resourceName}.amount</code> and any corresponding discovery configs 
-    so that your executors are created with that resource type. In addition to whole amounts, 
-    a fractional amount (for example, 0.25, which means 1/4th of a resource) may be specified. 
+    If this is specified you must also provide the executor config
+    <code>spark.executor.resource.{resourceName}.amount</code> and any corresponding discovery configs
+    so that your executors are created with that resource type. In addition to whole amounts,
+    a fractional amount (for example, 0.25, which means 1/4th of a resource) may be specified.
     Fractional amounts must be less than or equal to 0.5, or in other words, the minimum amount of
-    resource sharing is 2 tasks per resource. Additionally, fractional amounts are floored 
-    in order to assign resource slots (e.g. a 0.2222 configuration, or 1/0.2222 slots will become 
+    resource sharing is 2 tasks per resource. Additionally, fractional amounts are floored
+    in order to assign resource slots (e.g. a 0.2222 configuration, or 1/0.2222 slots will become
     4 tasks/resource, not 5).
   </td>
   <td>3.0.0</td>
@@ -2921,8 +2921,8 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Barrier Execution Mode
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.barrier.sync.timeout</code></td>
   <td>365d</td>
@@ -2968,8 +2968,8 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Dynamic Allocation
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.dynamicAllocation.enabled</code></td>
   <td>false</td>
@@ -3100,15 +3100,15 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Thread Configurations
 
-Depending on jobs and cluster configurations, we can set number of threads in several places in Spark to utilize 
-available resources efficiently to get better performance. Prior to Spark 3.0, these thread configurations apply 
-to all roles of Spark, such as driver, executor, worker and master. From Spark 3.0, we can configure threads in 
+Depending on jobs and cluster configurations, we can set number of threads in several places in Spark to utilize
+available resources efficiently to get better performance. Prior to Spark 3.0, these thread configurations apply
+to all roles of Spark, such as driver, executor, worker and master. From Spark 3.0, we can configure threads in
 finer granularity starting from driver and executor. Take RPC module as example in below table. For other modules,
-like shuffle, just replace "rpc" with "shuffle" in the property names except 
+like shuffle, just replace "rpc" with "shuffle" in the property names except
 <code>spark.{driver|executor}.rpc.netty.dispatcher.numThreads</code>, which is only for RPC module.
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.{driver|executor}.rpc.io.serverThreads</code></td>
   <td>
@@ -3135,10 +3135,73 @@ like shuffle, just replace "rpc" with "shuffle" in the property names except
 </tr>
 </table>
 
-The default value for number of thread-related config keys is the minimum of the number of cores requested for 
+The default value for number of thread-related config keys is the minimum of the number of cores requested for
 the driver or executor, or, in the absence of that value, the number of cores available for the JVM (with a hardcoded upper limit of 8).
 
-    
+### Spark Connect
+
+#### Server Configuration
+
+Server configurations are set in Spark Connect server, for example, when you start the Spark Connect server with `./sbin/start-connect-server.sh`.
+They are typically set via the config file and command-lineoptions with `--conf/-c`.
+
+<table class="table">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
+<tr>
+  <td><code>spark.connect.grpc.binding.port</code></td>
+  <td>
+    15002
+  </td>
+  <td>Port for Spark Connect server to bind.</td>
+  <td>3.4.0</td>
+</tr>
+<tr>
+  <td><code>spark.connect.grpc.interceptor.classes</code></td>
+  <td>
+    (none)
+  </td>
+  <td>Comma separated list of class names that must implement the <code>io.grpc.ServerInterceptor</code> interface</td>
+  <td>3.4.0</td>
+</tr>
+<tr>
+  <td><code>spark.connect.grpc.arrow.maxBatchSize</code></td>
+  <td>
+    4m
+  </td>
+  <td>When using Apache Arrow, limit the maximum size of one arrow batch that can be sent from server side to client side. Currently, we conservatively use 70% of it because the size is not accurate but estimated.</td>
+  <td>3.4.0</td>
+</tr>
+<tr>
+  <td><code>spark.connect.extensions.relation.classes</code></td>
+  <td>
+    (none)
+  </td>
+  <td>Comma separated list of classes that implement the trait <code>org.apache.spark.sql.connect.plugin.RelationPlugin</code> to support custom
+Relation types in proto.</td>
+  <td>3.4.0</td>
+</tr>
+<tr>
+  <td><code>spark.connect.extensions.expression.classes</code></td>
+  <td>
+    (none)
+  </td>
+  <td>Comma separated list of classes that implement the trait
+<code>org.apache.spark.sql.connect.plugin.ExpressionPlugin</code> to support custom
+Expression types in proto.</td>
+  <td>3.4.0</td>
+</tr>
+<tr>
+  <td><code>spark.connect.extensions.command.classes</code></td>
+  <td>
+    (none)
+  </td>
+  <td>Comma separated list of classes that implement the trait
+<code>org.apache.spark.sql.connect.plugin.CommandPlugin</code> to support custom
+Command types in proto.</td>
+  <td>3.4.0</td>
+</tr>
+</table>
+
 ### Security
 
 Please refer to the [Security](security.html) page for available options on how to secure different
@@ -3179,8 +3242,8 @@ External users can query the static sql config values via `SparkSession.conf` or
 
 ### Spark Streaming
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.streaming.backpressure.enabled</code></td>
   <td>false</td>
@@ -3311,8 +3374,8 @@ External users can query the static sql config values via `SparkSession.conf` or
 
 ### SparkR
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.r.numRBackendThreads</code></td>
   <td>2</td>
@@ -3367,8 +3430,8 @@ External users can query the static sql config values via `SparkSession.conf` or
 
 ### GraphX
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.graphx.pregel.checkpointInterval</code></td>
   <td>-1</td>
@@ -3382,8 +3445,8 @@ External users can query the static sql config values via `SparkSession.conf` or
 
 ### Deploy
 
-<table class="table">
-  <tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+  <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
   <tr>
     <td><code>spark.deploy.recoveryMode</code></td>
     <td>NONE</td>
@@ -3432,8 +3495,8 @@ copy `conf/spark-env.sh.template` to create it. Make sure you make the copy exec
 The following variables can be set in `spark-env.sh`:
 
 
-<table class="table">
-  <tr><th style="width:21%">Environment Variable</th><th>Meaning</th></tr>
+<table class="table table-striped">
+  <thead><tr><th style="width:21%">Environment Variable</th><th>Meaning</th></tr></thead>
   <tr>
     <td><code>JAVA_HOME</code></td>
     <td>Location where Java is installed (if it's not on your default <code>PATH</code>).</td>
@@ -3531,11 +3594,11 @@ val sc = new SparkContext(conf)
 
 Also, you can modify or add configurations at runtime:
 {% highlight bash %}
-./bin/spark-submit \ 
-  --name "My app" \ 
-  --master local[4] \  
-  --conf spark.eventLog.enabled=false \ 
-  --conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps" \ 
+./bin/spark-submit \
+  --name "My app" \
+  --master local[4] \
+  --conf spark.eventLog.enabled=false \
+  --conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps" \
   --conf spark.hadoop.abc.def=xyz \
   --conf spark.hive.abc=xyz
   myApp.jar
@@ -3569,8 +3632,8 @@ Push-based shuffle helps improve the reliability and performance of spark shuffl
 
 ### External Shuffle service(server) side configuration options
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.shuffle.push.server.mergedShuffleFileManagerImpl</code></td>
   <td>
@@ -3603,8 +3666,8 @@ Push-based shuffle helps improve the reliability and performance of spark shuffl
 
 ### Client side configuration options
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.shuffle.push.enabled</code></td>
   <td><code>false</code></td>
@@ -3649,7 +3712,7 @@ Push-based shuffle helps improve the reliability and performance of spark shuffl
   <td><code>spark.shuffle.push.numPushThreads</code></td>
   <td>(none)</td>
   <td>
-    Specify the number of threads in the block pusher pool. These threads assist in creating connections and pushing blocks to remote external shuffle services. 
+    Specify the number of threads in the block pusher pool. These threads assist in creating connections and pushing blocks to remote external shuffle services.
     By default, the threadpool size is equal to the number of spark executor cores.
   </td>
   <td>3.2.0</td>
@@ -3676,7 +3739,7 @@ Push-based shuffle helps improve the reliability and performance of spark shuffl
   <td><code>spark.shuffle.push.merge.finalizeThreads</code></td>
   <td>8</td>
   <td>
-    Number of threads used by driver to finalize shuffle merge. Since it could potentially take seconds for a large shuffle to finalize, 
+    Number of threads used by driver to finalize shuffle merge. Since it could potentially take seconds for a large shuffle to finalize,
     having multiple threads helps driver to handle concurrent shuffle merge finalize requests when push-based shuffle is enabled.
   </td>
   <td>3.3.0</td>
diff --git a/docs/css/custom.css b/docs/css/custom.css
new file mode 100644
index 0000000000000..4576f45d1ab7d
--- /dev/null
+++ b/docs/css/custom.css
@@ -0,0 +1,1114 @@
+
+
+
+body {
+  color: #666666;
+  font-family: 'DM Sans', sans-serif;
+  font-style: normal;
+  font-weight: 400;
+  overflow-wrap: anywhere;
+  overflow-x: hidden;
+  padding-top: 80px;
+}
+
+a {
+  color: #2fa4e7;
+  text-decoration: none;
+}
+
+a:hover, a:focus {
+  color: #157ab5;
+  text-decoration: underline;
+}
+
+.navbar {
+  border-radius: 0;
+  z-index: 9999;
+}
+
+.navbar {
+  transition: none !important;
+}
+
+.navbar .nav-item:hover .dropdown-menu, .navbar .nav-item .dropdown-menu, .navbar .dropdown-menu.fade-down, .navbar-toggler, .collapse, .collapsing {
+  transition: none !important;
+  transform: none !important;
+}
+
+@media all and (min-width: 992px) {
+  .navbar .nav-item .dropdown-menu {
+      display: block;
+      opacity: 0;
+      visibility: hidden;
+      transition: none !important;
+      margin-top: 0;
+  }
+
+  .navbar .dropdown-menu.fade-down {
+      top: 80%;
+      transform: none !important;
+      transform-origin: 0% 0%;
+  }
+
+  .navbar .dropdown-menu.fade-up {
+      top: 180%;
+  }
+
+  .navbar .nav-item:hover .dropdown-menu {
+      opacity: 1;
+      visibility: visible;
+      top: 100%;
+      font-size: calc(0.51rem + 0.55vw);
+  }
+}
+
+.navbar-nav {
+  font-style: normal;
+  font-weight: 500;
+  font-size: calc(0.51rem + 0.55vw);
+  line-height: 20px;
+}
+
+.navbar-nav a {
+  text-decoration: none !important;
+}
+
+.navbar-dark .navbar-nav .nav-link.active, .navbar-dark .navbar-nav .show > .nav-link, .navbar-dark .navbar-nav .nav-link {
+  color: #ffffff;
+  border: 1px solid transparent;
+}
+
+.navbar-dark .navbar-nav .nav-link:focus, .navbar-dark .navbar-nav .nav-link:active, .navbar-dark .navbar-nav .nav-link:focus, .navbar-dark .navbar-nav .nav-link:hover {
+  color: #ffffff;
+  background-color: #1b618e;
+  border: 1px solid transparent;
+  text-decoration: none;
+}
+
+section {
+  position: relative;
+}
+
+.navbar-dark .navbar-toggler {
+  color: #fff;
+  border: none;
+  border-color: transparent;
+}
+
+.hero-banner .bg {
+  background: url(/img/spark-hero-thin-light.jpg) no-repeat;
+  transform: translate(36%, 0%);
+  height: 475px;
+  top: 0;
+  position: absolute;
+  right: 0;
+  width: 100%;
+  opacity: 50%;
+}
+
+.hero-banner h1 {
+  color: #0B9ACE;
+  font-style: normal;
+  font-weight: normal;
+  font-size: 48px;
+  line-height: 63px;
+  letter-spacing: -0.045em;
+}
+
+.hero-banner h2 {
+  font-style: normal;
+  font-weight: bold;
+  font-size: 32px;
+  line-height: 42px;
+}
+
+.what-is-spark {
+  font-style: normal;
+  font-weight: normal;
+  font-size: 17px;
+  line-height: 24px;
+}
+
+.btn-cta {
+  background: #F55B14;
+  border-radius: 4px;
+  font-style: normal;
+  font-weight: bold;
+  font-size: 16px;
+  line-height: 23px;
+  text-align: center;
+  letter-spacing: 1px;
+  text-transform: uppercase;
+  color: #FFFFFF;
+}
+
+.btn-cta:hover {
+  background: #CA4000;
+  color: #ffffff;
+  text-decoration: none;
+}
+
+.spark-star-bg {
+  background: url(../images/spark-start.svg) no-repeat;
+  width: 907.5px;
+  height: 726px;
+  position: absolute;
+  right: 70px;
+}
+
+.apache-spark-motto {
+  margin-top: 150px;
+  position: relative;
+  font-style: normal;
+  font-weight: bold;
+  font-size: calc(3.2rem + 3.3vw);
+  line-height: 113px;
+  letter-spacing: -0.05em;
+  color: #8A8A8A;
+}
+
+@media screen and (min-width: 1900px) {
+.apache-spark-motto {
+  font-size: 7.3rem;
+}
+}
+
+.features .title {
+  margin-top: 24px;
+  font-style: normal;
+  font-weight: bold;
+  font-size: 21px;
+  line-height: 33px;
+  color: #0B9ACE;
+}
+
+.features .details {
+  color: #000000;
+  margin-top: 20px;
+  font-style: normal;
+  font-weight: normal;
+  font-size: 17px;
+  line-height: 24px;
+  margin-right: 20px;
+}
+
+.spark-run-now {
+  background-color: #1D6890;
+}
+
+.spark-run-now nav {
+  background-color: #1B4257;
+  color: #ffffff;
+}
+
+.spark-run-now .nav-link:focus, .nav-link:hover {
+  color: #ffffff;
+  border: 1px solid transparent;
+}
+
+.spark-run-now .nav-tabs .nav-item.show .nav-link, .spark-run-now .nav-tabs .nav-link.active {
+  color: #ffffff;
+  background-color: #1d6890;
+  border: none;
+}
+
+.spark-run-now .nav-tabs .nav-link:focus, .spark-run-now .nav-tabs .nav-link:hover {
+  outline: none;
+  background-color: #1D6890;
+  border: 1px solid transparent;
+}
+
+.spark-run-now .nav-link {
+  color: #ffffff;
+  border-radius: 0;
+  padding: 10px;
+  font-style: normal;
+  font-weight: bold;
+  font-size: 20px;
+  line-height: 33px;
+  text-align: center;
+}
+
+.spark-run-now .tab-content {
+  color: #ffffff;
+}
+
+.spark-run-now .title {
+  font-style: normal;
+  font-weight: bold;
+  font-size: 20px;
+  line-height: 42px;
+}
+
+.spark-code {
+  border-top: 2px solid #000000;
+}
+
+.spark-install .code {
+  color: #000000;
+  background-color: #ffffff;
+  padding: 15px;
+  font-family: "Menlo", "Lucida Console", Consolas, monospace;
+  font-style: normal;
+  font-weight: normal;
+  font-size: 14px;
+  line-height: 1.428571429;
+}
+
+.spark-install .code p {
+  margin: 0;
+}
+
+.spark-install .code .orange {
+  color: #df584e;
+}
+
+.spark-install .code .green {
+  color: #579f52;
+}
+
+.spark-install .code .blue {
+  color: #2088bb;
+}
+
+.spark-install .code .purple {
+  color: #a73ea7;
+}
+
+.spark-install .code .brown {
+  color: #976716;
+}
+
+.examples {
+  color: #666666;
+  padding: 12px 0 0 0;
+  background-color: #ffffff;
+  border-radius: 10px;
+}
+
+.examples .nav-tabs {
+  background-color: #F0F0F0;
+}
+
+.spark-run-now .examples .tab-content {
+  color: #666666;
+}
+
+.spark-run-now .examples nav {
+  border: 1px solid #000000;
+}
+
+.spark-run-now .examples .nav-link {
+  padding: 6px 5px;
+  border-right: 1px solid #000000;
+  font-style: normal;
+  font-weight: normal;
+  font-size: 15px;
+  line-height: 19px;
+}
+
+.spark-run-now .examples .nav-link {
+  color: #666666;
+  box-shadow: 0px 4px 4px rgba(0, 0, 0, 0.25);
+}
+
+.spark-run-now .examples .nav-link:hover {
+  color: #ffffff;
+}
+
+.window {
+  height: 30px;
+  padding: 0 10px;
+}
+
+.circle {
+  border-radius: 50%;
+  height: 17px;
+  width: 17px;
+  display: inline-block;
+  margin: 2px;
+}
+
+.circle.red {
+  background-color: #fb615a;
+}
+
+.circle.yellow {
+  background-color: #fdbc40;
+}
+
+.circle.green {
+  background-color: #3ec648;
+}
+
+.nav-tabs .nav-link {
+  margin-bottom: 0;
+}
+
+.spark-run-now .examples .nav-tabs .nav-link:focus, .spark-run-now .examples .nav-tabs .nav-link:hover, .spark-run-now .examples .nav-tabs .nav-link:active {
+  border-right: 1px solid;
+}
+
+.spark-run-now .examples .nav-tabs .nav-item.show .nav-link, .spark-run-now .examples .nav-tabs .nav-link.active {
+  border-right: 1px solid #666666;
+}
+
+.btn {
+  padding: 11px 23px 11px 23px;
+}
+
+.card {
+  box-shadow: 0px 4px 30px rgb(27 49 57 / 10%);
+  border: 2px solid transparent;
+  font-size: 19px;
+  line-height: 25px;
+}
+
+.card:hover {
+  border-bottom: 2px solid #F55B14;
+  cursor: pointer;
+  box-shadow: 0 14px 40px rgb(27 49 57 / 15%);
+}
+
+.card a {
+  text-decoration: none;
+  color: #666666;
+}
+
+.card a:hover {
+  text-decoration: none;
+  color: #666666;
+}
+
+.card-body img {
+  margin-right: 20px;
+}
+
+.tm {
+  font-size: 12px;
+  bottom: 5px;
+  position: relative;
+}
+
+.ecosystem-title {
+  font-style: normal;
+  font-weight: bold;
+  font-size: 20px;
+  line-height: 33px;
+  text-align: center;
+  color: #F55B14;
+}
+
+.scalable-data-science a {
+  font-style: normal;
+  font-weight: bold;
+  font-size: 20px;
+  line-height: 33px;
+  color: #0B9ACE;
+  text-decoration: none;
+}
+
+.scalable-data-science p {
+  font-style: normal;
+  font-weight: normal;
+  font-size: 18px;
+  line-height: 24px;
+}
+
+@media (max-width: 768px) {
+  .hero-banner {
+      background-size: 390px;
+      background-position: center right;
+      background-position-y: 40px;
+  }
+
+  .hero-banner .bg {
+      transform: translate(46%, -5%);
+  }
+
+  .hero-banner h1 {
+      margin-bottom: 30px;
+  }
+
+  .apache-spark-motto {
+      font-size: 90px;
+      margin: 120px 0;
+  }
+
+  .features {
+      margin-top: 53px;
+  }
+
+  .features:first-child {
+      margin-top: 0;
+  }
+
+  .border-end {
+      border-right: 1px solid transparent !important;
+      border-bottom: 1px solid #dee2e6 !important;
+  }
+
+  .hero-banner .bg {
+      display: none;
+  }
+}
+
+@media (max-width: 320px) {
+  .hero-banner {
+      background-size: 230px;
+      background-position: center right;
+      background-position-y: 160px;
+      background: none;
+  }
+
+  .navbar-dark .navbar-nav .nav-link.active, .navbar-dark .navbar-nav .show > .nav-link, .navbar-dark .navbar-nav .nav-link {
+      padding-left: 1rem;
+  }
+
+  .hero-banner h1 {
+      line-height: 65px;
+  }
+
+  .apache-spark-motto {
+      font-size: 70px;
+      margin: 0 0 50px 0;
+      line-height: 80px;
+  }
+}
+
+a {
+  background: transparent;
+}
+
+a:active, a:hover {
+  outline: 0;
+}
+
+@media print {
+  * {
+      color: #000 !important;
+      text-shadow: none !important;
+      background: transparent !important;
+      box-shadow: none !important;
+  }
+
+  a, a:visited {
+      text-decoration: underline;
+  }
+
+  a[href]:after {
+      content: " (" attr(href) ")";
+  }
+
+  p {
+      orphans: 3;
+      widows: 3;
+  }
+
+  pre {
+      border: 1px solid #999;
+      page-break-inside: avoid;
+  }
+
+  .global pre {
+      border: 1px solid #999;
+      page-break-inside: avoid;
+  }
+
+  h3 {
+      orphans: 3;
+      widows: 3;
+  }
+
+  h3 {
+      page-break-after: avoid;
+  }
+}
+
+*, *:before, *:after {
+  -webkit-box-sizing: border-box;
+  -moz-box-sizing: border-box;
+  box-sizing: border-box;
+}
+
+a {
+  color: #2fa4e7;
+  text-decoration: none;
+}
+
+a:hover, a:focus {
+  color: #157ab5;
+  text-decoration: underline;
+}
+
+a:focus {
+  outline: thin dotted;
+  outline: 5px auto -webkit-focus-ring-color;
+  outline-offset: -2px;
+}
+
+h5 {
+  font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
+  font-weight: 500;
+  line-height: 1.1;
+  color: #317eac;
+}
+
+h5 {
+  margin-top: 10px;
+  margin-bottom: 10px;
+}
+
+h5 {
+  font-size: 14px;
+}
+
+p {
+  margin: 0 0 10px;
+}
+
+.small {
+  font-size: 85%;
+}
+
+ul {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+.list-unstyled {
+  padding-left: 0;
+  list-style: none;
+}
+
+.news {
+  background: #FFF6ED;
+  border-radius: 6px;
+  padding: 4px 12px 1px 12px;
+}
+
+.news h5 {
+  color: rgb(85, 85, 85);
+}
+
+.news ul li {
+  margin-bottom: 6px;
+}
+
+.news li a, .news li a:hover, .news li a:visited {
+  color: rgb(85, 85, 85);
+}
+
+.news li .small {
+  color: #888;
+  font-size: 12px;
+}
+
+.news h5 {
+  font-size: 16px;
+}
+
+code, pre {
+  font-family: monospace, serif;
+  font-size: 1em;
+}
+
+pre {
+  white-space: pre-wrap;
+}
+
+code, pre {
+  font-family: Menlo, Monaco, Consolas, "Courier New", monospace;
+}
+
+code {
+  padding: 2px 4px;
+  font-size: 90%;
+  color: #c7254e;
+  white-space: nowrap;
+  background-color: #f9f2f4;
+  border-radius: 4px;
+}
+
+pre {
+  display: block;
+  padding: 20px;
+  margin: 0 0 10px;
+  font-size: 13px;
+  line-height: 1.428571429;
+  color: #333;
+  word-break: break-all;
+  word-wrap: break-word;
+  background-color: #f5f5f5;
+  border: 1px solid #ccc;
+  border-radius: 4px;
+}
+
+pre code {
+  padding: 0;
+  font-size: inherit;
+  color: inherit;
+  white-space: pre-wrap;
+  background-color: transparent;
+  border-radius: 0;
+}
+
+code {
+  font-family: "Menlo", "Lucida Console", Consolas, monospace;
+  background: transparent;
+  padding: 0;
+  color: inherit;
+}
+
+.code .sparkop {
+  color: #1663a8;
+}
+
+.code .closure {
+  color: #c1130e;
+}
+
+h3 {
+  font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
+  font-weight: 500;
+  line-height: 1.1;
+  color: #317eac;
+}
+
+h3 {
+  margin-top: 20px;
+  margin-bottom: 10px;
+  font-size: 20px;
+}
+ul {
+  margin-top: 0;
+  margin-bottom: 10px;
+}
+
+.list-none {
+  list-style: none;
+  padding: 0;
+}
+
+.jumbotron {
+  padding: 30px;
+  margin-bottom: 30px;
+  font-size: 21px;
+  font-weight: 200;
+  line-height: 2.1428571435;
+  color: inherit;
+  background-color: #eee;
+}
+
+.container .jumbotron {
+  border-radius: 6px;
+}
+
+@media screen and (min-width: 768px) {
+  .jumbotron {
+      padding-top: 48px;
+      padding-bottom: 48px;
+  }
+
+  .container .jumbotron {
+      padding-right: 60px;
+      padding-left: 60px;
+  }
+}
+
+@media screen and (max-width: 990px) {
+  .navbar-nav {
+      font-size: 16px;
+  }
+
+  .features .details {
+      margin-right: 0 !important;
+  }
+}
+
+@media screen and (min-width: 1441px) {
+  .navbar-nav, .navbar .nav-item:hover .dropdown-menu {
+      font-size: 16px;
+  }
+}
+
+.jumbotron {
+  padding: 20px 20px;
+  margin-bottom: 20px;
+  color: rgb(85, 85, 85);
+  background: #eef6fd;
+  text-align: center;
+  line-height: inherit;
+}
+
+.jumbotron {
+  font-size: 18px;
+}
+
+@media (min-width: 768px) {
+  .jumbotron {
+      font-size: 19px;
+  }
+}
+
+@media (min-width: 992px) {
+  .jumbotron {
+      font-size: 21px;
+  }
+}
+
+.col-padded-top {
+  margin-top: 30px;
+  margin-bottom: 18px;
+}
+
+.col-center {
+  text-align: center;
+}
+
+.nav > li > a {
+  position: relative;
+  display: block;
+  padding: 10px 15px;
+}
+
+.nav > li > a:hover, .nav > li > a:focus {
+  text-decoration: none;
+  background-color: #eee;
+}
+
+.nav-tabs > li > a {
+  margin-right: 2px;
+  line-height: 1.428571429;
+  border: 1px solid transparent;
+  border-radius: 4px 4px 0 0;
+}
+
+.nav-tabs > li > a:hover {
+  border-color: #eee #eee #ddd;
+}
+
+.nav-tabs > li.active > a, .nav-tabs > li.active > a:hover, .nav-tabs > li.active > a:focus {
+  color: #555;
+  cursor: default;
+  background-color: #fff;
+  border: 1px solid #ddd;
+  border-bottom-color: transparent;
+}
+
+.nav.nav-tabs > li > a {
+  padding: 6px 11px;
+  font-size: 14px;
+  border-radius: 0;
+}
+
+.global .row-padded {
+  margin-top: 30px;
+}
+
+.global .col-padded {
+  margin-top: 30px;
+  margin-bottom: 30px;
+}
+
+.global h1, .global h2, .global h3, .global h4, .global h5, .global h6, .global .h1, .global .h2, .global .h3, .global .h4, .global .h5, .global .h6 {
+  color: #317eac;
+  /*display: inline-block;*/
+  /*text-transform: lowercase;*/
+}
+
+.global h1, .global h2, .global h3 {
+  margin-top: 20px;
+  margin-bottom: 10px;
+}
+
+.global h2, .global .h2 {
+  font-size: 30px;
+}
+
+.global h3 {
+  font-size: 24px !important;
+}
+
+.global h4 {
+  font-size: 18px !important;
+}
+
+
+.global h1:first-letter, .global h2:first-letter, .global h3:first-letter, .global h4:first-letter, .global h5:first-letter, .global h6:first-letter, .global .h1:first-letter, .global .h2:first-letter, .global .h3:first-letter, .global .h4:first-letter, .global .h5:first-letter, .global .h6:first-letter {
+  text-transform: uppercase;
+}
+
+.global .caption {
+  width: 100%;
+  margin-top: 20px;
+  text-align: center;
+  color: #8f8f8f;
+}
+
+.global .code {
+  font-family: "Menlo", "Lucida Console", Consolas, monospace;
+  font-size: 12px;
+}
+
+@media (min-width: 1200px) {
+  .global .code {
+      font-size: 13px;
+  }
+}
+
+.global .code .string {
+  color: #2b8eeb;
+}
+
+.global .code .sparkop {
+  color: #1663a8;
+}
+
+/*! CSS Used from: http://spark.apache.org/css/cerulean.min.css */
+.global figure {
+  display: block;
+}
+
+.global code, .global pre {
+  font-family: monospace, serif;
+  font-size: 1em;
+}
+
+.global pre {
+  white-space: pre-wrap;
+}
+
+.global figure {
+  margin: 0;
+}
+
+.global code, .global pre {
+  font-family: Menlo, Monaco, Consolas, "Courier New", monospace;
+}
+
+.global code {
+  padding: 2px 4px;
+  font-size: 90%;
+  color: #c7254e;
+  white-space: nowrap;
+  background-color: #f9f2f4;
+  border-radius: 4px;
+}
+
+.global pre {
+  display: block;
+  padding: 9.5px;
+  margin: 0 0 10px;
+  font-size: 13px;
+  line-height: 1.428571429;
+  color: #333;
+  word-break: break-all;
+  word-wrap: break-word;
+  background-color: #f5f5f5;
+  border: 1px solid #ccc;
+  border-radius: 4px;
+}
+
+.global pre code {
+  padding: 0;
+  font-size: inherit;
+  color: inherit;
+  white-space: pre-wrap;
+  background-color: transparent;
+  border-radius: 0;
+}
+
+.global .code {
+  font-family: "Menlo", "Lucida Console", Consolas, monospace;
+  font-size: 12px;
+}
+
+@media (min-width: 1200px) {
+  .global .code {
+      font-size: 13px;
+  }
+}
+
+.global code {
+  font-family: "Menlo", "Lucida Console", Consolas, monospace;
+  background: transparent;
+  padding: 0;
+  color: inherit;
+}
+
+.global .code-tab {
+  margin-bottom: 10px;
+  border-bottom: 1px solid #ddd;
+}
+
+footer {
+  text-align: center;
+  color: #8f8f8f;
+  padding-bottom: 18px;
+  font-size: 80%;
+}
+
+footer a {
+  display: contents;
+}
+
+h1 .tm, h2 .tm, h3 .tm {
+  bottom: 10px;
+}
+
+
+
+.question {
+  font-size: 16px;
+  margin-top: 20px;
+  color: #555;
+  font-weight: 500;
+}
+
+/* GitHub style blockquote */
+blockquote {
+  display: block;
+  margin-block-start: 1em;
+  margin-block-end: 1em;
+  margin-inline-start: 40px;
+  margin-inline-end: 40px;
+  padding: 0 1em;
+  color: rgba(0, 0, 0, .5);
+  border-left: 0.25em solid rgba(0, 0, 0, .1)
+}
+
+
+.content {
+  z-index: 1;
+  position: relative;
+  background-color: #FFF;
+  max-width: 914px;
+  line-height: 1.6; /* Inspired by GitHub's wiki style */
+  /*padding-left: 15px;*/
+}
+
+.content-with-sidebar {
+  z-index: 1;
+  position: relative;
+  background-color: #FFF;
+  max-width: 914px;
+  line-height: 1.6; /* Inspired by GitHub's wiki style */
+  padding-left: 30px;
+  min-height: 100vh;
+}
+
+/**
+ * The left navigation bar.
+ */
+ .left-menu-wrapper {
+  margin-left: 0px;
+  margin-right: 0px;
+  border-top-width: 0px;
+  border-left-width: 0px;
+  border-bottom-width: 0px;
+  margin-top: 0px;
+  width: 220px;
+  height: 80%;
+  float: left;
+  position: fixed;
+  overflow-y: scroll;
+  padding-right: 20px;
+  font-size: 0.9em !important;
+}
+
+.left-menu h3 {
+  margin-left: 10px;
+  line-height: 30px;
+}
+
+/**
+ * The collapsing button for the navigation bar.
+ */
+.nav-trigger {
+  position: fixed;
+  clip: rect(0, 0, 0, 0);
+}
+
+.nav-trigger + label:after {
+  content: '»';
+}
+
+label {
+  z-index: 10;
+}
+
+label[for="nav-trigger"] {
+  position: fixed;
+  margin-left: 0px;
+  padding-top: 100px;
+  padding-left: 5px;
+  width: 10px;
+  height: 80%;
+  cursor: pointer;
+  background-size: contain;
+  background-color: #e2e2e2;
+  box-sizing: content-box;
+}
+
+label[for="nav-trigger"]:hover {
+  background-color: #d0cdcd;
+}
+
+.nav-trigger:checked + label {
+  margin-left: 200px;
+}
+
+.nav-trigger:checked + label:after {
+  content: '«';
+}
+
+.nav-trigger:checked ~ div.content-with-sidebar {
+  margin-left: 200px;
+}
+
+.nav-trigger + label, div.content-with-sidebar {
+  transition: left 0.4s;
+}
+
+/**
+ * Rules to collapse the menu automatically when the screen becomes too thin.
+ */
+
+@media all and (max-width: 780px) {
+
+  div.content-with-sidebar {
+    margin-left: 200px;
+  }
+  .nav-trigger + label:after {
+    content: '«';
+  }
+  label[for="nav-trigger"] {
+    margin-left: 200px;
+  }
+
+  .nav-trigger:checked + label {
+    margin-left: 0px;
+  }
+  .nav-trigger:checked + label:after {
+    content: '»';
+  }
+  .nav-trigger:checked ~ div.content-with-sidebar {
+    margin-left: 0px;
+  }
+
+  div.container-index {
+    margin-left: -215px;
+  }
+
+}
+
+img {
+  max-width: 100%;
+}
+
+table {
+  width: 100%;
+  overflow-wrap: normal;
+}
+
diff --git a/docs/css/main.css b/docs/css/main.css
deleted file mode 100755
index 6710b6e8563c9..0000000000000
--- a/docs/css/main.css
+++ /dev/null
@@ -1,420 +0,0 @@
-/* ==========================================================================
-   Author's custom styles
-   ========================================================================== */
-
-body {
-  font-size: 14px;
-}
-
-a {
-  color: #08c;
-}
-
-a:hover {
-  color: #05c;
-}
-
-img {
-  max-width: 100%;
-}
-
-table {
-  margin: 15px 0;
-  padding: 0;
-}
-
-table tr {
-  border-top: 1px solid #cccccc;
-  background-color: white;
-  margin: 0;
-  padding: 0;
-}
-
-table tr:nth-child(2n) {
-  background-color: #F1F4F5;
-}
-
-table tr th {
-  font-weight: bold;
-  border: 1px solid #cccccc;
-  text-align: left;
-  margin: 0;
-  padding: 6px 13px;
-}
-
-table tr td {
-  border: 1px solid #cccccc;
-  text-align: left;
-  margin: 0;
-  padding: 6px 13px;
-}
-
-.navbar {
-  background-color: #fafafa;
-  background-image: linear-gradient(to bottom, #ffffff, #e2f1f8);
-  background-repeat: repeat-x;
-  box-shadow: 0 1px 10px rgba(0,0,0,.1);
-  border-bottom-color: #d4d4d4;
-  border-bottom-style: solid;
-  border-bottom-width: 1px;
-  font-size: 15px;
-  line-height: 1;
-  margin-bottom: 15px;
-  padding: 0 1rem;
-}
-
-.navbar .navbar-brand {
-  height: 50px;
-  width: 110px;
-  margin-left: 1px;
-  margin-right: 0;
-  padding: 0;
-}
-
-.version {
-  line-height: 30px;
-  vertical-align: bottom;
-  font-size: 12px;
-  padding: 0;
-  margin: 0;
-  font-weight: bold;
-  color: #777;
-}
-
-.navbar .container {
-  padding-left: 0;
-  padding-top: 2px;
-}
-
-.navbar .navbar-nav,
-.nav {
-  margin-left: 0;
-}
-
-.navbar .navbar-nav > .nav-item {
-  line-height: 20px;
-}
-
-.navbar .navbar-nav > .nav-item > .nav-link {
-  color: #555;
-  padding: 10px 15px 10px;
-}
-
-.navbar .dropdown-menu {
-  font-size: 15px;
-  padding: 5px 0;
-  margin: 0;
-  border-radius: 6px;
-  box-shadow: 0 5px 10px rgba(0,0,0,0.2);
-}
-
-@media (min-width:768px) {
-  .navbar .dropdown-menu::before {
-    position: absolute;
-    top: -7px;
-    left: 9px;
-    display: inline-block;
-    border-right: 7px solid transparent;
-    border-bottom: 7px solid #ccc;
-    border-left: 7px solid transparent;
-    border-bottom-color: rgba(0,0,0,0.2);
-    content: '';
-  }
-
-  .navbar .dropdown-menu::after {
-    position: absolute;
-    top: -6px;
-    left: 10px;
-    display: inline-block;
-    border-right: 6px solid transparent;
-    border-bottom: 6px solid #fff;
-    border-left: 6px solid transparent;
-    content: '';
-  }
-}
-
-.navbar .dropdown-menu .dropdown-item:hover,
-.navbar .dropdown-menu .dropdown-item:focus,
-.navbar .dropdown-submenu:hover .dropdown-item {
-  color: #fff;
-  text-decoration: none;
-  background-color: #0088cc;
-  background-image: linear-gradient(to bottom,#0088cc,#0087b3);
-  background-repeat: repeat-x;
-}
-
-.navbar .divider-vertical {
-  border-right-color: lightgray;
-}
-
-.navbar-text .version-text {
-  color: #555555;
-  padding: 5px;
-  margin-left: 10px;
-}
-
-body .container-wrapper {
-  background-color: #FFF;
-  color: #1D1F22;
-  max-width: 1024px;
-  margin-top: 10px;
-  margin-left: auto;
-  margin-right: auto;
-  border-radius: 15px;
-  position: relative;
-  min-height: 100vh;
-}
-
-.title {
-  font-size: 32px;
-}
-
-h1, h2, h3, h4, h5, h6 {
-  font-weight: bold;
-}
-
-h1 {
-  font-size: 28px;
-  margin-top: 12px;
-}
-
-h2 {
-  font-size: 24px;
-  margin-top: 12px;
-}
-
-h3 {
-  font-size: 21px;
-  margin-top: 10px;
-}
-
-h4 {
-  font-size: 18px;
-  line-height: 20px;
-}
-
-pre {
-  background-color: #f5f5f5;
-  border: 1px solid rgba(0,0,0,0.15);
-  border-radius: 4px;
-  font-family: "Menlo", "Lucida Console", monospace;
-  padding: 9.5px;
-}
-
-ul, ol {
-  padding: 0;
-  margin: 0 0 10px 25px;
-}
-
-code {
-  font-family: "Menlo", "Lucida Console", monospace;
-  background: white;
-  border: none;
-  padding: 0;
-  color: #444444;
-}
-
-pre code {
-  background: transparent;
-}
-
-dt code {
-  white-space: nowrap;
-  max-width: 100%;
-  border: solid 1px #e1e4e5;
-  font-size: .8rem;
-  padding: 0 5px;
-  font-family: "Menlo", "Lucida Console", monospace;
-  overflow-x: auto;
-}
-
-dd { 
-  margin: 0 1.5em 1.5em;
-}
-
-div .highlight pre {
-  font-size: 12px;
-}
-
-a code {
-  color: #0088cc;
-}
-
-a:hover code {
-  color: #005580;
-  text-decoration: underline;
-}
-
-.container {
-  max-width: 914px;
-}
-
-.content {
-  z-index: 1;
-  position: relative;
-  background-color: #FFF;
-  max-width: 914px;
-  line-height: 1.6; /* Inspired by GitHub's wiki style */
-  padding-left: 15px;
-}
-
-.content-with-sidebar {
-  z-index: 1;
-  position: relative;
-  background-color: #FFF;
-  max-width: 914px;
-  line-height: 1.6; /* Inspired by GitHub's wiki style */
-  padding-left: 30px;
-  min-height: 100vh;
-}
-
-.dropdown-menu {
-  /* Remove the default 2px top margin which causes a small
-    gap between the hover trigger area and the popup menu */
-  margin-top: 0;
-  /* Avoid too much whitespace at the right for shorter menu items */
-  min-width: 50px;
-}
-
-/**
- * Made the navigation bar buttons not grey out when clicked.
- * Essentially making nav bar buttons not react to clicks, only hover events.
- */
-.navbar .navbar-nav .nav-item.dropdown.open > .dropdown-toggle {
-  background-color: transparent;
-}
-
-/**
- * Made the active tab caption blue. Otherwise the active tab is black, and inactive tab is blue.
- * That looks weird. Changed the colors to active - blue, inactive - black, and
- * no color change on hover.
- */
-.nav-tabs .nav-link.active, .nav-tabs .nav-link.active:hover {
-  color: #08c;
-}
-
-.nav-tabs .nav-link, .nav-tabs .nav-link:hover {
-  color: #333;
-}
-
-.nav-tabs .nav-link:not(.active):focus, .nav-tabs .nav-link:not(.active):hover {
-  background-color: #e9ecef;
-}
-
-/**
- * MathJax (embedded latex formulas)
- */
-.MathJax .mo { color: inherit }
-.MathJax .mi { color: inherit }
-.MathJax .mf { color: inherit }
-.MathJax .mh { color: inherit }
-
-/**
- * AnchorJS (anchor links when hovering over headers)
- */
-a.anchorjs-link:hover { text-decoration: none; }
-
-
-/**
- * The left navigation bar.
- */
-.left-menu-wrapper {
-  margin-left: 0px;
-  margin-right: 0px;
-  background-color: #F0F8FC;
-  border-top-width: 0px;
-  border-left-width: 0px;
-  border-bottom-width: 0px;
-  margin-top: 0px;
-  width: 220px;
-  height: 80%;
-  float: left;
-  position: fixed;
-  overflow-y: scroll;
-}
-
-.left-menu h3 {
-  margin-left: 10px;
-  line-height: 30px;
-}
-
-/**
- * The collapsing button for the navigation bar.
- */
-.nav-trigger {
-  position: fixed;
-  clip: rect(0, 0, 0, 0);
-}
-
-.nav-trigger + label:after {
-  content: '»';
-}
-
-label {
-  z-index: 10;
-}
-
-label[for="nav-trigger"] {
-  position: fixed;
-  margin-left: 0px;
-  padding-top: 100px;
-  padding-left: 5px;
-  width: 10px;
-  height: 80%;
-  cursor: pointer;
-  background-size: contain;
-  background-color: #D4F0FF;
-  box-sizing: content-box;
-}
-
-label[for="nav-trigger"]:hover {
-  background-color: #BEE9FF;
-}
-
-.nav-trigger:checked + label {
-  margin-left: 200px;
-}
-
-.nav-trigger:checked + label:after {
-  content: '«';
-}
-
-.nav-trigger:checked ~ div.content-with-sidebar {
-  margin-left: 200px;
-}
-
-.nav-trigger + label, div.content-with-sidebar {
-  transition: left 0.4s;
-}
-
-/**
- * Rules to collapse the menu automatically when the screen becomes too thin.
- */
-
-@media all and (max-width: 780px) {
-
-  div.content-with-sidebar {
-    margin-left: 200px;
-  }
-  .nav-trigger + label:after {
-    content: '«';
-  }
-  label[for="nav-trigger"] {
-    margin-left: 200px;
-  }
-
-  .nav-trigger:checked + label {
-    margin-left: 0px;
-  }
-  .nav-trigger:checked + label:after {
-    content: '»';
-  }
-  .nav-trigger:checked ~ div.content-with-sidebar {
-    margin-left: 0px;
-  }
-
-  div.container-index {
-    margin-left: -215px;
-  }
-
-}
diff --git a/docs/img/spark-hero-thin-light.jpg b/docs/img/spark-hero-thin-light.jpg
new file mode 100644
index 0000000000000..4d9ed926b361f
Binary files /dev/null and b/docs/img/spark-hero-thin-light.jpg differ
diff --git a/docs/img/spark-logo-rev.svg b/docs/img/spark-logo-rev.svg
new file mode 100644
index 0000000000000..fc4f6790218d2
--- /dev/null
+++ b/docs/img/spark-logo-rev.svg
@@ -0,0 +1,7 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="68" height="36" viewBox="0 0 68 36">
+    <g fill="none" fill-rule="evenodd">
+        <path fill="#E25A1C" d="M62.061 17.243c-.058.123-.085.186-.117.245-.85 1.594-1.698 3.19-2.555 4.78-.087.159-.076.254.042.39 1.352 1.558 2.697 3.122 4.044 4.685.047.054.09.113.108.21-.394-.101-.788-.201-1.181-.304-1.634-.427-3.268-.853-4.9-1.285-.152-.04-.221.003-.297.128-.927 1.528-1.86 3.053-2.792 4.578-.048.08-.1.157-.202.223-.075-.407-.152-.814-.225-1.221l-.777-4.314c-.028-.156-.067-.31-.08-.467-.014-.148-.09-.203-.227-.245-1.925-.596-3.848-1.198-5.772-1.8-.084-.026-.167-.06-.257-.141l4.744-1.86c-.058-.045-.096-.08-.138-.108-.984-.628-1.97-1.253-2.95-1.884-.118-.075-.211-.086-.343-.027-1.175.521-2.355 1.033-3.531 1.552-.529.233-1.004.542-1.374.988-.837 1.01-.672 2.158.443 2.86.365.23.78.401 1.192.534 1.886.606 3.779 1.19 5.67 1.777.159.049.233.119.262.288.252 1.44.514 2.877.776 4.315.141.768.216 1.557.595 2.26.146.272.32.537.528.764.75.822 1.799.854 2.593.07.268-.264.498-.576.698-.895.874-1.407 1.735-2.823 2.59-4.24.102-.169.193-.202.38-.153 2.11.558 4.222 1.107 6.333 1.657.436.114.876.155 1.323.073.974-.18 1.4-.91 1.07-1.838-.15-.422-.409-.78-.7-1.116-1.473-1.71-2.944-3.42-4.422-5.125-.121-.14-.124-.24-.04-.396.882-1.64 1.756-3.283 2.634-4.925.21-.393.37-.8.375-1.252.01-1.027-.75-1.867-1.785-2.016-.58-.084-1.118.038-1.66.203-1.323.404-2.648.8-3.974 1.193-.123.037-.17.084-.191.215-.153.92-.319 1.838-.48 2.757-.004.025.003.052.007.109l4.563-1.242" transform="matrix(1 0 0 -1 0 35.974)"/>
+        <path fill="#FFF" d="M59.483 3.841c-1.193.002-2.386.008-3.58.003-.157 0-.246.045-.334.177-1.412 2.122-2.83 4.239-4.248 6.357-.045.068-.093.133-.174.246l-.902-6.767h-3.124c.037.3.069.59.107.88.305 2.296.611 4.594.918 6.89.292 2.195.583 4.39.88 6.585.009.065.053.148.107.183 1.075.69 2.154 1.376 3.232 2.062.016.01.038.011.094.027l-.974-7.324.038-.026 5.113 5.59.136-.772c.121-.698.237-1.396.367-2.092.026-.14-.011-.228-.106-.325-1.094-1.13-2.185-2.263-3.276-3.396l-.147-.159c.035-.055.064-.108.1-.157l5.647-7.814c.034-.048.083-.085.126-.127v-.04M22.455 10.614c-.048.24-.082.593-.19.922-.523 1.592-2.18 2.465-3.894 2.071-1.88-.431-3.224-1.89-3.418-3.782-.144-1.4.62-2.749 2.041-3.255 1.145-.408 2.247-.237 3.27.368 1.358.803 2.093 1.995 2.19 3.676zm-8.258-5.917c-.093-.685-.182-1.33-.268-1.977-.114-.858-.228-1.717-.338-2.576-.013-.1-.044-.145-.153-.144-.858.003-1.716.003-2.574.004-.02 0-.039.01-.085.023.051.406.102.812.156 1.218.19 1.425.378 2.849.57 4.273.22 1.628.394 3.264.675 4.882.499 2.864 2.962 5.333 5.854 5.948 1.676.356 3.28.191 4.743-.74 1.46-.927 2.297-2.273 2.488-3.967.27-2.394-.624-4.382-2.348-6.026-1.132-1.08-2.487-1.766-4.05-1.998-1.61-.239-3.124.026-4.482.962-.05.034-.103.064-.188.118M12.798 18.308l-2.821-2.071c-.15.233-.285.466-.442.683-.404.557-.906.973-1.637 1.024-.607.042-1.126-.156-1.54-.597-.37-.395-.418-.957-.072-1.403.38-.49.796-.956 1.221-1.411.705-.753 1.442-1.478 2.142-2.236.637-.689 1.144-1.462 1.301-2.404.187-1.12-.04-2.187-.577-3.171-.994-1.822-2.56-2.88-4.626-3.21-.913-.146-1.823-.117-2.714.142C1.851 3.997 1.03 4.76.5 5.84.313 6.22.17 6.624 0 7.033l3.06 1.615c.035-.084.06-.154.093-.22.174-.343.317-.707.532-1.023.639-.937 1.67-1.222 2.718-.76.268.12.527.288.748.48.675.591.801 1.413.302 2.156-.287.427-.646.81-.995 1.193-.835.914-1.704 1.8-2.52 2.73-.561.642-.943 1.39-1.064 2.25-.132.938.058 1.823.553 2.613 1.23 1.963 3.02 2.985 5.385 2.9 1.348-.048 2.422-.67 3.256-1.703.246-.306.478-.624.73-.956M35.648 7.767c-.156-1.183-.304-2.307-.458-3.43-.008-.057-.053-.134-.102-.157-2.329-1.063-5.395-.915-7.307 1.227-1.028 1.15-1.459 2.516-1.394 4.036.149 3.517 3.105 6.587 6.641 7.016 2.066.251 3.878-.3 5.277-1.882.953-1.078 1.394-2.366 1.329-3.787-.043-.938-.192-1.873-.31-2.807-.168-1.323-.352-2.645-.529-3.967-.006-.047-.016-.094-.026-.153h-2.786c.037.304.071.601.11.898.202 1.527.424 3.052.6 4.582.11.952.041 1.898-.4 2.78-.47.939-1.259 1.435-2.295 1.543-2.145.224-4.186-1.246-4.643-3.328-.302-1.374.174-2.691 1.284-3.42 1.081-.71 2.24-.712 3.418-.251.598.233 1.107.602 1.591 1.1M47.378 16.192l-.38-2.848c-.59 0-1.167.003-1.744-.002-.468-.003-.895-.301-1.036-.732-.055-.168-.075-.349-.099-.526-.293-2.187-.583-4.374-.874-6.562-.074-.551-.145-1.103-.219-1.663h-2.899c.054.42.105.828.16 1.234.188 1.418.376 2.835.567 4.252.165 1.226.312 2.455.507 3.677.26 1.638 1.912 3.093 3.588 3.164.8.035 1.602.006 2.429.006" transform="matrix(1 0 0 -1 0 35.974)"/>
+        <path fill="#FFF" fill-rule="nonzero" d="M62.9 3.859v1.207h-.006l-.48-1.207h-.154l-.48 1.207h-.007V3.86h-.242v1.446h.373l.438-1.099.43 1.099h.37V3.859h-.241zm-2.127 1.253V3.859h-.242v1.253h-.46v.193h1.16v-.193h-.458M16.682 20.339h.72l-.17 1.073-.55-1.073zm.832-.694h-1.196l-.38-.734h-.847l1.868 3.443h.817l.636-3.443h-.785l-.113.734M21.793 21.66h-.426l-.143-.794h.425c.257 0 .463.166.463.48 0 .208-.13.314-.319.314zm-1.032.694h1.12c.585 0 .995-.345.995-.937 0-.744-.534-1.245-1.293-1.245H21.1l-.226-1.26h-.728l.615 3.442M25.352 20.339h.719l-.17 1.073-.55-1.073zm.831-.694h-1.195l-.38-.734h-.847l1.868 3.443h.816l.637-3.443h-.786l-.113.734M30.912 19.033c-.246-.111-.518-.178-.79-.178-.924 0-1.505.684-1.505 1.54 0 1.093.935 2.015 2.044 2.015.277 0 .528-.066.733-.177l-.102-.826c-.154.167-.4.284-.709.284-.636 0-1.2-.568-1.2-1.225 0-.502.318-.892.82-.892.309 0 .606.117.806.279l-.097-.82M35.366 20.298L33.847 20.298 33.6 18.911 32.872 18.911 33.487 22.354 34.216 22.354 33.97 20.992 35.489 20.992 35.735 22.354 36.464 22.354 35.849 18.911 35.12 18.911 35.366 20.298M38.039 18.911L38.655 22.354 40.59 22.354 40.467 21.66 39.26 21.66 39.138 20.992 40.246 20.992 40.123 20.298 39.014 20.298 38.891 19.605 40.097 19.605 39.974 18.911 38.039 18.911" transform="matrix(1 0 0 -1 0 35.974)"/>
+    </g>
+</svg>
diff --git a/docs/index.md b/docs/index.md
index 4f24ad4edce1c..dc0e0ce700885 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,6 +1,6 @@
 ---
 layout: global
-displayTitle: Spark Overview
+displayTitle: Apache Spark Overview
 title: Overview
 description: Apache Spark SPARK_VERSION_SHORT documentation homepage
 license: |
@@ -10,9 +10,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -20,11 +20,6 @@ license: |
   limitations under the License.
 ---
 
-Apache Spark is a unified analytics engine for large-scale data processing.
-It provides high-level APIs in Java, Scala, Python and R,
-and an optimized engine that supports general execution graphs.
-It also supports a rich set of higher-level tools including [Spark SQL](sql-programming-guide.html) for SQL and structured data processing, [pandas API on Spark](api/python/getting_started/quickstart_ps.html) for pandas workloads, [MLlib](ml-guide.html) for machine learning, [GraphX](graphx-programming-guide.html) for graph processing, and [Structured Streaming](structured-streaming-programming-guide.html) for incremental computation and stream processing.
-
 # Downloading
 
 Get Spark from the [downloads page](https://spark.apache.org/downloads.html) of the project website. This documentation is for Spark version {{site.SPARK_VERSION}}. Spark uses Hadoop's client libraries for HDFS and YARN. Downloads are pre-packaged for a handful of popular Hadoop versions.
@@ -33,23 +28,23 @@ Users can also download a "Hadoop free" binary and run Spark with any Hadoop ver
 Scala and Java users can include Spark in their projects using its Maven coordinates and Python users can install Spark from PyPI.
 
 
-If you'd like to build Spark from 
+If you'd like to build Spark from
 source, visit [Building Spark](building-spark.html).
 
 
 Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS), and it should run on any platform that runs a supported version of Java. This should include JVMs on x86_64 and ARM64. It's easy to run locally on one machine --- all you need is to have `java` installed on your system `PATH`, or the `JAVA_HOME` environment variable pointing to a Java installation.
 
-Spark runs on Java 8/11/17, Scala 2.12/2.13, Python 3.7+ and R 3.5+.
+Spark runs on Java 8/11/17, Scala 2.12/2.13, Python 3.7+, and R 3.5+.
 Python 3.7 support is deprecated as of Spark 3.4.0.
 Java 8 prior to version 8u362 support is deprecated as of Spark 3.4.0.
 When using the Scala API, it is necessary for applications to use the same version of Scala that Spark was compiled for.
 For example, when using Scala 2.13, use Spark compiled for 2.13, and compile code/applications for Scala 2.13 as well.
 
-For Java 11, `-Dio.netty.tryReflectionSetAccessible=true` is required additionally for Apache Arrow library. This prevents `java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.(long, int) not available` when Apache Arrow uses Netty internally.
+For Java 11, setting `-Dio.netty.tryReflectionSetAccessible=true` is required for the Apache Arrow library. This prevents the `java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.(long, int) not available` error when Apache Arrow uses Netty internally.
 
 # Running the Examples and Shell
 
-Spark comes with several sample programs. Python, Scala, Java and R examples are in the
+Spark comes with several sample programs. Python, Scala, Java, and R examples are in the
 `examples/src/main` directory.
 
 To run Spark interactively in a Python interpreter, use
@@ -77,14 +72,14 @@ great way to learn the framework.
 The `--master` option specifies the
 [master URL for a distributed cluster](submitting-applications.html#master-urls), or `local` to run
 locally with one thread, or `local[N]` to run locally with N threads. You should start by using
-`local` for testing. For a full list of options, run Spark shell with the `--help` option.
+`local` for testing. For a full list of options, run the Spark shell with the `--help` option.
 
-Spark also provides an [R API](sparkr.html) since 1.4 (only DataFrame APIs are included).
+Since version 1.4, Spark has provided an [R API](sparkr.html) (only the DataFrame APIs are included).
 To run Spark interactively in an R interpreter, use `bin/sparkR`:
 
     ./bin/sparkR --master "local[2]"
 
-Example applications are also provided in R. For example,
+Example applications are also provided in R. For example:
 
     ./bin/spark-submit examples/src/main/r/dataframe.R
 
@@ -114,12 +109,12 @@ options for deployment:
 **Programming Guides:**
 
 * [Quick Start](quick-start.html): a quick introduction to the Spark API; start here!
-* [RDD Programming Guide](rdd-programming-guide.html): overview of Spark basics - RDDs (core but old API), accumulators, and broadcast variables  
+* [RDD Programming Guide](rdd-programming-guide.html): overview of Spark basics - RDDs (core but old API), accumulators, and broadcast variables
 * [Spark SQL, Datasets, and DataFrames](sql-programming-guide.html): processing structured data with relational queries (newer API than RDDs)
 * [Structured Streaming](structured-streaming-programming-guide.html): processing structured data streams with relation queries (using Datasets and DataFrames, newer API than DStreams)
 * [Spark Streaming](streaming-programming-guide.html): processing data streams using DStreams (old API)
 * [MLlib](ml-guide.html): applying machine learning algorithms
-* [GraphX](graphx-programming-guide.html): processing graphs 
+* [GraphX](graphx-programming-guide.html): processing graphs
 * [SparkR](sparkr.html): processing data with Spark in R
 * [PySpark](api/python/getting_started/index.html): processing data with Spark in Python
 * [Spark SQL CLI](sql-distributed-sql-engine-spark-sql-cli.html): processing data with SQL on the command line
diff --git a/docs/js/main.js b/docs/js/main.js
index 968097c8041d6..8426ddc781882 100755
--- a/docs/js/main.js
+++ b/docs/js/main.js
@@ -29,16 +29,22 @@ function codeTabs() {
     $(this).addClass("tab-content");
 
     // Insert the tab bar
-    var tabBar = $('<ul class="nav nav-tabs mb-4" data-tabs="tabs" role="tablist"></ul>');
+    var tabBar = $('<ul class="nav nav-tabs mb-3" data-tabs="tabs" role="tablist"></ul>');
     $(this).before(tabBar);
 
     // Add each code sample to the tab bar:
     var codeSamples = $(this).children("div");
-    codeSamples.each(function() {
+    codeSamples.each(function(idx) {
+
+      // The code becomes a tab-pane.
       $(this).addClass("tab-pane");
+      $(this).attr("role", "tabpanel");
+
       var lang = $(this).data("lang");
       var image = $(this).data("image");
       var notabs = $(this).data("notabs");
+
+      // Generating the labels
       var capitalizedLang = lang.substr(0, 1).toUpperCase() + lang.substr(1);
       var id = "tab_" + lang + "_" + counter;
       $(this).attr("id", id);
@@ -49,23 +55,22 @@ function codeTabs() {
       } else {
         var buttonLabel = ""
       }
+
+      // Add the link to the tab
+      var active = "";
+      if (idx == 0) {
+        active = "active ";
+        $(this).addClass("active");
+      }
+
       tabBar.append(
-        '<li class="nav-item"><a class="nav-link tab_' + lang + '" href="#' + id + '" data-toggle="tab">' + buttonLabel + '</a></li>'
+        '<li class="nav-item"><button class="' +
+        active + 'nav-link tab_' + lang + '" data-bs-target="#' + id +
+        '" data-bs-toggle="tab">' + buttonLabel + '</button></li>'
       );
     });
-
-    codeSamples.first().addClass("active");
-    tabBar.children("li").first().children("a").first().addClass("active");
     counter++;
   });
-  $("ul.nav-tabs a").click(function (e) {
-    // Toggling a tab should switch all tabs corresponding to the same language
-    // while retaining the scroll position
-    e.preventDefault();
-    var scrollOffset = $(this).offset().top - $(document).scrollTop();
-    $("." + $(this).attr('class')).tab('show');
-    $(document).scrollTop($(this).offset().top - scrollOffset);
-  });
 }
 
 
diff --git a/docs/ml-classification-regression.md b/docs/ml-classification-regression.md
index c42d9314370a7..d184f4fe0257c 100644
--- a/docs/ml-classification-regression.md
+++ b/docs/ml-classification-regression.md
@@ -703,7 +703,7 @@ others.
 
 ###  Available families
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr>
       <th>Family</th>
@@ -935,7 +935,7 @@ and $f_{0}(\epsilon_{i})$ is the corresponding density function.
 The most commonly used AFT model is based on the Weibull distribution of the survival time.
 The Weibull distribution for lifetime corresponds to the extreme value distribution for the
 log of the lifetime, and the $S_{0}(\epsilon)$ function is:
-`\[   
+`\[
 S_{0}(\epsilon_{i})=\exp(-e^{\epsilon_{i}})
 \]`
 the $f_{0}(\epsilon_{i})$ function is:
@@ -949,7 +949,7 @@ The log-likelihood function for AFT model with a Weibull distribution of lifetim
 Due to minimizing the negative log-likelihood equivalent to maximum a posteriori probability,
 the loss function we use to optimize is $-\iota(\beta,\sigma)$.
 The gradient functions for $\beta$ and $\log\sigma$ respectively are:
-`\[   
+`\[
 \frac{\partial (-\iota)}{\partial \beta}=\sum_{1=1}^{n}[\delta_{i}-e^{\epsilon_{i}}]\frac{x_{i}}{\sigma}
 \]`
 `\[
@@ -1211,7 +1211,7 @@ The main differences between this API and the [original MLlib Decision Tree API]
 * use of DataFrame metadata to distinguish continuous and categorical features
 
 
-The Pipelines API for Decision Trees offers a bit more functionality than the original API.  
+The Pipelines API for Decision Trees offers a bit more functionality than the original API.
 In particular, for classification, users can get the predicted probability of each class (a.k.a. class conditional probabilities);
 for regression, users can get the biased sample variance of prediction.
 
@@ -1224,7 +1224,7 @@ All output columns are optional; to exclude an output column, set its correspond
 
 ### Input Columns
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr>
       <th align="left">Param name</th>
@@ -1251,7 +1251,7 @@ All output columns are optional; to exclude an output column, set its correspond
 
 ### Output Columns
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr>
       <th align="left">Param name</th>
@@ -1299,7 +1299,7 @@ All output columns are optional; to exclude an output column, set its correspond
 The DataFrame API supports two major tree ensemble algorithms: [Random Forests](http://en.wikipedia.org/wiki/Random_forest) and [Gradient-Boosted Trees (GBTs)](http://en.wikipedia.org/wiki/Gradient_boosting).
 Both use [`spark.ml` decision trees](ml-classification-regression.html#decision-trees) as their base models.
 
-Users can find more information about ensemble algorithms in the [MLlib Ensemble guide](mllib-ensembles.html).  
+Users can find more information about ensemble algorithms in the [MLlib Ensemble guide](mllib-ensembles.html).
 In this section, we demonstrate the DataFrame API for ensembles.
 
 The main differences between this API and the [original MLlib ensembles API](mllib-ensembles.html) are:
@@ -1326,7 +1326,7 @@ All output columns are optional; to exclude an output column, set its correspond
 
 #### Input Columns
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr>
       <th align="left">Param name</th>
@@ -1353,7 +1353,7 @@ All output columns are optional; to exclude an output column, set its correspond
 
 #### Output Columns (Predictions)
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr>
       <th align="left">Param name</th>
@@ -1407,7 +1407,7 @@ All output columns are optional; to exclude an output column, set its correspond
 
 #### Input Columns
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr>
       <th align="left">Param name</th>
@@ -1436,7 +1436,7 @@ Note that `GBTClassifier` currently only supports binary labels.
 
 #### Output Columns (Predictions)
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr>
       <th align="left">Param name</th>
diff --git a/docs/ml-clustering.md b/docs/ml-clustering.md
index 8cc6f1ae1838b..00a156b6645ce 100644
--- a/docs/ml-clustering.md
+++ b/docs/ml-clustering.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -40,7 +40,7 @@ called [kmeans||](http://theory.stanford.edu/~sergei/papers/vldb12-kmpar.pdf).
 
 ### Input Columns
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr>
       <th align="left">Param name</th>
@@ -61,7 +61,7 @@ called [kmeans||](http://theory.stanford.edu/~sergei/papers/vldb12-kmpar.pdf).
 
 ### Output Columns
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr>
       <th align="left">Param name</th>
@@ -185,7 +185,7 @@ Refer to the [Java API docs](api/java/org/apache/spark/ml/clustering/BisectingKM
 
 <div data-lang="r" markdown="1">
 
-Refer to the [R API docs](api/R/reference/spark.bisectingKmeans.html) for more details. 
+Refer to the [R API docs](api/R/reference/spark.bisectingKmeans.html) for more details.
 
 {% include_example r/ml/bisectingKmeans.R %}
 </div>
@@ -204,7 +204,7 @@ model.
 
 ### Input Columns
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr>
       <th align="left">Param name</th>
@@ -225,7 +225,7 @@ model.
 
 ### Output Columns
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr>
       <th align="left">Param name</th>
diff --git a/docs/mllib-classification-regression.md b/docs/mllib-classification-regression.md
index d99c7ff053147..10cb85e392029 100644
--- a/docs/mllib-classification-regression.md
+++ b/docs/mllib-classification-regression.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,14 +19,14 @@ license: |
   limitations under the License.
 ---
 
-The `spark.mllib` package supports various methods for 
+The `spark.mllib` package supports various methods for
 [binary classification](http://en.wikipedia.org/wiki/Binary_classification),
 [multiclass
 classification](http://en.wikipedia.org/wiki/Multiclass_classification), and
 [regression analysis](http://en.wikipedia.org/wiki/Regression_analysis). The table below outlines
 the supported algorithms for each type of problem.
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr><th>Problem Type</th><th>Supported Methods</th></tr>
   </thead>
diff --git a/docs/mllib-decision-tree.md b/docs/mllib-decision-tree.md
index 338e367e956f2..174255c48b699 100644
--- a/docs/mllib-decision-tree.md
+++ b/docs/mllib-decision-tree.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -51,7 +51,7 @@ The *node impurity* is a measure of the homogeneity of the labels at the node. T
 implementation provides two impurity measures for classification (Gini impurity and entropy) and one
 impurity measure for regression (variance).
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr><th>Impurity</th><th>Task</th><th>Formula</th><th>Description</th></tr>
   </thead>
diff --git a/docs/mllib-ensembles.md b/docs/mllib-ensembles.md
index 1fdfa91e1d422..b1006f2730db5 100644
--- a/docs/mllib-ensembles.md
+++ b/docs/mllib-ensembles.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -191,7 +191,7 @@ Note that each loss is applicable to one of classification or regression, not bo
 
 Notation: $N$ = number of instances. $y_i$ = label of instance $i$.  $x_i$ = features of instance $i$.  $F(x_i)$ = model's predicted label for instance $i$.
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr><th>Loss</th><th>Task</th><th>Formula</th><th>Description</th></tr>
   </thead>
diff --git a/docs/mllib-evaluation-metrics.md b/docs/mllib-evaluation-metrics.md
index 9074dfddb9d6f..f82f6a01136b9 100644
--- a/docs/mllib-evaluation-metrics.md
+++ b/docs/mllib-evaluation-metrics.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -76,7 +76,7 @@ plots (recall, false positive rate) points.
 
 **Available metrics**
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr><th>Metric</th><th>Definition</th></tr>
   </thead>
@@ -179,7 +179,7 @@ For this section, a modified delta function $\hat{\delta}(x)$ will prove useful
 
 $$\hat{\delta}(x) = \begin{cases}1 & \text{if $x = 0$}, \\ 0 & \text{otherwise}.\end{cases}$$
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr><th>Metric</th><th>Definition</th></tr>
   </thead>
@@ -296,7 +296,7 @@ The following definition of indicator function $I_A(x)$ on a set $A$ will be nec
 
 $$I_A(x) = \begin{cases}1 & \text{if $x \in A$}, \\ 0 & \text{otherwise}.\end{cases}$$
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr><th>Metric</th><th>Definition</th></tr>
   </thead>
@@ -447,7 +447,7 @@ documents, returns a relevance score for the recommended document.
 
 $$rel_D(r) = \begin{cases}1 & \text{if $r \in D$}, \\ 0 & \text{otherwise}.\end{cases}$$
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr><th>Metric</th><th>Definition</th><th>Notes</th></tr>
   </thead>
@@ -553,7 +553,7 @@ variable from a number of independent variables.
 
 **Available metrics**
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr><th>Metric</th><th>Definition</th></tr>
   </thead>
diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md
index e755454e7b71f..b535d2de307a9 100644
--- a/docs/mllib-linear-methods.md
+++ b/docs/mllib-linear-methods.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -72,7 +72,7 @@ training error) and minimizing model complexity (i.e., to avoid overfitting).
 The following table summarizes the loss functions and their gradients or sub-gradients for the
 methods `spark.mllib` supports:
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr><th></th><th>loss function $L(\wv; \x, y)$</th><th>gradient or sub-gradient</th></tr>
   </thead>
@@ -105,7 +105,7 @@ The purpose of the
 encourage simple models and avoid overfitting.  We support the following
 regularizers in `spark.mllib`:
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr><th></th><th>regularizer $R(\wv)$</th><th>gradient or sub-gradient</th></tr>
   </thead>
diff --git a/docs/mllib-pmml-model-export.md b/docs/mllib-pmml-model-export.md
index d7eb51d69a1da..e20d7c2fe4e17 100644
--- a/docs/mllib-pmml-model-export.md
+++ b/docs/mllib-pmml-model-export.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -28,14 +28,14 @@ license: |
 
 The table below outlines the `spark.mllib` models that can be exported to PMML and their equivalent PMML model.
 
-<table class="table">
+<table class="table table-striped">
   <thead>
     <tr><th>spark.mllib model</th><th>PMML model</th></tr>
   </thead>
   <tbody>
     <tr>
       <td>KMeansModel</td><td>ClusteringModel</td>
-    </tr>    
+    </tr>
     <tr>
       <td>LinearRegressionModel</td><td>RegressionModel (functionName="regression")</td>
     </tr>
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 1f7acf4dece33..ebd8781fd0071 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -45,7 +45,7 @@ in the UI to persisted storage.
 
 ## Viewing After the Fact
 
-It is still possible to construct the UI of an application through Spark's history server, 
+It is still possible to construct the UI of an application through Spark's history server,
 provided that the application's event logs exist.
 You can start the history server by executing:
 
@@ -69,8 +69,8 @@ The history server can be configured as follows:
 
 ### Environment Variables
 
-<table class="table">
-  <tr><th style="width:21%">Environment Variable</th><th>Meaning</th></tr>
+<table class="table table-striped">
+  <thead><tr><th style="width:21%">Environment Variable</th><th>Meaning</th></tr></thead>
   <tr>
     <td><code>SPARK_DAEMON_MEMORY</code></td>
     <td>Memory to allocate to the history server (default: 1g).</td>
@@ -145,8 +145,15 @@ Use it with caution.
 Security options for the Spark History Server are covered more detail in the
 [Security](security.html#web-ui) page.
 
-<table class="table">
-  <tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+  <thead>
+  <tr>
+    <th>Property Name</th>
+    <th>Default</th>
+    <th>Meaning</th>
+    <th>Since Version</th>
+  </tr>
+  </thead>
   <tr>
     <td>spark.history.provider</td>
     <td><code>org.apache.spark.deploy.history.FsHistoryProvider</code></td>
@@ -274,7 +281,7 @@ Security options for the Spark History Server are covered more detail in the
     <td>spark.history.fs.endEventReparseChunkSize</td>
     <td>1m</td>
     <td>
-      How many bytes to parse at the end of log files looking for the end event. 
+      How many bytes to parse at the end of log files looking for the end event.
       This is used to speed up generation of application listings by skipping unnecessary
       parts of event log files. It can be disabled by setting this config to 0.
     </td>
@@ -435,16 +442,16 @@ Note
 multiple attempts after failures, the failed attempts will be displayed, as well as any ongoing
 incomplete attempt or the final successful attempt.
 
-2. Incomplete applications are only updated intermittently. The time between updates is defined
+1. Incomplete applications are only updated intermittently. The time between updates is defined
 by the interval between checks for changed files (`spark.history.fs.update.interval`).
 On larger clusters, the update interval may be set to large values.
 The way to view a running application is actually to view its own web UI.
 
-3. Applications which exited without registering themselves as completed will be listed
+1. Applications which exited without registering themselves as completed will be listed
 as incomplete —even though they are no longer running. This can happen if an application
 crashes.
 
-2. One way to signal the completion of a Spark job is to stop the Spark Context
+1. One way to signal the completion of a Spark job is to stop the Spark Context
 explicitly (`sc.stop()`), or in Python using the `with SparkContext() as sc:` construct
 to handle the Spark Context setup and tear down.
 
@@ -463,8 +470,8 @@ only for applications in cluster mode, not applications in client mode. Applicat
 can be identified by their `[attempt-id]`. In the API listed below, when running in YARN cluster mode,
 `[app-id]` will actually be `[base-app-id]/[attempt-id]`, where `[base-app-id]` is the YARN application ID.
 
-<table class="table">
-  <tr><th>Endpoint</th><th>Meaning</th></tr>
+<table class="table table-striped">
+  <thead><tr><th>Endpoint</th><th>Meaning</th></tr></thead>
   <tr>
     <td><code>/applications</code></td>
     <td>A list of all applications.
@@ -508,7 +515,7 @@ can be identified by their `[attempt-id]`. In the API listed below, when running
         <br><code>?details=true</code> lists all stages with the task data.
         <br><code>?taskStatus=[RUNNING|SUCCESS|FAILED|KILLED|PENDING]</code> lists only those tasks with the specified task status. Query parameter taskStatus takes effect only when <code>details=true</code>. This also supports multiple <code>taskStatus</code> such as <code>?details=true&taskStatus=SUCCESS&taskStatus=FAILED</code> which will return all tasks matching any of specified task status.
         <br><code>?withSummaries=true</code> lists stages with task metrics distribution and executor metrics distribution.
-        <br><code>?quantiles=0.0,0.25,0.5,0.75,1.0</code> summarize the metrics with the given quantiles. Query parameter quantiles takes effect only when <code>withSummaries=true</code>. Default value is <code>0.0,0.25,0.5,0.75,1.0</code>. 
+        <br><code>?quantiles=0.0,0.25,0.5,0.75,1.0</code> summarize the metrics with the given quantiles. Query parameter quantiles takes effect only when <code>withSummaries=true</code>. Default value is <code>0.0,0.25,0.5,0.75,1.0</code>.
     </td>
   </tr>
   <tr>
@@ -518,7 +525,7 @@ can be identified by their `[attempt-id]`. In the API listed below, when running
         <br><code>?details=true</code> lists all attempts with the task data for the given stage.
         <br><code>?taskStatus=[RUNNING|SUCCESS|FAILED|KILLED|PENDING]</code> lists only those tasks with the specified task status. Query parameter taskStatus takes effect only when <code>details=true</code>. This also supports multiple <code>taskStatus</code> such as <code>?details=true&taskStatus=SUCCESS&taskStatus=FAILED</code> which will return all tasks matching any of specified task status.
         <br><code>?withSummaries=true</code> lists task metrics distribution and executor metrics distribution of each attempt.
-        <br><code>?quantiles=0.0,0.25,0.5,0.75,1.0</code> summarize the metrics with the given quantiles. Query parameter quantiles takes effect only when <code>withSummaries=true</code>. Default value is <code>0.0,0.25,0.5,0.75,1.0</code>. 
+        <br><code>?quantiles=0.0,0.25,0.5,0.75,1.0</code> summarize the metrics with the given quantiles. Query parameter quantiles takes effect only when <code>withSummaries=true</code>. Default value is <code>0.0,0.25,0.5,0.75,1.0</code>.
       <br>Example:
         <br><code>?details=true</code>
         <br><code>?details=true&taskStatus=RUNNING</code>
@@ -533,7 +540,7 @@ can be identified by their `[attempt-id]`. In the API listed below, when running
         <br><code>?details=true</code> lists all task data for the given stage attempt.
         <br><code>?taskStatus=[RUNNING|SUCCESS|FAILED|KILLED|PENDING]</code> lists only those tasks with the specified task status. Query parameter taskStatus takes effect only when <code>details=true</code>. This also supports multiple <code>taskStatus</code> such as <code>?details=true&taskStatus=SUCCESS&taskStatus=FAILED</code> which will return all tasks matching any of specified task status.
         <br><code>?withSummaries=true</code> lists task metrics distribution and executor metrics distribution for the given stage attempt.
-        <br><code>?quantiles=0.0,0.25,0.5,0.75,1.0</code> summarize the metrics with the given quantiles. Query parameter quantiles takes effect only when <code>withSummaries=true</code>. Default value is <code>0.0,0.25,0.5,0.75,1.0</code>. 
+        <br><code>?quantiles=0.0,0.25,0.5,0.75,1.0</code> summarize the metrics with the given quantiles. Query parameter quantiles takes effect only when <code>withSummaries=true</code>. Default value is <code>0.0,0.25,0.5,0.75,1.0</code>.
       <br>Example:
         <br><code>?details=true</code>
         <br><code>?details=true&taskStatus=RUNNING</code>
@@ -662,10 +669,13 @@ The REST API exposes the values of the Task Metrics collected by Spark executors
 of task execution. The metrics can be used for performance troubleshooting and workload characterization.
 A list of the available metrics, with a short description:
 
-<table class="table">
-  <tr><th>Spark Executor Task Metric name</th>
+<table class="table table-striped">
+  <thead>
+    <tr>
+      <th>Spark Executor Task Metric name</th>
       <th>Short description</th>
-  </tr>
+    </tr>
+  </thead>
   <tr>
     <td>executorRunTime</td>
     <td>Elapsed time the executor spent running this task. This includes time fetching shuffle data.
@@ -729,7 +739,7 @@ A list of the available metrics, with a short description:
   <tr>
     <td>outputMetrics.*</td>
     <td>Metrics related to writing data externally (e.g. to a distributed filesystem),
-    defined only in tasks with output.</td>            
+    defined only in tasks with output.</td>
   </tr>
   <tr>
     <td>&nbsp;&nbsp;&nbsp;&nbsp;.bytesWritten</td>
@@ -776,14 +786,14 @@ A list of the available metrics, with a short description:
   <tr>
     <td>&nbsp;&nbsp;&nbsp;&nbsp;.remoteBytesReadToDisk</td>
     <td>Number of remote bytes read to disk in shuffle operations.
-    Large blocks are fetched to disk in shuffle read operations, as opposed to 
+    Large blocks are fetched to disk in shuffle read operations, as opposed to
     being read into memory, which is the default behavior.</td>
   </tr>
   <tr>
     <td>&nbsp;&nbsp;&nbsp;&nbsp;.fetchWaitTime</td>
-    <td>Time the task spent waiting for remote shuffle blocks. 
+    <td>Time the task spent waiting for remote shuffle blocks.
         This only includes the time blocking on shuffle input data.
-        For instance if block B is being fetched while the task is still not finished 
+        For instance if block B is being fetched while the task is still not finished
         processing block A, it is not considered to be blocking on block B.
         The value is expressed in milliseconds.</td>
   </tr>
@@ -813,14 +823,16 @@ Executor metric values and their measured memory peak values per executor are ex
 The JSON end point is exposed at: `/applications/[app-id]/executors`, and the Prometheus endpoint at: `/metrics/executors/prometheus`.
 The Prometheus endpoint is conditional to a configuration parameter: `spark.ui.prometheus.enabled=true` (the default is `false`).
 In addition, aggregated per-stage peak values of the executor memory metrics are written to the event log if
-`spark.eventLog.logStageExecutorMetrics` is true.  
+`spark.eventLog.logStageExecutorMetrics` is true.
 Executor memory metrics are also exposed via the Spark metrics system based on the [Dropwizard metrics library](https://metrics.dropwizard.io/4.2.0).
 A list of the available metrics, with a short description:
 
-<table class="table">
-  <tr><th>Executor Level Metric name</th>
+<table class="table table-striped">
+  <thead>
+      <tr><th>Executor Level Metric name</th>
       <th>Short description</th>
-  </tr>
+    </tr>
+  </thead>
   <tr>
     <td>rddBlocks</td>
     <td>RDD blocks in the block manager of this executor.</td>
@@ -983,7 +995,7 @@ A list of the available metrics, with a short description:
   </tr>
   <tr>
     <td>&nbsp;&nbsp;&nbsp;&nbsp;.MinorGCTime</td>
-    <td>Elapsed total minor GC time. 
+    <td>Elapsed total minor GC time.
     The value is expressed in milliseconds.</td>
   </tr>
   <tr>
@@ -992,7 +1004,7 @@ A list of the available metrics, with a short description:
   </tr>
   <tr>
     <td>&nbsp;&nbsp;&nbsp;&nbsp;.MajorGCTime</td>
-    <td>Elapsed total major GC time. 
+    <td>Elapsed total major GC time.
     The value is expressed in milliseconds.</td>
   </tr>
 </table>
@@ -1027,15 +1039,15 @@ at `$SPARK_HOME/conf/metrics.properties`. A custom file location can be specifie
 `spark.metrics.conf` [configuration property](configuration.html#spark-properties).
 Instead of using the configuration file, a set of configuration parameters with prefix
 `spark.metrics.conf.` can be used.
-By default, the root namespace used for driver or executor metrics is 
-the value of `spark.app.id`. However, often times, users want to be able to track the metrics 
-across apps for driver and executors, which is hard to do with application ID 
+By default, the root namespace used for driver or executor metrics is
+the value of `spark.app.id`. However, often times, users want to be able to track the metrics
+across apps for driver and executors, which is hard to do with application ID
 (i.e. `spark.app.id`) since it changes with every invocation of the app. For such use cases,
 a custom namespace can be specified for metrics reporting using `spark.metrics.namespace`
-configuration property. 
+configuration property.
 If, say, users wanted to set the metrics namespace to the name of the application, they
 can set the `spark.metrics.namespace` property to a value like `${spark.app.name}`. This value is
-then expanded appropriately by Spark and is used as the root namespace of the metrics system. 
+then expanded appropriately by Spark and is used as the root namespace of the metrics system.
 Non-driver and executor metrics are never prefixed with `spark.app.id`, nor does the
 `spark.metrics.namespace` property have any such affect on such metrics.
 
@@ -1104,18 +1116,18 @@ Default values of the Spark metrics configuration are as follows:
 ```
 
 Additional sources can be configured using the metrics configuration file or the configuration
-parameter `spark.metrics.conf.[component_name].source.jvm.class=[source_name]`. At present the 
+parameter `spark.metrics.conf.[component_name].source.jvm.class=[source_name]`. At present the
 JVM source is the only available optional source. For example the following configuration parameter
 activates the JVM source:
 `"spark.metrics.conf.*.source.jvm.class"="org.apache.spark.metrics.source.JvmSource"`
 
-## List of available metrics providers 
+## List of available metrics providers
 
-Metrics used by Spark are of multiple types: gauge, counter, histogram, meter and timer, 
+Metrics used by Spark are of multiple types: gauge, counter, histogram, meter and timer,
 see [Dropwizard library documentation for details](https://metrics.dropwizard.io/4.2.0/getting-started.html).
 The following list of components and metrics reports the name and some details about the available metrics,
 grouped per component instance and source namespace.
-The most common time of metrics used in Spark instrumentation are gauges and counters. 
+The most common time of metrics used in Spark instrumentation are gauges and counters.
 Counters can be recognized as they have the `.count` suffix. Timers, meters and histograms are annotated
 in the list, the rest of the list elements are metrics of type gauge.
 The large majority of metrics are active as soon as their parent component instance is configured,
@@ -1139,7 +1151,7 @@ This is the component with the largest amount of instrumented metrics
 
 - namespace=HiveExternalCatalog
   - **note:** these metrics are conditional to a configuration parameter:
-    `spark.metrics.staticSources.enabled` (default is true) 
+    `spark.metrics.staticSources.enabled` (default is true)
   - fileCacheHits.count
   - filesDiscovered.count
   - hiveClientCalls.count
@@ -1148,14 +1160,14 @@ This is the component with the largest amount of instrumented metrics
 
 - namespace=CodeGenerator
   - **note:** these metrics are conditional to a configuration parameter:
-    `spark.metrics.staticSources.enabled` (default is true) 
+    `spark.metrics.staticSources.enabled` (default is true)
   - compilationTime (histogram)
   - generatedClassSize (histogram)
   - generatedMethodSize (histogram)
   - sourceCodeSize (histogram)
 
 - namespace=DAGScheduler
-  - job.activeJobs 
+  - job.activeJobs
   - job.allJobs
   - messageProcessingTime (timer)
   - stage.failedStages
@@ -1176,7 +1188,7 @@ This is the component with the largest amount of instrumented metrics
   - queue.executorManagement.listenerProcessingTime (timer)
 
 - namespace=appStatus (all metrics of type=counter)
-  - **note:** Introduced in Spark 3.0. Conditional to a configuration parameter:  
+  - **note:** Introduced in Spark 3.0. Conditional to a configuration parameter:
    `spark.metrics.appStatusSource.enabled` (default is false)
   - stages.failedStages.count
   - stages.skippedStages.count
@@ -1192,15 +1204,15 @@ This is the component with the largest amount of instrumented metrics
   - jobs.succeededJobs
   - jobs.failedJobs
   - jobDuration
-  
-- namespace=AccumulatorSource  
+
+- namespace=AccumulatorSource
   - **note:** User-configurable sources to attach accumulators to metric system
   - DoubleAccumulatorSource
   - LongAccumulatorSource
 
 - namespace=spark.streaming
   - **note:** This applies to Spark Structured Streaming only. Conditional to a configuration
-  parameter: `spark.sql.streaming.metricsEnabled=true` (default is false) 
+  parameter: `spark.sql.streaming.metricsEnabled=true` (default is false)
   - eventTime-watermark
   - inputRate-total
   - latency
@@ -1213,19 +1225,19 @@ This is the component with the largest amount of instrumented metrics
 
 - namespace=executor
   - **note:** These metrics are available in the driver in local mode only.
-  - A full list of available metrics in this 
+  - A full list of available metrics in this
     namespace can be found in the corresponding entry for the Executor component instance.
-    
+
 - namespace=ExecutorMetrics
   - **note:** these metrics are conditional to a configuration parameter:
-    `spark.metrics.executorMetricsSource.enabled` (default is true) 
-  - This source contains memory-related metrics. A full list of available metrics in this 
+    `spark.metrics.executorMetricsSource.enabled` (default is true)
+  - This source contains memory-related metrics. A full list of available metrics in this
     namespace can be found in the corresponding entry for the Executor component instance.
 
 - namespace=ExecutorAllocationManager
   - **note:** these metrics are only emitted when using dynamic allocation. Conditional to a configuration
     parameter `spark.dynamicAllocation.enabled` (default is false)
-  - executors.numberExecutorsToAdd  
+  - executors.numberExecutorsToAdd
   - executors.numberExecutorsPendingToRemove
   - executors.numberAllExecutors
   - executors.numberTargetExecutors
@@ -1242,8 +1254,8 @@ This is the component with the largest amount of instrumented metrics
   custom plugins into Spark.
 
 ### Component instance = Executor
-These metrics are exposed by Spark executors. 
- 
+These metrics are exposed by Spark executors.
+
 - namespace=executor (metrics are of type counter or gauge)
   - **notes:**
     - `spark.executor.metrics.fileSystemSchemes` (default: `file,hdfs`) determines the exposed file system metrics.
@@ -1289,12 +1301,12 @@ These metrics are exposed by Spark executors.
   - threadpool.startedTasks
 
 - namespace=ExecutorMetrics
-  - **notes:** 
+  - **notes:**
     - These metrics are conditional to a configuration parameter:
-    `spark.metrics.executorMetricsSource.enabled` (default value is true) 
+    `spark.metrics.executorMetricsSource.enabled` (default value is true)
     - ExecutorMetrics are updated as part of heartbeat processes scheduled
    for the executors and for the driver at regular intervals: `spark.executor.heartbeatInterval` (default value is 10 seconds)
-    - An optional faster polling mechanism is available for executor memory metrics, 
+    - An optional faster polling mechanism is available for executor memory metrics,
    it can be activated by setting a polling interval (in milliseconds) using the configuration parameter `spark.executor.metrics.pollingInterval`
   - JVMHeapMemory
   - JVMOffHeapMemory
@@ -1333,7 +1345,7 @@ These metrics are exposed by Spark executors.
 
 - namespace=HiveExternalCatalog
   - **note:** these metrics are conditional to a configuration parameter:
-    `spark.metrics.staticSources.enabled` (default is true) 
+    `spark.metrics.staticSources.enabled` (default is true)
   - fileCacheHits.count
   - filesDiscovered.count
   - hiveClientCalls.count
@@ -1342,7 +1354,7 @@ These metrics are exposed by Spark executors.
 
 - namespace=CodeGenerator
   - **note:** these metrics are conditional to a configuration parameter:
-    `spark.metrics.staticSources.enabled` (default is true) 
+    `spark.metrics.staticSources.enabled` (default is true)
   - compilationTime (histogram)
   - generatedClassSize (histogram)
   - generatedMethodSize (histogram)
@@ -1353,16 +1365,16 @@ These metrics are exposed by Spark executors.
   configured using the Spark plugin API. See "Advanced Instrumentation" below for how to load
   custom plugins into Spark.
 
-### Source = JVM Source 
-Notes: 
-  - Activate this source by setting the relevant `metrics.properties` file entry or the 
-  configuration parameter:`spark.metrics.conf.*.source.jvm.class=org.apache.spark.metrics.source.JvmSource`  
+### Source = JVM Source
+Notes:
+  - Activate this source by setting the relevant `metrics.properties` file entry or the
+  configuration parameter:`spark.metrics.conf.*.source.jvm.class=org.apache.spark.metrics.source.JvmSource`
   - These metrics are conditional to a configuration parameter:
     `spark.metrics.staticSources.enabled` (default is true)
-  - This source is available for driver and executor instances and is also available for other instances.  
-  - This source provides information on JVM metrics using the 
+  - This source is available for driver and executor instances and is also available for other instances.
+  - This source provides information on JVM metrics using the
   [Dropwizard/Codahale Metric Sets for JVM instrumentation](https://metrics.dropwizard.io/4.2.0/manual/jvm.html)
-   and in particular the metric sets BufferPoolMetricSet, GarbageCollectorMetricSet and MemoryUsageGaugeSet. 
+   and in particular the metric sets BufferPoolMetricSet, GarbageCollectorMetricSet and MemoryUsageGaugeSet.
 
 ### Component instance = applicationMaster
 Note: applies when running on YARN
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index 28d5b376da30d..8525f48cdc501 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -382,8 +382,8 @@ resulting Java objects using [pickle](https://github.com/irmen/pickle/). When sa
 PySpark does the reverse. It unpickles Python objects into Java objects and then converts them to Writables. The following
 Writables are automatically converted:
 
-<table class="table">
-<tr><th>Writable Type</th><th>Python Type</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Writable Type</th><th>Python Type</th></tr></thead>
 <tr><td>Text</td><td>str</td></tr>
 <tr><td>IntWritable</td><td>int</td></tr>
 <tr><td>FloatWritable</td><td>float</td></tr>
@@ -958,8 +958,8 @@ and pair RDD functions doc
  [Java](api/java/index.html?org/apache/spark/api/java/JavaPairRDD.html))
 for details.
 
-<table class="table">
-<tr><th style="width:25%">Transformation</th><th>Meaning</th></tr>
+<table class="table table-striped">
+<thead><tr><th style="width:25%">Transformation</th><th>Meaning</th></tr></thead>
 <tr>
   <td> <b>map</b>(<i>func</i>) </td>
   <td> Return a new distributed dataset formed by passing each element of the source through a function <i>func</i>. </td>
@@ -1073,8 +1073,8 @@ and pair RDD functions doc
  [Java](api/java/index.html?org/apache/spark/api/java/JavaPairRDD.html))
 for details.
 
-<table class="table">
-<tr><th>Action</th><th>Meaning</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Action</th><th>Meaning</th></tr></thead>
 <tr>
   <td> <b>reduce</b>(<i>func</i>) </td>
   <td> Aggregate the elements of the dataset using a function <i>func</i> (which takes two arguments and returns one). The function should be commutative and associative so that it can be computed correctly in parallel. </td>
@@ -1218,8 +1218,8 @@ to `persist()`. The `cache()` method is a shorthand for using the default storag
 which is `StorageLevel.MEMORY_ONLY` (store deserialized objects in memory). The full set of
 storage levels is:
 
-<table class="table">
-<tr><th style="width:23%">Storage Level</th><th>Meaning</th></tr>
+<table class="table table-striped">
+<thead><tr><th style="width:23%">Storage Level</th><th>Meaning</th></tr></thead>
 <tr>
   <td> MEMORY_ONLY </td>
   <td> Store RDD as deserialized Java objects in the JVM. If the RDD does not fit in memory, some partitions will
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index e580beb8ad6ed..4eecd7dae51c5 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -572,8 +572,8 @@ See the [configuration page](configuration.html) for information on Spark config
 
 #### Spark Properties
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.kubernetes.context</code></td>
   <td><code>(none)</code></td>
@@ -1622,7 +1622,7 @@ See the [configuration page](configuration.html) for information on Spark config
   <td><code>spark.kubernetes.executor.rollPolicy</code></td>
   <td><code>OUTLIER</code></td>
   <td>
-    Executor roll policy: Valid values are ID, ADD_TIME, TOTAL_GC_TIME, 
+    Executor roll policy: Valid values are ID, ADD_TIME, TOTAL_GC_TIME,
     TOTAL_DURATION, FAILED_TASKS, and OUTLIER (default).
     When executor roll happens, Spark uses this policy to choose
     an executor and decommission it. The built-in policies are based on executor summary
@@ -1648,8 +1648,8 @@ See the below table for the full list of pod specifications that will be overwri
 
 ### Pod Metadata
 
-<table class="table">
-<tr><th>Pod metadata key</th><th>Modified value</th><th>Description</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Pod metadata key</th><th>Modified value</th><th>Description</th></tr></thead>
 <tr>
   <td>name</td>
   <td>Value of <code>spark.kubernetes.driver.pod.name</code></td>
@@ -1684,8 +1684,8 @@ See the below table for the full list of pod specifications that will be overwri
 
 ### Pod Spec
 
-<table class="table">
-<tr><th>Pod spec key</th><th>Modified value</th><th>Description</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Pod spec key</th><th>Modified value</th><th>Description</th></tr></thead>
 <tr>
   <td>imagePullSecrets</td>
   <td>Adds image pull secrets from <code>spark.kubernetes.container.image.pullSecrets</code></td>
@@ -1737,8 +1737,8 @@ See the below table for the full list of pod specifications that will be overwri
 
 The following affect the driver and executor containers. All other containers in the pod spec will be unaffected.
 
-<table class="table">
-<tr><th>Container spec key</th><th>Modified value</th><th>Description</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Container spec key</th><th>Modified value</th><th>Description</th></tr></thead>
 <tr>
   <td>env</td>
   <td>Adds env variables from <code>spark.kubernetes.driverEnv.[EnvironmentVariableName]</code></td>
@@ -1817,7 +1817,7 @@ metadata:
   labels:
     template-label-key: driver-template-label-value
 spec:
-  # Specify the priority in here 
+  # Specify the priority in here
   priorityClassName: system-node-critical
   containers:
   - name: test-driver-container
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index 52325f370db1d..b1a54a089a542 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -8,9 +8,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,7 +19,7 @@ license: |
 ---
 * This will become a table of contents (this text will be scraped).
 {:toc}
-  
+
 *Note*: Apache Mesos support is deprecated as of Apache Spark 3.2.0. It will be removed in a future version.
 
 Spark can run on hardware clusters managed by [Apache Mesos](http://mesos.apache.org/).
@@ -132,8 +132,8 @@ An equivalent order applies for the secret.  Essentially we prefer the configura
 If you want to deploy a Spark Application into a Mesos cluster that is running in a secure mode there are some environment variables that need to be set.
 
 - `LIBPROCESS_SSL_ENABLED=true` enables SSL communication
-- `LIBPROCESS_SSL_VERIFY_CERT=false` verifies the ssl certificate 
-- `LIBPROCESS_SSL_KEY_FILE=pathToKeyFile.key` path to key 
+- `LIBPROCESS_SSL_VERIFY_CERT=false` verifies the ssl certificate
+- `LIBPROCESS_SSL_KEY_FILE=pathToKeyFile.key` path to key
 - `LIBPROCESS_SSL_CERT_FILE=pathToCRTFile.crt` the certificate file to be used
 
 All options can be found at http://mesos.apache.org/documentation/latest/ssl/
@@ -374,8 +374,8 @@ See the [configuration page](configuration.html) for information on Spark config
 
 #### Spark Properties
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.mesos.coarse</code></td>
   <td>true</td>
@@ -622,7 +622,7 @@ See the [configuration page](configuration.html) for information on Spark config
       <code>spark.mesos.[driver|executor].secret.values</code>
       property, to make the secret available in the driver or executors.
       For example, to make a secret password "guessme" available to the driver process, set:
-    
+
       <pre>spark.mesos.driver.secret.values=guessme</pre>
     </p>
     <p>
@@ -697,9 +697,9 @@ See the [configuration page](configuration.html) for information on Spark config
       provide a comma-separated list:
 
       <pre>spark.mesos.driver.secret.envkeys=PASSWORD1,PASSWORD2</pre>
-    
+
       or
-    
+
       <pre>spark.mesos.driver.secret.filenames=pwdfile1,pwdfile2</pre>
     </p>
   </td>
@@ -830,10 +830,10 @@ See the [configuration page](configuration.html) for information on Spark config
   <td><code>spark.mesos.driver.failoverTimeout</code></td>
   <td><code>0.0</code></td>
   <td>
-    The amount of time (in seconds) that the master will wait for the 
-    driver to reconnect, after being temporarily disconnected, before 
-    it tears down the driver framework by killing all its 
-    executors. The default value is zero, meaning no timeout: if the 
+    The amount of time (in seconds) that the master will wait for the
+    driver to reconnect, after being temporarily disconnected, before
+    it tears down the driver framework by killing all its
+    executors. The default value is zero, meaning no timeout: if the
     driver disconnects, the master immediately tears down the framework.
   </td>
   <td>2.3.0</td>
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index e1cc8c325490b..e819c6e23ff72 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -8,9 +8,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -143,8 +143,8 @@ To use a custom metrics.properties for the application master and executors, upd
 
 #### Spark Properties
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.yarn.am.memory</code></td>
   <td><code>512m</code></td>
@@ -165,7 +165,7 @@ To use a custom metrics.properties for the application master and executors, upd
     Please note that this feature can be used only with YARN 3.0+
     For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceModel.html
     <p/>
-    Example: 
+    Example:
     To request GPU resources from YARN, use: <code>spark.yarn.am.resource.yarn.io/gpu.amount</code>
   </td>
   <td>3.0.0</td>
@@ -187,10 +187,10 @@ To use a custom metrics.properties for the application master and executors, upd
     Please note that this feature can be used only with YARN 3.0+
     For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceModel.html
     <p/>
-    Example: 
+    Example:
     To request GPU resources from YARN, use: <code>spark.yarn.driver.resource.yarn.io/gpu.amount</code>
   </td>
-  <td>3.0.0</td> 
+  <td>3.0.0</td>
 </tr>
 <tr>
   <td><code>spark.yarn.executor.resource.{resource-type}.amount</code></td>
@@ -200,7 +200,7 @@ To use a custom metrics.properties for the application master and executors, upd
     Please note that this feature can be used only with YARN 3.0+
     For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceModel.html
     <p/>
-    Example: 
+    Example:
     To request GPU resources from YARN, use: <code>spark.yarn.executor.resource.yarn.io/gpu.amount</code>
   </td>
   <td>3.0.0</td>
@@ -243,7 +243,7 @@ To use a custom metrics.properties for the application master and executors, upd
     Only used in <code>cluster</code> mode. Time for the YARN Application Master to wait for the
     SparkContext to be initialized.
   </td>
- <td>1.3.0</td> 
+ <td>1.3.0</td>
 </tr>
 <tr>
   <td><code>spark.yarn.submit.file.replication</code></td>
@@ -259,7 +259,7 @@ To use a custom metrics.properties for the application master and executors, upd
   <td>
     Staging directory used while submitting applications.
   </td>
- <td>2.0.0</td> 
+ <td>2.0.0</td>
 </tr>
 <tr>
   <td><code>spark.yarn.preserve.staging.files</code></td>
@@ -267,7 +267,7 @@ To use a custom metrics.properties for the application master and executors, upd
   <td>
     Set to <code>true</code> to preserve the staged files (Spark jar, app jar, distributed cache files) at the end of the job rather than delete them.
   </td>
-  <td>1.1.0</td> 
+  <td>1.1.0</td>
 </tr>
 <tr>
   <td><code>spark.yarn.scheduler.heartbeat.interval-ms</code></td>
@@ -433,12 +433,12 @@ To use a custom metrics.properties for the application master and executors, upd
 <tr>
   <td><code>spark.yarn.populateHadoopClasspath</code></td>
   <td>
-    For <code>with-hadoop</code> Spark distribution, this is set to false; 
+    For <code>with-hadoop</code> Spark distribution, this is set to false;
     for <code>no-hadoop</code> distribution, this is set to true.
   </td>
   <td>
     Whether to populate Hadoop classpath from <code>yarn.application.classpath</code> and
-    <code>mapreduce.application.classpath</code> Note that if this is set to <code>false</code>, 
+    <code>mapreduce.application.classpath</code> Note that if this is set to <code>false</code>,
     it requires a <code>with-Hadoop</code> Spark distribution that bundles Hadoop runtime or
     user has to provide a Hadoop installation separately.
   </td>
@@ -451,7 +451,7 @@ To use a custom metrics.properties for the application master and executors, upd
   The maximum number of attempts that will be made to submit the application.
   It should be no larger than the global number of max attempts in the YARN configuration.
   </td>
-  <td>1.3.0</td> 
+  <td>1.3.0</td>
 </tr>
 <tr>
   <td><code>spark.yarn.am.attemptFailuresValidityInterval</code></td>
@@ -490,12 +490,12 @@ To use a custom metrics.properties for the application master and executors, upd
   <td><code>spark.yarn.am.tokenConfRegex</code></td>
   <td>(none)</td>
   <td>
-    This config is only supported when Hadoop version is 2.9+ or 3.x (e.g., when using the Hadoop 3.x profile). 
-    The value of this config is a regex expression used to grep a list of config entries from the job's configuration file (e.g., hdfs-site.xml) 
-    and send to RM, which uses them when renewing delegation tokens. A typical use case of this feature is to support delegation 
-    tokens in an environment where a YARN cluster needs to talk to multiple downstream HDFS clusters, where the YARN RM may not have configs 
-    (e.g., dfs.nameservices, dfs.ha.namenodes.*, dfs.namenode.rpc-address.*) to connect to these clusters. 
-    In this scenario, Spark users can specify the config value to be <code>^dfs.nameservices$|^dfs.namenode.rpc-address.*$|^dfs.ha.namenodes.*$</code> to parse 
+    This config is only supported when Hadoop version is 2.9+ or 3.x (e.g., when using the Hadoop 3.x profile).
+    The value of this config is a regex expression used to grep a list of config entries from the job's configuration file (e.g., hdfs-site.xml)
+    and send to RM, which uses them when renewing delegation tokens. A typical use case of this feature is to support delegation
+    tokens in an environment where a YARN cluster needs to talk to multiple downstream HDFS clusters, where the YARN RM may not have configs
+    (e.g., dfs.nameservices, dfs.ha.namenodes.*, dfs.namenode.rpc-address.*) to connect to these clusters.
+    In this scenario, Spark users can specify the config value to be <code>^dfs.nameservices$|^dfs.namenode.rpc-address.*$|^dfs.ha.namenodes.*$</code> to parse
     these HDFS configs from the job's local configuration files. This config is very similar to <code>mapreduce.job.send-token-conf</code>. Please check YARN-5910 for more details.
   </td>
   <td>3.3.0</td>
@@ -633,7 +633,7 @@ To use a custom metrics.properties for the application master and executors, upd
   <td><code>spark.yarn.metrics.namespace</code></td>
   <td>(none)</td>
   <td>
-  The root namespace for AM metrics reporting. 
+  The root namespace for AM metrics reporting.
   If it is not set then the YARN application ID is used.
   </td>
   <td>2.4.0</td>
@@ -658,8 +658,8 @@ To use a custom metrics.properties for the application master and executors, upd
   <td><code>spark.yarn.includeDriverLogsLink</code></td>
   <td><code>false</code></td>
   <td>
-    In cluster mode, whether the client application report includes links to the driver 
-    container's logs. This requires polling the ResourceManager's REST API, so it 
+    In cluster mode, whether the client application report includes links to the driver
+    container's logs. This requires polling the ResourceManager's REST API, so it
     places some additional load on the RM.
   </td>
   <td>3.1.0</td>
@@ -668,7 +668,7 @@ To use a custom metrics.properties for the application master and executors, upd
   <td><code>spark.yarn.unmanagedAM.enabled</code></td>
   <td><code>false</code></td>
   <td>
-    In client mode, whether to launch the Application Master service as part of the client 
+    In client mode, whether to launch the Application Master service as part of the client
     using unmanaged am.
   </td>
   <td>3.0.0</td>
@@ -677,8 +677,8 @@ To use a custom metrics.properties for the application master and executors, upd
 
 #### Available patterns for SHS custom executor log URL
 
-<table class="table">
-    <tr><th>Pattern</th><th>Meaning</th></tr>
+<table class="table table-striped">
+    <thead><tr><th>Pattern</th><th>Meaning</th></tr></thead>
     <tr>
       <td>&#123;&#123;HTTP_SCHEME&#125;&#125;</td>
       <td><code>http://</code> or <code>https://</code> according to YARN HTTP policy. (Configured via <code>yarn.http.policy</code>)</td>
@@ -760,8 +760,8 @@ staging directory of the Spark application.
 
 ## YARN-specific Kerberos Configuration
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.kerberos.keytab</code></td>
   <td>(none)</td>
@@ -859,8 +859,8 @@ to avoid garbage collection issues during shuffle.
 
 The following extra configuration options are available when the shuffle service is running on YARN:
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr></thead>
 <tr>
   <td><code>spark.yarn.shuffle.stopOnFailure</code></td>
   <td><code>false</code></td>
@@ -893,10 +893,10 @@ The following extra configuration options are available when the shuffle service
   <td><code>spark.shuffle.service.db.backend</code></td>
   <td>LEVELDB</td>
   <td>
-    When work-preserving restart is enabled in YARN, this is used to specify the disk-base store used 
-    in shuffle service state store, supports `LEVELDB` and `ROCKSDB` with `LEVELDB` as default value. 
-    The original data store in `LevelDB/RocksDB` will not be automatically converted to another kind 
-    of storage now. The original data store will be retained and the new type data store will be 
+    When work-preserving restart is enabled in YARN, this is used to specify the disk-base store used
+    in shuffle service state store, supports `LEVELDB` and `ROCKSDB` with `LEVELDB` as default value.
+    The original data store in `LevelDB/RocksDB` will not be automatically converted to another kind
+    of storage now. The original data store will be retained and the new type data store will be
     created when switching storage types.
   </td>
   <td>3.4.0</td>
diff --git a/docs/security.md b/docs/security.md
index b0bf562584d20..7201ea5185928 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -68,8 +68,8 @@ that any user that can list pods in the namespace where the Spark application is
 also see their authentication secret. Access control rules should be properly set up by the
 Kubernetes admin to ensure that Spark authentication is secure.
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.authenticate</code></td>
   <td>false</td>
@@ -89,8 +89,8 @@ Kubernetes admin to ensure that Spark authentication is secure.
 Alternatively, one can mount authentication secrets using files and Kubernetes secrets that
 the user mounts into their pods.
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.authenticate.secret.file</code></td>
   <td>None</td>
@@ -145,8 +145,8 @@ is still required when talking to shuffle services from Spark versions older tha
 
 The following table describes the different options available for configuring this feature.
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.network.crypto.enabled</code></td>
   <td>false</td>
@@ -205,8 +205,8 @@ encrypting output data generated by applications with APIs such as `saveAsHadoop
 
 The following settings cover enabling encryption for data written to disk:
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.io.encryption.enabled</code></td>
   <td>false</td>
@@ -273,8 +273,8 @@ below.
 
 The following options control the authentication of Web UIs:
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.ui.filters</code></td>
   <td>None</td>
@@ -371,8 +371,8 @@ servlet filters.
 
 To enable authorization in the SHS, a few extra options are used:
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.history.ui.acls.enable</code></td>
   <td>false</td>
@@ -420,11 +420,13 @@ protocol-specific settings. This way the user can easily provide the common sett
 protocols without disabling the ability to configure each one individually. The following table
 describes the SSL configuration namespaces:
 
-<table class="table">
+<table class="table table-striped">
+  <thead>
   <tr>
     <th>Config Namespace</th>
     <th>Component</th>
   </tr>
+  </thead>
   <tr>
     <td><code>spark.ssl</code></td>
     <td>
@@ -449,8 +451,8 @@ describes the SSL configuration namespaces:
 The full breakdown of available SSL options can be found below. The `${ns}` placeholder should be
 replaced with one of the above namespaces.
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr></thead>
   <tr>
     <td><code>${ns}.enabled</code></td>
     <td>false</td>
@@ -619,8 +621,8 @@ Apache Spark can be configured to include HTTP headers to aid in preventing Cros
 (XSS), Cross-Frame Scripting (XFS), MIME-Sniffing, and also to enforce HTTP Strict Transport
 Security.
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.ui.xXssProtection</code></td>
   <td><code>1; mode=block</code></td>
@@ -675,11 +677,13 @@ configure those ports.
 
 ## Standalone mode only
 
-<table class="table">
+<table class="table table-striped">
+  <thead>
   <tr>
     <th>From</th><th>To</th><th>Default Port</th><th>Purpose</th><th>Configuration
     Setting</th><th>Notes</th>
   </tr>
+  </thead>
   <tr>
     <td>Browser</td>
     <td>Standalone Master</td>
@@ -724,11 +728,13 @@ configure those ports.
 
 ## All cluster managers
 
-<table class="table">
+<table class="table table-striped">
+  <thead>
   <tr>
     <th>From</th><th>To</th><th>Default Port</th><th>Purpose</th><th>Configuration
     Setting</th><th>Notes</th>
   </tr>
+  </thead>
   <tr>
     <td>Browser</td>
     <td>Application</td>
@@ -798,8 +804,8 @@ deployment-specific page for more information.
 
 The following options provides finer-grained control for this feature:
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.security.credentials.${service}.enabled</code></td>
   <td><code>true</code></td>
diff --git a/docs/spark-connect-overview.md b/docs/spark-connect-overview.md
index e46fb9ad91343..55cc825a14850 100644
--- a/docs/spark-connect-overview.md
+++ b/docs/spark-connect-overview.md
@@ -44,13 +44,13 @@ The Spark Connect client translates DataFrame operations into unresolved
 logical query plans which are encoded using protocol buffers. These are sent
 to the server using the gRPC framework.
 
-The Spark Connect endpoint embedded on the Spark Server, receives and
+The Spark Connect endpoint embedded on the Spark Server receives and
 translates unresolved logical plans into Spark's logical plan operators.
 This is similar to parsing a SQL query, where attributes and relations are
 parsed and an initial parse plan is built. From there, the standard Spark
 execution process kicks in, ensuring that Spark Connect leverages all of
 Spark's optimizations and enhancements. Results are streamed back to the
-client via gRPC as Apache Arrow-encoded row batches.
+client through gRPC as Apache Arrow-encoded row batches.
 
 <p style="text-align: center;">
   <img src="img/spark-connect-communication.png" title="Spark Connect communication" alt="Spark Connect communication" />
@@ -67,11 +67,11 @@ own dependencies on the client and don't need to worry about potential conflicts
 with the Spark driver.
 
 **Upgradability**: The Spark driver can now seamlessly be upgraded independently
-of applications, e.g. to benefit from performance improvements and security fixes.
+of applications, for example to benefit from performance improvements and security fixes.
 This means applications can be forward-compatible, as long as the server-side RPC
 definitions are designed to be backwards compatible.
 
-**Debuggability and Observability**: Spark Connect enables interactive debugging
+**Debuggability and observability**: Spark Connect enables interactive debugging
 during development directly from your favorite IDE. Similarly, applications can
 be monitored using the application's framework native metrics and logging libraries.
 
@@ -106,8 +106,8 @@ Spark Connect, like in this example:
 
 Note that we include a Spark Connect package (`spark-connect_2.12:3.4.0`), when starting
 Spark server. This is required to use Spark Connect. Make sure to use the same version
-of the package as the Spark version you downloaded above. In the example here, Spark 3.4.0
-with Scala 2.12.
+of the package as the Spark version you downloaded previously. In this example,
+Spark 3.4.0 with Scala 2.12.
 
 Now Spark server is running and ready to accept Spark Connect sessions from client
 applications. In the next section we will walk through how to use Spark Connect
@@ -116,7 +116,7 @@ when writing client applications.
 ## Use Spark Connect in client applications
 
 When creating a Spark session, you can specify that you want to use Spark Connect
-and there are a few ways to do that as outlined below.
+and there are a few ways to do that outlined as follows.
 
 If you do not use one of the mechanisms outlined here, your Spark session will
 work just like before, without leveraging Spark Connect, and your application code
@@ -125,12 +125,12 @@ will run on the Spark driver node.
 ### Set SPARK_REMOTE environment variable
 
 If you set the `SPARK_REMOTE` environment variable on the client machine where your
-Spark client application is running and create a new Spark Session as illustrated
-below, the session will be a Spark Connect session. With this approach, there is
-no code change needed to start using Spark Connect.
+Spark client application is running and create a new Spark Session as in the following
+example, the session will be a Spark Connect session. With this approach, there is no
+code change needed to start using Spark Connect.
 
 In a terminal window, set the `SPARK_REMOTE` environment variable to point to the
-local Spark server you started on your computer above:
+local Spark server you started previously on your computer:
 
 {% highlight bash %}
 export SPARK_REMOTE="sc://localhost"
@@ -145,8 +145,11 @@ And start the Spark shell as usual:
 ./bin/pyspark
 {% endhighlight %}
 
-The PySpark shell is now connected to Spark using Spark Connect as indicated in the welcome
-message.
+The PySpark shell is now connected to Spark using Spark Connect as indicated in the welcome message:
+
+{% highlight python %}
+Client connected to the Spark Connect server at localhost
+{% endhighlight %}
 </div>
 
 </div>
@@ -164,8 +167,8 @@ spark = SparkSession.builder.getOrCreate()
 
 </div>
 
-Which will create a Spark Connect session from your application by reading the
-`SPARK_REMOTE` environment variable we set above.
+This will create a Spark Connect session from your application by reading the
+`SPARK_REMOTE` environment variable we set previously.
 
 ### Specify Spark Connect when creating Spark session
 
@@ -180,14 +183,27 @@ illustrated here.
 <div data-lang="python"  markdown="1">
 To launch the PySpark shell with Spark Connect, simply include the `remote`
 parameter and specify the location of your Spark server. We are using `localhost`
-in this example to connect to the local Spark server we started above.
+in this example to connect to the local Spark server we started previously:
 
 {% highlight bash %}
 ./bin/pyspark --remote "sc://localhost"
 {% endhighlight %}
 
 And you will notice that the PySpark shell welcome message tells you that
-you have connected to Spark using Spark Connect.
+you have connected to Spark using Spark Connect:
+
+{% highlight python %}
+Client connected to the Spark Connect server at localhost
+{% endhighlight %}
+
+You can also check the Spark session type. If it includes `.connect.` you
+are using Spark Connect as shown in this example:
+
+{% highlight python %}
+SparkSession available as 'spark'.
+>>> type(spark)
+<class 'pyspark.sql.connect.session.SparkSession'>
+{% endhighlight %}
 
 Now you can run PySpark code in the shell to see Spark Connect in action:
 
@@ -202,8 +218,6 @@ Now you can run PySpark code in the shell to see Spark Connect in action:
 |  1|Sarah|
 |  2|Maria|
 +---+-----+
-
->>>
 {% endhighlight %}
 </div>
 
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index b431752f166be..d47ff3987f95b 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -8,9 +8,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -53,8 +53,8 @@ You should see the new node listed there, along with its number of CPUs and memo
 
 Finally, the following configuration options can be passed to the master and worker:
 
-<table class="table">
-  <tr><th style="width:21%">Argument</th><th>Meaning</th></tr>
+<table class="table table-striped">
+  <thead><tr><th style="width:21%">Argument</th><th>Meaning</th></tr></thead>
   <tr>
     <td><code>-h HOST</code>, <code>--host HOST</code></td>
     <td>Hostname to listen on</td>
@@ -114,8 +114,8 @@ Note that these scripts must be executed on the machine you want to run the Spar
 
 You can optionally configure the cluster further by setting environment variables in `conf/spark-env.sh`. Create this file by starting with the `conf/spark-env.sh.template`, and _copy it to all your worker machines_ for the settings to take effect. The following settings are available:
 
-<table class="table">
-  <tr><th style="width:21%">Environment Variable</th><th>Meaning</th></tr>
+<table class="table table-striped">
+  <thead><tr><th style="width:21%">Environment Variable</th><th>Meaning</th></tr></thead>
   <tr>
     <td><code>SPARK_MASTER_HOST</code></td>
     <td>Bind the master to a specific hostname or IP address, for example a public one.</td>
@@ -186,8 +186,8 @@ You can optionally configure the cluster further by setting environment variable
 
 SPARK_MASTER_OPTS supports the following system properties:
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.deploy.retainedApplications</code></td>
   <td>200</td>
@@ -285,8 +285,8 @@ SPARK_MASTER_OPTS supports the following system properties:
 
 SPARK_WORKER_OPTS supports the following system properties:
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.worker.cleanup.enabled</code></td>
   <td>false</td>
@@ -332,8 +332,8 @@ SPARK_WORKER_OPTS supports the following system properties:
   <td><code>spark.shuffle.service.db.backend</code></td>
   <td>LEVELDB</td>
   <td>
-    When <code>spark.shuffle.service.db.enabled</code> is true, user can use this to specify the kind of disk-based 
-    store used in shuffle service state store. This supports `LEVELDB` and `ROCKSDB` now and `LEVELDB` as default value. 
+    When <code>spark.shuffle.service.db.enabled</code> is true, user can use this to specify the kind of disk-based
+    store used in shuffle service state store. This supports `LEVELDB` and `ROCKSDB` now and `LEVELDB` as default value.
     The original data store in `LevelDB/RocksDB` will not be automatically convert to another kind of storage now.
   </td>
   <td>3.4.0</td>
@@ -388,10 +388,10 @@ You can also pass an option `--total-executor-cores <numCores>` to control the n
 
 # Client Properties
 
-Spark applications supports the following configuration properties specific to standalone mode: 
+Spark applications supports the following configuration properties specific to standalone mode:
 
-<table class="table">
-  <tr><th style="width:21%">Property Name</th><th>Default Value</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+  <thead><tr><th style="width:21%">Property Name</th><th>Default Value</th><th>Meaning</th><th>Since Version</th></tr></thead>
   <tr>
   <td><code>spark.standalone.submit.waitAppCompletion</code></td>
   <td><code>false</code></td>
@@ -539,8 +539,8 @@ ZooKeeper is the best way to go for production-level high availability, but if y
 
 In order to enable this recovery mode, you can set SPARK_DAEMON_JAVA_OPTS in spark-env using this configuration:
 
-<table class="table">
-  <tr><th style="width:21%">System property</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+  <thead><tr><th style="width:21%">System property</th><th>Meaning</th><th>Since Version</th></tr></thead>
   <tr>
     <td><code>spark.deploy.recoveryMode</code></td>
     <td>Set to FILESYSTEM to enable single-node recovery mode (default: NONE).</td>
diff --git a/docs/sparkr.md b/docs/sparkr.md
index 2e55c7a20c0e0..8e6a98e40b680 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -77,8 +77,8 @@ sparkR.session(master = "local[*]", sparkConfig = list(spark.driver.memory = "2g
 
 The following Spark driver properties can be set in `sparkConfig` with `sparkR.session` from RStudio:
 
-<table class="table">
-  <tr><th>Property Name</th><th>Property group</th><th><code>spark-submit</code> equivalent</th></tr>
+<table class="table table-striped">
+  <thead><tr><th>Property Name</th><th>Property group</th><th><code>spark-submit</code> equivalent</th></tr></thead>
   <tr>
     <td><code>spark.master</code></td>
     <td>Application Properties</td>
@@ -588,8 +588,8 @@ The following example shows how to save/load a MLlib model by SparkR.
 {% include_example read_write r/ml/ml.R %}
 
 # Data type mapping between R and Spark
-<table class="table">
-<tr><th>R</th><th>Spark</th></tr>
+<table class="table table-striped">
+<thead><tr><th>R</th><th>Spark</th></tr></thead>
 <tr>
   <td>byte</td>
   <td>byte</td>
@@ -728,8 +728,8 @@ function is masking another function.
 
 The following functions are masked by the SparkR package:
 
-<table class="table">
-  <tr><th>Masked function</th><th>How to Access</th></tr>
+<table class="table table-striped">
+  <thead><tr><th>Masked function</th><th>How to Access</th></tr></thead>
   <tr>
     <td><code>cov</code> in <code>package:stats</code></td>
     <td><code><pre>stats::cov(x, y = NULL, use = "everything",
diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md
index 84ff5213a4853..25c1fa30ed983 100644
--- a/docs/sql-data-sources-avro.md
+++ b/docs/sql-data-sources-avro.md
@@ -8,9 +8,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -39,7 +39,7 @@ See [Application Submission Guide](submitting-applications.html) for more detail
 
 ## Load and Save Functions
 
-Since `spark-avro` module is external, there is no `.avro` API in 
+Since `spark-avro` module is external, there is no `.avro` API in
 `DataFrameReader` or `DataFrameWriter`.
 
 To load/save data in Avro format, you need to specify the data source option `format` as `avro`(or `org.apache.spark.sql.avro`).
@@ -84,8 +84,8 @@ write.df(select(df, "name", "favorite_color"), "namesAndFavColors.avro", "avro")
 </div>
 
 ## to_avro() and from_avro()
-The Avro package provides function `to_avro` to encode a column as binary in Avro 
-format, and `from_avro()` to decode Avro binary data into a column. Both functions transform one column to 
+The Avro package provides function `to_avro` to encode a column as binary in Avro
+format, and `from_avro()` to decode Avro binary data into a column. Both functions transform one column to
 another column, and the input/output SQL data type can be a complex type or a primitive type.
 
 Using Avro record as columns is useful when reading from or writing to a streaming source like Kafka. Each
@@ -233,8 +233,8 @@ Data source options of Avro can be set via:
  * the `.option` method on `DataFrameReader` or `DataFrameWriter`.
  * the `options` parameter in function `from_avro`.
 
-<table class="table">
-  <tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th><th><b>Since Version</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th><th><b>Since Version</b></th></tr></thead>
   <tr>
     <td><code>avroSchema</code></td>
     <td>None</td>
@@ -244,7 +244,7 @@ Data source options of Avro can be set via:
           When reading Avro files or calling function <code>from_avro</code>, this option can be set to an evolved schema, which is compatible but different with
           the actual Avro schema. The deserialization schema will be consistent with the evolved schema.
           For example, if we set an evolved schema containing one additional column with a default value,
-          the reading result in Spark will contain the new column too. Note that when using this option with 
+          the reading result in Spark will contain the new column too. Note that when using this option with
           <code>from_avro</code>, you still need to pass the actual Avro schema as a parameter to the function.
         </li>
         <li>
@@ -325,8 +325,8 @@ Data source options of Avro can be set via:
 
 ## Configuration
 Configuration of Avro can be done using the `setConf` method on SparkSession or by running `SET key=value` commands using SQL.
-<table class="table">
-  <tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Since Version</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Since Version</b></th></tr></thead>
   <tr>
     <td>spark.sql.legacy.replaceDatabricksSparkAvro.enabled</td>
     <td>true</td>
@@ -392,28 +392,28 @@ Configuration of Avro can be done using the `setConf` method on SparkSession or
 </table>
 
 ## Compatibility with Databricks spark-avro
-This Avro data source module is originally from and compatible with Databricks's open source repository 
+This Avro data source module is originally from and compatible with Databricks's open source repository
 [spark-avro](https://github.com/databricks/spark-avro).
 
-By default with the SQL configuration `spark.sql.legacy.replaceDatabricksSparkAvro.enabled` enabled, the data source provider `com.databricks.spark.avro` is 
-mapped to this built-in Avro module. For the Spark tables created with `Provider` property as `com.databricks.spark.avro` in 
-catalog meta store, the mapping is essential to load these tables if you are using this built-in Avro module. 
+By default with the SQL configuration `spark.sql.legacy.replaceDatabricksSparkAvro.enabled` enabled, the data source provider `com.databricks.spark.avro` is
+mapped to this built-in Avro module. For the Spark tables created with `Provider` property as `com.databricks.spark.avro` in
+catalog meta store, the mapping is essential to load these tables if you are using this built-in Avro module.
 
-Note in Databricks's [spark-avro](https://github.com/databricks/spark-avro), implicit classes 
-`AvroDataFrameWriter` and `AvroDataFrameReader` were created for shortcut function `.avro()`. In this 
-built-in but external module, both implicit classes are removed. Please use `.format("avro")` in 
+Note in Databricks's [spark-avro](https://github.com/databricks/spark-avro), implicit classes
+`AvroDataFrameWriter` and `AvroDataFrameReader` were created for shortcut function `.avro()`. In this
+built-in but external module, both implicit classes are removed. Please use `.format("avro")` in
 `DataFrameWriter` or `DataFrameReader` instead, which should be clean and good enough.
 
-If you prefer using your own build of `spark-avro` jar file, you can simply disable the configuration 
-`spark.sql.legacy.replaceDatabricksSparkAvro.enabled`, and use the option `--jars` on deploying your 
+If you prefer using your own build of `spark-avro` jar file, you can simply disable the configuration
+`spark.sql.legacy.replaceDatabricksSparkAvro.enabled`, and use the option `--jars` on deploying your
 applications. Read the [Advanced Dependency Management](https://spark.apache
-.org/docs/latest/submitting-applications.html#advanced-dependency-management) section in Application 
-Submission Guide for more details. 
+.org/docs/latest/submitting-applications.html#advanced-dependency-management) section in Application
+Submission Guide for more details.
 
 ## Supported types for Avro -> Spark SQL conversion
 Currently Spark supports reading all [primitive types](https://avro.apache.org/docs/1.11.1/specification/#primitive-types) and [complex types](https://avro.apache.org/docs/1.11.1/specification/#complex-types) under records of Avro.
-<table class="table">
-  <tr><th><b>Avro type</b></th><th><b>Spark SQL type</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Avro type</b></th><th><b>Spark SQL type</b></th></tr></thead>
   <tr>
     <td>boolean</td>
     <td>BooleanType</td>
@@ -477,8 +477,8 @@ All other union types are considered complex. They will be mapped to StructType
 
 It also supports reading the following Avro [logical types](https://avro.apache.org/docs/1.11.1/specification/#logical-types):
 
-<table class="table">
-  <tr><th><b>Avro logical type</b></th><th><b>Avro type</b></th><th><b>Spark SQL type</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Avro logical type</b></th><th><b>Avro type</b></th><th><b>Spark SQL type</b></th></tr></thead>
   <tr>
     <td>date</td>
     <td>int</td>
@@ -510,8 +510,8 @@ At the moment, it ignores docs, aliases and other properties present in the Avro
 ## Supported types for Spark SQL -> Avro conversion
 Spark supports writing of all Spark SQL types into Avro. For most types, the mapping from Spark types to Avro types is straightforward (e.g. IntegerType gets converted to int); however, there are a few special cases which are listed below:
 
-<table class="table">
-<tr><th><b>Spark SQL type</b></th><th><b>Avro type</b></th><th><b>Avro logical type</b></th></tr>
+<table class="table table-striped">
+<thead><tr><th><b>Spark SQL type</b></th><th><b>Avro type</b></th><th><b>Avro logical type</b></th></tr></thead>
   <tr>
     <td>ByteType</td>
     <td>int</td>
@@ -546,8 +546,8 @@ Spark supports writing of all Spark SQL types into Avro. For most types, the map
 
 You can also specify the whole output Avro schema with the option `avroSchema`, so that Spark SQL types can be converted into other Avro types. The following conversions are not applied by default and require user specified Avro schema:
 
-<table class="table">
-  <tr><th><b>Spark SQL type</b></th><th><b>Avro type</b></th><th><b>Avro logical type</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Spark SQL type</b></th><th><b>Avro type</b></th><th><b>Avro logical type</b></th></tr></thead>
   <tr>
     <td>BinaryType</td>
     <td>fixed</td>
diff --git a/docs/sql-data-sources-csv.md b/docs/sql-data-sources-csv.md
index 697f8c4913df1..31167f5514302 100644
--- a/docs/sql-data-sources-csv.md
+++ b/docs/sql-data-sources-csv.md
@@ -52,8 +52,8 @@ Data source options of CSV can be set via:
 * `OPTIONS` clause at [CREATE TABLE USING DATA_SOURCE](sql-ref-syntax-ddl-create-table-datasource.html)
 
 
-<table class="table">
-  <tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr></thead>
   <tr>
     <td><code>sep</code></td>
     <td>,</td>
diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md
index d65842ffbe8cd..0de573ec64b89 100644
--- a/docs/sql-data-sources-hive-tables.md
+++ b/docs/sql-data-sources-hive-tables.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -75,8 +75,8 @@ format("serde", "input format", "output format"), e.g. `CREATE TABLE src(id int)
 By default, we will read the table files as plain text. Note that, Hive storage handler is not supported yet when
 creating table, you can create a table using storage handler at Hive side, and use Spark SQL to read it.
 
-<table class="table">
-  <tr><th>Property Name</th><th>Meaning</th></tr>
+<table class="table table-striped">
+  <thead><tr><th>Property Name</th><th>Meaning</th></tr></thead>
   <tr>
     <td><code>fileFormat</code></td>
     <td>
@@ -123,8 +123,8 @@ will compile against built-in Hive and use those classes for internal execution
 
 The following options can be used to configure the version of Hive that is used to retrieve metadata:
 
-<table class="table">
-  <tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+  <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
   <tr>
     <td><code>spark.sql.hive.metastore.version</code></td>
     <td><code>2.3.9</code></td>
@@ -166,7 +166,7 @@ The following options can be used to configure the version of Hive that is used
     <td><code>(empty)</code></td>
     <td>
       Comma-separated paths of the jars that used to instantiate the HiveMetastoreClient.
-      This configuration is useful only when <code>spark.sql.hive.metastore.jars</code> is set as <code>path</code>. 
+      This configuration is useful only when <code>spark.sql.hive.metastore.jars</code> is set as <code>path</code>.
       <br/>
       The paths can be any of the following format:
       <ol>
diff --git a/docs/sql-data-sources-jdbc.md b/docs/sql-data-sources-jdbc.md
index 96ce4a966e405..b74c5bcaaf78c 100644
--- a/docs/sql-data-sources-jdbc.md
+++ b/docs/sql-data-sources-jdbc.md
@@ -51,8 +51,8 @@ For connection properties, users can specify the JDBC connection properties in t
 <code>user</code> and <code>password</code> are normally provided as connection properties for
 logging into the data sources.
 
-<table class="table">
-  <tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr></thead>
   <tr>
     <td><code>url</code></td>
     <td>(none)</td>
@@ -281,36 +281,36 @@ logging into the data sources.
 
   <tr>
     <td><code>pushDownAggregate</code></td>
-    <td><code>false</code></td>
+    <td><code>true</code></td>
     <td>
-     The option to enable or disable aggregate push-down in V2 JDBC data source. The default value is false, in which case Spark will not push down aggregates to the JDBC data source. Otherwise, if sets to true, aggregates will be pushed down to the JDBC data source. Aggregate push-down is usually turned off when the aggregate is performed faster by Spark than by the JDBC data source. Please note that aggregates can be pushed down if and only if all the aggregate functions and the related filters can be pushed down. If <code>numPartitions</code> equals to 1 or the group by key is the same as <code>partitionColumn</code>, Spark will push down aggregate to data source completely and not apply a final aggregate over the data source output. Otherwise, Spark will apply a final aggregate over the data source output.
+     The option to enable or disable aggregate push-down in V2 JDBC data source. The default value is true, in which case Spark will push down aggregates to the JDBC data source. Otherwise, if sets to false, aggregates will not be pushed down to the JDBC data source. Aggregate push-down is usually turned off when the aggregate is performed faster by Spark than by the JDBC data source. Please note that aggregates can be pushed down if and only if all the aggregate functions and the related filters can be pushed down. If <code>numPartitions</code> equals to 1 or the group by key is the same as <code>partitionColumn</code>, Spark will push down aggregate to data source completely and not apply a final aggregate over the data source output. Otherwise, Spark will apply a final aggregate over the data source output.
     </td>
     <td>read</td>
   </tr>
 
   <tr>
     <td><code>pushDownLimit</code></td>
-    <td><code>false</code></td>
+    <td><code>true</code></td>
     <td>
-     The option to enable or disable LIMIT push-down into V2 JDBC data source. The LIMIT push-down also includes LIMIT + SORT , a.k.a. the Top N operator. The default value is false, in which case Spark does not push down LIMIT or LIMIT with SORT to the JDBC data source. Otherwise, if sets to true, LIMIT or LIMIT with SORT is pushed down to the JDBC data source. If <code>numPartitions</code> is greater than 1, Spark still applies LIMIT or LIMIT with SORT on the result from data source even if LIMIT or LIMIT with SORT is pushed down. Otherwise, if LIMIT or LIMIT with SORT is pushed down and <code>numPartitions</code> equals to 1, Spark will not apply LIMIT or LIMIT with SORT on the result from data source.
+     The option to enable or disable LIMIT push-down into V2 JDBC data source. The LIMIT push-down also includes LIMIT + SORT , a.k.a. the Top N operator. The default value is true, in which case Spark push down LIMIT or LIMIT with SORT to the JDBC data source. Otherwise, if sets to false, LIMIT or LIMIT with SORT is not pushed down to the JDBC data source. If <code>numPartitions</code> is greater than 1, Spark still applies LIMIT or LIMIT with SORT on the result from data source even if LIMIT or LIMIT with SORT is pushed down. Otherwise, if LIMIT or LIMIT with SORT is pushed down and <code>numPartitions</code> equals to 1, Spark will not apply LIMIT or LIMIT with SORT on the result from data source.
     </td>
     <td>read</td>
   </tr>
 
   <tr>
     <td><code>pushDownOffset</code></td>
-    <td><code>false</code></td>
+    <td><code>true</code></td>
     <td>
-     The option to enable or disable OFFSET push-down into V2 JDBC data source. The default value is false, in which case Spark will not push down OFFSET to the JDBC data source. Otherwise, if sets to true, Spark will try to push down OFFSET to the JDBC data source. If <code>pushDownOffset</code> is true and <code>numPartitions</code> is equal to 1, OFFSET will be pushed down to the JDBC data source. Otherwise, OFFSET will not be pushed down and Spark still applies OFFSET on the result from data source.
+     The option to enable or disable OFFSET push-down into V2 JDBC data source. The default value is true, in which case Spark will push down OFFSET to the JDBC data source. Otherwise, if sets to false, Spark will not try to push down OFFSET to the JDBC data source. If <code>pushDownOffset</code> is true and <code>numPartitions</code> is equal to 1, OFFSET will be pushed down to the JDBC data source. Otherwise, OFFSET will not be pushed down and Spark still applies OFFSET on the result from data source.
     </td>
     <td>read</td>
   </tr>
 
   <tr>
     <td><code>pushDownTableSample</code></td>
-    <td><code>false</code></td>
+    <td><code>true</code></td>
     <td>
-     The option to enable or disable TABLESAMPLE push-down into V2 JDBC data source. The default value is false, in which case Spark does not push down TABLESAMPLE to the JDBC data source. Otherwise, if value sets to true, TABLESAMPLE is pushed down to the JDBC data source.
+     The option to enable or disable TABLESAMPLE push-down into V2 JDBC data source. The default value is true, in which case Spark push down TABLESAMPLE to the JDBC data source. Otherwise, if value sets to false, TABLESAMPLE is not pushed down to the JDBC data source.
     </td>
     <td>read</td>
   </tr>
@@ -352,7 +352,7 @@ logging into the data sources.
       </ol>
     </td>
     <td>read/write</td>
-  </tr>  
+  </tr>
 
   <tr>
     <td><code>connectionProvider</code></td>
diff --git a/docs/sql-data-sources-json.md b/docs/sql-data-sources-json.md
index 1c477ae83741c..881a69cb1cea4 100644
--- a/docs/sql-data-sources-json.md
+++ b/docs/sql-data-sources-json.md
@@ -109,8 +109,8 @@ Data source options of JSON can be set via:
   * `schema_of_json`
 * `OPTIONS` clause at [CREATE TABLE USING DATA_SOURCE](sql-ref-syntax-ddl-create-table-datasource.html)
 
-<table class="table">
-  <tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr></thead>
   <tr>
     <!-- TODO(SPARK-35433): Add timeZone to Data Source Option for CSV, too. -->
     <td><code>timeZone</code></td>
diff --git a/docs/sql-data-sources-load-save-functions.md b/docs/sql-data-sources-load-save-functions.md
index c6cf8054f5f90..9d0a3f9c72b9a 100644
--- a/docs/sql-data-sources-load-save-functions.md
+++ b/docs/sql-data-sources-load-save-functions.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -218,8 +218,8 @@ present. It is important to realize that these save modes do not utilize any loc
 atomic. Additionally, when performing an `Overwrite`, the data will be deleted before writing out the
 new data.
 
-<table class="table">
-<tr><th>Scala/Java</th><th>Any Language</th><th>Meaning</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Scala/Java</th><th>Any Language</th><th>Meaning</th></tr></thead>
 <tr>
   <td><code>SaveMode.ErrorIfExists</code> (default)</td>
   <td><code>"error" or "errorifexists"</code> (default)</td>
diff --git a/docs/sql-data-sources-orc.md b/docs/sql-data-sources-orc.md
index 2b00b771b09c2..b90f1e0717853 100644
--- a/docs/sql-data-sources-orc.md
+++ b/docs/sql-data-sources-orc.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -130,8 +130,8 @@ When reading from Hive metastore ORC tables and inserting to Hive metastore ORC
 
 ### Configuration
 
-<table class="table">
-  <tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Since Version</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Since Version</b></th></tr></thead>
   <tr>
     <td><code>spark.sql.orc.impl</code></td>
     <td><code>native</code></td>
@@ -156,7 +156,7 @@ When reading from Hive metastore ORC tables and inserting to Hive metastore ORC
     <td><code>spark.sql.orc.columnarReaderBatchSize</code></td>
     <td><code>4096</code></td>
     <td>
-      The number of rows to include in an orc vectorized reader batch. The number should 
+      The number of rows to include in an orc vectorized reader batch. The number should
       be carefully chosen to minimize overhead and avoid OOMs in reading data.
     </td>
     <td>2.4.0</td>
@@ -165,7 +165,7 @@ When reading from Hive metastore ORC tables and inserting to Hive metastore ORC
     <td><code>spark.sql.orc.columnarWriterBatchSize</code></td>
     <td><code>1024</code></td>
     <td>
-      The number of rows to include in an orc vectorized writer batch. The number should 
+      The number of rows to include in an orc vectorized writer batch. The number should
       be carefully chosen to minimize overhead and avoid OOMs in writing data.
     </td>
     <td>3.4.0</td>
@@ -192,9 +192,9 @@ When reading from Hive metastore ORC tables and inserting to Hive metastore ORC
     <td><code>spark.sql.orc.aggregatePushdown</code></td>
     <td><code>false</code></td>
     <td>
-      If true, aggregates will be pushed down to ORC for optimization. Support MIN, MAX and 
-      COUNT as aggregate expression. For MIN/MAX, support boolean, integer, float and date 
-      type. For COUNT, support all data types. If statistics is missing from any ORC file 
+      If true, aggregates will be pushed down to ORC for optimization. Support MIN, MAX and
+      COUNT as aggregate expression. For MIN/MAX, support boolean, integer, float and date
+      type. For COUNT, support all data types. If statistics is missing from any ORC file
       footer, exception would be thrown.
     </td>
     <td>3.3.0</td>
@@ -231,8 +231,8 @@ Data source options of ORC can be set via:
   * `DataStreamWriter`
 * `OPTIONS` clause at [CREATE TABLE USING DATA_SOURCE](sql-ref-syntax-ddl-create-table-datasource.html)
 
-<table class="table">
-  <tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr></thead>
   <tr>
     <td><code>mergeSchema</code></td>
     <td><code>false</code></td>
diff --git a/docs/sql-data-sources-parquet.md b/docs/sql-data-sources-parquet.md
index fdeb79325e030..4a4a3938c866d 100644
--- a/docs/sql-data-sources-parquet.md
+++ b/docs/sql-data-sources-parquet.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -302,7 +302,7 @@ sc.hadoopConfiguration.set("parquet.encryption.key.list" ,
 sc.hadoopConfiguration.set("parquet.crypto.factory.class" ,
                    "org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory")
 
-// Write encrypted dataframe files. 
+// Write encrypted dataframe files.
 // Column "square" will be protected with master key "keyA".
 // Parquet file footers will be protected with master key "keyB"
 squaresDF.write.
@@ -359,11 +359,11 @@ public interface KmsClient {
   // Wraps a key - encrypts it with the master key.
   public String wrapKey(byte[] keyBytes, String masterKeyIdentifier);
 
-  // Decrypts (unwraps) a key with the master key. 
+  // Decrypts (unwraps) a key with the master key.
   public byte[] unwrapKey(String wrappedKey, String masterKeyIdentifier);
 
   // Use of initialization parameters is optional.
-  public void initialize(Configuration configuration, String kmsInstanceID, 
+  public void initialize(Configuration configuration, String kmsInstanceID,
                          String kmsInstanceURL, String accessToken);
 }
 
@@ -386,8 +386,8 @@ Data source options of Parquet can be set via:
   * `DataStreamWriter`
 * `OPTIONS` clause at [CREATE TABLE USING DATA_SOURCE](sql-ref-syntax-ddl-create-table-datasource.html)
 
-<table class="table">
-  <tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr></thead>
   <tr>
     <td><code>datetimeRebaseMode</code></td>
     <td>(value of <code>spark.sql.parquet.datetimeRebaseModeInRead</code> configuration)</td>
@@ -434,8 +434,8 @@ Other generic options can be found in <a href="https://spark.apache.org/docs/lat
 Configuration of Parquet can be done using the `setConf` method on `SparkSession` or by running
 `SET key=value` commands using SQL.
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.sql.parquet.binaryAsString</code></td>
   <td>false</td>
@@ -459,8 +459,8 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   <td><code>spark.sql.parquet.int96TimestampConversion</code></td>
   <td>false</td>
   <td>
-    This controls whether timestamp adjustments should be applied to INT96 data when 
-    converting to timestamps, for data written by Impala.  This is necessary because Impala 
+    This controls whether timestamp adjustments should be applied to INT96 data when
+    converting to timestamps, for data written by Impala.  This is necessary because Impala
     stores INT96 data with a different timezone offset than Hive & Spark.
   </td>
   <td>2.3.0</td>
@@ -469,10 +469,10 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   <td><code>spark.sql.parquet.outputTimestampType</code></td>
   <td>INT96</td>
   <td>
-    Sets which Parquet timestamp type to use when Spark writes data to Parquet files. 
-    INT96 is a non-standard but commonly used timestamp type in Parquet. TIMESTAMP_MICROS 
-    is a standard timestamp type in Parquet, which stores number of microseconds from the 
-    Unix epoch. TIMESTAMP_MILLIS is also standard, but with millisecond precision, which 
+    Sets which Parquet timestamp type to use when Spark writes data to Parquet files.
+    INT96 is a non-standard but commonly used timestamp type in Parquet. TIMESTAMP_MICROS
+    is a standard timestamp type in Parquet, which stores number of microseconds from the
+    Unix epoch. TIMESTAMP_MILLIS is also standard, but with millisecond precision, which
     means Spark has to truncate the microsecond portion of its timestamp value.
   </td>
   <td>2.3.0</td>
@@ -499,9 +499,9 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   <td><code>spark.sql.parquet.aggregatePushdown</code></td>
   <td>false</td>
   <td>
-    If true, aggregates will be pushed down to Parquet for optimization. Support MIN, MAX 
-    and COUNT as aggregate expression. For MIN/MAX, support boolean, integer, float and date 
-    type. For COUNT, support all data types. If statistics is missing from any Parquet file 
+    If true, aggregates will be pushed down to Parquet for optimization. Support MIN, MAX
+    and COUNT as aggregate expression. For MIN/MAX, support boolean, integer, float and date
+    type. For COUNT, support all data types. If statistics is missing from any Parquet file
     footer, exception would be thrown.
   </td>
   <td>3.3.0</td>
@@ -530,9 +530,9 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   <td><code>spark.sql.parquet.respectSummaryFiles</code></td>
   <td>false</td>
   <td>
-    When true, we make assumption that all part-files of Parquet are consistent with 
-    summary files and we will ignore them when merging schema. Otherwise, if this is 
-    false, which is the default, we will merge all part-files. This should be considered 
+    When true, we make assumption that all part-files of Parquet are consistent with
+    summary files and we will ignore them when merging schema. Otherwise, if this is
+    false, which is the default, we will merge all part-files. This should be considered
     as expert-only option, and shouldn't be enabled before knowing what it means exactly.
   </td>
   <td>1.5.0</td>
@@ -561,7 +561,7 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   <td><code>spark.sql.parquet.enableNestedColumnVectorizedReader</code></td>
   <td>true</td>
   <td>
-    Enables vectorized Parquet decoding for nested columns (e.g., struct, list, map). 
+    Enables vectorized Parquet decoding for nested columns (e.g., struct, list, map).
     Requires <code>spark.sql.parquet.enableVectorizedReader</code> to be enabled.
   </td>
   <td>3.3.0</td>
@@ -570,9 +570,9 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   <td><code>spark.sql.parquet.recordLevelFilter.enabled</code></td>
   <td>false</td>
   <td>
-    If true, enables Parquet's native record-level filtering using the pushed down filters. 
-    This configuration only has an effect when <code>spark.sql.parquet.filterPushdown</code> 
-    is enabled and the vectorized reader is not used. You can ensure the vectorized reader 
+    If true, enables Parquet's native record-level filtering using the pushed down filters.
+    This configuration only has an effect when <code>spark.sql.parquet.filterPushdown</code>
+    is enabled and the vectorized reader is not used. You can ensure the vectorized reader
     is not used by setting <code>spark.sql.parquet.enableVectorizedReader</code> to false.
   </td>
   <td>2.3.0</td>
@@ -581,7 +581,7 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   <td><code>spark.sql.parquet.columnarReaderBatchSize</code></td>
   <td>4096</td>
   <td>
-    The number of rows to include in a parquet vectorized reader batch. The number should 
+    The number of rows to include in a parquet vectorized reader batch. The number should
     be carefully chosen to minimize overhead and avoid OOMs in reading data.
   </td>
   <td>2.4.0</td>
@@ -590,7 +590,7 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   <td><code>spark.sql.parquet.fieldId.write.enabled</code></td>
   <td>true</td>
   <td>
-    Field ID is a native field of the Parquet schema spec. When enabled, 
+    Field ID is a native field of the Parquet schema spec. When enabled,
     Parquet writers will populate the field Id metadata (if present) in the Spark schema to the Parquet schema.
   </td>
   <td>3.3.0</td>
@@ -599,8 +599,8 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   <td><code>spark.sql.parquet.fieldId.read.enabled</code></td>
   <td>false</td>
   <td>
-    Field ID is a native field of the Parquet schema spec. When enabled, Parquet readers 
-    will use field IDs (if present) in the requested Spark schema to look up Parquet 
+    Field ID is a native field of the Parquet schema spec. When enabled, Parquet readers
+    will use field IDs (if present) in the requested Spark schema to look up Parquet
     fields instead of using column names.
   </td>
   <td>3.3.0</td>
@@ -609,8 +609,8 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   <td><code>spark.sql.parquet.fieldId.read.ignoreMissing</code></td>
   <td>false</td>
   <td>
-    When the Parquet file doesn't have any field IDs but the 
-    Spark read schema is using field IDs to read, we will silently return nulls 
+    When the Parquet file doesn't have any field IDs but the
+    Spark read schema is using field IDs to read, we will silently return nulls
     when this flag is enabled, or error otherwise.
   </td>
   <td>3.3.0</td>
@@ -619,10 +619,10 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   <td><code>spark.sql.parquet.timestampNTZ.enabled</code></td>
   <td>true</td>
   <td>
-    Enables <code>TIMESTAMP_NTZ</code> support for Parquet reads and writes. 
-    When enabled, <code>TIMESTAMP_NTZ</code> values are written as Parquet timestamp 
-    columns with annotation isAdjustedToUTC = false and are inferred in a similar way. 
-    When disabled, such values are read as <code>TIMESTAMP_LTZ</code> and have to be 
+    Enables <code>TIMESTAMP_NTZ</code> support for Parquet reads and writes.
+    When enabled, <code>TIMESTAMP_NTZ</code> values are written as Parquet timestamp
+    columns with annotation isAdjustedToUTC = false and are inferred in a similar way.
+    When disabled, such values are read as <code>TIMESTAMP_LTZ</code> and have to be
     converted to <code>TIMESTAMP_LTZ</code> for writes.
   </td>
   <td>3.4.0</td>
diff --git a/docs/sql-data-sources-protobuf.md b/docs/sql-data-sources-protobuf.md
index 1a107e0ddc830..1c275065be527 100644
--- a/docs/sql-data-sources-protobuf.md
+++ b/docs/sql-data-sources-protobuf.md
@@ -245,8 +245,8 @@ StreamingQuery query = output
 Currently Spark supports reading [protobuf scalar types](https://developers.google.com/protocol-buffers/docs/proto3#scalar), [enum types](https://developers.google.com/protocol-buffers/docs/proto3#enum), [nested type](https://developers.google.com/protocol-buffers/docs/proto3#nested), and [maps type](https://developers.google.com/protocol-buffers/docs/proto3#maps) under messages of Protobuf.
 In addition to the these types, `spark-protobuf` also introduces support for Protobuf `OneOf` fields. which allows you to handle messages that can have multiple possible sets of fields, but only one set can be present at a time. This is useful for situations where the data you are working with is not always in the same format, and you need to be able to handle messages with different sets of fields without encountering errors.
 
-<table class="table">
-  <tr><th><b>Protobuf type</b></th><th><b>Spark SQL type</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Protobuf type</b></th><th><b>Spark SQL type</b></th></tr></thead>
   <tr>
     <td>boolean</td>
     <td>BooleanType</td>
@@ -299,8 +299,8 @@ In addition to the these types, `spark-protobuf` also introduces support for Pro
 
 It also supports reading the following Protobuf types [Timestamp](https://developers.google.com/protocol-buffers/docs/reference/google.protobuf#timestamp) and [Duration](https://developers.google.com/protocol-buffers/docs/reference/google.protobuf#duration)
 
-<table class="table">
-  <tr><th><b>Protobuf logical type</b></th><th><b>Protobuf schema</b></th><th><b>Spark SQL type</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Protobuf logical type</b></th><th><b>Protobuf schema</b></th><th><b>Spark SQL type</b></th></tr></thead>
   <tr>
     <td>duration</td>
     <td>MessageType{seconds: Long, nanos: Int}</td>
@@ -316,8 +316,8 @@ It also supports reading the following Protobuf types [Timestamp](https://develo
 ## Supported types for Spark SQL -> Protobuf conversion
 Spark supports the writing of all Spark SQL types into Protobuf. For most types, the mapping from Spark types to Protobuf types is straightforward (e.g. IntegerType gets converted to int);
 
-<table class="table">
-  <tr><th><b>Spark SQL type</b></th><th><b>Protobuf type</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Spark SQL type</b></th><th><b>Protobuf type</b></th></tr></thead>
   <tr>
     <td>BooleanType</td>
     <td>boolean</td>
@@ -366,7 +366,7 @@ Spark supports the writing of all Spark SQL types into Protobuf. For most types,
 
 ## Handling circular references protobuf fields
 One common issue that can arise when working with Protobuf data is the presence of circular references. In Protobuf, a circular reference occurs when a field refers back to itself or to another field that refers back to the original field. This can cause issues when parsing the data, as it can result in infinite loops or other unexpected behavior.
-To address this issue, the latest version of spark-protobuf introduces a new feature: the ability to check for circular references through field types. This allows users use the `recursive.fields.max.depth` option to specify the maximum number of levels of recursion to allow when parsing the schema. By default, `spark-protobuf` will not permit recursive fields by setting `recursive.fields.max.depth` to -1. However, you can set this option to 0 to 10 if needed. 
+To address this issue, the latest version of spark-protobuf introduces a new feature: the ability to check for circular references through field types. This allows users use the `recursive.fields.max.depth` option to specify the maximum number of levels of recursion to allow when parsing the schema. By default, `spark-protobuf` will not permit recursive fields by setting `recursive.fields.max.depth` to -1. However, you can set this option to 0 to 10 if needed.
 
 Setting `recursive.fields.max.depth` to 0 drops all recursive fields, setting it to 1 allows it to be recursed once, and setting it to 2 allows it to be recursed twice. A `recursive.fields.max.depth` value greater than 10 is not allowed, as it can lead to performance issues and even stack overflows.
 
diff --git a/docs/sql-data-sources-text.md b/docs/sql-data-sources-text.md
index 6c380873969e2..bb485d29c396a 100644
--- a/docs/sql-data-sources-text.md
+++ b/docs/sql-data-sources-text.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -47,8 +47,8 @@ Data source options of text can be set via:
   *  `DataStreamWriter`
 *  `OPTIONS` clause at [CREATE TABLE USING DATA_SOURCE](sql-ref-syntax-ddl-create-table-datasource.html)
 
-<table class="table">
-  <tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr></thead>
   <tr>
     <td><code>wholetext</code></td>
     <td><code>false</code></td>
diff --git a/docs/sql-distributed-sql-engine-spark-sql-cli.md b/docs/sql-distributed-sql-engine-spark-sql-cli.md
index 53493fb2b4e39..a67e009b9ae10 100644
--- a/docs/sql-distributed-sql-engine-spark-sql-cli.md
+++ b/docs/sql-distributed-sql-engine-spark-sql-cli.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -62,8 +62,8 @@ For example: `/path/to/spark-sql-cli.sql` equals to `file:///path/to/spark-sql-c
 
 ## Supported comment types
 
-<table class="table">
-<tr><th>Comment</th><th>Example</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Comment</th><th>Example</th></tr></thead>
 <tr>
   <td>simple comment</td>
   <td>
@@ -115,8 +115,8 @@ Use `;` (semicolon) to terminate commands. Notice:
    ```
    However, if ';' is the end of the line, it terminates the SQL statement. The example above will be terminated into  `/* This is a comment contains ` and `*/ SELECT 1`, Spark will submit these two commands separated and throw parser error (`unclosed bracketed comment` and `Syntax error at or near '*/'`).
 
-<table class="table">
-<tr><th>Command</th><th>Description</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Command</th><th>Description</th></tr></thead>
 <tr>
   <td><code>quit</code> or <code>exit</code></td>
   <td>Exits the interactive shell.</td>
@@ -148,15 +148,15 @@ Example of running a query from the command line:
 Example of setting Hive configuration variables:
 
     ./bin/spark-sql -e 'SELECT COL FROM TBL' --hiveconf hive.exec.scratchdir=/home/my/hive_scratch
-    
+
 Example of setting Hive configuration variables and using it in the SQL query:
 
     ./bin/spark-sql -e 'SELECT ${hiveconf:aaa}' --hiveconf aaa=bbb --hiveconf hive.exec.scratchdir=/home/my/hive_scratch
     spark-sql> SELECT ${aaa};
     bbb
-    
+
 Example of setting Hive variables substitution:
-    
+
     ./bin/spark-sql --hivevar aaa=bbb --define ccc=ddd
     spark-sql> SELECT ${aaa}, ${ccc};
     bbb ddd
diff --git a/docs/sql-error-conditions-sqlstates.md b/docs/sql-error-conditions-sqlstates.md
index ed6dace4c653c..1eea335ac9b8f 100644
--- a/docs/sql-error-conditions-sqlstates.md
+++ b/docs/sql-error-conditions-sqlstates.md
@@ -33,8 +33,8 @@ Spark SQL uses the following `SQLSTATE` classes:
 
 ## Class `0A`: feature not supported
 
-<table class="table">
-<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<table class="table table-striped">
+<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>0A000</td>
   <td>feature not supported</td>
@@ -44,12 +44,12 @@ Spark SQL uses the following `SQLSTATE` classes:
   <td><a href="sql-error-conditions.html#invalid_pandas_udf_placement">INVALID_PANDAS_UDF_PLACEMENT</a>, <a href="sql-error-conditions.html#star_group_by_pos">STAR_GROUP_BY_POS</a>, <a href="sql-error-conditions.html#unsupported_arrowtype">UNSUPPORTED_ARROWTYPE</a>, <a href="sql-error-conditions.html#unsupported_datatype">UNSUPPORTED_DATATYPE</a>, <a href="unsupported-deserializer-error-class.md">UNSUPPORTED_DESERIALIZER</a>, <a href="unsupported-feature-error-class.md">UNSUPPORTED_FEATURE</a>, <a href="unsupported-generator-error-class.md">UNSUPPORTED_GENERATOR</a>, <a href="unsupported-subquery-expression-category-error-class.md">UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY</a>, <a href="sql-error-conditions.html#unsupported_typed_literal">UNSUPPORTED_TYPED_LITERAL</a>
   </td>
 </tr>
-    
+
 </table>
 ## Class `21`: cardinality violation
 
-<table class="table">
-<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<table class="table table-striped">
+<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>21000</td>
   <td>cardinality violation</td>
@@ -59,12 +59,12 @@ Spark SQL uses the following `SQLSTATE` classes:
   <td><a href="sql-error-conditions.html#scalar_subquery_too_many_rows">SCALAR_SUBQUERY_TOO_MANY_ROWS</a>
   </td>
 </tr>
-    
+
 </table>
 ## Class `22`: data exception
 
-<table class="table">
-<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<table class="table table-striped">
+<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>22003</td>
   <td>numeric value out of range</td>
@@ -164,12 +164,12 @@ Spark SQL uses the following `SQLSTATE` classes:
   <td><a href="sql-error-conditions.html#cannot_decode_url">CANNOT_DECODE_URL</a>
   </td>
 </tr>
-    
+
 </table>
 ## Class `23`: integrity constraint violation
 
-<table class="table">
-<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<table class="table table-striped">
+<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>23505</td>
   <td>A violation of the constraint imposed by a unique index or a unique constraint occurred.</td>
@@ -179,12 +179,12 @@ Spark SQL uses the following `SQLSTATE` classes:
   <td><a href="sql-error-conditions.html#duplicated_map_key">DUPLICATED_MAP_KEY</a>, <a href="sql-error-conditions.html#duplicate_key">DUPLICATE_KEY</a>
   </td>
 </tr>
-    
+
 </table>
 ## Class `2B`: dependent privilege descriptors still exist
 
-<table class="table">
-<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<table class="table table-striped">
+<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>2BP01</td>
   <td>dependent_objects_still_exist</td>
@@ -194,12 +194,12 @@ Spark SQL uses the following `SQLSTATE` classes:
   <td><a href="sql-error-conditions.html#schema_not_empty">SCHEMA_NOT_EMPTY</a>
   </td>
 </tr>
-    
+
 </table>
 ## Class `38`: external routine exception
 
-<table class="table">
-<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<table class="table table-striped">
+<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>38000</td>
   <td>external routine exception</td>
@@ -209,12 +209,12 @@ Spark SQL uses the following `SQLSTATE` classes:
   <td><a href="sql-error-conditions.html#failed_function_call">FAILED_FUNCTION_CALL</a>
   </td>
 </tr>
-    
+
 </table>
 ## Class `39`: external routine invocation exception
 
-<table class="table">
-<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<table class="table table-striped">
+<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>39000</td>
   <td>external routine invocation exception</td>
@@ -224,12 +224,12 @@ Spark SQL uses the following `SQLSTATE` classes:
   <td><a href="sql-error-conditions.html#failed_execute_udf">FAILED_EXECUTE_UDF</a>
   </td>
 </tr>
-    
+
 </table>
 ## Class `42`: syntax error or access rule violation
 
-<table class="table">
-<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<table class="table table-striped">
+<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>42000</td>
   <td>syntax error or access rule violation</td>
@@ -644,12 +644,12 @@ Spark SQL uses the following `SQLSTATE` classes:
   <td><a href="sql-error-conditions.html#unsupported_expr_for_window">UNSUPPORTED_EXPR_FOR_WINDOW</a>
   </td>
 </tr>
-    
+
 </table>
 ## Class `46`: java ddl 1
 
-<table class="table">
-<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<table class="table table-striped">
+<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>46110</td>
   <td>unsupported feature</td>
@@ -668,12 +668,12 @@ Spark SQL uses the following `SQLSTATE` classes:
   <td><a href="sql-error-conditions.html#invalid_column_name_as_path">INVALID_COLUMN_NAME_AS_PATH</a>
   </td>
 </tr>
-    
+
 </table>
 ## Class `53`: insufficient resources
 
-<table class="table">
-<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<table class="table table-striped">
+<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>53200</td>
   <td>out_of_memory</td>
@@ -683,12 +683,12 @@ Spark SQL uses the following `SQLSTATE` classes:
   <td><a href="sql-error-conditions.html#unable_to_acquire_memory">UNABLE_TO_ACQUIRE_MEMORY</a>
   </td>
 </tr>
-    
+
 </table>
 ## Class `54`: program limit exceeded
 
-<table class="table">
-<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<table class="table table-striped">
+<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>54000</td>
   <td>program limit exceeded</td>
@@ -698,12 +698,12 @@ Spark SQL uses the following `SQLSTATE` classes:
   <td><a href="sql-error-conditions.html#grouping_size_limit_exceeded">GROUPING_SIZE_LIMIT_EXCEEDED</a>, <a href="sql-error-conditions.html#too_many_array_elements">TOO_MANY_ARRAY_ELEMENTS</a>
   </td>
 </tr>
-    
+
 </table>
 ## Class `XX`: internal error
 
-<table class="table">
-<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<table class="table table-striped">
+<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>XX000</td>
   <td>internal error</td>
@@ -713,7 +713,7 @@ Spark SQL uses the following `SQLSTATE` classes:
   <td><a href="sql-error-conditions.html#internal_error">INTERNAL_ERROR</a>
   </td>
 </tr>
-    
+
 </table>
 
 
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index e570981e25c25..04ee3f6ea8fc2 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -22,8 +22,12 @@ license: |
 * Table of contents
 {:toc}
 
+## Upgrading from Spark SQL 3.4 to 3.5
+
+- Since Spark 3.5, the JDBC options related to DS V2 pushdown are `true` by default. These options include: `pushDownAggregate`, `pushDownLimit`, `pushDownOffset` and `pushDownTableSample`. To restore the legacy behavior, please set them to `false`. e.g. set `spark.sql.catalog.your_catalog_name.pushDownAggregate` to `false`.
+
 ## Upgrading from Spark SQL 3.3 to 3.4
-  
+
   - Since Spark 3.4, INSERT INTO commands with explicit column lists comprising fewer columns than the target table will automatically add the corresponding default values for the remaining columns (or NULL for any column lacking an explicitly-assigned default value). In Spark 3.3 or earlier, these commands would have failed returning errors reporting that the number of provided columns does not match the number of columns in the target table. Note that disabling `spark.sql.defaultColumn.useNullsForMissingDefaultValues` will restore the previous behavior.
   - Since Spark 3.4, Number or Number(\*) from Teradata will be treated as Decimal(38,18). In Spark 3.3 or earlier, Number or Number(\*) from Teradata will be treated as Decimal(38, 0), in which case the fractional part will be removed.
   - Since Spark 3.4, v1 database, table, permanent view and function identifier will include 'spark_catalog' as the catalog name if database is defined, e.g. a table identifier will be: `spark_catalog.default.t`. To restore the legacy behavior, set `spark.sql.legacy.v1IdentifierNoCatalog` to `true`.
@@ -42,7 +46,7 @@ license: |
 
 ## Upgrading from Spark SQL 3.2 to 3.3
 
-  - Since Spark 3.3, the `histogram_numeric` function in Spark SQL returns an output type of an array of structs (x, y), where the type of the 'x' field in the return value is propagated from the input values consumed in the aggregate function. In Spark 3.2 or earlier, 'x' always had double type. Optionally, use the configuration `spark.sql.legacy.histogramNumericPropagateInputType` since Spark 3.3 to revert back to the previous behavior. 
+  - Since Spark 3.3, the `histogram_numeric` function in Spark SQL returns an output type of an array of structs (x, y), where the type of the 'x' field in the return value is propagated from the input values consumed in the aggregate function. In Spark 3.2 or earlier, 'x' always had double type. Optionally, use the configuration `spark.sql.legacy.histogramNumericPropagateInputType` since Spark 3.3 to revert back to the previous behavior.
 
   - Since Spark 3.3, `DayTimeIntervalType` in Spark SQL is mapped to Arrow's `Duration` type in `ArrowWriter` and `ArrowColumnVector` developer APIs. Previously, `DayTimeIntervalType` was mapped to Arrow's `Interval` type which does not match with the types of other languages Spark SQL maps. For example, `DayTimeIntervalType` is mapped to `java.time.Duration` in Java.
 
@@ -51,7 +55,7 @@ license: |
   - Since Spark 3.3, Spark turns a non-nullable schema into nullable for API `DataFrameReader.schema(schema: StructType).json(jsonDataset: Dataset[String])` and `DataFrameReader.schema(schema: StructType).csv(csvDataset: Dataset[String])` when the schema is specified by the user and contains non-nullable fields. To restore the legacy behavior of respecting the nullability, set `spark.sql.legacy.respectNullabilityInTextDatasetConversion` to `true`.
 
   - Since Spark 3.3, when the date or timestamp pattern is not specified, Spark converts an input string to a date/timestamp using the `CAST` expression approach. The changes affect CSV/JSON datasources and parsing of partition values. In Spark 3.2 or earlier, when the date or timestamp pattern is not set, Spark uses the default patterns: `yyyy-MM-dd` for dates and `yyyy-MM-dd HH:mm:ss` for timestamps. After the changes, Spark still recognizes the pattern together with
-    
+
     Date patterns:
       * `[+-]yyyy*`
       * `[+-]yyyy*-[m]m`
@@ -59,7 +63,7 @@ license: |
       * `[+-]yyyy*-[m]m-[d]d `
       * `[+-]yyyy*-[m]m-[d]d *`
       * `[+-]yyyy*-[m]m-[d]dT*`
-    
+
     Timestamp patterns:
       * `[+-]yyyy*`
       * `[+-]yyyy*-[m]m`
@@ -83,7 +87,7 @@ license: |
   - Since Spark 3.3, when reading values from a JSON attribute defined as `FloatType` or `DoubleType`, the strings `"+Infinity"`, `"+INF"`, and `"-INF"` are now parsed to the appropriate values, in addition to the already supported `"Infinity"` and `"-Infinity"` variations. This change was made to improve consistency with Jackson's parsing of the unquoted versions of these values. Also, the `allowNonNumericNumbers` option is now respected so these strings will now be considered invalid if this option is disabled.
 
   - Since Spark 3.3, Spark will try to use built-in data source writer instead of Hive serde in `INSERT OVERWRITE DIRECTORY`. This behavior is effective only if `spark.sql.hive.convertMetastoreParquet` or `spark.sql.hive.convertMetastoreOrc` is enabled respectively for Parquet and ORC formats. To restore the behavior before Spark 3.3, you can set `spark.sql.hive.convertMetastoreInsertDir` to `false`.
-  
+
   - Since Spark 3.3, the precision of the return type of round-like functions has been fixed. This may cause Spark throw `AnalysisException` of the `CANNOT_UP_CAST_DATATYPE` error class when using views created by prior versions. In such cases, you need to recreate the views using ALTER VIEW AS or CREATE OR REPLACE VIEW AS with newer Spark versions.
 
   - Since Spark 3.3.1 and 3.2.3, for `SELECT ... GROUP BY a GROUPING SETS (b)`-style SQL statements, `grouping__id` returns different values from Apache Spark 3.2.0, 3.2.1, 3.2.2, and 3.3.0. It computes based on user-given group-by expressions plus grouping set columns. To restore the behavior before 3.3.1 and 3.2.3, you can set `spark.sql.legacy.groupingIdWithAppendedUserGroupBy`. For details, see [SPARK-40218](https://issues.apache.org/jira/browse/SPARK-40218) and [SPARK-40562](https://issues.apache.org/jira/browse/SPARK-40562).
@@ -112,9 +116,9 @@ license: |
   - In Spark 3.2, script transform default FIELD DELIMIT is `\u0001` for no serde mode, serde property `field.delim` is `\t` for Hive serde mode when user specifies serde. In Spark 3.1 or earlier, the default FIELD DELIMIT is `\t`, serde property `field.delim` is `\u0001` for Hive serde mode when user specifies serde.
 
   - In Spark 3.2, the auto-generated `Cast` (such as those added by type coercion rules) will be stripped when generating column alias names. E.g., `sql("SELECT floor(1)").columns` will be `FLOOR(1)` instead of `FLOOR(CAST(1 AS DOUBLE))`.
-  
+
   - In Spark 3.2, the output schema of `SHOW TABLES` becomes `namespace: string, tableName: string, isTemporary: boolean`. In Spark 3.1 or earlier, the `namespace` field was named `database` for the builtin catalog, and there is no `isTemporary` field for v2 catalogs. To restore the old schema with the builtin catalog, you can set `spark.sql.legacy.keepCommandOutputSchema` to `true`.
-  
+
   - In Spark 3.2, the output schema of `SHOW TABLE EXTENDED` becomes `namespace: string, tableName: string, isTemporary: boolean, information: string`. In Spark 3.1 or earlier, the `namespace` field was named `database` for the builtin catalog, and no change for the v2 catalogs. To restore the old schema with the builtin catalog, you can set `spark.sql.legacy.keepCommandOutputSchema` to `true`.
 
   - In Spark 3.2, the output schema of `SHOW TBLPROPERTIES` becomes `key: string, value: string` whether you specify the table property key or not. In Spark 3.1 and earlier, the output schema of `SHOW TBLPROPERTIES` is `value: string` when you specify the table property key. To restore the old schema with the builtin catalog, you can set `spark.sql.legacy.keepCommandOutputSchema` to `true`.
@@ -136,7 +140,7 @@ license: |
   - In Spark 3.2, the usage of `count(tblName.*)` is blocked to avoid producing ambiguous results. Because `count(*)` and `count(tblName.*)` will output differently if there is any null values. To restore the behavior before Spark 3.2, you can set `spark.sql.legacy.allowStarWithSingleTableIdentifierInCount` to `true`.
 
   - In Spark 3.2, we support typed literals in the partition spec of INSERT and ADD/DROP/RENAME PARTITION. For example, `ADD PARTITION(dt = date'2020-01-01')` adds a partition with date value `2020-01-01`. In Spark 3.1 and earlier, the partition value will be parsed as string value `date '2020-01-01'`, which is an illegal date value, and we add a partition with null value at the end.
-      
+
   - In Spark 3.2, `DataFrameNaFunctions.replace()` no longer uses exact string match for the input column names, to match the SQL syntax and support qualified column names. Input column name having a dot in the name (not nested) needs to be escaped with backtick \`. Now, it throws `AnalysisException` if the column is not found in the data frame schema. It also throws `IllegalArgumentException` if the input column name is a nested column. In Spark 3.1 and earlier, it used to ignore invalid input column name and nested column name.
 
   - In Spark 3.2, the dates subtraction expression such as `date1 - date2` returns values of `DayTimeIntervalType`. In Spark 3.1 and earlier, the returned type is `CalendarIntervalType`. To restore the behavior before Spark 3.2, you can set `spark.sql.legacy.interval.enabled` to `true`.
@@ -160,11 +164,11 @@ license: |
   - In Spark 3.2, `CREATE TABLE AS SELECT` with non-empty `LOCATION` will throw `AnalysisException`. To restore the behavior before Spark 3.2, you can set `spark.sql.legacy.allowNonEmptyLocationInCTAS` to `true`.
 
   - In Spark 3.2, special datetime values such as `epoch`, `today`, `yesterday`, `tomorrow`, and `now` are supported in typed literals or in cast of foldable strings only, for instance, `select timestamp'now'` or `select cast('today' as date)`. In Spark 3.1 and 3.0, such special values are supported in any casts of strings to dates/timestamps. To keep these special values as dates/timestamps in Spark 3.1 and 3.0, you should replace them manually, e.g. `if (c in ('now', 'today'), current_date(), cast(c as date))`.
-  
-  - In Spark 3.2, `FloatType` is mapped to `FLOAT` in MySQL. Prior to this, it used to be mapped to `REAL`, which is by default a synonym to `DOUBLE PRECISION` in MySQL. 
+
+  - In Spark 3.2, `FloatType` is mapped to `FLOAT` in MySQL. Prior to this, it used to be mapped to `REAL`, which is by default a synonym to `DOUBLE PRECISION` in MySQL.
 
   - In Spark 3.2, the query executions triggered by `DataFrameWriter` are always named `command` when being sent to `QueryExecutionListener`. In Spark 3.1 and earlier, the name is one of `save`, `insertInto`, `saveAsTable`.
-  
+
   - In Spark 3.2, `Dataset.unionByName` with `allowMissingColumns` set to true will add missing nested fields to the end of structs. In Spark 3.1, nested struct fields are sorted alphabetically.
 
   - In Spark 3.2, create/alter view will fail if the input query output columns contain auto-generated alias. This is necessary to make sure the query output column names are stable across different spark versions. To restore the behavior before Spark 3.2, set `spark.sql.legacy.allowAutoGeneratedAliasForView` to `true`.
@@ -178,9 +182,9 @@ license: |
   - In Spark 3.1, grouping_id() returns long values. In Spark version 3.0 and earlier, this function returns int values. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.integerGroupingId` to `true`.
 
   - In Spark 3.1, SQL UI data adopts the `formatted` mode for the query plan explain results. To restore the behavior before Spark 3.1, you can set `spark.sql.ui.explainMode` to `extended`.
-  
+
   - In Spark 3.1, `from_unixtime`, `unix_timestamp`,`to_unix_timestamp`, `to_timestamp` and `to_date` will fail if the specified datetime pattern is invalid. In Spark 3.0 or earlier, they result `NULL`.
-  
+
   - In Spark 3.1, the Parquet, ORC, Avro and JSON datasources throw the exception `org.apache.spark.sql.AnalysisException: Found duplicate column(s) in the data schema` in read if they detect duplicate names in top-level columns as well in nested structures. The datasources take into account the SQL config `spark.sql.caseSensitive` while detecting column name duplicates.
 
   - In Spark 3.1, structs and maps are wrapped by the `{}` brackets in casting them to strings. For instance, the `show()` action and the `CAST` expression use such brackets. In Spark 3.0 and earlier, the `[]` brackets are used for the same purpose. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.castComplexTypesToString.enabled` to `true`.
@@ -194,13 +198,13 @@ license: |
   - In Spark 3.1, `IllegalArgumentException` is returned for the incomplete interval literals, e.g. `INTERVAL '1'`, `INTERVAL '1 DAY 2'`, which are invalid. In Spark 3.0, these literals result in `NULL`s.
 
   - In Spark 3.1, we remove the built-in Hive 1.2. You need to migrate your custom SerDes to Hive 2.3. See [HIVE-15167](https://issues.apache.org/jira/browse/HIVE-15167) for more details.
-  
+
   - In Spark 3.1, loading and saving of timestamps from/to parquet files fails if the timestamps are before 1900-01-01 00:00:00Z, and loaded (saved) as the INT96 type. In Spark 3.0, the actions don't fail but might lead to shifting of the input timestamps due to rebasing from/to Julian to/from Proleptic Gregorian calendar. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.parquet.int96RebaseModeInRead` or/and `spark.sql.legacy.parquet.int96RebaseModeInWrite` to `LEGACY`.
-  
-  - In Spark 3.1, the `schema_of_json` and `schema_of_csv` functions return the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case. 
+
+  - In Spark 3.1, the `schema_of_json` and `schema_of_csv` functions return the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case.
 
   - In Spark 3.1, refreshing a table will trigger an uncache operation for all other caches that reference the table, even if the table itself is not cached. In Spark 3.0 the operation will only be triggered if the table itself is cached.
-  
+
   - In Spark 3.1, creating or altering a permanent view will capture runtime SQL configs and store them as view properties. These configs will be applied during the parsing and analysis phases of the view resolution. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.useCurrentConfigsForView` to `true`.
 
   - In Spark 3.1, the temporary view will have same behaviors with the permanent view, i.e. capture and store runtime SQL configs, SQL text, catalog and namespace. The captured view properties will be applied during the parsing and analysis phases of the view resolution. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.storeAnalyzedPlanForView` to `true`.
@@ -257,7 +261,7 @@ license: |
     | location                 | yes               | yes            | For databases and tables, use the `LOCATION` clause to specify it. |
     | owner                    | yes               | yes            | For databases and tables, it is determined by the user who runs spark and create the table. |
 
- 
+
   - In Spark 3.0, you can use `ADD FILE` to add file directories as well. Earlier you could add only single files using this command. To restore the behavior of earlier versions, set `spark.sql.legacy.addSingleFileInAddFile` to `true`.
 
   - In Spark 3.0, `SHOW TBLPROPERTIES` throws `AnalysisException` if the table does not exist. In Spark version 2.4 and below, this scenario caused `NoSuchTableException`.
@@ -402,7 +406,7 @@ license: |
 
   - In Spark 3.0, when Avro files are written with user provided non-nullable schema, even the catalyst schema is nullable, Spark is still able to write the files. However, Spark throws runtime NullPointerException if any of the records contains null.
 
-  - In Spark version 2.4 and below, CSV datasource can detect encoding of input files automatically when the files have BOM at the beginning. For instance, CSV datasource can recognize UTF-8, UTF-16BE, UTF-16LE, UTF-32BE and UTF-32LE in the multi-line mode (the CSV option `multiLine` is set to `true`). In Spark 3.0, CSV datasource reads input files in encoding specified via the CSV option `encoding` which has the default value of UTF-8. In this way, if file encoding doesn't match to the encoding specified via the CSV option, Spark loads the file incorrectly. To solve the issue, users should either set correct encoding via the CSV option `encoding` or set the option to `null` which fallbacks to encoding auto-detection as in Spark versions before 3.0. 
+  - In Spark version 2.4 and below, CSV datasource can detect encoding of input files automatically when the files have BOM at the beginning. For instance, CSV datasource can recognize UTF-8, UTF-16BE, UTF-16LE, UTF-32BE and UTF-32LE in the multi-line mode (the CSV option `multiLine` is set to `true`). In Spark 3.0, CSV datasource reads input files in encoding specified via the CSV option `encoding` which has the default value of UTF-8. In this way, if file encoding doesn't match to the encoding specified via the CSV option, Spark loads the file incorrectly. To solve the issue, users should either set correct encoding via the CSV option `encoding` or set the option to `null` which fallbacks to encoding auto-detection as in Spark versions before 3.0.
 
 ### Others
 
@@ -429,7 +433,7 @@ license: |
   - In Spark 2.4.8, `AnalysisException` is replaced by its sub-classes that are thrown for tables from Hive external catalog in the following situations:
     * `ALTER TABLE .. ADD PARTITION` throws `PartitionsAlreadyExistException` if new partition exists already
     * `ALTER TABLE .. DROP PARTITION` throws `NoSuchPartitionsException` for not existing partitions
-    
+
 ## Upgrading from Spark SQL 2.4.5 to 2.4.6
 
   - In Spark 2.4.6, the `RESET` command does not reset the static SQL configuration values to the default. It only clears the runtime SQL configuration values.
@@ -457,21 +461,23 @@ license: |
 ## Upgrading from Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
-    <table class="table">
-        <tr>
-          <th>
-            <b>Query</b>
-          </th>
-          <th>
-            <b>Spark 2.3 or Prior</b>
-          </th>
-          <th>
-            <b>Spark 2.4</b>
-          </th>
-          <th>
-            <b>Remarks</b>
-          </th>
-        </tr>
+    <table class="table table-striped">
+        <thead>
+          <tr>
+            <th>
+              <b>Query</b>
+            </th>
+            <th>
+              <b>Spark 2.3 or Prior</b>
+            </th>
+            <th>
+              <b>Spark 2.4</b>
+            </th>
+            <th>
+              <b>Remarks</b>
+            </th>
+          </tr>
+        </thead>
         <tr>
           <td>
             <code>SELECT array_contains(array(1), 1.34D);</code>
@@ -569,7 +575,8 @@ license: |
   - Since Spark 2.3, the Join/Filter's deterministic predicates that are after the first non-deterministic predicates are also pushed down/through the child operators, if possible. In prior Spark versions, these filters are not eligible for predicate pushdown.
 
   - Partition column inference previously found incorrect common type for different inferred types, for example, previously it ended up with double type as the common type for double type and date type. Now it finds the correct common type for such conflicts. The conflict resolution follows the table below:
-    <table class="table">
+    <table class="table table-striped">
+    <thead>
       <tr>
         <th>
           <b>InputA \ InputB</b>
@@ -599,6 +606,7 @@ license: |
           <b>StringType</b>
         </th>
       </tr>
+   </thead>
       <tr>
         <td>
           <b>NullType</b>
diff --git a/docs/sql-performance-tuning.md b/docs/sql-performance-tuning.md
index a91fb4e893707..6642b958f48ec 100644
--- a/docs/sql-performance-tuning.md
+++ b/docs/sql-performance-tuning.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -34,8 +34,8 @@ memory usage and GC pressure. You can call `spark.catalog.uncacheTable("tableNam
 Configuration of in-memory caching can be done using the `setConf` method on `SparkSession` or by running
 `SET key=value` commands using SQL.
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.sql.inMemoryColumnarStorage.compressed</code></td>
   <td>true</td>
@@ -62,8 +62,8 @@ Configuration of in-memory caching can be done using the `setConf` method on `Sp
 The following options can also be used to tune the performance of query execution. It is possible
 that these options will be deprecated in future release as more optimizations are performed automatically.
 
-<table class="table">
-  <tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+  <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
   <tr>
     <td><code>spark.sql.files.maxPartitionBytes</code></td>
     <td>134217728 (128 MB)</td>
@@ -242,8 +242,8 @@ Adaptive Query Execution (AQE) is an optimization technique in Spark SQL that ma
 
 ### Coalescing Post Shuffle Partitions
 This feature coalesces the post shuffle partitions based on the map output statistics when both `spark.sql.adaptive.enabled` and `spark.sql.adaptive.coalescePartitions.enabled` configurations are true. This feature simplifies the tuning of shuffle partition number when running queries. You do not need to set a proper shuffle partition number to fit your dataset. Spark can pick the proper shuffle partition number at runtime once you set a large enough initial number of shuffle partitions via `spark.sql.adaptive.coalescePartitions.initialPartitionNum` configuration.
- <table class="table">
-   <tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+ <table class="table table-striped">
+   <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
    <tr>
      <td><code>spark.sql.adaptive.coalescePartitions.enabled</code></td>
      <td>true</td>
@@ -285,10 +285,10 @@ This feature coalesces the post shuffle partitions based on the map output stati
      <td>3.0.0</td>
    </tr>
  </table>
- 
+
 ### Spliting skewed shuffle partitions
- <table class="table">
-   <tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+ <table class="table table-striped">
+   <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
    <tr>
      <td><code>spark.sql.adaptive.optimizeSkewsInRebalancePartitions.enabled</code></td>
      <td>true</td>
@@ -309,8 +309,8 @@ This feature coalesces the post shuffle partitions based on the map output stati
 
 ### Converting sort-merge join to broadcast join
 AQE converts sort-merge join to broadcast hash join when the runtime statistics of any join side is smaller than the adaptive broadcast hash join threshold. This is not as efficient as planning a broadcast hash join in the first place, but it's better than keep doing the sort-merge join, as we can save the sorting of both the join sides, and read shuffle files locally to save network traffic(if `spark.sql.adaptive.localShuffleReader.enabled` is true)
-  <table class="table">
-     <tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+  <table class="table table-striped">
+     <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
      <tr>
        <td><code>spark.sql.adaptive.autoBroadcastJoinThreshold</code></td>
        <td>(none)</td>
@@ -331,8 +331,8 @@ AQE converts sort-merge join to broadcast hash join when the runtime statistics
 
 ### Converting sort-merge join to shuffled hash join
 AQE converts sort-merge join to shuffled hash join when all post shuffle partitions are smaller than a threshold, the max threshold can see the config `spark.sql.adaptive.maxShuffledHashJoinLocalMapThreshold`.
-  <table class="table">
-     <tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+  <table class="table table-striped">
+     <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
      <tr>
        <td><code>spark.sql.adaptive.maxShuffledHashJoinLocalMapThreshold</code></td>
        <td>0</td>
@@ -345,8 +345,8 @@ AQE converts sort-merge join to shuffled hash join when all post shuffle partiti
 
 ### Optimizing Skew Join
 Data skew can severely downgrade the performance of join queries. This feature dynamically handles skew in sort-merge join by splitting (and replicating if needed) skewed tasks into roughly evenly sized tasks. It takes effect when both `spark.sql.adaptive.enabled` and `spark.sql.adaptive.skewJoin.enabled` configurations are enabled.
-  <table class="table">
-     <tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+  <table class="table table-striped">
+     <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
      <tr>
        <td><code>spark.sql.adaptive.skewJoin.enabled</code></td>
        <td>true</td>
@@ -382,8 +382,8 @@ Data skew can severely downgrade the performance of join queries. This feature d
    </table>
 
 ### Misc
-  <table class="table">
-    <tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+  <table class="table table-striped">
+    <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
     <tr>
       <td><code>spark.sql.adaptive.optimizer.excludedRules</code></td>
       <td>(none)</td>
diff --git a/docs/storage-openstack-swift.md b/docs/storage-openstack-swift.md
index 6bdcaa7ccbd70..73b21a1f7c27b 100644
--- a/docs/storage-openstack-swift.md
+++ b/docs/storage-openstack-swift.md
@@ -8,9 +8,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,8 +19,8 @@ license: |
 ---
 
 Spark's support for Hadoop InputFormat allows it to process data in OpenStack Swift using the
-same URI formats as in Hadoop. You can specify a path in Swift as input through a 
-URI of the form <code>swift://container.PROVIDER/path</code>. You will also need to set your 
+same URI formats as in Hadoop. You can specify a path in Swift as input through a
+URI of the form <code>swift://container.PROVIDER/path</code>. You will also need to set your
 Swift security credentials, through <code>core-site.xml</code> or via
 <code>SparkContext.hadoopConfiguration</code>.
 The current Swift driver requires Swift to use the Keystone authentication method, or
@@ -60,8 +60,8 @@ required by Keystone.
 The following table contains a list of Keystone mandatory parameters. <code>PROVIDER</code> can be
 any (alphanumeric) name.
 
-<table class="table">
-<tr><th>Property Name</th><th>Meaning</th><th>Required</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Meaning</th><th>Required</th></tr></thead>
 <tr>
   <td><code>fs.swift.service.PROVIDER.auth.url</code></td>
   <td>Keystone Authentication URL</td>
@@ -145,7 +145,7 @@ defined for tenant <code>test</code>. Then <code>core-site.xml</code> should inc
 
 Notice that
 <code>fs.swift.service.PROVIDER.tenant</code>,
-<code>fs.swift.service.PROVIDER.username</code>, 
+<code>fs.swift.service.PROVIDER.username</code>,
 <code>fs.swift.service.PROVIDER.password</code> contains sensitive information and keeping them in
 <code>core-site.xml</code> is not always a good approach.
 We suggest to keep those parameters in <code>core-site.xml</code> for testing purposes when running Spark
diff --git a/docs/streaming-custom-receivers.md b/docs/streaming-custom-receivers.md
index 6ce73b27d11ba..591a4415bb1a5 100644
--- a/docs/streaming-custom-receivers.md
+++ b/docs/streaming-custom-receivers.md
@@ -8,9 +8,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -243,11 +243,13 @@ interval in the [Spark Streaming Programming Guide](streaming-programming-guide.
 
 The following table summarizes the characteristics of both types of receivers
 
-<table class="table">
+<table class="table table-striped">
+<thead>
 <tr>
   <th>Receiver Type</th>
   <th>Characteristics</th>
 </tr>
+</thead>
 <tr>
   <td><b>Unreliable Receivers</b></td>
   <td>
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 16f1f869d5177..5ed66eab348a6 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -10,9 +10,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -433,8 +433,8 @@ Streaming core
 artifact `spark-streaming-xyz_{{site.SCALA_BINARY_VERSION}}` to the dependencies. For example,
 some of the common ones are as follows.
 
-<table class="table">
-<tr><th>Source</th><th>Artifact</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Source</th><th>Artifact</th></tr></thead>
 <tr><td> Kafka </td><td> spark-streaming-kafka-0-10_{{site.SCALA_BINARY_VERSION}} </td></tr>
 <tr><td> Kinesis<br/></td><td>spark-streaming-kinesis-asl_{{site.SCALA_BINARY_VERSION}} [Amazon Software License] </td></tr>
 <tr><td></td><td></td></tr>
@@ -820,8 +820,8 @@ Similar to that of RDDs, transformations allow the data from the input DStream t
 DStreams support many of the transformations available on normal Spark RDD's.
 Some of the common ones are as follows.
 
-<table class="table">
-<tr><th style="width:25%">Transformation</th><th>Meaning</th></tr>
+<table class="table table-striped">
+<thead><tr><th style="width:25%">Transformation</th><th>Meaning</th></tr></thead>
 <tr>
   <td> <b>map</b>(<i>func</i>) </td>
   <td> Return a new DStream by passing each element of the source DStream through a
@@ -1109,8 +1109,8 @@ JavaPairDStream<String, Integer> windowedWordCounts = pairs.reduceByKeyAndWindow
 Some of the common window operations are as follows. All of these operations take the
 said two parameters - <i>windowLength</i> and <i>slideInterval</i>.
 
-<table class="table">
-<tr><th style="width:25%">Transformation</th><th>Meaning</th></tr>
+<table class="table table-striped">
+<thead><tr><th style="width:25%">Transformation</th><th>Meaning</th></tr></thead>
 <tr>
   <td> <b>window</b>(<i>windowLength</i>, <i>slideInterval</i>) </td>
   <td> Return a new DStream which is computed based on windowed batches of the source DStream.
@@ -1201,7 +1201,7 @@ JavaPairDStream<String, Tuple2<String, String>> joinedStream = stream1.join(stre
 </div>
 
 </div>
-Here, in each batch interval, the RDD generated by `stream1` will be joined with the RDD generated by `stream2`. You can also do `leftOuterJoin`, `rightOuterJoin`, `fullOuterJoin`. Furthermore, it is often very useful to do joins over windows of the streams. That is pretty easy as well. 
+Here, in each batch interval, the RDD generated by `stream1` will be joined with the RDD generated by `stream2`. You can also do `leftOuterJoin`, `rightOuterJoin`, `fullOuterJoin`. Furthermore, it is often very useful to do joins over windows of the streams. That is pretty easy as well.
 
 <div class="codetabs">
 
@@ -1280,8 +1280,8 @@ Since the output operations actually allow the transformed data to be consumed b
 they trigger the actual execution of all the DStream transformations (similar to actions for RDDs).
 Currently, the following output operations are defined:
 
-<table class="table">
-<tr><th style="width:30%">Output Operation</th><th>Meaning</th></tr>
+<table class="table table-striped">
+<thead><tr><th style="width:30%">Output Operation</th><th>Meaning</th></tr></thead>
 <tr>
   <td> <b>print</b>()</td>
   <td> Prints the first ten elements of every batch of data in a DStream on the driver node running
@@ -1603,7 +1603,7 @@ words.foreachRDD { rdd =>
   wordsDataFrame.createOrReplaceTempView("words")
 
   // Do word count on DataFrame using SQL and print it
-  val wordCountsDataFrame = 
+  val wordCountsDataFrame =
     spark.sql("select word, count(*) as total from words group by word")
   wordCountsDataFrame.show()
 }
@@ -1633,7 +1633,7 @@ public class JavaRow implements java.io.Serializable {
 
 /** DataFrame operations inside your streaming program */
 
-JavaDStream<String> words = ... 
+JavaDStream<String> words = ...
 
 words.foreachRDD((rdd, time) -> {
   // Get the singleton instance of SparkSession
@@ -1880,12 +1880,12 @@ batch interval that is at least 10 seconds. It can be set by using
 
 ## Accumulators, Broadcast Variables, and Checkpoints
 
-[Accumulators](rdd-programming-guide.html#accumulators) and [Broadcast variables](rdd-programming-guide.html#broadcast-variables) 
-cannot be recovered from checkpoint in Spark Streaming. If you enable checkpointing and use 
-[Accumulators](rdd-programming-guide.html#accumulators) or [Broadcast variables](rdd-programming-guide.html#broadcast-variables) 
-as well, you'll have to create lazily instantiated singleton instances for 
-[Accumulators](rdd-programming-guide.html#accumulators) and [Broadcast variables](rdd-programming-guide.html#broadcast-variables) 
-so that they can be re-instantiated after the driver restarts on failure. 
+[Accumulators](rdd-programming-guide.html#accumulators) and [Broadcast variables](rdd-programming-guide.html#broadcast-variables)
+cannot be recovered from checkpoint in Spark Streaming. If you enable checkpointing and use
+[Accumulators](rdd-programming-guide.html#accumulators) or [Broadcast variables](rdd-programming-guide.html#broadcast-variables)
+as well, you'll have to create lazily instantiated singleton instances for
+[Accumulators](rdd-programming-guide.html#accumulators) and [Broadcast variables](rdd-programming-guide.html#broadcast-variables)
+so that they can be re-instantiated after the driver restarts on failure.
 This is shown in the following example.
 
 <div class="codetabs">
@@ -2252,7 +2252,7 @@ Another parameter that should be considered is the receiver's block interval,
 which is determined by the [configuration parameter](configuration.html#spark-streaming)
 `spark.streaming.blockInterval`. For most receivers, the received data is coalesced together into
 blocks of data before storing inside Spark's memory. The number of blocks in each batch
-determines the number of tasks that will be used to process 
+determines the number of tasks that will be used to process
 the received data in a map-like transformation. The number of tasks per receiver per batch will be
 approximately (batch interval / block interval). For example, a block interval of 200 ms will
 create 10 tasks per 2 second batches. If the number of tasks is too low (that is, less than the number
@@ -2342,15 +2342,15 @@ in the [Tuning Guide](tuning.html#memory-tuning). It is strongly recommended tha
 
 The amount of cluster memory required by a Spark Streaming application depends heavily on the type of transformations used. For example, if you want to use a window operation on the last 10 minutes of data, then your cluster should have sufficient memory to hold 10 minutes worth of data in memory. Or if you want to use `updateStateByKey` with a large number of keys, then the necessary memory  will be high. On the contrary, if you want to do a simple map-filter-store operation, then the necessary memory will be low.
 
-In general, since the data received through receivers is stored with StorageLevel.MEMORY_AND_DISK_SER_2, the data that does not fit in memory will spill over to the disk. This may reduce the performance of the streaming application, and hence it is advised to provide sufficient memory as required by your streaming application. Its best to try and see the memory usage on a small scale and estimate accordingly. 
+In general, since the data received through receivers is stored with StorageLevel.MEMORY_AND_DISK_SER_2, the data that does not fit in memory will spill over to the disk. This may reduce the performance of the streaming application, and hence it is advised to provide sufficient memory as required by your streaming application. Its best to try and see the memory usage on a small scale and estimate accordingly.
 
-Another aspect of memory tuning is garbage collection. For a streaming application that requires low latency, it is undesirable to have large pauses caused by JVM Garbage Collection. 
+Another aspect of memory tuning is garbage collection. For a streaming application that requires low latency, it is undesirable to have large pauses caused by JVM Garbage Collection.
 
 There are a few parameters that can help you tune the memory usage and GC overheads:
 
 * **Persistence Level of DStreams**: As mentioned earlier in the [Data Serialization](#data-serialization) section, the input data and RDDs are by default persisted as serialized bytes. This reduces both the memory usage and GC overheads, compared to deserialized persistence. Enabling Kryo serialization further reduces serialized sizes and memory usage. Further reduction in memory usage can be achieved with compression (see the Spark configuration `spark.rdd.compress`), at the cost of CPU time.
 
-* **Clearing old data**: By default, all input data and persisted RDDs generated by DStream transformations are automatically cleared. Spark Streaming decides when to clear the data based on the transformations that are used. For example, if you are using a window operation of 10 minutes, then Spark Streaming will keep around the last 10 minutes of data, and actively throw away older data. 
+* **Clearing old data**: By default, all input data and persisted RDDs generated by DStream transformations are automatically cleared. Spark Streaming decides when to clear the data based on the transformations that are used. For example, if you are using a window operation of 10 minutes, then Spark Streaming will keep around the last 10 minutes of data, and actively throw away older data.
 Data can be retained for a longer duration (e.g. interactively querying older data) by setting `streamingContext.remember`.
 
 * **CMS Garbage Collector**: Use of the concurrent mark-and-sweep GC is strongly recommended for keeping GC-related pauses consistently low. Even though concurrent GC is known to reduce the
@@ -2386,7 +2386,7 @@ Having a bigger blockinterval means bigger blocks. A high value of `spark.locali
 
 # Fault-tolerance Semantics
 In this section, we will discuss the behavior of Spark Streaming applications in the event
-of failures. 
+of failures.
 
 ## Background
 {:.no_toc}
@@ -2481,16 +2481,18 @@ lost. This will affect the results of the stateful transformations.
 
 To avoid this loss of past received data, Spark 1.2 introduced _write
 ahead logs_ which save the received data to fault-tolerant storage. With the [write-ahead logs
-enabled](#deploying-applications) and reliable receivers, there is zero data loss. In terms of semantics, it provides an at-least once guarantee. 
+enabled](#deploying-applications) and reliable receivers, there is zero data loss. In terms of semantics, it provides an at-least once guarantee.
 
 The following table summarizes the semantics under failures:
 
-<table class="table">
+<table class="table table-striped">
+  <thead>
   <tr>
     <th style="width:30%">Deployment Scenario</th>
     <th>Worker Failure</th>
     <th>Driver Failure</th>
   </tr>
+  </thead>
   <tr>
     <td>
       <i>Spark 1.1 or earlier,</i> OR<br/>
@@ -2531,7 +2533,7 @@ In Spark 1.3, we have introduced a new Kafka Direct API, which can ensure that a
 
 ## Semantics of output operations
 {:.no_toc}
-Output operations (like `foreachRDD`) have _at-least once_ semantics, that is, 
+Output operations (like `foreachRDD`) have _at-least once_ semantics, that is,
 the transformed data may get written to an external entity more than once in
 the event of a worker failure. While this is acceptable for saving to file systems using the
 `saveAs***Files` operations (as the file will simply get overwritten with the same data),
diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index 6135e80eb0cec..66e6efb1c8a9f 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -8,9 +8,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -297,8 +297,8 @@ df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
 </div>
 
 Each row in the source has the following schema:
-<table class="table">
-<tr><th>Column</th><th>Type</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Column</th><th>Type</th></tr></thead>
 <tr>
   <td>key</td>
   <td>binary</td>
@@ -336,8 +336,8 @@ Each row in the source has the following schema:
 The following options must be set for the Kafka source
 for both batch and streaming queries.
 
-<table class="table">
-<tr><th>Option</th><th>value</th><th>meaning</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Option</th><th>value</th><th>meaning</th></tr></thead>
 <tr>
   <td>assign</td>
   <td>json string {"topicA":[0,1],"topicB":[2,4]}</td>
@@ -368,8 +368,8 @@ for both batch and streaming queries.
 
 The following configurations are optional:
 
-<table class="table">
-<tr><th>Option</th><th>value</th><th>default</th><th>query type</th><th>meaning</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Option</th><th>value</th><th>default</th><th>query type</th><th>meaning</th></tr></thead>
 <tr>
   <td>startingTimestamp</td>
   <td>timestamp string e.g. "1000"</td>
@@ -607,8 +607,8 @@ The caching key is built up from the following information:
 
 The following properties are available to configure the consumer pool:
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td>spark.kafka.consumer.cache.capacity</td>
   <td>64</td>
@@ -657,8 +657,8 @@ Note that it doesn't leverage Apache Commons Pool due to the difference of chara
 
 The following properties are available to configure the fetched data pool:
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td>spark.kafka.consumer.fetchedData.cache.timeout</td>
   <td>5m (5 minutes)</td>
@@ -685,8 +685,8 @@ solution to remove duplicates when reading the written data could be to introduc
 that can be used to perform de-duplication when reading.
 
 The Dataframe being written to Kafka should have the following columns in schema:
-<table class="table">
-<tr><th>Column</th><th>Type</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Column</th><th>Type</th></tr></thead>
 <tr>
   <td>key (optional)</td>
   <td>string or binary</td>
@@ -715,7 +715,7 @@ a ```null``` valued key column will be automatically added (see Kafka semantics
 how ```null``` valued key values are handled). If a topic column exists then its value
 is used as the topic when writing the given row to Kafka, unless the "topic" configuration
 option is set i.e., the "topic" configuration option overrides the topic column.
-If a "partition" column is not specified (or its value is ```null```) 
+If a "partition" column is not specified (or its value is ```null```)
 then the partition is calculated by the Kafka producer.
 A Kafka partitioner can be specified in Spark by setting the
 ```kafka.partitioner.class``` option. If not present, Kafka default partitioner
@@ -725,8 +725,8 @@ will be used.
 The following options must be set for the Kafka sink
 for both batch and streaming queries.
 
-<table class="table">
-<tr><th>Option</th><th>value</th><th>meaning</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Option</th><th>value</th><th>meaning</th></tr></thead>
 <tr>
   <td>kafka.bootstrap.servers</td>
   <td>A comma-separated list of host:port</td>
@@ -736,8 +736,8 @@ for both batch and streaming queries.
 
 The following configurations are optional:
 
-<table class="table">
-<tr><th>Option</th><th>value</th><th>default</th><th>query type</th><th>meaning</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Option</th><th>value</th><th>default</th><th>query type</th><th>meaning</th></tr></thead>
 <tr>
   <td>topic</td>
   <td>string</td>
@@ -912,8 +912,8 @@ It will use different Kafka producer when delegation token is renewed; Kafka pro
 
 The following properties are available to configure the producer pool:
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td>spark.kafka.producer.cache.timeout</td>
   <td>10m (10 minutes)</td>
@@ -1039,8 +1039,8 @@ When none of the above applies then unsecure connection assumed.
 
 Delegation tokens can be obtained from multiple clusters and <code>${cluster}</code> is an arbitrary unique identifier which helps to group different configurations.
 
-<table class="table">
-<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
   <tr>
     <td><code>spark.kafka.clusters.${cluster}.auth.bootstrap.servers</code></td>
     <td>None</td>
diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index a71c774f328ee..f0f41e5cee1ca 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -25,7 +25,7 @@ license: |
 # Overview
 Structured Streaming is a scalable and fault-tolerant stream processing engine built on the Spark SQL engine. You can express your streaming computation the same way you would express a batch computation on static data. The Spark SQL engine will take care of running it incrementally and continuously and updating the final result as streaming data continues to arrive. You can use the [Dataset/DataFrame API](sql-programming-guide.html) in Scala, Java, Python or R to express streaming aggregations, event-time windows, stream-to-batch joins, etc. The computation is executed on the same optimized Spark SQL engine. Finally, the system ensures end-to-end exactly-once fault-tolerance guarantees through checkpointing and Write-Ahead Logs. In short, *Structured Streaming provides fast, scalable, fault-tolerant, end-to-end exactly-once stream processing without the user having to reason about streaming.*
 
-Internally, by default, Structured Streaming queries are processed using a *micro-batch processing* engine, which processes data streams as a series of small batch jobs thereby achieving end-to-end latencies as low as 100 milliseconds and exactly-once fault-tolerance guarantees. However, since Spark 2.3, we have introduced a new low-latency processing mode called **Continuous Processing**, which can achieve end-to-end latencies as low as 1 millisecond with at-least-once guarantees. Without changing the Dataset/DataFrame operations in your queries, you will be able to choose the mode based on your application requirements. 
+Internally, by default, Structured Streaming queries are processed using a *micro-batch processing* engine, which processes data streams as a series of small batch jobs thereby achieving end-to-end latencies as low as 100 milliseconds and exactly-once fault-tolerance guarantees. However, since Spark 2.3, we have introduced a new low-latency processing mode called **Continuous Processing**, which can achieve end-to-end latencies as low as 1 millisecond with at-least-once guarantees. Without changing the Dataset/DataFrame operations in your queries, you will be able to choose the mode based on your application requirements.
 
 In this guide, we are going to walk you through the programming model and the APIs. We are going to explain the concepts mostly using the default micro-batch processing model, and then [later](#continuous-processing) discuss Continuous Processing model. First, let's start with a simple example of a Structured Streaming query - a streaming word count.
 
@@ -61,7 +61,7 @@ val spark = SparkSession
   .builder
   .appName("StructuredNetworkWordCount")
   .getOrCreate()
-  
+
 import spark.implicits._
 {% endhighlight %}
 
@@ -250,8 +250,8 @@ awaitTermination(query)
 
 After this code is executed, the streaming computation will have started in the background. The `query` object is a handle to that active streaming query, and we have decided to wait for the termination of the query using `awaitTermination()` to prevent the process from exiting while the query is active.
 
-To actually execute this example code, you can either compile the code in your own 
-[Spark application](quick-start.html#self-contained-applications), or simply 
+To actually execute this example code, you can either compile the code in your own
+[Spark application](quick-start.html#self-contained-applications), or simply
 [run the example](index.html#running-the-examples-and-shell) once you have downloaded Spark. We are showing the latter. You will first need to run Netcat (a small utility found in most Unix-like systems) as a data server by using
 
 
@@ -450,40 +450,40 @@ Batch: 1
 
 # Programming Model
 
-The key idea in Structured Streaming is to treat a live data stream as a 
-table that is being continuously appended. This leads to a new stream 
-processing model that is very similar to a batch processing model. You will 
-express your streaming computation as standard batch-like query as on a static 
-table, and Spark runs it as an *incremental* query on the *unbounded* input 
+The key idea in Structured Streaming is to treat a live data stream as a
+table that is being continuously appended. This leads to a new stream
+processing model that is very similar to a batch processing model. You will
+express your streaming computation as standard batch-like query as on a static
+table, and Spark runs it as an *incremental* query on the *unbounded* input
 table. Let’s understand this model in more detail.
 
 ## Basic Concepts
-Consider the input data stream as the "Input Table". Every data item that is 
+Consider the input data stream as the "Input Table". Every data item that is
 arriving on the stream is like a new row being appended to the Input Table.
 
 ![Stream as a Table](img/structured-streaming-stream-as-a-table.png "Stream as a Table")
 
-A query on the input will generate the "Result Table". Every trigger interval (say, every 1 second), new rows get appended to the Input Table, which eventually updates the Result Table. Whenever the result table gets updated, we would want to write the changed result rows to an external sink. 
+A query on the input will generate the "Result Table". Every trigger interval (say, every 1 second), new rows get appended to the Input Table, which eventually updates the Result Table. Whenever the result table gets updated, we would want to write the changed result rows to an external sink.
 
 ![Model](img/structured-streaming-model.png)
 
 The "Output" is defined as what gets written out to the external storage. The output can be defined in a different mode:
 
-  - *Complete Mode* - The entire updated Result Table will be written to the external storage. It is up to the storage connector to decide how to handle writing of the entire table. 
+  - *Complete Mode* - The entire updated Result Table will be written to the external storage. It is up to the storage connector to decide how to handle writing of the entire table.
 
   - *Append Mode* - Only the new rows appended in the Result Table since the last trigger will be written to the external storage. This is applicable only on the queries where existing rows in the Result Table are not expected to change.
-  
+
   - *Update Mode* - Only the rows that were updated in the Result Table since the last trigger will be written to the external storage (available since Spark 2.1.1). Note that this is different from the Complete Mode in that this mode only outputs the rows that have changed since the last trigger. If the query doesn't contain aggregations, it will be equivalent to Append mode.
 
 Note that each mode is applicable on certain types of queries. This is discussed in detail [later](#output-modes).
 
-To illustrate the use of this model, let’s understand the model in context of 
-the [Quick Example](#quick-example) above. The first `lines` DataFrame is the input table, and 
-the final `wordCounts` DataFrame is the result table. Note that the query on 
-streaming `lines` DataFrame to generate `wordCounts` is *exactly the same* as 
-it would be a static DataFrame. However, when this query is started, Spark 
-will continuously check for new data from the socket connection. If there is 
-new data, Spark will run an "incremental" query that combines the previous 
+To illustrate the use of this model, let’s understand the model in context of
+the [Quick Example](#quick-example) above. The first `lines` DataFrame is the input table, and
+the final `wordCounts` DataFrame is the result table. Note that the query on
+streaming `lines` DataFrame to generate `wordCounts` is *exactly the same* as
+it would be a static DataFrame. However, when this query is started, Spark
+will continuously check for new data from the socket connection. If there is
+new data, Spark will run an "incremental" query that combines the previous
 running counts with the new data to compute updated counts, as shown below.
 
 ![Model](img/structured-streaming-example-model.png)
@@ -494,23 +494,23 @@ and then discards the source data. It only keeps around the minimal intermediate
 required to update the result (e.g. intermediate counts in the earlier example).
 
 This model is significantly different from many other stream processing
-engines. Many streaming systems require the user to maintain running 
-aggregations themselves, thus having to reason about fault-tolerance, and 
-data consistency (at-least-once, or at-most-once, or exactly-once). In this 
-model, Spark is responsible for updating the Result Table when there is new 
-data, thus relieving the users from reasoning about it. As an example, let’s 
+engines. Many streaming systems require the user to maintain running
+aggregations themselves, thus having to reason about fault-tolerance, and
+data consistency (at-least-once, or at-most-once, or exactly-once). In this
+model, Spark is responsible for updating the Result Table when there is new
+data, thus relieving the users from reasoning about it. As an example, let’s
 see how this model handles event-time based processing and late arriving data.
 
 ## Handling Event-time and Late Data
 Event-time is the time embedded in the data itself. For many applications, you may want to operate on this event-time. For example, if you want to get the number of events generated by IoT devices every minute, then you probably want to use the time when the data was generated (that is, event-time in the data), rather than the time Spark receives them. This event-time is very naturally expressed in this model -- each event from the devices is a row in the table, and event-time is a column value in the row. This allows window-based aggregations (e.g. number of events every minute) to be just a special type of grouping and aggregation on the event-time column -- each time window is a group and each row can belong to multiple windows/groups. Therefore, such event-time-window-based aggregation queries can be defined consistently on both a static dataset (e.g. from collected device events logs) as well as on a data stream, making the life of the user much easier.
 
-Furthermore, this model naturally handles data that has arrived later than 
-expected based on its event-time. Since Spark is updating the Result Table, 
-it has full control over updating old aggregates when there is late data, 
+Furthermore, this model naturally handles data that has arrived later than
+expected based on its event-time. Since Spark is updating the Result Table,
+it has full control over updating old aggregates when there is late data,
 as well as cleaning up old aggregates to limit the size of intermediate
-state data. Since Spark 2.1, we have support for watermarking which 
+state data. Since Spark 2.1, we have support for watermarking which
 allows the user to specify the threshold of late data, and allows the engine
-to accordingly clean up old state. These are explained later in more 
+to accordingly clean up old state. These are explained later in more
 detail in the [Window Operations](#window-operations-on-event-time) section.
 
 ## Fault Tolerance Semantics
@@ -534,24 +534,26 @@ There are a few built-in sources.
   - **File source** - Reads files written in a directory as a stream of data. Files will be processed in the order of file modification time. If `latestFirst` is set, order will be reversed. Supported file formats are text, CSV, JSON, ORC, Parquet. See the docs of the DataStreamReader interface for a more up-to-date list, and supported options for each file format. Note that the files must be atomically placed in the given directory, which in most file systems, can be achieved by file move operations.
   - **Kafka source** - Reads data from Kafka. It's compatible with Kafka broker versions 0.10.0 or higher. See the [Kafka Integration Guide](structured-streaming-kafka-integration.html) for more details.
 
-  - **Socket source (for testing)** - Reads UTF8 text data from a socket connection. The listening server socket is at the driver. Note that this should be used only for testing as this does not provide end-to-end fault-tolerance guarantees. 
+  - **Socket source (for testing)** - Reads UTF8 text data from a socket connection. The listening server socket is at the driver. Note that this should be used only for testing as this does not provide end-to-end fault-tolerance guarantees.
 
   - **Rate source (for testing)** - Generates data at the specified number of rows per second, each output row contains a `timestamp` and `value`. Where `timestamp` is a `Timestamp` type containing the time of message dispatch, and `value` is of `Long` type containing the message count, starting from 0 as the first row. This source is intended for testing and benchmarking.
 
   - **Rate Per Micro-Batch source (for testing)** - Generates data at the specified number of rows per micro-batch, each output row contains a `timestamp` and `value`. Where `timestamp` is a `Timestamp` type containing the time of message dispatch, and `value` is of `Long` type containing the message count, starting from 0 as the first row. Unlike `rate` data source, this data source provides a consistent set of input rows per micro-batch regardless of query execution (configuration of trigger, query being lagging, etc.), say, batch 0 will produce 0~999 and batch 1 will produce 1000~1999, and so on. Same applies to the generated time. This source is intended for testing and benchmarking.
 
-Some sources are not fault-tolerant because they do not guarantee that data can be replayed using 
-checkpointed offsets after a failure. See the earlier section on 
+Some sources are not fault-tolerant because they do not guarantee that data can be replayed using
+checkpointed offsets after a failure. See the earlier section on
 [fault-tolerance semantics](#fault-tolerance-semantics).
 Here are the details of all the sources in Spark.
 
-<table class="table">
+<table class="table table-striped">
+  <thead>
   <tr>
     <th>Source</th>
     <th>Options</th>
     <th>Fault-tolerant</th>
     <th>Notes</th>
   </tr>
+  </thead>
   <tr>
     <td><b>File source</b></td>
     <td>
@@ -605,7 +607,7 @@ Here are the details of all the sources in Spark.
         <code>rowsPerSecond</code> (e.g. 100, default: 1): How many rows should be generated per second.<br/><br/>
         <code>rampUpTime</code> (e.g. 5s, default: 0s): How long to ramp up before the generating speed becomes <code>rowsPerSecond</code>. Using finer granularities than seconds will be truncated to integer seconds. <br/><br/>
         <code>numPartitions</code> (e.g. 10, default: Spark's default parallelism): The partition number for the generated rows. <br/><br/>
-        
+
         The source will try its best to reach <code>rowsPerSecond</code>, but the query may be resource constrained, and <code>numPartitions</code> can be tweaked to help reach the desired speed.
     </td>
     <td>Yes</td>
@@ -788,7 +790,7 @@ val df: DataFrame = ... // streaming DataFrame with IOT device data with schema
 val ds: Dataset[DeviceData] = df.as[DeviceData]    // streaming Dataset with IOT device data
 
 // Select the devices which have signal more than 10
-df.select("device").where("signal > 10")      // using untyped APIs   
+df.select("device").where("signal > 10")      // using untyped APIs
 ds.filter(_.signal > 10).map(_.device)         // using typed APIs
 
 // Running count of the number of updates for each device type
@@ -856,7 +858,7 @@ You can also register a streaming DataFrame/Dataset as a temporary view and then
 
 <div class="codetabs">
 
-<div data-lang="python"  markdown="1">  
+<div data-lang="python"  markdown="1">
 {% highlight python %}
 df.createOrReplaceTempView("updates")
 spark.sql("select count(*) from updates")  # returns another streaming DF
@@ -870,7 +872,7 @@ spark.sql("select count(*) from updates")  // returns another streaming DF
 {% endhighlight %}
 </div>
 
-<div data-lang="java"  markdown="1">  
+<div data-lang="java"  markdown="1">
 {% highlight java %}
 df.createOrReplaceTempView("updates");
 spark.sql("select count(*) from updates");  // returns another streaming DF
@@ -919,7 +921,7 @@ isStreaming(df)
 You may want to check the query plan of the query, as Spark could inject stateful operations during interpret of SQL statement against streaming dataset. Once stateful operations are injected in the query plan, you may need to check your query with considerations in stateful operations. (e.g. output mode, watermark, state store size maintenance, etc.)
 
 ### Window Operations on Event Time
-Aggregations over a sliding event-time window are straightforward with Structured Streaming and are very similar to grouped aggregations. In a grouped aggregation, aggregate values (e.g. counts) are maintained for each unique value in the user-specified grouping column. In case of window-based aggregations, aggregate values are maintained for each window the event-time of a row falls into. Let's understand this with an illustration. 
+Aggregations over a sliding event-time window are straightforward with Structured Streaming and are very similar to grouped aggregations. In a grouped aggregation, aggregate values (e.g. counts) are maintained for each unique value in the user-specified grouping column. In case of window-based aggregations, aggregate values are maintained for each window the event-time of a row falls into. Let's understand this with an illustration.
 
 Imagine our [quick example](#quick-example) is modified and the stream now contains lines along with the time when the line was generated. Instead of running word counts, we want to count words within 10 minute windows, updating every 5 minutes. That is, word counts in words received between 10 minute windows 12:00 - 12:10, 12:05 - 12:15, 12:10 - 12:20, etc. Note that 12:00 - 12:10 means data that arrived after 12:00 but before 12:10. Now, consider a word that was received at 12:07. This word should increment the counts corresponding to two windows 12:00 - 12:10 and 12:05 - 12:15. So the counts will be indexed by both, the grouping key (i.e. the word) and the window (can be calculated from the event-time).
 
@@ -994,25 +996,25 @@ windowedCounts <- count(
 
 #### Handling Late Data and Watermarking
 Now consider what happens if one of the events arrives late to the application.
-For example, say, a word generated at 12:04 (i.e. event time) could be received by 
+For example, say, a word generated at 12:04 (i.e. event time) could be received by
 the application at 12:11. The application should use the time 12:04 instead of 12:11
-to update the older counts for the window `12:00 - 12:10`. This occurs 
-naturally in our window-based grouping – Structured Streaming can maintain the intermediate state 
-for partial aggregates for a long period of time such that late data can update aggregates of 
+to update the older counts for the window `12:00 - 12:10`. This occurs
+naturally in our window-based grouping – Structured Streaming can maintain the intermediate state
+for partial aggregates for a long period of time such that late data can update aggregates of
 old windows correctly, as illustrated below.
 
 ![Handling Late Data](img/structured-streaming-late-data.png)
 
-However, to run this query for days, it's necessary for the system to bound the amount of 
-intermediate in-memory state it accumulates. This means the system needs to know when an old 
-aggregate can be dropped from the in-memory state because the application is not going to receive 
-late data for that aggregate any more. To enable this, in Spark 2.1, we have introduced 
+However, to run this query for days, it's necessary for the system to bound the amount of
+intermediate in-memory state it accumulates. This means the system needs to know when an old
+aggregate can be dropped from the in-memory state because the application is not going to receive
+late data for that aggregate any more. To enable this, in Spark 2.1, we have introduced
 **watermarking**, which lets the engine automatically track the current event time in the data
-and attempt to clean up old state accordingly. You can define the watermark of a query by 
-specifying the event time column and the threshold on how late the data is expected to be in terms of 
+and attempt to clean up old state accordingly. You can define the watermark of a query by
+specifying the event time column and the threshold on how late the data is expected to be in terms of
 event time. For a specific window ending at time `T`, the engine will maintain state and allow late
-data to update the state until `(max event time seen by the engine - late threshold > T)`. 
-In other words, late data within the threshold will be aggregated, 
+data to update the state until `(max event time seen by the engine - late threshold > T)`.
+In other words, late data within the threshold will be aggregated,
 but data later than the threshold will start getting dropped
 (see [later](#semantic-guarantees-of-aggregation-with-watermarking)
 in the section for the exact guarantees). Let's understand this with an example. We can
@@ -1087,44 +1089,44 @@ windowedCounts <- count(
 
 </div>
 
-In this example, we are defining the watermark of the query on the value of the column "timestamp", 
-and also defining "10 minutes" as the threshold of how late is the data allowed to be. If this query 
-is run in Update output mode (discussed later in [Output Modes](#output-modes) section), 
+In this example, we are defining the watermark of the query on the value of the column "timestamp",
+and also defining "10 minutes" as the threshold of how late is the data allowed to be. If this query
+is run in Update output mode (discussed later in [Output Modes](#output-modes) section),
 the engine will keep updating counts of a window in the Result Table until the window is older
 than the watermark, which lags behind the current event time in column "timestamp" by 10 minutes.
-Here is an illustration. 
+Here is an illustration.
 
 ![Watermarking in Update Mode](img/structured-streaming-watermark-update-mode.png)
 
-As shown in the illustration, the maximum event time tracked by the engine is the 
+As shown in the illustration, the maximum event time tracked by the engine is the
 *blue dashed line*, and the watermark set as `(max event time - '10 mins')`
-at the beginning of every trigger is the red line. For example, when the engine observes the data 
+at the beginning of every trigger is the red line. For example, when the engine observes the data
 `(12:14, dog)`, it sets the watermark for the next trigger as `12:04`.
 This watermark lets the engine maintain intermediate state for additional 10 minutes to allow late
 data to be counted. For example, the data `(12:09, cat)` is out of order and late, and it falls in
-windows `12:00 - 12:10` and `12:05 - 12:15`. Since, it is still ahead of the watermark `12:04` in 
-the trigger, the engine still maintains the intermediate counts as state and correctly updates the 
-counts of the related windows. However, when the watermark is updated to `12:11`, the intermediate 
-state for window `(12:00 - 12:10)` is cleared, and all subsequent data (e.g. `(12:04, donkey)`) 
-is considered "too late" and therefore ignored. Note that after every trigger, 
-the updated counts (i.e. purple rows) are written to sink as the trigger output, as dictated by 
+windows `12:00 - 12:10` and `12:05 - 12:15`. Since, it is still ahead of the watermark `12:04` in
+the trigger, the engine still maintains the intermediate counts as state and correctly updates the
+counts of the related windows. However, when the watermark is updated to `12:11`, the intermediate
+state for window `(12:00 - 12:10)` is cleared, and all subsequent data (e.g. `(12:04, donkey)`)
+is considered "too late" and therefore ignored. Note that after every trigger,
+the updated counts (i.e. purple rows) are written to sink as the trigger output, as dictated by
 the Update mode.
 
 Some sinks (e.g. files) may not supported fine-grained updates that Update Mode requires. To work
 with them, we have also support Append Mode, where only the *final counts* are written to sink.
 This is illustrated below.
 
-Note that using `withWatermark` on a non-streaming Dataset is no-op. As the watermark should not affect 
+Note that using `withWatermark` on a non-streaming Dataset is no-op. As the watermark should not affect
 any batch query in any way, we will ignore it directly.
 
 ![Watermarking in Append Mode](img/structured-streaming-watermark-append-mode.png)
 
-Similar to the Update Mode earlier, the engine maintains intermediate counts for each window. 
+Similar to the Update Mode earlier, the engine maintains intermediate counts for each window.
 However, the partial counts are not updated to the Result Table and not written to sink. The engine
-waits for "10 mins" for late date to be counted, 
+waits for "10 mins" for late date to be counted,
 then drops intermediate state of a window < watermark, and appends the final
-counts to the Result Table/sink. For example, the final counts of window `12:00 - 12:10` is 
-appended to the Result Table only after the watermark is updated to `12:11`. 
+counts to the Result Table/sink. For example, the final counts of window `12:00 - 12:10` is
+appended to the Result Table only after the watermark is updated to `12:11`.
 
 #### Types of time windows
 
@@ -1438,23 +1440,23 @@ Dataset<Row> anotherWindowedCounts = windowedCounts.groupBy(
 ##### Conditions for watermarking to clean aggregation state
 {:.no_toc}
 
-It is important to note that the following conditions must be satisfied for the watermarking to 
+It is important to note that the following conditions must be satisfied for the watermarking to
 clean the state in aggregation queries *(as of Spark 2.1.1, subject to change in the future)*.
 
-- **Output mode must be Append or Update.** Complete mode requires all aggregate data to be preserved, 
-and hence cannot use watermarking to drop intermediate state. See the [Output Modes](#output-modes) 
+- **Output mode must be Append or Update.** Complete mode requires all aggregate data to be preserved,
+and hence cannot use watermarking to drop intermediate state. See the [Output Modes](#output-modes)
 section for detailed explanation of the semantics of each output mode.
 
-- The aggregation must have either the event-time column, or a `window` on the event-time column. 
+- The aggregation must have either the event-time column, or a `window` on the event-time column.
 
-- `withWatermark` must be called on the 
-same column as the timestamp column used in the aggregate. For example, 
-`df.withWatermark("time", "1 min").groupBy("time2").count()` is invalid 
+- `withWatermark` must be called on the
+same column as the timestamp column used in the aggregate. For example,
+`df.withWatermark("time", "1 min").groupBy("time2").count()` is invalid
 in Append output mode, as watermark is defined on a different column
 from the aggregation column.
 
-- `withWatermark` must be called before the aggregation for the watermark details to be used. 
-For example, `df.groupBy("time").count().withWatermark("time", "1 min")` is invalid in Append 
+- `withWatermark` must be called before the aggregation for the watermark details to be used.
+For example, `df.groupBy("time").count().withWatermark("time", "1 min")` is invalid in Append
 output mode.
 
 ##### Semantic Guarantees of Aggregation with Watermarking
@@ -1817,13 +1819,15 @@ regarding watermark delays and whether data will be dropped or not.
 
 ##### Support matrix for joins in streaming queries
 
-<table class ="table">
+<table class="table table-striped">
+<thead>
   <tr>
     <th>Left Input</th>
     <th>Right Input</th>
     <th>Join Type</th>
     <th></th>
   </tr>
+</thead>
   <tr>
       <td style="vertical-align: middle;">Static</td>
       <td style="vertical-align: middle;">Static</td>
@@ -2138,7 +2142,7 @@ Each of the input streams can have a different threshold of late data that needs
 be tolerated for stateful operations. You specify these thresholds using
 ``withWatermarks("eventTime", delay)`` on each of the input streams. For example, consider
 a query with stream-stream joins between `inputStream1` and `inputStream2`.
-    
+
 <div class="codetabs">
 <div data-lang="scala"  markdown="1">
 
@@ -2164,7 +2168,7 @@ be delayed accordingly.
 However, in some cases, you may want to get faster results even if it means dropping data from the
 slowest stream. Since Spark 2.4, you can set the multiple watermark policy to choose
 the maximum value as the global watermark by setting the SQL configuration
-``spark.sql.streaming.multipleWatermarkPolicy`` to ``max`` (default is ``min``). 
+``spark.sql.streaming.multipleWatermarkPolicy`` to ``max`` (default is ``min``).
 This lets the global watermark move at the pace of the fastest stream.
 However, as a side effect, data from the slower streams will be aggressively dropped. Hence, use
 this configuration judiciously.
@@ -2175,9 +2179,9 @@ Many usecases require more advanced stateful operations than aggregations. For e
 Though Spark cannot check and force it, the state function should be implemented with respect to the semantics of the output mode. For example, in Update mode Spark doesn't expect that the state function will emit rows which are older than current watermark plus allowed late record delay, whereas in Append mode the state function can emit these rows.
 
 ### Unsupported Operations
-There are a few DataFrame/Dataset operations that are not supported with streaming DataFrames/Datasets. 
+There are a few DataFrame/Dataset operations that are not supported with streaming DataFrames/Datasets.
 Some of them are as follows.
- 
+
 - Limit and take the first N rows are not supported on streaming Datasets.
 
 - Distinct operations on streaming Datasets are not supported.
@@ -2195,17 +2199,17 @@ Some of them are as follows.
 
 In addition, there are some Dataset methods that will not work on streaming Datasets. They are actions that will immediately run queries and return results, which does not make sense on a streaming Dataset. Rather, those functionalities can be done by explicitly starting a streaming query (see the next section regarding that).
 
-- `count()` - Cannot return a single count from a streaming Dataset. Instead, use `ds.groupBy().count()` which returns a streaming Dataset containing a running count. 
+- `count()` - Cannot return a single count from a streaming Dataset. Instead, use `ds.groupBy().count()` which returns a streaming Dataset containing a running count.
 
 - `foreach()` - Instead use `ds.writeStream.foreach(...)` (see next section).
 
 - `show()` - Instead use the console sink (see next section).
 
 If you try any of these operations, you will see an `AnalysisException` like "operation XYZ is not supported with streaming DataFrames/Datasets".
-While some of them may be supported in future releases of Spark, 
-there are others which are fundamentally hard to implement on streaming data efficiently. 
-For example, sorting on the input stream is not supported, as it requires keeping 
-track of all the data received in the stream. This is therefore fundamentally hard to execute 
+While some of them may be supported in future releases of Spark,
+there are others which are fundamentally hard to implement on streaming data efficiently.
+For example, sorting on the input stream is not supported, as it requires keeping
+track of all the data received in the stream. This is therefore fundamentally hard to execute
 efficiently.
 
 ### State Store
@@ -2248,12 +2252,14 @@ to `org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider`.
 
 Here are the configs regarding to RocksDB instance of the state store provider:
 
-<table class="table">
+<table class="table table-striped">
+  <thead>
   <tr>
     <th>Config Name</th>
     <th>Description</th>
     <th>Default Value</th>
   </tr>
+  </thead>
   <tr>
     <td>spark.sql.streaming.stateStore.rocksdb.compactOnCommit</td>
     <td>Whether we perform a range compaction of RocksDB instance for commit operation</td>
@@ -2289,6 +2295,16 @@ Here are the configs regarding to RocksDB instance of the state store provider:
     <td>Whether we track the total number of rows in state store. Please refer the details in <a href="#performance-aspect-considerations">Performance-aspect considerations</a>.</td>
     <td>True</td>
   </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.writeBufferSizeMB</td>
+    <td>The maximum size of MemTable in RocksDB. Value of -1 means that RocksDB internal default values will be used</td>
+    <td>-1</td>
+  </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.maxWriteBufferNumber</td>
+    <td>The maximum number of MemTables in RocksDB, both active and immutable. Value of -1 means that RocksDB internal default values will be used</td>
+    <td>-1</td>
+  </tr>
 </table>
 
 ##### Performance-aspect considerations
@@ -2340,39 +2356,41 @@ returned through `Dataset.writeStream()`. You will have to specify one or more o
 #### Output Modes
 There are a few types of output modes.
 
-- **Append mode (default)** - This is the default mode, where only the 
-new rows added to the Result Table since the last trigger will be 
-outputted to the sink. This is supported for only those queries where 
-rows added to the Result Table is never going to change. Hence, this mode 
-guarantees that each row will be output only once (assuming 
-fault-tolerant sink). For example, queries with only `select`, 
+- **Append mode (default)** - This is the default mode, where only the
+new rows added to the Result Table since the last trigger will be
+outputted to the sink. This is supported for only those queries where
+rows added to the Result Table is never going to change. Hence, this mode
+guarantees that each row will be output only once (assuming
+fault-tolerant sink). For example, queries with only `select`,
 `where`, `map`, `flatMap`, `filter`, `join`, etc. will support Append mode.
 
 - **Complete mode** - The whole Result Table will be outputted to the sink after every trigger.
  This is supported for aggregation queries.
 
-- **Update mode** - (*Available since Spark 2.1.1*) Only the rows in the Result Table that were 
-updated since the last trigger will be outputted to the sink. 
+- **Update mode** - (*Available since Spark 2.1.1*) Only the rows in the Result Table that were
+updated since the last trigger will be outputted to the sink.
 More information to be added in future releases.
 
 Different types of streaming queries support different output modes.
 Here is the compatibility matrix.
 
-<table class="table">
+<table class="table table-striped">
+  <thead>
   <tr>
     <th>Query Type</th>
     <th></th>
     <th>Supported Output Modes</th>
-    <th>Notes</th>        
+    <th>Notes</th>
   </tr>
+  </thead>
   <tr>
     <td rowspan="2" style="vertical-align: middle;">Queries with aggregation</td>
     <td style="vertical-align: middle;">Aggregation on event-time with watermark</td>
     <td style="vertical-align: middle;">Append, Update, Complete</td>
     <td>
-        Append mode uses watermark to drop old aggregation state. But the output of a 
+        Append mode uses watermark to drop old aggregation state. But the output of a
         windowed aggregation is delayed the late threshold specified in <code>withWatermark()</code> as by
-        the modes semantics, rows can be added to the Result Table only once after they are 
+        the modes semantics, rows can be added to the Result Table only once after they are
         finalized (i.e. after watermark is crossed). See the
         <a href="#handling-late-data-and-watermarking">Late Data</a> section for more details.
         <br/><br/>
@@ -2380,18 +2398,18 @@ Here is the compatibility matrix.
         <br/><br/>
         Complete mode does not drop old aggregation state since by definition this mode
         preserves all data in the Result Table.
-    </td>    
+    </td>
   </tr>
   <tr>
     <td style="vertical-align: middle;">Other aggregations</td>
     <td style="vertical-align: middle;">Complete, Update</td>
     <td>
-        Since no watermark is defined (only defined in other category), 
+        Since no watermark is defined (only defined in other category),
         old aggregation state is not dropped.
         <br/><br/>
-        Append mode is not supported as aggregates can update thus violating the semantics of 
+        Append mode is not supported as aggregates can update thus violating the semantics of
         this mode.
-    </td>  
+    </td>
   </tr>
   <tr>
     <td colspan="2" style="vertical-align: middle;">Queries with <code>mapGroupsWithState</code></td>
@@ -2490,12 +2508,13 @@ writeStream
     .start()
 {% endhighlight %}
 
-Some sinks are not fault-tolerant because they do not guarantee persistence of the output and are 
-meant for debugging purposes only. See the earlier section on 
-[fault-tolerance semantics](#fault-tolerance-semantics). 
+Some sinks are not fault-tolerant because they do not guarantee persistence of the output and are
+meant for debugging purposes only. See the earlier section on
+[fault-tolerance semantics](#fault-tolerance-semantics).
 Here are the details of all the sinks in Spark.
 
-<table class="table">
+<table class="table table-striped">
+  <thead>
   <tr>
     <th>Sink</th>
     <th>Supported Output Modes</th>
@@ -2503,6 +2522,7 @@ Here are the details of all the sinks in Spark.
     <th>Fault-tolerant</th>
     <th>Notes</th>
   </tr>
+  </thead>
   <tr>
     <td><b>File Sink</b></td>
     <td>Append</td>
@@ -2542,7 +2562,7 @@ Here are the details of all the sinks in Spark.
       <td>Depends on the implementation</td>
       <td>More details in the <a href="#using-foreach-and-foreachbatch">next section</a></td>
     </tr>
-    
+
   <tr>
     <td><b>Console Sink</b></td>
     <td>Append, Update, Complete</td>
@@ -2579,7 +2599,7 @@ Note that you have to call `start()` to actually start the execution of the quer
 
 {% highlight python %}
 # ========== DF with no aggregations ==========
-noAggDF = deviceDataDf.select("device").where("signal > 10")   
+noAggDF = deviceDataDf.select("device").where("signal > 10")
 
 # Print new data to console
 noAggDF \
@@ -2622,7 +2642,7 @@ spark.sql("select * from aggregates").show()   # interactively query in-memory t
 
 {% highlight scala %}
 // ========== DF with no aggregations ==========
-val noAggDF = deviceDataDf.select("device").where("signal > 10")   
+val noAggDF = deviceDataDf.select("device").where("signal > 10")
 
 // Print new data to console
 noAggDF
@@ -2737,14 +2757,14 @@ head(sql("select * from aggregates"))
 </div>
 
 ##### Using Foreach and ForeachBatch
-The `foreach` and `foreachBatch` operations allow you to apply arbitrary operations and writing 
-logic on the output of a streaming query. They have slightly different use cases - while `foreach` 
-allows custom write logic on every row, `foreachBatch` allows arbitrary operations 
-and custom logic on the output of each micro-batch. Let's understand their usages in more detail.  
+The `foreach` and `foreachBatch` operations allow you to apply arbitrary operations and writing
+logic on the output of a streaming query. They have slightly different use cases - while `foreach`
+allows custom write logic on every row, `foreachBatch` allows arbitrary operations
+and custom logic on the output of each micro-batch. Let's understand their usages in more detail.
 
 ###### ForeachBatch
-`foreachBatch(...)` allows you to specify a function that is executed on 
-the output data of every micro-batch of a streaming query. Since Spark 2.4, this is supported in Scala, Java and Python. 
+`foreachBatch(...)` allows you to specify a function that is executed on
+the output data of every micro-batch of a streaming query. Since Spark 2.4, this is supported in Scala, Java and Python.
 It takes two parameters: a DataFrame or Dataset that has the output data of a micro-batch and the unique ID of the micro-batch.
 
 <div class="codetabs">
@@ -2755,8 +2775,8 @@ It takes two parameters: a DataFrame or Dataset that has the output data of a mi
 def foreach_batch_function(df, epoch_id):
     # Transform and write batchDF
     pass
-  
-streamingDF.writeStream.foreachBatch(foreach_batch_function).start()   
+
+streamingDF.writeStream.foreachBatch(foreach_batch_function).start()
 {% endhighlight %}
 
 </div>
@@ -2765,7 +2785,7 @@ streamingDF.writeStream.foreachBatch(foreach_batch_function).start()
 
 {% highlight scala %}
 streamingDF.writeStream.foreachBatch { (batchDF: DataFrame, batchId: Long) =>
-  // Transform and write batchDF 
+  // Transform and write batchDF
 }.start()
 {% endhighlight %}
 
@@ -2778,7 +2798,7 @@ streamingDatasetOfString.writeStream().foreachBatch(
   new VoidFunction2<Dataset<String>, Long>() {
     public void call(Dataset<String> dataset, Long batchId) {
       // Transform and write batchDF
-    }    
+    }
   }
 ).start();
 {% endhighlight %}
@@ -2793,13 +2813,13 @@ R is not yet supported.
 
 With `foreachBatch`, you can do the following.
 
-- **Reuse existing batch data sources** - For many storage systems, there may not be a streaming sink available yet, 
+- **Reuse existing batch data sources** - For many storage systems, there may not be a streaming sink available yet,
   but there may already exist a data writer for batch queries. Using `foreachBatch`, you can use the batch
   data writers on the output of each micro-batch.
-- **Write to multiple locations** - If you want to write the output of a streaming query to multiple locations, 
-  then you can simply write the output DataFrame/Dataset multiple times. However, each attempt to write can 
+- **Write to multiple locations** - If you want to write the output of a streaming query to multiple locations,
+  then you can simply write the output DataFrame/Dataset multiple times. However, each attempt to write can
   cause the output data to be recomputed (including possible re-reading of the input data). To avoid recomputations,
-  you should cache the output DataFrame/Dataset, write it to multiple locations, and then uncache it. Here is an outline.  
+  you should cache the output DataFrame/Dataset, write it to multiple locations, and then uncache it. Here is an outline.
 
 <div class="codetabs">
 <div data-lang="scala"  markdown="1">
@@ -2816,30 +2836,30 @@ streamingDF.writeStream.foreachBatch { (batchDF: DataFrame, batchId: Long) =>
 </div>
 </div>
 
-- **Apply additional DataFrame operations** - Many DataFrame and Dataset operations are not supported 
-  in streaming DataFrames because Spark does not support generating incremental plans in those cases. 
+- **Apply additional DataFrame operations** - Many DataFrame and Dataset operations are not supported
+  in streaming DataFrames because Spark does not support generating incremental plans in those cases.
   Using `foreachBatch`, you can apply some of these operations on each micro-batch output. However, you will have to reason about the end-to-end semantics of doing that operation yourself.
 
 **Note:**
-- By default, `foreachBatch` provides only at-least-once write guarantees. However, you can use the 
-  batchId provided to the function as way to deduplicate the output and get an exactly-once guarantee.  
+- By default, `foreachBatch` provides only at-least-once write guarantees. However, you can use the
+  batchId provided to the function as way to deduplicate the output and get an exactly-once guarantee.
 - `foreachBatch` does not work with the continuous processing mode as it fundamentally relies on the
   micro-batch execution of a streaming query. If you write data in the continuous mode, use `foreach` instead.
 
 
 ###### Foreach
-If `foreachBatch` is not an option (for example, corresponding batch data writer does not exist, or 
-continuous processing mode), then you can express your custom writer logic using `foreach`. 
+If `foreachBatch` is not an option (for example, corresponding batch data writer does not exist, or
+continuous processing mode), then you can express your custom writer logic using `foreach`.
 Specifically, you can express the data writing logic by dividing it into three methods: `open`, `process`, and `close`.
-Since Spark 2.4, `foreach` is available in Scala, Java and Python. 
+Since Spark 2.4, `foreach` is available in Scala, Java and Python.
 
 <div class="codetabs">
 
 <div data-lang="python"  markdown="1">
 
-In Python, you can invoke foreach in two ways: in a function or in an object. 
-The function offers a simple way to express your processing logic but does not allow you to 
-deduplicate generated data when failures cause reprocessing of some input data. 
+In Python, you can invoke foreach in two ways: in a function or in an object.
+The function offers a simple way to express your processing logic but does not allow you to
+deduplicate generated data when failures cause reprocessing of some input data.
 For that situation you must specify the processing logic in an object.
 
 - First, the function takes a row as input.
@@ -2849,7 +2869,7 @@ def process_row(row):
     # Write row to storage
     pass
 
-query = streamingDF.writeStream.foreach(process_row).start()  
+query = streamingDF.writeStream.foreach(process_row).start()
 {% endhighlight %}
 
 - Second, the object has a process method and optional open and close methods:
@@ -2867,7 +2887,7 @@ class ForeachWriter:
     def close(self, error):
         # Close the connection. This method in optional in Python.
         pass
-      
+
 query = streamingDF.writeStream.foreach(ForeachWriter()).start()
 {% endhighlight %}
 
@@ -2933,12 +2953,12 @@ R is not yet supported.
 **Execution semantics**
 When the streaming query is started, Spark calls the function or the object’s methods in the following way:
 
-- A single copy of this object is responsible for all the data generated by a single task in a query. 
+- A single copy of this object is responsible for all the data generated by a single task in a query.
   In other words, one instance is responsible for processing one partition of the data generated in a distributed manner.
 
-- This object must be serializable, because each task will get a fresh serialized-deserialized copy 
-  of the provided object. Hence, it is strongly recommended that any initialization for writing data 
-  (for example. opening a connection or starting a transaction) is done after the open() method has 
+- This object must be serializable, because each task will get a fresh serialized-deserialized copy
+  of the provided object. Hence, it is strongly recommended that any initialization for writing data
+  (for example. opening a connection or starting a transaction) is done after the open() method has
   been called, which signifies that the task is ready to generate data.
 
 - The lifecycle of the methods are as follows:
@@ -3081,11 +3101,13 @@ The trigger settings of a streaming query define the timing of streaming data pr
 the query is going to be executed as micro-batch query with a fixed batch interval or as a continuous processing query.
 Here are the different kinds of triggers that are supported.
 
-<table class="table">
+<table class="table table-striped">
+  <thead>
   <tr>
     <th>Trigger Type</th>
     <th>Description</th>
   </tr>
+  </thead>
   <tr>
     <td><i>unspecified (default)</i></td>
     <td>
@@ -3287,7 +3309,7 @@ write.stream(df, "console", trigger.once = TRUE)
 
 
 ## Managing Streaming Queries
-The `StreamingQuery` object created when a query is started can be used to monitor and manage the query. 
+The `StreamingQuery` object created when a query is started can be used to monitor and manage the query.
 
 <div class="codetabs">
 
@@ -3456,18 +3478,18 @@ There are multiple ways to monitor active streaming queries. You can either push
 
 ### Reading Metrics Interactively
 
-You can directly get the current status and metrics of an active query using 
-`streamingQuery.lastProgress()` and `streamingQuery.status()`. 
-`lastProgress()` returns a `StreamingQueryProgress` object 
-in [Scala](api/scala/org/apache/spark/sql/streaming/StreamingQueryProgress.html) 
+You can directly get the current status and metrics of an active query using
+`streamingQuery.lastProgress()` and `streamingQuery.status()`.
+`lastProgress()` returns a `StreamingQueryProgress` object
+in [Scala](api/scala/org/apache/spark/sql/streaming/StreamingQueryProgress.html)
 and [Java](api/java/org/apache/spark/sql/streaming/StreamingQueryProgress.html)
 and a dictionary with the same fields in Python. It has all the information about
-the progress made in the last trigger of the stream - what data was processed, 
-what were the processing rates, latencies, etc. There is also 
-`streamingQuery.recentProgress` which returns an array of last few progresses.  
+the progress made in the last trigger of the stream - what data was processed,
+what were the processing rates, latencies, etc. There is also
+`streamingQuery.recentProgress` which returns an array of last few progresses.
 
-In addition, `streamingQuery.status()` returns a `StreamingQueryStatus` object 
-in [Scala](api/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.html) 
+In addition, `streamingQuery.status()` returns a `StreamingQueryStatus` object
+in [Scala](api/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.html)
 and [Java](api/java/org/apache/spark/sql/streaming/StreamingQueryStatus.html)
 and a dictionary with the same fields in Python. It gives information about
 what the query is immediately doing - is a trigger active, is data being processed, etc.
@@ -3489,7 +3511,7 @@ Will print something like the following.
 '''
 
 print(query.status)
-''' 
+'''
 Will print something like the following.
 
 {u'message': u'Waiting for data to arrive', u'isTriggerActive': False, u'isDataAvailable': False}
@@ -3772,12 +3794,12 @@ Not available in R.
 
 </div>
 
-### Reporting Metrics using Dropwizard 
-Spark supports reporting metrics using the [Dropwizard Library](monitoring.html#metrics). To enable metrics of Structured Streaming queries to be reported as well, you have to explicitly enable the configuration `spark.sql.streaming.metricsEnabled` in the SparkSession. 
+### Reporting Metrics using Dropwizard
+Spark supports reporting metrics using the [Dropwizard Library](monitoring.html#metrics). To enable metrics of Structured Streaming queries to be reported as well, you have to explicitly enable the configuration `spark.sql.streaming.metricsEnabled` in the SparkSession.
 
 <div class="codetabs">
 
-<div data-lang="python"  markdown="1">  
+<div data-lang="python"  markdown="1">
 {% highlight python %}
 spark.conf.set("spark.sql.streaming.metricsEnabled", "true")
 # or
@@ -3793,7 +3815,7 @@ spark.sql("SET spark.sql.streaming.metricsEnabled=true")
 {% endhighlight %}
 </div>
 
-<div data-lang="java"  markdown="1">  
+<div data-lang="java"  markdown="1">
 {% highlight java %}
 spark.conf().set("spark.sql.streaming.metricsEnabled", "true");
 // or
@@ -3812,7 +3834,7 @@ sql("SET spark.sql.streaming.metricsEnabled=true")
 
 All queries started in the SparkSession after this configuration has been enabled will report metrics through Dropwizard to whatever [sinks](monitoring.html#metrics) have been configured (e.g. Ganglia, Graphite, JMX, etc.).
 
-## Recovering from Failures with Checkpointing 
+## Recovering from Failures with Checkpointing
 In case of a failure or intentional shutdown, you can recover the previous progress and state of a previous query, and continue where it left off. This is done using checkpointing and write-ahead logs. You can configure a query with a checkpoint location, and the query will save all the progress information (i.e. range of offsets processed in each trigger) and the running aggregates (e.g. word counts in the [quick example](#quick-example)) to the checkpoint location. This checkpoint location has to be a path in an HDFS compatible file system, and can be set as an option in the DataStreamWriter when [starting a query](#starting-streaming-queries).
 
 <div class="codetabs">
@@ -3868,29 +3890,29 @@ write.stream(aggDF, "memory", outputMode = "complete", checkpointLocation = "pat
 
 
 ## Recovery Semantics after Changes in a Streaming Query
-There are limitations on what changes in a streaming query are allowed between restarts from the 
-same checkpoint location. Here are a few kinds of changes that are either not allowed, or 
+There are limitations on what changes in a streaming query are allowed between restarts from the
+same checkpoint location. Here are a few kinds of changes that are either not allowed, or
 the effect of the change is not well-defined. For all of them:
 
-- The term *allowed* means you can do the specified change but whether the semantics of its effect 
+- The term *allowed* means you can do the specified change but whether the semantics of its effect
   is well-defined depends on the query and the change.
 
-- The term *not allowed* means you should not do the specified change as the restarted query is likely 
-  to fail with unpredictable errors. `sdf` represents a streaming DataFrame/Dataset 
+- The term *not allowed* means you should not do the specified change as the restarted query is likely
+  to fail with unpredictable errors. `sdf` represents a streaming DataFrame/Dataset
   generated with sparkSession.readStream.
-  
+
 **Types of changes**
 
 - *Changes in the number or type (i.e. different source) of input sources*: This is not allowed.
 
-- *Changes in the parameters of input sources*: Whether this is allowed and whether the semantics 
+- *Changes in the parameters of input sources*: Whether this is allowed and whether the semantics
   of the change are well-defined depends on the source and the query. Here are a few examples.
 
   - Addition/deletion/modification of rate limits is allowed: `spark.readStream.format("kafka").option("subscribe", "topic")` to `spark.readStream.format("kafka").option("subscribe", "topic").option("maxOffsetsPerTrigger", ...)`
 
   - Changes to subscribed topics/files are generally not allowed as the results are unpredictable: `spark.readStream.format("kafka").option("subscribe", "topic")` to `spark.readStream.format("kafka").option("subscribe", "newTopic")`
 
-- *Changes in the type of output sink*: Changes between a few specific combinations of sinks 
+- *Changes in the type of output sink*: Changes between a few specific combinations of sinks
   are allowed. This needs to be verified on a case-by-case basis. Here are a few examples.
 
   - File sink to Kafka sink is allowed. Kafka will see only the new data.
@@ -3899,7 +3921,7 @@ the effect of the change is not well-defined. For all of them:
 
   - Kafka sink changed to foreach, or vice versa is allowed.
 
-- *Changes in the parameters of output sink*: Whether this is allowed and whether the semantics of 
+- *Changes in the parameters of output sink*: Whether this is allowed and whether the semantics of
   the change are well-defined depends on the sink and the query. Here are a few examples.
 
   - Changes to output directory of a file sink are not allowed: `sdf.writeStream.format("parquet").option("path", "/somePath")` to `sdf.writeStream.format("parquet").option("path", "/anotherPath")`
@@ -3968,7 +3990,7 @@ val query = stream.writeStream
 
 The table below describes the configurations for this feature and default values associated with them.
 
-| Option    | Value           | Default | Description       | 
+| Option    | Value           | Default | Description       |
 |-------------|-----------------|------------|---------------------|
 |asyncProgressTrackingEnabled|true/false|false|enable or disable asynchronous progress tracking|
 |asyncProgressCheckpointingInterval|minutes|1|the interval in which we commit offsets and completion commits|
@@ -3998,13 +4020,13 @@ This is caused by the fact that when async progress tracking is enabled, the fra
 ## [Experimental]
 {:.no_toc}
 
-**Continuous processing** is a new, experimental streaming execution mode introduced in Spark 2.3 that enables low (~1 ms) end-to-end latency with at-least-once fault-tolerance guarantees. Compare this with the default *micro-batch processing* engine which can achieve exactly-once guarantees but achieve latencies of ~100ms at best. For some types of queries (discussed below), you can choose which mode to execute them in without modifying the application logic (i.e. without changing the DataFrame/Dataset operations). 
+**Continuous processing** is a new, experimental streaming execution mode introduced in Spark 2.3 that enables low (~1 ms) end-to-end latency with at-least-once fault-tolerance guarantees. Compare this with the default *micro-batch processing* engine which can achieve exactly-once guarantees but achieve latencies of ~100ms at best. For some types of queries (discussed below), you can choose which mode to execute them in without modifying the application logic (i.e. without changing the DataFrame/Dataset operations).
 
-To run a supported query in continuous processing mode, all you need to do is specify a **continuous trigger** with the desired checkpoint interval as a parameter. For example, 
+To run a supported query in continuous processing mode, all you need to do is specify a **continuous trigger** with the desired checkpoint interval as a parameter. For example,
 
 <div class="codetabs">
 
-<div data-lang="python"  markdown="1">  
+<div data-lang="python"  markdown="1">
 {% highlight python %}
 spark \
   .readStream \
@@ -4043,7 +4065,7 @@ spark
 {% endhighlight %}
 </div>
 
-<div data-lang="java"  markdown="1">  
+<div data-lang="java"  markdown="1">
 {% highlight java %}
 import org.apache.spark.sql.streaming.Trigger;
 
@@ -4079,10 +4101,10 @@ As of Spark 2.4, only the following type of queries are supported in the continu
   + Kafka source: All options are supported.
   + Rate source: Good for testing. Only options that are supported in the continuous mode are `numPartitions` and `rowsPerSecond`.
 
-- *Sinks*: 
+- *Sinks*:
   + Kafka sink: All options are supported.
   + Memory sink: Good for debugging.
-  + Console sink: Good for debugging. All options are supported. Note that the console will print every checkpoint interval that you have specified in the continuous trigger. 
+  + Console sink: Good for debugging. All options are supported. Note that the console will print every checkpoint interval that you have specified in the continuous trigger.
 
 See [Input Sources](#input-sources) and [Output Sinks](#output-sinks) sections for more details on them. While the console sink is good for testing, the end-to-end low-latency processing can be best observed with Kafka as the source and sink, as this allows the engine to process the data and make the results available in the output topic within milliseconds of the input data being available in the input topic.
 
diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md
index 39e3473ea8095..becdfb4b18f5d 100644
--- a/docs/submitting-applications.md
+++ b/docs/submitting-applications.md
@@ -8,9 +8,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -159,8 +159,8 @@ export HADOOP_CONF_DIR=XXX
 
 The master URL passed to Spark can be in one of the following formats:
 
-<table class="table">
-<tr><th>Master URL</th><th>Meaning</th></tr>
+<table class="table table-striped">
+<thead><tr><th>Master URL</th><th>Meaning</th></tr></thead>
 <tr><td> <code>local</code> </td><td> Run Spark locally with one worker thread (i.e. no parallelism at all). </td></tr>
 <tr><td> <code>local[K]</code> </td><td> Run Spark locally with K worker threads (ideally, set this to the number of cores on your machine). </td></tr>
 <tr><td> <code>local[K,F]</code> </td><td> Run Spark locally with K worker threads and F maxFailures (see <a href="configuration.html#scheduling">spark.task.maxFailures</a> for an explanation of this variable). </td></tr>
diff --git a/docs/web-ui.md b/docs/web-ui.md
index e228d7fe2a987..079bc6137f020 100644
--- a/docs/web-ui.md
+++ b/docs/web-ui.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -62,7 +62,7 @@ When you click on a specific job, you can see the detailed information of this j
 
 ### Jobs detail
 
-This page displays the details of a specific job identified by its job ID. 
+This page displays the details of a specific job identified by its job ID.
 * Job Status: (running, succeeded, failed)
 * Number of stages per status (active, pending, completed, skipped, failed)
 * Associated SQL Query: Link to the sql tab for this job
@@ -74,7 +74,7 @@ This page displays the details of a specific job identified by its job ID.
 
 * DAG visualization: Visual representation of the directed acyclic graph of this job where vertices represent the RDDs or DataFrames and the edges represent an operation to be applied on RDD.
 * An example of DAG visualization for `sc.parallelize(1 to 100).toDF.count()`
- 
+
 <p style="text-align: center;">
   <img src="img/JobPageDetail2.png" title="DAG" alt="DAG" width="40%">
 </p>
@@ -185,7 +185,7 @@ scala> rdd.persist(MEMORY_ONLY_SER)
 res0: rdd.type = rdd MapPartitionsRDD[1] at range at <console>:27
 
 scala> rdd.count
-res1: Long = 100                                                                
+res1: Long = 100
 
 scala> val df = Seq((1, "andy"), (2, "bob"), (2, "andy")).toDF("count", "name")
 df: org.apache.spark.sql.DataFrame = [count: int, name: string]
@@ -318,7 +318,7 @@ scala> val df = Seq((1, "andy"), (2, "bob"), (2, "andy")).toDF("count", "name")
 df: org.apache.spark.sql.DataFrame = [count: int, name: string]
 
 scala> df.count
-res0: Long = 3                                                                  
+res0: Long = 3
 
 scala> df.createGlobalTempView("df")
 
@@ -380,8 +380,8 @@ operator shows the number of bytes written by a shuffle.
 
 Here is the list of SQL metrics:
 
-<table class="table">
-<tr><th>SQL metrics</th><th>Meaning</th><th>Operators</th></tr>
+<table class="table table-striped">
+<thead><tr><th>SQL metrics</th><th>Meaning</th><th>Operators</th></tr></thead>
 <tr><td> <code>number of output rows</code> </td><td> the number of output rows of the operator </td><td> Aggregate operators, Join operators, Sample, Range, Scan operators, Filter, etc.</td></tr>
 <tr><td> <code>data size</code> </td><td> the size of broadcast/shuffled/collected data of the operator </td><td> BroadcastExchange, ShuffleExchange, Subquery </td></tr>
 <tr><td> <code>time to collect</code> </td><td> the time spent on collecting data </td><td> BroadcastExchange, Subquery </td></tr>
@@ -411,7 +411,7 @@ Here is the list of SQL metrics:
 </table>
 
 ## Structured Streaming Tab
-When running Structured Streaming jobs in micro-batch mode, a Structured Streaming tab will be 
+When running Structured Streaming jobs in micro-batch mode, a Structured Streaming tab will be
 available on the Web UI. The overview page displays some brief statistics for running and completed
 queries. Also, you can check the latest exception of a failed query. For detailed statistics, please
 click a "run id" in the tables.
@@ -421,13 +421,13 @@ click a "run id" in the tables.
   <img src="img/webui-structured-streaming-detail2.png">
 </p>
 
-The statistics page displays some useful metrics for insight into the status of your streaming 
+The statistics page displays some useful metrics for insight into the status of your streaming
 queries. Currently, it contains the following metrics.
 
 * **Input Rate.** The aggregate (across all sources) rate of data arriving.
 * **Process Rate.** The aggregate (across all sources) rate at which Spark is processing data.
 * **Input Rows.** The aggregate (across all sources) number of records processed in a trigger.
-* **Batch Duration.** The process duration of each batch. 
+* **Batch Duration.** The process duration of each batch.
 * **Operation Duration.** The amount of time taken to perform various operations in milliseconds.
 The tracked operations are listed as follows.
     * addBatch: Time taken to read the micro-batch's input data from the sources, process it, and write the batch's output to the sink. This should take the bulk of the micro-batch's time.
@@ -440,7 +440,7 @@ The tracked operations are listed as follows.
 * **Aggregated Number Of Updated State Rows.** The aggregated number of updated state rows.
 * **Aggregated State Memory Used In Bytes.** The aggregated state memory used in bytes.
 * **Aggregated Number Of State Rows Dropped By Watermark.** The aggregated number of state rows dropped by watermark.
-    
+
 As an early-release version, the statistics page is still under development and will be improved in
 future releases.
 
diff --git a/mllib/common/pom.xml b/mllib/common/pom.xml
new file mode 100644
index 0000000000000..41275b244995e
--- /dev/null
+++ b/mllib/common/pom.xml
@@ -0,0 +1,109 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.5.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>spark-mllib-common_2.12</artifactId>
+  <properties>
+    <sbt.project.name>mllib-common</sbt.project.name>
+  </properties>
+  <packaging>jar</packaging>
+  <name>Spark Project ML Common</name>
+  <url>https://spark.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.json4s</groupId>
+      <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-mllib-local_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <!--TODO: update the dependency once the catalyst refactoring is done-->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-inline</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-mllib-local_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  </build>
+</project>
diff --git a/mllib/common/src/main/scala/org/apache/spark/ml/Predictor.scala b/mllib/common/src/main/scala/org/apache/spark/ml/Predictor.scala
new file mode 100644
index 0000000000000..4fad2db81d762
--- /dev/null
+++ b/mllib/common/src/main/scala/org/apache/spark/ml/Predictor.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import org.apache.spark.ml.param._
+import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.util.SchemaUtils
+import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
+
+/**
+ * (private[ml]) Trait for parameters for prediction (regression and classification).
+ */
+private[ml] trait PredictorParams
+    extends Params
+    with HasLabelCol
+    with HasFeaturesCol
+    with HasPredictionCol {
+
+  /**
+   * Validates and transforms the input schema with the provided param map.
+   *
+   * @param schema
+   *   input schema
+   * @param fitting
+   *   whether this is in fitting
+   * @param featuresDataType
+   *   SQL DataType for FeaturesType. E.g., `VectorUDT` for vector features.
+   * @return
+   *   output schema
+   */
+  protected def validateAndTransformSchema(
+      schema: StructType,
+      fitting: Boolean,
+      featuresDataType: DataType): StructType = {
+    // TODO: Support casting Array[Double] and Array[Float] to Vector when FeaturesType = Vector
+    SchemaUtils.checkColumnType(schema, $(featuresCol), featuresDataType)
+    if (fitting) {
+      SchemaUtils.checkNumericType(schema, $(labelCol))
+
+      this match {
+        case p: HasWeightCol =>
+          if (isDefined(p.weightCol) && $(p.weightCol).nonEmpty) {
+            SchemaUtils.checkNumericType(schema, $(p.weightCol))
+          }
+        case _ =>
+      }
+    }
+    SchemaUtils.appendColumn(schema, $(predictionCol), DoubleType)
+  }
+}
+
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala b/mllib/common/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
rename to mllib/common/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeKeys.scala b/mllib/common/src/main/scala/org/apache/spark/ml/attribute/AttributeKeys.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeKeys.scala
rename to mllib/common/src/main/scala/org/apache/spark/ml/attribute/AttributeKeys.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala b/mllib/common/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
rename to mllib/common/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala b/mllib/common/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
similarity index 99%
rename from mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
rename to mllib/common/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
index f11cd865843d2..576c771d83bec 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
+++ b/mllib/common/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
@@ -20,7 +20,6 @@ package org.apache.spark.ml.attribute
 import scala.annotation.varargs
 
 import org.apache.spark.sql.types.{DoubleType, Metadata, MetadataBuilder, NumericType, StructField}
-import org.apache.spark.util.collection.Utils
 
 /**
  * Abstract class for ML attributes.
@@ -339,7 +338,7 @@ class NominalAttribute private[ml] (
   override def isNominal: Boolean = true
 
   private lazy val valueToIndex: Map[String, Int] = {
-    values.map(Utils.toMapWithIndex(_)).getOrElse(Map.empty)
+    values.map(_.zipWithIndex.toMap).getOrElse(Map.empty)
   }
 
   /** Index of a specific value. */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/package-info.java b/mllib/common/src/main/scala/org/apache/spark/ml/attribute/package-info.java
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/attribute/package-info.java
rename to mllib/common/src/main/scala/org/apache/spark/ml/attribute/package-info.java
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala b/mllib/common/src/main/scala/org/apache/spark/ml/attribute/package.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala
rename to mllib/common/src/main/scala/org/apache/spark/ml/attribute/package.scala
diff --git a/mllib/common/src/main/scala/org/apache/spark/ml/classification/Classifier.scala b/mllib/common/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
new file mode 100644
index 0000000000000..c81caddba7371
--- /dev/null
+++ b/mllib/common/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.classification
+
+import org.apache.spark.ml.PredictorParams
+import org.apache.spark.ml.linalg.VectorUDT
+import org.apache.spark.ml.param.shared.HasRawPredictionCol
+import org.apache.spark.ml.util.SchemaUtils
+import org.apache.spark.sql.types.{DataType, StructType}
+
+/**
+ * (private[spark]) Params for classification.
+ */
+private[spark] trait ClassifierParams extends PredictorParams with HasRawPredictionCol {
+
+  override protected def validateAndTransformSchema(
+      schema: StructType,
+      fitting: Boolean,
+      featuresDataType: DataType): StructType = {
+    val parentSchema = super.validateAndTransformSchema(schema, fitting, featuresDataType)
+    SchemaUtils.appendColumn(parentSchema, $(rawPredictionCol), new VectorUDT)
+  }
+}
diff --git a/mllib/common/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala b/mllib/common/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
new file mode 100644
index 0000000000000..535c9e61fcdb0
--- /dev/null
+++ b/mllib/common/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.classification
+
+import org.apache.spark.ml.linalg.VectorUDT
+import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.util.SchemaUtils
+import org.apache.spark.sql.types.{DataType, StructType}
+
+/**
+ * (private[classification]) Params for probabilistic classification.
+ */
+private[ml] trait ProbabilisticClassifierParams
+    extends ClassifierParams
+    with HasProbabilityCol
+    with HasThresholds {
+  override protected def validateAndTransformSchema(
+      schema: StructType,
+      fitting: Boolean,
+      featuresDataType: DataType): StructType = {
+    val parentSchema = super.validateAndTransformSchema(schema, fitting, featuresDataType)
+    SchemaUtils.appendColumn(parentSchema, $(probabilityCol), new VectorUDT)
+  }
+}
diff --git a/mllib/common/src/main/scala/org/apache/spark/ml/feature/Instance.scala b/mllib/common/src/main/scala/org/apache/spark/ml/feature/Instance.scala
new file mode 100644
index 0000000000000..7e754b151b7b9
--- /dev/null
+++ b/mllib/common/src/main/scala/org/apache/spark/ml/feature/Instance.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.spark.ml.linalg._
+
+/**
+ * Class that represents an instance of weighted data point with label and features.
+ *
+ * @param label Label for this data point.
+ * @param weight The weight of this instance.
+ * @param features The vector of features for this data point.
+ */
+private[spark] case class Instance(label: Double, weight: Double, features: Vector)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala b/mllib/common/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala
rename to mllib/common/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonMatrixConverter.scala b/mllib/common/src/main/scala/org/apache/spark/ml/linalg/JsonMatrixConverter.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/linalg/JsonMatrixConverter.scala
rename to mllib/common/src/main/scala/org/apache/spark/ml/linalg/JsonMatrixConverter.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonVectorConverter.scala b/mllib/common/src/main/scala/org/apache/spark/ml/linalg/JsonVectorConverter.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/linalg/JsonVectorConverter.scala
rename to mllib/common/src/main/scala/org/apache/spark/ml/linalg/JsonVectorConverter.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala b/mllib/common/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala
similarity index 96%
rename from mllib/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala
rename to mllib/common/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala
index 27a67d561e357..197ec1a9936ad 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala
+++ b/mllib/common/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.linalg
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeArrayData}
-import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.{ArrayType, BooleanType, ByteType, DoubleType, IntegerType, StructField, StructType, UserDefinedType}
 
 /**
  * User-defined type for [[Matrix]] in mllib-local which allows easy interaction with SQL
diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/SQLDataTypes.scala b/mllib/common/src/main/scala/org/apache/spark/ml/linalg/SQLDataTypes.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/linalg/SQLDataTypes.scala
rename to mllib/common/src/main/scala/org/apache/spark/ml/linalg/SQLDataTypes.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala b/mllib/common/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala
similarity index 96%
rename from mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala
rename to mllib/common/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala
index 302a94c87811e..fbe22f3fc3a31 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala
+++ b/mllib/common/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.linalg
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeArrayData}
-import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.{ArrayType, ByteType, DoubleType, IntegerType, StructField, StructType, UserDefinedType}
 
 /**
  * User-defined type for [[Vector]] in mllib-local which allows easy interaction with SQL
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/common/src/main/scala/org/apache/spark/ml/param/params.scala
similarity index 99%
rename from mllib/src/main/scala/org/apache/spark/ml/param/params.scala
rename to mllib/common/src/main/scala/org/apache/spark/ml/param/params.scala
index 52840e04eae6f..b818be30583c0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/common/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -652,7 +652,7 @@ trait Params extends Identifiable with Serializable {
     methods.filter { m =>
         Modifier.isPublic(m.getModifiers) &&
           classOf[Param[_]].isAssignableFrom(m.getReturnType) &&
-          m.getParameterTypes.isEmpty
+          m.getParameterCount == 0
       }.sortBy(_.getName)
       .map(m => m.invoke(this).asInstanceOf[Param[_]])
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/HasParallelism.scala b/mllib/common/src/main/scala/org/apache/spark/ml/param/shared/HasParallelism.scala
similarity index 71%
rename from mllib/src/main/scala/org/apache/spark/ml/param/shared/HasParallelism.scala
rename to mllib/common/src/main/scala/org/apache/spark/ml/param/shared/HasParallelism.scala
index 021d0b3e34166..3a864fa394f34 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/HasParallelism.scala
+++ b/mllib/common/src/main/scala/org/apache/spark/ml/param/shared/HasParallelism.scala
@@ -17,16 +17,14 @@
 
 package org.apache.spark.ml.param.shared
 
-import scala.concurrent.ExecutionContext
-
 import org.apache.spark.ml.param.{IntParam, Params, ParamValidators}
-import org.apache.spark.util.ThreadUtils
 
 /**
  * Trait to define a level of parallelism for algorithms that are able to use
  * multithreaded execution, and provide a thread-pool based execution context.
  */
 private[ml] trait HasParallelism extends Params {
+  // TODO: Move HasParallelism into sharedParames
 
   /**
    * The number of threads to use when running parallel algorithms.
@@ -41,19 +39,4 @@ private[ml] trait HasParallelism extends Params {
 
   /** @group expertGetParam */
   def getParallelism: Int = $(parallelism)
-
-  /**
-   * Create a new execution context with a thread-pool that has a maximum number of threads
-   * set to the value of [[parallelism]]. If this param is set to 1, a same-thread executor
-   * will be used to run in serial.
-   */
-  private[ml] def getExecutionContext: ExecutionContext = {
-    getParallelism match {
-      case 1 =>
-        ThreadUtils.sameThread
-      case n =>
-        ExecutionContext.fromExecutorService(ThreadUtils
-          .newDaemonCachedThreadPool(s"${this.getClass.getSimpleName}-thread-pool", n))
-    }
-  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/common/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
rename to mllib/common/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/Identifiable.scala b/mllib/common/src/main/scala/org/apache/spark/ml/util/Identifiable.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/util/Identifiable.scala
rename to mllib/common/src/main/scala/org/apache/spark/ml/util/Identifiable.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala b/mllib/common/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala
rename to mllib/common/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeGroupSuite.scala b/mllib/common/src/test/scala/org/apache/spark/ml/attribute/AttributeGroupSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeGroupSuite.scala
rename to mllib/common/src/test/scala/org/apache/spark/ml/attribute/AttributeGroupSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala b/mllib/common/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala
rename to mllib/common/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/linalg/JsonMatrixConverterSuite.scala b/mllib/common/src/test/scala/org/apache/spark/ml/linalg/JsonMatrixConverterSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/linalg/JsonMatrixConverterSuite.scala
rename to mllib/common/src/test/scala/org/apache/spark/ml/linalg/JsonMatrixConverterSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/linalg/JsonVectorConverterSuite.scala b/mllib/common/src/test/scala/org/apache/spark/ml/linalg/JsonVectorConverterSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/linalg/JsonVectorConverterSuite.scala
rename to mllib/common/src/test/scala/org/apache/spark/ml/linalg/JsonVectorConverterSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/linalg/MatrixUDTSuite.scala b/mllib/common/src/test/scala/org/apache/spark/ml/linalg/MatrixUDTSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/linalg/MatrixUDTSuite.scala
rename to mllib/common/src/test/scala/org/apache/spark/ml/linalg/MatrixUDTSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/linalg/SQLDataTypesSuite.scala b/mllib/common/src/test/scala/org/apache/spark/ml/linalg/SQLDataTypesSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/linalg/SQLDataTypesSuite.scala
rename to mllib/common/src/test/scala/org/apache/spark/ml/linalg/SQLDataTypesSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala b/mllib/common/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala
similarity index 81%
rename from mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala
rename to mllib/common/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala
index 67c64f762b25e..582b38b327606 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala
+++ b/mllib/common/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala
@@ -18,10 +18,7 @@
 package org.apache.spark.ml.linalg
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.ml.feature.LabeledPoint
-import org.apache.spark.sql.catalyst.JavaTypeInference
 import org.apache.spark.sql.types._
-
 class VectorUDTSuite extends SparkFunSuite {
 
   test("preloaded VectorUDT") {
@@ -38,10 +35,4 @@ class VectorUDTSuite extends SparkFunSuite {
       assert(udt.simpleString == "vector")
     }
   }
-
-  test("JavaTypeInference with VectorUDT") {
-    val (dataType, _) = JavaTypeInference.inferDataType(classOf[LabeledPoint])
-    assert(dataType.asInstanceOf[StructType].fields.map(_.dataType)
-      === Seq(new VectorUDT, DoubleType))
-  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala b/mllib/common/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala
similarity index 81%
rename from mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala
rename to mllib/common/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala
index 36e06091d24de..cc94560969f47 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala
+++ b/mllib/common/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala
@@ -17,13 +17,8 @@
 
 package org.apache.spark.ml.param
 
-import java.io.{ByteArrayOutputStream, ObjectOutputStream}
-
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.ml.{Estimator, Transformer}
 import org.apache.spark.ml.linalg.{Vector, Vectors}
-import org.apache.spark.ml.util.MyParams
-import org.apache.spark.sql.Dataset
 
 class ParamsSuite extends SparkFunSuite {
 
@@ -379,77 +374,4 @@ class ParamsSuite extends SparkFunSuite {
     val t3 = t.copy(ParamMap(t.maxIter -> 20))
     assert(t3.isSet(t3.maxIter))
   }
-
-  test("Filtering ParamMap") {
-    val params1 = new MyParams("my_params1")
-    val params2 = new MyParams("my_params2")
-    val paramMap = ParamMap(
-      params1.intParam -> 1,
-      params2.intParam -> 1,
-      params1.doubleParam -> 0.2,
-      params2.doubleParam -> 0.2)
-    val filteredParamMap = paramMap.filter(params1)
-
-    assert(filteredParamMap.size === 2)
-    filteredParamMap.toSeq.foreach {
-      case ParamPair(p, _) =>
-        assert(p.parent === params1.uid)
-    }
-
-    // At the previous implementation of ParamMap#filter,
-    // mutable.Map#filterKeys was used internally but
-    // the return type of the method is not serializable (see SI-6654).
-    // Now mutable.Map#filter is used instead of filterKeys and the return type is serializable.
-    // So let's ensure serializability.
-    val objOut = new ObjectOutputStream(new ByteArrayOutputStream())
-    objOut.writeObject(filteredParamMap)
-  }
-}
-
-object ParamsSuite extends SparkFunSuite {
-
-  /**
-   * Checks common requirements for `Params.params`:
-   *   - params are ordered by names
-   *   - param parent has the same UID as the object's UID
-   *   - param name is the same as the param method name
-   *   - obj.copy should return the same type as the obj
-   */
-  def checkParams(obj: Params): Unit = {
-    val clazz = obj.getClass
-
-    val params = obj.params
-    val paramNames = params.map(_.name)
-    require(paramNames === paramNames.sorted, "params must be ordered by names")
-    params.foreach { p =>
-      assert(p.parent === obj.uid)
-      assert(obj.getParam(p.name) === p)
-      // TODO: Check that setters return self, which needs special handling for generic types.
-    }
-
-    val copyMethod = clazz.getMethod("copy", classOf[ParamMap])
-    val copyReturnType = copyMethod.getReturnType
-    require(copyReturnType === obj.getClass,
-      s"${clazz.getName}.copy should return ${clazz.getName} instead of ${copyReturnType.getName}.")
-  }
-
-  /**
-   * Checks that the class throws an exception in case multiple exclusive params are set.
-   * The params to be checked are passed as arguments with their value.
-   */
-  def testExclusiveParams(
-      model: Params,
-      dataset: Dataset[_],
-      paramsAndValues: (String, Any)*): Unit = {
-    val m = model.copy(ParamMap.empty)
-    paramsAndValues.foreach { case (paramName, paramValue) =>
-      m.set(m.getParam(paramName), paramValue)
-    }
-    intercept[IllegalArgumentException] {
-      m match {
-        case t: Transformer => t.transform(dataset)
-        case e: Estimator[_] => e.fit(dataset)
-      }
-    }
-  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala b/mllib/common/src/test/scala/org/apache/spark/ml/param/TestParams.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala
rename to mllib/common/src/test/scala/org/apache/spark/ml/param/TestParams.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/shared/SharedParamsSuite.scala b/mllib/common/src/test/scala/org/apache/spark/ml/param/shared/SharedParamsSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/param/shared/SharedParamsSuite.scala
rename to mllib/common/src/test/scala/org/apache/spark/ml/param/shared/SharedParamsSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/IdentifiableSuite.scala b/mllib/common/src/test/scala/org/apache/spark/ml/util/IdentifiableSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/util/IdentifiableSuite.scala
rename to mllib/common/src/test/scala/org/apache/spark/ml/util/IdentifiableSuite.scala
diff --git a/mllib/benchmarks/UDTSerializationBenchmark-jdk11-results.txt b/mllib/core/benchmarks/UDTSerializationBenchmark-jdk11-results.txt
similarity index 100%
rename from mllib/benchmarks/UDTSerializationBenchmark-jdk11-results.txt
rename to mllib/core/benchmarks/UDTSerializationBenchmark-jdk11-results.txt
diff --git a/mllib/benchmarks/UDTSerializationBenchmark-jdk17-results.txt b/mllib/core/benchmarks/UDTSerializationBenchmark-jdk17-results.txt
similarity index 100%
rename from mllib/benchmarks/UDTSerializationBenchmark-jdk17-results.txt
rename to mllib/core/benchmarks/UDTSerializationBenchmark-jdk17-results.txt
diff --git a/mllib/benchmarks/UDTSerializationBenchmark-results.txt b/mllib/core/benchmarks/UDTSerializationBenchmark-results.txt
similarity index 100%
rename from mllib/benchmarks/UDTSerializationBenchmark-results.txt
rename to mllib/core/benchmarks/UDTSerializationBenchmark-results.txt
diff --git a/mllib/pom.xml b/mllib/core/pom.xml
similarity index 93%
rename from mllib/pom.xml
rename to mllib/core/pom.xml
index 73af83c758688..a23fdc61e5ef5 100644
--- a/mllib/pom.xml
+++ b/mllib/core/pom.xml
@@ -22,7 +22,7 @@
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
     <version>3.5.0-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
+    <relativePath>../../pom.xml</relativePath>
   </parent>
 
   <artifactId>spark-mllib_2.12</artifactId>
@@ -84,6 +84,11 @@
       <artifactId>spark-mllib-local_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-mllib-common_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-mllib-local_${scala.binary.version}</artifactId>
@@ -91,6 +96,13 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-mllib-common_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
     <!-- #if scala-2.13 --><!--
     <dependency>
       <groupId>org.scala-lang.modules</groupId>
diff --git a/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.util.MLFormatRegister b/mllib/core/src/main/resources/META-INF/services/org.apache.spark.ml.util.MLFormatRegister
similarity index 100%
rename from mllib/src/main/resources/META-INF/services/org.apache.spark.ml.util.MLFormatRegister
rename to mllib/core/src/main/resources/META-INF/services/org.apache.spark.ml.util.MLFormatRegister
diff --git a/mllib/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/mllib/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
similarity index 100%
rename from mllib/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
rename to mllib/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
diff --git a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/README b/mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/README
similarity index 100%
rename from mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/README
rename to mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/README
diff --git a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/danish.txt b/mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/danish.txt
similarity index 100%
rename from mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/danish.txt
rename to mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/danish.txt
diff --git a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/dutch.txt b/mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/dutch.txt
similarity index 100%
rename from mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/dutch.txt
rename to mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/dutch.txt
diff --git a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/english.txt b/mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/english.txt
similarity index 100%
rename from mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/english.txt
rename to mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/english.txt
diff --git a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/finnish.txt b/mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/finnish.txt
similarity index 100%
rename from mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/finnish.txt
rename to mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/finnish.txt
diff --git a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/french.txt b/mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/french.txt
similarity index 100%
rename from mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/french.txt
rename to mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/french.txt
diff --git a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/german.txt b/mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/german.txt
similarity index 100%
rename from mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/german.txt
rename to mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/german.txt
diff --git a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/hungarian.txt b/mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/hungarian.txt
similarity index 100%
rename from mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/hungarian.txt
rename to mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/hungarian.txt
diff --git a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/italian.txt b/mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/italian.txt
similarity index 100%
rename from mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/italian.txt
rename to mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/italian.txt
diff --git a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/norwegian.txt b/mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/norwegian.txt
similarity index 100%
rename from mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/norwegian.txt
rename to mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/norwegian.txt
diff --git a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/portuguese.txt b/mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/portuguese.txt
similarity index 100%
rename from mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/portuguese.txt
rename to mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/portuguese.txt
diff --git a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/russian.txt b/mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/russian.txt
similarity index 100%
rename from mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/russian.txt
rename to mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/russian.txt
diff --git a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/spanish.txt b/mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/spanish.txt
similarity index 100%
rename from mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/spanish.txt
rename to mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/spanish.txt
diff --git a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/swedish.txt b/mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/swedish.txt
similarity index 100%
rename from mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/swedish.txt
rename to mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/swedish.txt
diff --git a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/turkish.txt b/mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/turkish.txt
similarity index 100%
rename from mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/turkish.txt
rename to mllib/core/src/main/resources/org/apache/spark/ml/feature/stopwords/turkish.txt
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala b/mllib/core/src/main/scala/org/apache/spark/ml/Estimator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/Estimator.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/Estimator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Model.scala b/mllib/core/src/main/scala/org/apache/spark/ml/Model.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/Model.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/Model.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/core/src/main/scala/org/apache/spark/ml/Pipeline.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/Pipeline.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala b/mllib/core/src/main/scala/org/apache/spark/ml/Predictor.scala
similarity index 85%
rename from mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/Predictor.scala
index 9c6eb880c80cb..0153bf7b51b39 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
+++ b/mllib/core/src/main/scala/org/apache/spark/ml/Predictor.scala
@@ -26,42 +26,6 @@ import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
 
-/**
- * (private[ml])  Trait for parameters for prediction (regression and classification).
- */
-private[ml] trait PredictorParams extends Params
-  with HasLabelCol with HasFeaturesCol with HasPredictionCol {
-
-  /**
-   * Validates and transforms the input schema with the provided param map.
-   *
-   * @param schema input schema
-   * @param fitting whether this is in fitting
-   * @param featuresDataType  SQL DataType for FeaturesType.
-   *                          E.g., `VectorUDT` for vector features.
-   * @return output schema
-   */
-  protected def validateAndTransformSchema(
-      schema: StructType,
-      fitting: Boolean,
-      featuresDataType: DataType): StructType = {
-    // TODO: Support casting Array[Double] and Array[Float] to Vector when FeaturesType = Vector
-    SchemaUtils.checkColumnType(schema, $(featuresCol), featuresDataType)
-    if (fitting) {
-      SchemaUtils.checkNumericType(schema, $(labelCol))
-
-      this match {
-        case p: HasWeightCol =>
-          if (isDefined(p.weightCol) && $(p.weightCol).nonEmpty) {
-            SchemaUtils.checkNumericType(schema, $(p.weightCol))
-          }
-        case _ =>
-      }
-    }
-    SchemaUtils.appendColumn(schema, $(predictionCol), DoubleType)
-  }
-}
-
 /**
  * Abstraction for prediction problems (regression and classification). It accepts all NumericType
  * labels and will automatically cast it to DoubleType in `fit()`. If this predictor supports
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala b/mllib/core/src/main/scala/org/apache/spark/ml/Transformer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/Transformer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala b/mllib/core/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/core/src/main/scala/org/apache/spark/ml/ann/Layer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/ann/Layer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala b/mllib/core/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/ClassificationSummary.scala b/mllib/core/src/main/scala/org/apache/spark/ml/classification/ClassificationSummary.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/classification/ClassificationSummary.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/classification/ClassificationSummary.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala b/mllib/core/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
similarity index 91%
rename from mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
index c46be175cb2e5..838149c28194a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
+++ b/mllib/core/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
@@ -18,29 +18,13 @@
 package org.apache.spark.ml.classification
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.{PredictionModel, Predictor, PredictorParams}
-import org.apache.spark.ml.linalg.{Vector, VectorUDT}
+import org.apache.spark.ml.{PredictionModel, Predictor}
+import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param.ParamMap
-import org.apache.spark.ml.param.shared.HasRawPredictionCol
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types.{DataType, StructType}
-
-/**
- * (private[spark]) Params for classification.
- */
-private[spark] trait ClassifierParams
-  extends PredictorParams with HasRawPredictionCol {
-
-  override protected def validateAndTransformSchema(
-      schema: StructType,
-      fitting: Boolean,
-      featuresDataType: DataType): StructType = {
-    val parentSchema = super.validateAndTransformSchema(schema, fitting, featuresDataType)
-    SchemaUtils.appendColumn(parentSchema, $(rawPredictionCol), new VectorUDT)
-  }
-}
+import org.apache.spark.sql.types.StructType
 
 /**
  * Single-label binary or multiclass classification.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/core/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala b/mllib/core/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/core/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/core/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/core/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/core/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/core/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/core/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
similarity index 99%
rename from mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index cf94c9fd36a40..5bd27b3e8092b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/core/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -34,7 +34,7 @@ import org.apache.spark.ml._
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param.{Param, ParamMap, ParamPair, Params}
-import org.apache.spark.ml.param.shared.{HasParallelism, HasWeightCol}
+import org.apache.spark.ml.param.shared.{HasExecutionContext, HasWeightCol}
 import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
@@ -320,7 +320,8 @@ object OneVsRestModel extends MLReadable[OneVsRestModel] {
 @Since("1.4.0")
 final class OneVsRest @Since("1.4.0") (
     @Since("1.4.0") override val uid: String)
-  extends Estimator[OneVsRestModel] with OneVsRestParams with HasParallelism with MLWritable {
+  extends Estimator[OneVsRestModel] with OneVsRestParams
+    with HasExecutionContext with MLWritable {
 
   @Since("1.4.0")
   def this() = this(Identifiable.randomUID("oneVsRest"))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala b/mllib/core/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
similarity index 93%
rename from mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
index 1caaeccd7b0d8..dbbad54fc3a10 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
+++ b/mllib/core/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
@@ -18,28 +18,12 @@
 package org.apache.spark.ml.classification
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.linalg.{DenseVector, Vector, VectorUDT}
+import org.apache.spark.ml.linalg.{DenseVector, Vector}
 import org.apache.spark.ml.param.ParamMap
-import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util.SchemaUtils
 import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types.{DataType, StructType}
-
-/**
- * (private[classification])  Params for probabilistic classification.
- */
-private[ml] trait ProbabilisticClassifierParams
-  extends ClassifierParams with HasProbabilityCol with HasThresholds {
-  override protected def validateAndTransformSchema(
-      schema: StructType,
-      fitting: Boolean,
-      featuresDataType: DataType): StructType = {
-    val parentSchema = super.validateAndTransformSchema(schema, fitting, featuresDataType)
-    SchemaUtils.appendColumn(parentSchema, $(probabilityCol), new VectorUDT)
-  }
-}
-
+import org.apache.spark.sql.types.StructType
 
 /**
  * Single-label binary or multiclass classifier which can output class conditional probabilities.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/core/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/core/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala b/mllib/core/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/core/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/core/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/core/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala b/mllib/core/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala b/mllib/core/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala b/mllib/core/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringMetrics.scala b/mllib/core/src/main/scala/org/apache/spark/ml/evaluation/ClusteringMetrics.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringMetrics.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/evaluation/ClusteringMetrics.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala b/mllib/core/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala b/mllib/core/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala b/mllib/core/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala b/mllib/core/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala b/mllib/core/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/events.scala b/mllib/core/src/main/scala/org/apache/spark/ml/events.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/events.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/events.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/DCT.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/DCT.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/IDF.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/IDF.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/Instance.scala
similarity index 95%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/Instance.scala
index c237366ec5c3d..e1178ed24b1aa 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala
+++ b/mllib/core/src/main/scala/org/apache/spark/ml/feature/Instance.scala
@@ -22,16 +22,6 @@ import scala.collection.mutable
 import org.apache.spark.ml.linalg._
 import org.apache.spark.rdd.RDD
 
-/**
- * Class that represents an instance of weighted data point with label and features.
- *
- * @param label Label for this data point.
- * @param weight The weight of this instance.
- * @param features The vector of features for this data point.
- */
-private[spark] case class Instance(label: Double, weight: Double, features: Vector)
-
-
 /**
  * Class that represents an block of instance.
  * If all weights are 1, then an empty array is stored.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/LSH.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/LSH.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/NGram.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/NGram.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/PCA.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/PCA.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormulaParser.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/RFormulaParser.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/RFormulaParser.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/RFormulaParser.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/Selector.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/Selector.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java b/mllib/core/src/main/scala/org/apache/spark/ml/feature/package-info.java
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/package-info.java
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala b/mllib/core/src/main/scala/org/apache/spark/ml/feature/package.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/package.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/feature/package.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/core/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala b/mllib/core/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/functions.scala b/mllib/core/src/main/scala/org/apache/spark/ml/functions.scala
similarity index 94%
rename from mllib/src/main/scala/org/apache/spark/ml/functions.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/functions.scala
index 2bd7233f3acc3..040eb0bbe955e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/functions.scala
+++ b/mllib/core/src/main/scala/org/apache/spark/ml/functions.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.functions.udf
 @Since("3.0.0")
 object functions {
 // scalastyle:on
-  private val vectorToArrayUdf = udf { vec: Any =>
+  private[spark] val vectorToArrayUdf = udf { vec: Any =>
     vec match {
       case v: Vector => v.toArray
       case v: OldVector => v.toArray
@@ -38,7 +38,7 @@ object functions {
     }
   }.asNonNullable()
 
-  private val vectorToArrayFloatUdf = udf { vec: Any =>
+  private[spark] val vectorToArrayFloatUdf = udf { vec: Any =>
     vec match {
       case v: SparseVector =>
         val data = new Array[Float](v.size)
@@ -72,7 +72,7 @@ object functions {
     }
   }
 
-  private val arrayToVectorUdf = udf { array: Seq[Double] =>
+  private[spark] val arrayToVectorUdf = udf { array: Seq[Double] =>
     Vectors.dense(array.toArray)
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/image/HadoopUtils.scala b/mllib/core/src/main/scala/org/apache/spark/ml/image/HadoopUtils.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/image/HadoopUtils.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/image/HadoopUtils.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala b/mllib/core/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala b/mllib/core/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/NormalEquationSolver.scala b/mllib/core/src/main/scala/org/apache/spark/ml/optim/NormalEquationSolver.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/optim/NormalEquationSolver.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/optim/NormalEquationSolver.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala b/mllib/core/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTBlockAggregator.scala b/mllib/core/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTBlockAggregator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTBlockAggregator.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTBlockAggregator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/BinaryLogisticBlockAggregator.scala b/mllib/core/src/main/scala/org/apache/spark/ml/optim/aggregator/BinaryLogisticBlockAggregator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/BinaryLogisticBlockAggregator.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/optim/aggregator/BinaryLogisticBlockAggregator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/DifferentiableLossAggregator.scala b/mllib/core/src/main/scala/org/apache/spark/ml/optim/aggregator/DifferentiableLossAggregator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/DifferentiableLossAggregator.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/optim/aggregator/DifferentiableLossAggregator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeBlockAggregator.scala b/mllib/core/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeBlockAggregator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeBlockAggregator.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeBlockAggregator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberBlockAggregator.scala b/mllib/core/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberBlockAggregator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberBlockAggregator.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberBlockAggregator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresBlockAggregator.scala b/mllib/core/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresBlockAggregator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresBlockAggregator.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresBlockAggregator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/MultinomialLogisticBlockAggregator.scala b/mllib/core/src/main/scala/org/apache/spark/ml/optim/aggregator/MultinomialLogisticBlockAggregator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/MultinomialLogisticBlockAggregator.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/optim/aggregator/MultinomialLogisticBlockAggregator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/loss/DifferentiableRegularization.scala b/mllib/core/src/main/scala/org/apache/spark/ml/optim/loss/DifferentiableRegularization.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/optim/loss/DifferentiableRegularization.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/optim/loss/DifferentiableRegularization.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/loss/RDDLossFunction.scala b/mllib/core/src/main/scala/org/apache/spark/ml/optim/loss/RDDLossFunction.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/optim/loss/RDDLossFunction.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/optim/loss/RDDLossFunction.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/package-info.java b/mllib/core/src/main/scala/org/apache/spark/ml/package-info.java
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/package-info.java
rename to mllib/core/src/main/scala/org/apache/spark/ml/package-info.java
diff --git a/mllib/src/main/scala/org/apache/spark/ml/package.scala b/mllib/core/src/main/scala/org/apache/spark/ml/package.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/package.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/package.scala
diff --git a/mllib/core/src/main/scala/org/apache/spark/ml/param/shared/HasExecutionContext.scala b/mllib/core/src/main/scala/org/apache/spark/ml/param/shared/HasExecutionContext.scala
new file mode 100644
index 0000000000000..fe61416202b5b
--- /dev/null
+++ b/mllib/core/src/main/scala/org/apache/spark/ml/param/shared/HasExecutionContext.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.param.shared
+
+import scala.concurrent.ExecutionContext
+
+import org.apache.spark.util.ThreadUtils
+
+private[ml] trait HasExecutionContext extends HasParallelism {
+
+  /**
+   * Create a new execution context with a thread-pool that has a maximum number of threads
+   * set to the value of [[parallelism]]. If this param is set to 1, a same-thread executor
+   * will be used to run in serial.
+   */
+  private[ml] def getExecutionContext: ExecutionContext = {
+    getParallelism match {
+      case 1 =>
+        ThreadUtils.sameThread
+      case n =>
+        ExecutionContext.fromExecutorService(ThreadUtils
+          .newDaemonCachedThreadPool(s"${this.getClass.getSimpleName}-thread-pool", n))
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/core/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/python/MLSerDe.scala b/mllib/core/src/main/scala/org/apache/spark/ml/python/MLSerDe.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/python/MLSerDe.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/python/MLSerDe.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/AFTSurvivalRegressionWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/AFTSurvivalRegressionWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/AFTSurvivalRegressionWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/AFTSurvivalRegressionWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/ALSWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/ALSWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/ALSWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/ALSWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/BisectingKMeansWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/BisectingKMeansWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/BisectingKMeansWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/BisectingKMeansWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/DecisionTreeClassifierWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/DecisionTreeClassifierWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/DecisionTreeClassifierWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/DecisionTreeClassifierWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/DecisionTreeRegressorWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/DecisionTreeRegressorWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/DecisionTreeRegressorWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/DecisionTreeRegressorWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/FMClassifierWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/FMClassifierWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/FMClassifierWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/FMClassifierWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/FMRegressorWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/FMRegressorWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/FMRegressorWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/FMRegressorWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/FPGrowthWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/FPGrowthWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/FPGrowthWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/FPGrowthWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/GBTClassifierWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/GBTClassifierWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/GBTClassifierWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/GBTClassifierWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/GBTRegressorWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/GBTRegressorWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/GBTRegressorWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/GBTRegressorWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/GaussianMixtureWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/GaussianMixtureWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/GaussianMixtureWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/GaussianMixtureWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/IsotonicRegressionWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/IsotonicRegressionWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/IsotonicRegressionWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/IsotonicRegressionWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/KSTestWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/KSTestWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/KSTestWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/KSTestWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LinearRegressionWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/LinearRegressionWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/LinearRegressionWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/LinearRegressionWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LinearSVCWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/LinearSVCWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/LinearSVCWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/LinearSVCWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/PowerIterationClusteringWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/PowerIterationClusteringWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/PowerIterationClusteringWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/PowerIterationClusteringWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/PrefixSpanWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/PrefixSpanWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/PrefixSpanWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/PrefixSpanWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/RWrapperUtils.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/RWrapperUtils.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/RWrapperUtils.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/RWrapperUtils.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/RWrappers.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/RWrappers.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/RWrappers.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/RWrappers.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassifierWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/RandomForestClassifierWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassifierWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/RandomForestClassifierWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestRegressorWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/ml/r/RandomForestRegressorWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/r/RandomForestRegressorWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/r/RandomForestRegressorWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/core/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/core/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/core/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala b/mllib/core/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/core/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/core/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/core/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/core/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/core/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala b/mllib/core/src/main/scala/org/apache/spark/ml/regression/Regressor.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/regression/Regressor.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageDataSource.scala b/mllib/core/src/main/scala/org/apache/spark/ml/source/image/ImageDataSource.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/source/image/ImageDataSource.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/source/image/ImageDataSource.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageFileFormat.scala b/mllib/core/src/main/scala/org/apache/spark/ml/source/image/ImageFileFormat.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/source/image/ImageFileFormat.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/source/image/ImageFileFormat.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageOptions.scala b/mllib/core/src/main/scala/org/apache/spark/ml/source/image/ImageOptions.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/source/image/ImageOptions.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/source/image/ImageOptions.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala b/mllib/core/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMOptions.scala b/mllib/core/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMOptions.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMOptions.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMOptions.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala b/mllib/core/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/ANOVATest.scala b/mllib/core/src/main/scala/org/apache/spark/ml/stat/ANOVATest.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/stat/ANOVATest.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/stat/ANOVATest.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/ChiSquareTest.scala b/mllib/core/src/main/scala/org/apache/spark/ml/stat/ChiSquareTest.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/stat/ChiSquareTest.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/stat/ChiSquareTest.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala b/mllib/core/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/FValueTest.scala b/mllib/core/src/main/scala/org/apache/spark/ml/stat/FValueTest.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/stat/FValueTest.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/stat/FValueTest.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTest.scala b/mllib/core/src/main/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTest.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTest.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTest.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/MultiClassSummarizer.scala b/mllib/core/src/main/scala/org/apache/spark/ml/stat/MultiClassSummarizer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/stat/MultiClassSummarizer.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/stat/MultiClassSummarizer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala b/mllib/core/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala b/mllib/core/src/main/scala/org/apache/spark/ml/tree/Node.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/tree/Node.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala b/mllib/core/src/main/scala/org/apache/spark/ml/tree/Split.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/tree/Split.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/BaggedPoint.scala b/mllib/core/src/main/scala/org/apache/spark/ml/tree/impl/BaggedPoint.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/tree/impl/BaggedPoint.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/tree/impl/BaggedPoint.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala b/mllib/core/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/tree/impl/DTStatsAggregator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala b/mllib/core/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala b/mllib/core/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala b/mllib/core/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/TimeTracker.scala b/mllib/core/src/main/scala/org/apache/spark/ml/tree/impl/TimeTracker.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/tree/impl/TimeTracker.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/tree/impl/TimeTracker.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/TreePoint.scala b/mllib/core/src/main/scala/org/apache/spark/ml/tree/impl/TreePoint.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/tree/impl/TreePoint.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/tree/impl/TreePoint.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala b/mllib/core/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/core/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/core/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
similarity index 99%
rename from mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index e04a8c1389b0e..1cc7faef19add 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/core/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -31,7 +31,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.evaluation.Evaluator
 import org.apache.spark.ml.param.{IntParam, Param, ParamMap, ParamValidators}
-import org.apache.spark.ml.param.shared.{HasCollectSubModels, HasParallelism}
+import org.apache.spark.ml.param.shared.{HasCollectSubModels, HasExecutionContext}
 import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.util.MLUtils
@@ -79,7 +79,7 @@ private[ml] trait CrossValidatorParams extends ValidatorParams {
 @Since("1.2.0")
 class CrossValidator @Since("1.2.0") (@Since("1.4.0") override val uid: String)
   extends Estimator[CrossValidatorModel]
-  with CrossValidatorParams with HasParallelism with HasCollectSubModels
+  with CrossValidatorParams with HasExecutionContext with HasCollectSubModels
   with MLWritable with Logging {
 
   @Since("1.2.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala b/mllib/core/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala b/mllib/core/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
similarity index 99%
rename from mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
index 4a6d5164aa0a3..5275ae412311d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
+++ b/mllib/core/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
@@ -32,7 +32,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.evaluation.Evaluator
 import org.apache.spark.ml.param.{DoubleParam, ParamMap, ParamValidators}
-import org.apache.spark.ml.param.shared.{HasCollectSubModels, HasParallelism}
+import org.apache.spark.ml.param.shared.{HasCollectSubModels, HasExecutionContext}
 import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.sql.{DataFrame, Dataset}
@@ -67,7 +67,7 @@ private[ml] trait TrainValidationSplitParams extends ValidatorParams {
 @Since("1.5.0")
 class TrainValidationSplit @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   extends Estimator[TrainValidationSplitModel]
-  with TrainValidationSplitParams with HasParallelism with HasCollectSubModels
+  with TrainValidationSplitParams with HasExecutionContext with HasCollectSubModels
   with MLWritable with Logging {
 
   @Since("1.5.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ValidatorParams.scala b/mllib/core/src/main/scala/org/apache/spark/ml/tuning/ValidatorParams.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/tuning/ValidatorParams.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/tuning/ValidatorParams.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/DatasetUtils.scala b/mllib/core/src/main/scala/org/apache/spark/ml/util/DatasetUtils.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/util/DatasetUtils.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/util/DatasetUtils.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/HasTrainingSummary.scala b/mllib/core/src/main/scala/org/apache/spark/ml/util/HasTrainingSummary.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/util/HasTrainingSummary.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/util/HasTrainingSummary.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala b/mllib/core/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala b/mllib/core/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/core/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/stopwatches.scala b/mllib/core/src/main/scala/org/apache/spark/ml/util/stopwatches.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/ml/util/stopwatches.scala
rename to mllib/core/src/main/scala/org/apache/spark/ml/util/stopwatches.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/JavaPackage.java b/mllib/core/src/main/scala/org/apache/spark/mllib/JavaPackage.java
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/JavaPackage.java
rename to mllib/core/src/main/scala/org/apache/spark/mllib/JavaPackage.java
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/FPGrowthModelWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/api/python/FPGrowthModelWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/api/python/FPGrowthModelWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/api/python/FPGrowthModelWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/LDAModelWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/api/python/LDAModelWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/api/python/LDAModelWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/api/python/LDAModelWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/api/python/MatrixFactorizationModelWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PowerIterationClusteringModelWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/api/python/PowerIterationClusteringModelWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/api/python/PowerIterationClusteringModelWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/api/python/PowerIterationClusteringModelWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PrefixSpanModelWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/api/python/PrefixSpanModelWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/api/python/PrefixSpanModelWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/api/python/PrefixSpanModelWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/Word2VecModelWrapper.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/api/python/Word2VecModelWrapper.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/api/python/Word2VecModelWrapper.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/api/python/Word2VecModelWrapper.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/package.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/api/python/package.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/api/python/package.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/api/python/package.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionWithSGD.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionWithSGD.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionWithSGD.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionWithSGD.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAUtils.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/clustering/LDAUtils.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAUtils.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/clustering/LDAUtils.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryConfusionMatrix.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryConfusionMatrix.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryConfusionMatrix.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryConfusionMatrix.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryLabelCounter.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryLabelCounter.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryLabelCounter.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryLabelCounter.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/feature/PCA.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/feature/PCA.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/feature/VectorTransformer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/LocalPrefixSpan.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/fpm/LocalPrefixSpan.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/fpm/LocalPrefixSpan.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/fpm/LocalPrefixSpan.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/ARPACK.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/linalg/ARPACK.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/linalg/ARPACK.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/linalg/ARPACK.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/LAPACK.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/linalg/LAPACK.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/linalg/LAPACK.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/linalg/LAPACK.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/linalg/distributed/DistributedMatrix.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/package-info.java b/mllib/core/src/main/scala/org/apache/spark/mllib/package-info.java
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/package-info.java
rename to mllib/core/src/main/scala/org/apache/spark/mllib/package-info.java
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/package.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/package.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/package.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/package.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactory.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactory.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactory.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactory.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RandomRDD.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/rdd/RandomRDD.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/rdd/RandomRDD.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/rdd/RandomRDD.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/Correlation.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/stat/correlation/Correlation.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/Correlation.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/stat/correlation/Correlation.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/PearsonCorrelation.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/stat/correlation/PearsonCorrelation.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/PearsonCorrelation.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/stat/correlation/PearsonCorrelation.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/SpearmanCorrelation.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/stat/correlation/SpearmanCorrelation.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/stat/correlation/SpearmanCorrelation.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/stat/correlation/SpearmanCorrelation.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/EnsembleCombiningStrategy.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/configuration/EnsembleCombiningStrategy.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/EnsembleCombiningStrategy.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/configuration/EnsembleCombiningStrategy.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurities.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurities.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurities.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurities.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/loss/AbsoluteError.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/loss/LogLoss.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/loss/Losses.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/loss/SquaredError.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/tree/package.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/tree/package.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/tree/package.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala b/mllib/core/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
rename to mllib/core/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
diff --git a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java b/mllib/core/src/test/java/org/apache/spark/SharedSparkSession.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/SharedSparkSession.java
rename to mllib/core/src/test/java/org/apache/spark/SharedSparkSession.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/JavaPipelineSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/attribute/JavaAttributeGroupSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/attribute/JavaAttributeGroupSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/attribute/JavaAttributeGroupSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/attribute/JavaAttributeGroupSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/attribute/JavaAttributeSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/attribute/JavaAttributeSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/attribute/JavaAttributeSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/attribute/JavaAttributeSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaDecisionTreeClassifierSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/classification/JavaDecisionTreeClassifierSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/classification/JavaDecisionTreeClassifierSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/classification/JavaDecisionTreeClassifierSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaGBTClassifierSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/classification/JavaGBTClassifierSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/classification/JavaGBTClassifierSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/classification/JavaGBTClassifierSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaMultilayerPerceptronClassifierSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/classification/JavaMultilayerPerceptronClassifierSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/classification/JavaMultilayerPerceptronClassifierSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/classification/JavaMultilayerPerceptronClassifierSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/clustering/JavaKMeansSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java b/mllib/core/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStringIndexerSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/feature/JavaStringIndexerSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/feature/JavaStringIndexerSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/feature/JavaStringIndexerSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaTokenizerSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/feature/JavaTokenizerSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/feature/JavaTokenizerSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/feature/JavaTokenizerSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorAssemblerSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/feature/JavaVectorAssemblerSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorAssemblerSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/feature/JavaVectorAssemblerSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/linalg/JavaSQLDataTypesSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/linalg/JavaSQLDataTypesSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/linalg/JavaSQLDataTypesSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/linalg/JavaSQLDataTypesSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/param/JavaParamsSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/param/JavaParamsSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/param/JavaParamsSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/param/JavaParamsSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java b/mllib/core/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
rename to mllib/core/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaDecisionTreeRegressorSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/regression/JavaDecisionTreeRegressorSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/regression/JavaDecisionTreeRegressorSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/regression/JavaDecisionTreeRegressorSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaGBTRegressorSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/regression/JavaGBTRegressorSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/regression/JavaGBTRegressorSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/regression/JavaGBTRegressorSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaRandomForestRegressorSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/regression/JavaRandomForestRegressorSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/regression/JavaRandomForestRegressorSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/regression/JavaRandomForestRegressorSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/stat/JavaKolmogorovSmirnovTestSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/stat/JavaKolmogorovSmirnovTestSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/stat/JavaKolmogorovSmirnovTestSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/stat/JavaKolmogorovSmirnovTestSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/stat/JavaSummarizerSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/stat/JavaSummarizerSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/stat/JavaSummarizerSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/stat/JavaSummarizerSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java b/mllib/core/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java
rename to mllib/core/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/classification/JavaSVMSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaStreamingLogisticRegressionSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/classification/JavaStreamingLogisticRegressionSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/classification/JavaStreamingLogisticRegressionSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/classification/JavaStreamingLogisticRegressionSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaBisectingKMeansSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/clustering/JavaBisectingKMeansSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/clustering/JavaBisectingKMeansSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/clustering/JavaBisectingKMeansSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaGaussianMixtureSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/clustering/JavaGaussianMixtureSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/clustering/JavaGaussianMixtureSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/clustering/JavaGaussianMixtureSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaStreamingKMeansSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/clustering/JavaStreamingKMeansSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/clustering/JavaStreamingKMeansSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/clustering/JavaStreamingKMeansSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/feature/JavaTfIdfSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/feature/JavaWord2VecSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/fpm/JavaFPGrowthSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaPrefixSpanSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/fpm/JavaPrefixSpanSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/fpm/JavaPrefixSpanSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/fpm/JavaPrefixSpanSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaMatricesSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/linalg/JavaMatricesSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/linalg/JavaMatricesSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/linalg/JavaMatricesSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/linalg/JavaVectorsSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/linalg/JavaVectorsSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/linalg/JavaVectorsSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/linalg/JavaVectorsSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/random/JavaRandomRDDsSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/recommendation/JavaALSSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/regression/JavaIsotonicRegressionSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaStreamingLinearRegressionSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/regression/JavaStreamingLinearRegressionSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/regression/JavaStreamingLinearRegressionSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/regression/JavaStreamingLinearRegressionSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/stat/JavaStatisticsSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/stat/JavaStatisticsSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/stat/JavaStatisticsSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/stat/JavaStatisticsSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java
diff --git a/mllib/src/test/java/org/apache/spark/mllib/util/JavaMLUtilsSuite.java b/mllib/core/src/test/java/org/apache/spark/mllib/util/JavaMLUtilsSuite.java
similarity index 100%
rename from mllib/src/test/java/org/apache/spark/mllib/util/JavaMLUtilsSuite.java
rename to mllib/core/src/test/java/org/apache/spark/mllib/util/JavaMLUtilsSuite.java
diff --git a/mllib/src/test/resources/META-INF/services/org.apache.spark.ml.util.MLFormatRegister b/mllib/core/src/test/resources/META-INF/services/org.apache.spark.ml.util.MLFormatRegister
similarity index 100%
rename from mllib/src/test/resources/META-INF/services/org.apache.spark.ml.util.MLFormatRegister
rename to mllib/core/src/test/resources/META-INF/services/org.apache.spark.ml.util.MLFormatRegister
diff --git a/mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-01/29.5.a_b_EGDP022204.jpg b/mllib/core/src/test/resources/images/partitioned/cls=kittens/date=2018-01/29.5.a_b_EGDP022204.jpg
similarity index 100%
rename from mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-01/29.5.a_b_EGDP022204.jpg
rename to mllib/core/src/test/resources/images/partitioned/cls=kittens/date=2018-01/29.5.a_b_EGDP022204.jpg
diff --git a/mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-01/not-image.txt b/mllib/core/src/test/resources/images/partitioned/cls=kittens/date=2018-01/not-image.txt
similarity index 100%
rename from mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-01/not-image.txt
rename to mllib/core/src/test/resources/images/partitioned/cls=kittens/date=2018-01/not-image.txt
diff --git a/mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/54893.jpg b/mllib/core/src/test/resources/images/partitioned/cls=kittens/date=2018-02/54893.jpg
similarity index 100%
rename from mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/54893.jpg
rename to mllib/core/src/test/resources/images/partitioned/cls=kittens/date=2018-02/54893.jpg
diff --git a/mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/DP153539.jpg b/mllib/core/src/test/resources/images/partitioned/cls=kittens/date=2018-02/DP153539.jpg
similarity index 100%
rename from mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/DP153539.jpg
rename to mllib/core/src/test/resources/images/partitioned/cls=kittens/date=2018-02/DP153539.jpg
diff --git a/mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/DP802813.jpg b/mllib/core/src/test/resources/images/partitioned/cls=kittens/date=2018-02/DP802813.jpg
similarity index 100%
rename from mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/DP802813.jpg
rename to mllib/core/src/test/resources/images/partitioned/cls=kittens/date=2018-02/DP802813.jpg
diff --git a/mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-01/BGRA.png b/mllib/core/src/test/resources/images/partitioned/cls=multichannel/date=2018-01/BGRA.png
similarity index 100%
rename from mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-01/BGRA.png
rename to mllib/core/src/test/resources/images/partitioned/cls=multichannel/date=2018-01/BGRA.png
diff --git a/mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-01/BGRA_alpha_60.png b/mllib/core/src/test/resources/images/partitioned/cls=multichannel/date=2018-01/BGRA_alpha_60.png
similarity index 100%
rename from mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-01/BGRA_alpha_60.png
rename to mllib/core/src/test/resources/images/partitioned/cls=multichannel/date=2018-01/BGRA_alpha_60.png
diff --git a/mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-02/chr30.4.184.jpg b/mllib/core/src/test/resources/images/partitioned/cls=multichannel/date=2018-02/chr30.4.184.jpg
similarity index 100%
rename from mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-02/chr30.4.184.jpg
rename to mllib/core/src/test/resources/images/partitioned/cls=multichannel/date=2018-02/chr30.4.184.jpg
diff --git a/mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-02/grayscale.jpg b/mllib/core/src/test/resources/images/partitioned/cls=multichannel/date=2018-02/grayscale.jpg
similarity index 100%
rename from mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-02/grayscale.jpg
rename to mllib/core/src/test/resources/images/partitioned/cls=multichannel/date=2018-02/grayscale.jpg
diff --git a/mllib/src/test/resources/iris_libsvm.txt b/mllib/core/src/test/resources/iris_libsvm.txt
similarity index 100%
rename from mllib/src/test/resources/iris_libsvm.txt
rename to mllib/core/src/test/resources/iris_libsvm.txt
diff --git a/mllib/src/test/resources/log4j2.properties b/mllib/core/src/test/resources/log4j2.properties
similarity index 100%
rename from mllib/src/test/resources/log4j2.properties
rename to mllib/core/src/test/resources/log4j2.properties
diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/data/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/dtc-2.4.7/data/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/dtc-2.4.7/data/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/dtc-2.4.7/data/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/data/.part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet.crc b/mllib/core/src/test/resources/ml-models/dtc-2.4.7/data/.part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/dtc-2.4.7/data/.part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet.crc
rename to mllib/core/src/test/resources/ml-models/dtc-2.4.7/data/.part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet.crc
diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/data/_SUCCESS b/mllib/core/src/test/resources/ml-models/dtc-2.4.7/data/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/dtc-2.4.7/data/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/dtc-2.4.7/data/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/data/part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet b/mllib/core/src/test/resources/ml-models/dtc-2.4.7/data/part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet
similarity index 100%
rename from mllib/src/test/resources/ml-models/dtc-2.4.7/data/part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet
rename to mllib/core/src/test/resources/ml-models/dtc-2.4.7/data/part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet
diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/dtc-2.4.7/metadata/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/dtc-2.4.7/metadata/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/.part-00000.crc b/mllib/core/src/test/resources/ml-models/dtc-2.4.7/metadata/.part-00000.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/.part-00000.crc
rename to mllib/core/src/test/resources/ml-models/dtc-2.4.7/metadata/.part-00000.crc
diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/_SUCCESS b/mllib/core/src/test/resources/ml-models/dtc-2.4.7/metadata/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/dtc-2.4.7/metadata/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/part-00000 b/mllib/core/src/test/resources/ml-models/dtc-2.4.7/metadata/part-00000
similarity index 100%
rename from mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/part-00000
rename to mllib/core/src/test/resources/ml-models/dtc-2.4.7/metadata/part-00000
diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/data/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/dtr-2.4.7/data/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/dtr-2.4.7/data/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/dtr-2.4.7/data/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/data/.part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet.crc b/mllib/core/src/test/resources/ml-models/dtr-2.4.7/data/.part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/dtr-2.4.7/data/.part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet.crc
rename to mllib/core/src/test/resources/ml-models/dtr-2.4.7/data/.part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet.crc
diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/data/_SUCCESS b/mllib/core/src/test/resources/ml-models/dtr-2.4.7/data/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/dtr-2.4.7/data/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/dtr-2.4.7/data/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/data/part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet b/mllib/core/src/test/resources/ml-models/dtr-2.4.7/data/part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet
similarity index 100%
rename from mllib/src/test/resources/ml-models/dtr-2.4.7/data/part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet
rename to mllib/core/src/test/resources/ml-models/dtr-2.4.7/data/part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet
diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/dtr-2.4.7/metadata/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/dtr-2.4.7/metadata/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/.part-00000.crc b/mllib/core/src/test/resources/ml-models/dtr-2.4.7/metadata/.part-00000.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/.part-00000.crc
rename to mllib/core/src/test/resources/ml-models/dtr-2.4.7/metadata/.part-00000.crc
diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/_SUCCESS b/mllib/core/src/test/resources/ml-models/dtr-2.4.7/metadata/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/dtr-2.4.7/metadata/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/part-00000 b/mllib/core/src/test/resources/ml-models/dtr-2.4.7/metadata/part-00000
similarity index 100%
rename from mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/part-00000
rename to mllib/core/src/test/resources/ml-models/dtr-2.4.7/metadata/part-00000
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/gbtc-2.4.7/data/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtc-2.4.7/data/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/gbtc-2.4.7/data/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/.part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet.crc b/mllib/core/src/test/resources/ml-models/gbtc-2.4.7/data/.part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtc-2.4.7/data/.part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet.crc
rename to mllib/core/src/test/resources/ml-models/gbtc-2.4.7/data/.part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet.crc
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/_SUCCESS b/mllib/core/src/test/resources/ml-models/gbtc-2.4.7/data/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtc-2.4.7/data/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/gbtc-2.4.7/data/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet b/mllib/core/src/test/resources/ml-models/gbtc-2.4.7/data/part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtc-2.4.7/data/part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet
rename to mllib/core/src/test/resources/ml-models/gbtc-2.4.7/data/part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/gbtc-2.4.7/metadata/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/gbtc-2.4.7/metadata/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/.part-00000.crc b/mllib/core/src/test/resources/ml-models/gbtc-2.4.7/metadata/.part-00000.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/.part-00000.crc
rename to mllib/core/src/test/resources/ml-models/gbtc-2.4.7/metadata/.part-00000.crc
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/_SUCCESS b/mllib/core/src/test/resources/ml-models/gbtc-2.4.7/metadata/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/gbtc-2.4.7/metadata/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/part-00000 b/mllib/core/src/test/resources/ml-models/gbtc-2.4.7/metadata/part-00000
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/part-00000
rename to mllib/core/src/test/resources/ml-models/gbtc-2.4.7/metadata/part-00000
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/.part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet.crc b/mllib/core/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/.part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/.part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet.crc
rename to mllib/core/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/.part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet.crc
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/_SUCCESS b/mllib/core/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet b/mllib/core/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet
rename to mllib/core/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/gbtr-2.4.7/data/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtr-2.4.7/data/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/gbtr-2.4.7/data/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/.part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet.crc b/mllib/core/src/test/resources/ml-models/gbtr-2.4.7/data/.part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtr-2.4.7/data/.part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet.crc
rename to mllib/core/src/test/resources/ml-models/gbtr-2.4.7/data/.part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet.crc
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/_SUCCESS b/mllib/core/src/test/resources/ml-models/gbtr-2.4.7/data/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtr-2.4.7/data/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/gbtr-2.4.7/data/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet b/mllib/core/src/test/resources/ml-models/gbtr-2.4.7/data/part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtr-2.4.7/data/part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet
rename to mllib/core/src/test/resources/ml-models/gbtr-2.4.7/data/part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/gbtr-2.4.7/metadata/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/gbtr-2.4.7/metadata/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/.part-00000.crc b/mllib/core/src/test/resources/ml-models/gbtr-2.4.7/metadata/.part-00000.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/.part-00000.crc
rename to mllib/core/src/test/resources/ml-models/gbtr-2.4.7/metadata/.part-00000.crc
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/_SUCCESS b/mllib/core/src/test/resources/ml-models/gbtr-2.4.7/metadata/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/gbtr-2.4.7/metadata/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/part-00000 b/mllib/core/src/test/resources/ml-models/gbtr-2.4.7/metadata/part-00000
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/part-00000
rename to mllib/core/src/test/resources/ml-models/gbtr-2.4.7/metadata/part-00000
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/.part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet.crc b/mllib/core/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/.part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/.part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet.crc
rename to mllib/core/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/.part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet.crc
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/_SUCCESS b/mllib/core/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet b/mllib/core/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet
similarity index 100%
rename from mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet
rename to mllib/core/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet
diff --git a/mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/hashingTF-2.4.4/metadata/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/hashingTF-2.4.4/metadata/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/.part-00000.crc b/mllib/core/src/test/resources/ml-models/hashingTF-2.4.4/metadata/.part-00000.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/.part-00000.crc
rename to mllib/core/src/test/resources/ml-models/hashingTF-2.4.4/metadata/.part-00000.crc
diff --git a/mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/_SUCCESS b/mllib/core/src/test/resources/ml-models/hashingTF-2.4.4/metadata/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/hashingTF-2.4.4/metadata/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/part-00000 b/mllib/core/src/test/resources/ml-models/hashingTF-2.4.4/metadata/part-00000
similarity index 100%
rename from mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/part-00000
rename to mllib/core/src/test/resources/ml-models/hashingTF-2.4.4/metadata/part-00000
diff --git a/mllib/src/test/resources/ml-models/mlp-2.4.4/data/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/mlp-2.4.4/data/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/mlp-2.4.4/data/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/mlp-2.4.4/data/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/mlp-2.4.4/data/.part-00000-fa18aaf6-d8df-4b90-8231-eb5f6ac12138-c000.snappy.parquet.crc b/mllib/core/src/test/resources/ml-models/mlp-2.4.4/data/.part-00000-fa18aaf6-d8df-4b90-8231-eb5f6ac12138-c000.snappy.parquet.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/mlp-2.4.4/data/.part-00000-fa18aaf6-d8df-4b90-8231-eb5f6ac12138-c000.snappy.parquet.crc
rename to mllib/core/src/test/resources/ml-models/mlp-2.4.4/data/.part-00000-fa18aaf6-d8df-4b90-8231-eb5f6ac12138-c000.snappy.parquet.crc
diff --git a/mllib/src/test/resources/ml-models/mlp-2.4.4/data/_SUCCESS b/mllib/core/src/test/resources/ml-models/mlp-2.4.4/data/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/mlp-2.4.4/data/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/mlp-2.4.4/data/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/mlp-2.4.4/data/part-00000-fa18aaf6-d8df-4b90-8231-eb5f6ac12138-c000.snappy.parquet b/mllib/core/src/test/resources/ml-models/mlp-2.4.4/data/part-00000-fa18aaf6-d8df-4b90-8231-eb5f6ac12138-c000.snappy.parquet
similarity index 100%
rename from mllib/src/test/resources/ml-models/mlp-2.4.4/data/part-00000-fa18aaf6-d8df-4b90-8231-eb5f6ac12138-c000.snappy.parquet
rename to mllib/core/src/test/resources/ml-models/mlp-2.4.4/data/part-00000-fa18aaf6-d8df-4b90-8231-eb5f6ac12138-c000.snappy.parquet
diff --git a/mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/mlp-2.4.4/metadata/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/mlp-2.4.4/metadata/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/.part-00000.crc b/mllib/core/src/test/resources/ml-models/mlp-2.4.4/metadata/.part-00000.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/.part-00000.crc
rename to mllib/core/src/test/resources/ml-models/mlp-2.4.4/metadata/.part-00000.crc
diff --git a/mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/_SUCCESS b/mllib/core/src/test/resources/ml-models/mlp-2.4.4/metadata/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/mlp-2.4.4/metadata/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/part-00000 b/mllib/core/src/test/resources/ml-models/mlp-2.4.4/metadata/part-00000
similarity index 100%
rename from mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/part-00000
rename to mllib/core/src/test/resources/ml-models/mlp-2.4.4/metadata/part-00000
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/data/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/rfc-2.4.7/data/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfc-2.4.7/data/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/rfc-2.4.7/data/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/data/.part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet.crc b/mllib/core/src/test/resources/ml-models/rfc-2.4.7/data/.part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfc-2.4.7/data/.part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet.crc
rename to mllib/core/src/test/resources/ml-models/rfc-2.4.7/data/.part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet.crc
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/data/_SUCCESS b/mllib/core/src/test/resources/ml-models/rfc-2.4.7/data/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfc-2.4.7/data/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/rfc-2.4.7/data/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/data/part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet b/mllib/core/src/test/resources/ml-models/rfc-2.4.7/data/part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfc-2.4.7/data/part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet
rename to mllib/core/src/test/resources/ml-models/rfc-2.4.7/data/part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/rfc-2.4.7/metadata/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/rfc-2.4.7/metadata/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/.part-00000.crc b/mllib/core/src/test/resources/ml-models/rfc-2.4.7/metadata/.part-00000.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/.part-00000.crc
rename to mllib/core/src/test/resources/ml-models/rfc-2.4.7/metadata/.part-00000.crc
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/_SUCCESS b/mllib/core/src/test/resources/ml-models/rfc-2.4.7/metadata/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/rfc-2.4.7/metadata/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/part-00000 b/mllib/core/src/test/resources/ml-models/rfc-2.4.7/metadata/part-00000
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/part-00000
rename to mllib/core/src/test/resources/ml-models/rfc-2.4.7/metadata/part-00000
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/.part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet.crc b/mllib/core/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/.part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/.part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet.crc
rename to mllib/core/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/.part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet.crc
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/_SUCCESS b/mllib/core/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet b/mllib/core/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet
rename to mllib/core/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/data/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/rfr-2.4.7/data/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfr-2.4.7/data/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/rfr-2.4.7/data/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/data/.part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet.crc b/mllib/core/src/test/resources/ml-models/rfr-2.4.7/data/.part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfr-2.4.7/data/.part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet.crc
rename to mllib/core/src/test/resources/ml-models/rfr-2.4.7/data/.part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet.crc
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/data/_SUCCESS b/mllib/core/src/test/resources/ml-models/rfr-2.4.7/data/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfr-2.4.7/data/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/rfr-2.4.7/data/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/data/part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet b/mllib/core/src/test/resources/ml-models/rfr-2.4.7/data/part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfr-2.4.7/data/part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet
rename to mllib/core/src/test/resources/ml-models/rfr-2.4.7/data/part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/rfr-2.4.7/metadata/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/rfr-2.4.7/metadata/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/.part-00000.crc b/mllib/core/src/test/resources/ml-models/rfr-2.4.7/metadata/.part-00000.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/.part-00000.crc
rename to mllib/core/src/test/resources/ml-models/rfr-2.4.7/metadata/.part-00000.crc
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/_SUCCESS b/mllib/core/src/test/resources/ml-models/rfr-2.4.7/metadata/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/rfr-2.4.7/metadata/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/part-00000 b/mllib/core/src/test/resources/ml-models/rfr-2.4.7/metadata/part-00000
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/part-00000
rename to mllib/core/src/test/resources/ml-models/rfr-2.4.7/metadata/part-00000
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/.part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet.crc b/mllib/core/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/.part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/.part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet.crc
rename to mllib/core/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/.part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet.crc
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/_SUCCESS b/mllib/core/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet b/mllib/core/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet
similarity index 100%
rename from mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet
rename to mllib/core/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet
diff --git a/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/strIndexerModel-2.4.4/data/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/strIndexerModel-2.4.4/data/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/.part-00000-f09b03f6-6e17-4756-b9ca-c5e505dcd898-c000.snappy.parquet.crc b/mllib/core/src/test/resources/ml-models/strIndexerModel-2.4.4/data/.part-00000-f09b03f6-6e17-4756-b9ca-c5e505dcd898-c000.snappy.parquet.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/.part-00000-f09b03f6-6e17-4756-b9ca-c5e505dcd898-c000.snappy.parquet.crc
rename to mllib/core/src/test/resources/ml-models/strIndexerModel-2.4.4/data/.part-00000-f09b03f6-6e17-4756-b9ca-c5e505dcd898-c000.snappy.parquet.crc
diff --git a/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/_SUCCESS b/mllib/core/src/test/resources/ml-models/strIndexerModel-2.4.4/data/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/strIndexerModel-2.4.4/data/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/part-00000-f09b03f6-6e17-4756-b9ca-c5e505dcd898-c000.snappy.parquet b/mllib/core/src/test/resources/ml-models/strIndexerModel-2.4.4/data/part-00000-f09b03f6-6e17-4756-b9ca-c5e505dcd898-c000.snappy.parquet
similarity index 100%
rename from mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/part-00000-f09b03f6-6e17-4756-b9ca-c5e505dcd898-c000.snappy.parquet
rename to mllib/core/src/test/resources/ml-models/strIndexerModel-2.4.4/data/part-00000-f09b03f6-6e17-4756-b9ca-c5e505dcd898-c000.snappy.parquet
diff --git a/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/._SUCCESS.crc b/mllib/core/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/._SUCCESS.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/._SUCCESS.crc
rename to mllib/core/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/._SUCCESS.crc
diff --git a/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/.part-00000.crc b/mllib/core/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/.part-00000.crc
similarity index 100%
rename from mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/.part-00000.crc
rename to mllib/core/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/.part-00000.crc
diff --git a/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/_SUCCESS b/mllib/core/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/_SUCCESS
rename to mllib/core/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/part-00000 b/mllib/core/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/part-00000
similarity index 100%
rename from mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/part-00000
rename to mllib/core/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/part-00000
diff --git a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/PredictorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/PredictorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/PredictorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/PredictorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/ann/ANNSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/ann/ANNSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/ann/ANNSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/ann/ANNSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/ann/GradientSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/ann/GradientSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/ann/GradientSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/ann/GradientSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/FMClassifierSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/classification/FMClassifierSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/classification/FMClassifierSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/classification/FMClassifierSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
similarity index 99%
rename from mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
index bed45fc68f478..8d964e095755c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
+++ b/mllib/core/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
@@ -72,10 +72,10 @@ class NaiveBayesSuite extends MLTest with DefaultReadWriteTest {
     gaussianDataset = generateGaussianNaiveBayesInput(pi, theta2, sigma, 1000, seed).toDF()
 
     gaussianDataset2 = spark.read.format("libsvm")
-      .load("../data/mllib/sample_multiclass_classification_data.txt")
+      .load("../../data/mllib/sample_multiclass_classification_data.txt")
 
     complementDataset = spark.read.format("libsvm")
-        .load("../data/mllib/sample_libsvm_data.txt")
+        .load("../../data/mllib/sample_libsvm_data.txt")
   }
 
   def validatePrediction(predictionAndLabels: Seq[Row]): Unit = {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/ProbabilisticClassifierSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/classification/ProbabilisticClassifierSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/classification/ProbabilisticClassifierSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/classification/ProbabilisticClassifierSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/ClusteringEvaluatorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/evaluation/ClusteringEvaluatorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/evaluation/ClusteringEvaluatorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/evaluation/ClusteringEvaluatorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluatorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluatorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluatorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluatorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RankingEvaluatorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/evaluation/RankingEvaluatorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/evaluation/RankingEvaluatorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/evaluation/RankingEvaluatorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/ChiSqSelectorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/ChiSqSelectorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/ChiSqSelectorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/ChiSqSelectorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/CountVectorizerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/CountVectorizerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/CountVectorizerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/CountVectorizerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/ElementwiseProductSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/ElementwiseProductSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/ElementwiseProductSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/ElementwiseProductSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/FeatureHasherSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/FeatureHasherSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/FeatureHasherSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/FeatureHasherSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/LabeledPointSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/LabeledPointSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/LabeledPointSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/LabeledPointSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaParserSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/RFormulaParserSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaParserSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/RFormulaParserSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/RobustScalerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/RobustScalerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/RobustScalerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/RobustScalerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StandardScalerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/StandardScalerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/StandardScalerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/StandardScalerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/UnivariateFeatureSelectorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/UnivariateFeatureSelectorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/UnivariateFeatureSelectorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/UnivariateFeatureSelectorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VarianceThresholdSelectorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/VarianceThresholdSelectorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/VarianceThresholdSelectorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/VarianceThresholdSelectorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorSizeHintSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/VectorSizeHintSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/VectorSizeHintSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/VectorSizeHintSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorSlicerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/VectorSlicerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/VectorSlicerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/VectorSlicerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/fpm/PrefixSpanSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/fpm/PrefixSpanSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/fpm/PrefixSpanSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/fpm/PrefixSpanSuite.scala
diff --git a/mllib/core/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala
new file mode 100644
index 0000000000000..76dd5ea4211cd
--- /dev/null
+++ b/mllib/core/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.linalg
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.sql.catalyst.JavaTypeInference
+import org.apache.spark.sql.types._
+
+class VectorUDTSuite extends SparkFunSuite {
+
+  test("JavaTypeInference with VectorUDT") {
+    val (dataType, _) = JavaTypeInference.inferDataType(classOf[LabeledPoint])
+    assert(dataType.asInstanceOf[StructType].fields.map(_.dataType)
+      === Seq(new VectorUDT, DoubleType))
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquaresSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquaresSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquaresSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquaresSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/BinaryLogisticBlockAggregatorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/optim/aggregator/BinaryLogisticBlockAggregatorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/BinaryLogisticBlockAggregatorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/optim/aggregator/BinaryLogisticBlockAggregatorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/DifferentiableLossAggregatorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/optim/aggregator/DifferentiableLossAggregatorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/DifferentiableLossAggregatorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/optim/aggregator/DifferentiableLossAggregatorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/HingeBlockAggregatorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/optim/aggregator/HingeBlockAggregatorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/HingeBlockAggregatorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/optim/aggregator/HingeBlockAggregatorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/HuberBlockAggregatorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/optim/aggregator/HuberBlockAggregatorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/HuberBlockAggregatorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/optim/aggregator/HuberBlockAggregatorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresBlockAggregatorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresBlockAggregatorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresBlockAggregatorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresBlockAggregatorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/MultinomialLogisticBlockAggregatorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/optim/aggregator/MultinomialLogisticBlockAggregatorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/optim/aggregator/MultinomialLogisticBlockAggregatorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/optim/aggregator/MultinomialLogisticBlockAggregatorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/loss/DifferentiableRegularizationSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/optim/loss/DifferentiableRegularizationSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/optim/loss/DifferentiableRegularizationSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/optim/loss/DifferentiableRegularizationSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/loss/RDDLossFunctionSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/optim/loss/RDDLossFunctionSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/optim/loss/RDDLossFunctionSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/optim/loss/RDDLossFunctionSuite.scala
diff --git a/mllib/core/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala
new file mode 100644
index 0000000000000..fe0874464555d
--- /dev/null
+++ b/mllib/core/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.param
+
+import java.io.{ByteArrayOutputStream, ObjectOutputStream}
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.ml.{Estimator, Transformer}
+import org.apache.spark.ml.util.MyParams
+import org.apache.spark.sql.Dataset
+
+class ParamsSuite extends SparkFunSuite {
+
+  test("Filtering ParamMap") {
+    val params1 = new MyParams("my_params1")
+    val params2 = new MyParams("my_params2")
+    val paramMap = ParamMap(
+      params1.intParam -> 1,
+      params2.intParam -> 1,
+      params1.doubleParam -> 0.2,
+      params2.doubleParam -> 0.2)
+    val filteredParamMap = paramMap.filter(params1)
+
+    assert(filteredParamMap.size === 2)
+    filteredParamMap.toSeq.foreach {
+      case ParamPair(p, _) =>
+        assert(p.parent === params1.uid)
+    }
+
+    // At the previous implementation of ParamMap#filter,
+    // mutable.Map#filterKeys was used internally but
+    // the return type of the method is not serializable (see SI-6654).
+    // Now mutable.Map#filter is used instead of filterKeys and the return type is serializable.
+    // So let's ensure serializability.
+    val objOut = new ObjectOutputStream(new ByteArrayOutputStream())
+    objOut.writeObject(filteredParamMap)
+  }
+}
+
+object ParamsSuite extends SparkFunSuite {
+
+  /**
+   * Checks common requirements for `Params.params`:
+   *   - params are ordered by names
+   *   - param parent has the same UID as the object's UID
+   *   - param name is the same as the param method name
+   *   - obj.copy should return the same type as the obj
+   */
+  def checkParams(obj: Params): Unit = {
+    val clazz = obj.getClass
+
+    val params = obj.params
+    val paramNames = params.map(_.name)
+    require(paramNames === paramNames.sorted, "params must be ordered by names")
+    params.foreach { p =>
+      assert(p.parent === obj.uid)
+      assert(obj.getParam(p.name) === p)
+      // TODO: Check that setters return self, which needs special handling for generic types.
+    }
+
+    val copyMethod = clazz.getMethod("copy", classOf[ParamMap])
+    val copyReturnType = copyMethod.getReturnType
+    require(copyReturnType === obj.getClass,
+      s"${clazz.getName}.copy should return ${clazz.getName} instead of ${copyReturnType.getName}.")
+  }
+
+  /**
+   * Checks that the class throws an exception in case multiple exclusive params are set.
+   * The params to be checked are passed as arguments with their value.
+   */
+  def testExclusiveParams(
+      model: Params,
+      dataset: Dataset[_],
+      paramsAndValues: (String, Any)*): Unit = {
+    val m = model.copy(ParamMap.empty)
+    paramsAndValues.foreach { case (paramName, paramValue) =>
+      m.set(m.getParam(paramName), paramValue)
+    }
+    intercept[IllegalArgumentException] {
+      m match {
+        case t: Transformer => t.transform(dataset)
+        case e: Estimator[_] => e.fit(dataset)
+      }
+    }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/python/MLSerDeSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/python/MLSerDeSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/python/MLSerDeSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/python/MLSerDeSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/r/RWrapperUtilsSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/r/RWrapperUtilsSuite.scala
similarity index 94%
rename from mllib/src/test/scala/org/apache/spark/ml/r/RWrapperUtilsSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/r/RWrapperUtilsSuite.scala
index 27b03918d951e..f77c9e95f9fd1 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/r/RWrapperUtilsSuite.scala
+++ b/mllib/core/src/test/scala/org/apache/spark/ml/r/RWrapperUtilsSuite.scala
@@ -25,7 +25,7 @@ class RWrapperUtilsSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   test("avoid libsvm data column name conflicting") {
     val rFormula = new RFormula().setFormula("label ~ features")
-    val data = spark.read.format("libsvm").load("../data/mllib/sample_libsvm_data.txt")
+    val data = spark.read.format("libsvm").load("../../data/mllib/sample_libsvm_data.txt")
 
     // if not checking column name, then IllegalArgumentException
     intercept[IllegalArgumentException] {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/CollectTopKSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/recommendation/CollectTopKSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/recommendation/CollectTopKSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/recommendation/CollectTopKSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/FMRegressorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/regression/FMRegressorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/regression/FMRegressorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/regression/FMRegressorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/IsotonicRegressionSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/regression/IsotonicRegressionSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/regression/IsotonicRegressionSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/regression/IsotonicRegressionSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/stat/ANOVATestSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/stat/ANOVATestSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/stat/ANOVATestSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/stat/ANOVATestSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/stat/ChiSquareTestSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/stat/ChiSquareTestSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/stat/ChiSquareTestSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/stat/ChiSquareTestSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/stat/CorrelationSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/stat/CorrelationSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/stat/CorrelationSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/stat/CorrelationSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/stat/FValueTestSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/stat/FValueTestSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/stat/FValueTestSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/stat/FValueTestSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTestSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTestSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTestSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/stat/KolmogorovSmirnovTestSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/stat/MultiClassSummarizerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/stat/MultiClassSummarizerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/stat/MultiClassSummarizerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/stat/MultiClassSummarizerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/BaggedPointSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/tree/impl/BaggedPointSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/tree/impl/BaggedPointSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/tree/impl/BaggedPointSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/GradientBoostedTreesSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/tree/impl/GradientBoostedTreesSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/tree/impl/GradientBoostedTreesSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/tree/impl/GradientBoostedTreesSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreePointSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/tree/impl/TreePointSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreePointSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/tree/impl/TreePointSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala b/mllib/core/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/ParamGridBuilderSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/tuning/ParamGridBuilderSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/tuning/ParamGridBuilderSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/tuning/ParamGridBuilderSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/ValidatorParamsSuiteHelpers.scala b/mllib/core/src/test/scala/org/apache/spark/ml/tuning/ValidatorParamsSuiteHelpers.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/tuning/ValidatorParamsSuiteHelpers.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/tuning/ValidatorParamsSuiteHelpers.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala b/mllib/core/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala b/mllib/core/src/test/scala/org/apache/spark/ml/util/MLTest.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/util/MLTest.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/util/MLTestSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/util/MLTestSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/util/MLTestSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala b/mllib/core/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/PMMLReadWriteTest.scala b/mllib/core/src/test/scala/org/apache/spark/ml/util/PMMLReadWriteTest.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/util/PMMLReadWriteTest.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/util/PMMLReadWriteTest.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala b/mllib/core/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/ReadWriteSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/util/ReadWriteSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/util/ReadWriteSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/util/ReadWriteSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/StopwatchSuite.scala b/mllib/core/src/test/scala/org/apache/spark/ml/util/StopwatchSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/util/StopwatchSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/util/StopwatchSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/TempDirectory.scala b/mllib/core/src/test/scala/org/apache/spark/ml/util/TempDirectory.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/ml/util/TempDirectory.scala
rename to mllib/core/src/test/scala/org/apache/spark/ml/util/TempDirectory.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/BisectingKMeansSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/clustering/BisectingKMeansSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/clustering/BisectingKMeansSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/clustering/BisectingKMeansSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/DistanceMeasureSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/clustering/DistanceMeasureSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/clustering/DistanceMeasureSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/clustering/DistanceMeasureSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/evaluation/AreaUnderCurveSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetricsSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MultilabelMetricsSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/evaluation/MultilabelMetricsSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/evaluation/MultilabelMetricsSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/evaluation/MultilabelMetricsSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/ChiSqSelectorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/feature/ChiSqSelectorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/feature/ChiSqSelectorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/feature/ChiSqSelectorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/HashingTFSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/feature/HashingTFSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/feature/HashingTFSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/feature/HashingTFSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/feature/IDFSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/fpm/AssociationRulesSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/fpm/AssociationRulesSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/fpm/AssociationRulesSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/fpm/AssociationRulesSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/fpm/FPGrowthSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/fpm/FPGrowthSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/fpm/FPGrowthSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/fpm/FPGrowthSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/fpm/FPTreeSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/fpm/FPTreeSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/fpm/FPTreeSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/fpm/FPTreeSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/fpm/PrefixSpanSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/fpm/PrefixSpanSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/fpm/PrefixSpanSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/fpm/PrefixSpanSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/linalg/BreezeMatrixConversionSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExportSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExportSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExportSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExportSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExportSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExportSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExportSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExportSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExportSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExportSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExportSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExportSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactorySuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactorySuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactorySuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactorySuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/rdd/RDDFunctionsSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/rdd/RDDFunctionsSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/rdd/RDDFunctionsSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/rdd/RDDFunctionsSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModelSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModelSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModelSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModelSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/StreamingTestSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/stat/StreamingTestSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/stat/StreamingTestSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/stat/StreamingTestSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/ImpuritySuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/tree/ImpuritySuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/tree/ImpuritySuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/tree/ImpuritySuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala b/mllib/core/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala
similarity index 100%
rename from mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala
rename to mllib/core/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala
diff --git a/mllib-local/benchmarks/BLASBenchmark-jdk11-results.txt b/mllib/local/benchmarks/BLASBenchmark-jdk11-results.txt
similarity index 100%
rename from mllib-local/benchmarks/BLASBenchmark-jdk11-results.txt
rename to mllib/local/benchmarks/BLASBenchmark-jdk11-results.txt
diff --git a/mllib-local/benchmarks/BLASBenchmark-jdk17-results.txt b/mllib/local/benchmarks/BLASBenchmark-jdk17-results.txt
similarity index 100%
rename from mllib-local/benchmarks/BLASBenchmark-jdk17-results.txt
rename to mllib/local/benchmarks/BLASBenchmark-jdk17-results.txt
diff --git a/mllib-local/benchmarks/BLASBenchmark-results.txt b/mllib/local/benchmarks/BLASBenchmark-results.txt
similarity index 100%
rename from mllib-local/benchmarks/BLASBenchmark-results.txt
rename to mllib/local/benchmarks/BLASBenchmark-results.txt
diff --git a/mllib-local/pom.xml b/mllib/local/pom.xml
similarity index 98%
rename from mllib-local/pom.xml
rename to mllib/local/pom.xml
index 00c16a8b6a544..8146658c57922 100644
--- a/mllib-local/pom.xml
+++ b/mllib/local/pom.xml
@@ -22,7 +22,7 @@
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
     <version>3.5.0-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
+    <relativePath>../../pom.xml</relativePath>
   </parent>
 
   <artifactId>spark-mllib-local_2.12</artifactId>
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/impl/Utils.scala b/mllib/local/src/main/scala/org/apache/spark/ml/impl/Utils.scala
similarity index 100%
rename from mllib-local/src/main/scala/org/apache/spark/ml/impl/Utils.scala
rename to mllib/local/src/main/scala/org/apache/spark/ml/impl/Utils.scala
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala b/mllib/local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
similarity index 100%
rename from mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
rename to mllib/local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala b/mllib/local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
similarity index 100%
rename from mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
rename to mllib/local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib/local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
similarity index 100%
rename from mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
rename to mllib/local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala b/mllib/local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala
similarity index 100%
rename from mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala
rename to mllib/local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/SparkMLFunSuite.scala b/mllib/local/src/test/scala/org/apache/spark/ml/SparkMLFunSuite.scala
similarity index 100%
rename from mllib-local/src/test/scala/org/apache/spark/ml/SparkMLFunSuite.scala
rename to mllib/local/src/test/scala/org/apache/spark/ml/SparkMLFunSuite.scala
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/impl/UtilsSuite.scala b/mllib/local/src/test/scala/org/apache/spark/ml/impl/UtilsSuite.scala
similarity index 100%
rename from mllib-local/src/test/scala/org/apache/spark/ml/impl/UtilsSuite.scala
rename to mllib/local/src/test/scala/org/apache/spark/ml/impl/UtilsSuite.scala
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASBenchmark.scala b/mllib/local/src/test/scala/org/apache/spark/ml/linalg/BLASBenchmark.scala
similarity index 100%
rename from mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASBenchmark.scala
rename to mllib/local/src/test/scala/org/apache/spark/ml/linalg/BLASBenchmark.scala
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASSuite.scala b/mllib/local/src/test/scala/org/apache/spark/ml/linalg/BLASSuite.scala
similarity index 100%
rename from mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASSuite.scala
rename to mllib/local/src/test/scala/org/apache/spark/ml/linalg/BLASSuite.scala
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeMatrixConversionSuite.scala b/mllib/local/src/test/scala/org/apache/spark/ml/linalg/BreezeMatrixConversionSuite.scala
similarity index 100%
rename from mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeMatrixConversionSuite.scala
rename to mllib/local/src/test/scala/org/apache/spark/ml/linalg/BreezeMatrixConversionSuite.scala
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeVectorConversionSuite.scala b/mllib/local/src/test/scala/org/apache/spark/ml/linalg/BreezeVectorConversionSuite.scala
similarity index 100%
rename from mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeVectorConversionSuite.scala
rename to mllib/local/src/test/scala/org/apache/spark/ml/linalg/BreezeVectorConversionSuite.scala
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala b/mllib/local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
similarity index 100%
rename from mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
rename to mllib/local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala b/mllib/local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
similarity index 100%
rename from mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
rename to mllib/local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussianSuite.scala b/mllib/local/src/test/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussianSuite.scala
similarity index 100%
rename from mllib-local/src/test/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussianSuite.scala
rename to mllib/local/src/test/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussianSuite.scala
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtils.scala b/mllib/local/src/test/scala/org/apache/spark/ml/util/TestingUtils.scala
similarity index 100%
rename from mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtils.scala
rename to mllib/local/src/test/scala/org/apache/spark/ml/util/TestingUtils.scala
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtilsSuite.scala b/mllib/local/src/test/scala/org/apache/spark/ml/util/TestingUtilsSuite.scala
similarity index 100%
rename from mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtilsSuite.scala
rename to mllib/local/src/test/scala/org/apache/spark/ml/util/TestingUtilsSuite.scala
diff --git a/pom.xml b/pom.xml
index 51de315dd8c87..86f6435ee861b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -84,8 +84,9 @@
     <module>common/tags</module>
     <module>core</module>
     <module>graphx</module>
-    <module>mllib</module>
-    <module>mllib-local</module>
+    <module>mllib/core</module>
+    <module>mllib/common</module>
+    <module>mllib/local</module>
     <module>tools</module>
     <module>streaming</module>
     <module>sql/catalyst</module>
@@ -141,7 +142,7 @@
     <!-- After 10.15.1.3, the minimum required version is JDK9 -->
     <derby.version>10.14.2.0</derby.version>
     <parquet.version>1.12.3</parquet.version>
-    <orc.version>1.8.2</orc.version>
+    <orc.version>1.8.3</orc.version>
     <orc.classifier>shaded-protobuf</orc.classifier>
     <jetty.version>9.4.51.v20230217</jetty.version>
     <jakartaservlet.version>4.0.3</jakartaservlet.version>
@@ -1425,6 +1426,10 @@
         <scope>test</scope>
       </dependency>
       <!-- Managed up to match Hadoop in HADOOP-16530 -->
+      <!--
+        When upgrading `xercesImpl` version, also need to change
+        the version definition in `SparkBuild#DependencyOverrides`.
+      -->
       <dependency>
         <groupId>xerces</groupId>
         <artifactId>xercesImpl</artifactId>
@@ -2767,7 +2772,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-enforcer-plugin</artifactId>
-          <version>3.0.0-M2</version>
+          <version>3.2.1</version>
           <executions>
             <execution>
               <id>enforce-versions</id>
@@ -2810,7 +2815,7 @@
         <plugin>
           <groupId>org.codehaus.mojo</groupId>
           <artifactId>build-helper-maven-plugin</artifactId>
-          <version>3.2.0</version>
+          <version>3.3.0</version>
           <executions>
             <execution>
               <id>module-timestamp-property</id>
@@ -2912,7 +2917,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-compiler-plugin</artifactId>
-          <version>3.10.1</version>
+          <version>3.11.0</version>
           <configuration>
             <source>${java.version}</source>
             <target>${java.version}</target>
@@ -2929,7 +2934,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-surefire-plugin</artifactId>
-          <version>3.0.0-M9</version>
+          <version>3.0.0</version>
           <!-- Note config is repeated in scalatest config -->
           <configuration>
             <includes>
@@ -3090,7 +3095,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-javadoc-plugin</artifactId>
-          <version>3.4.1</version>
+          <version>3.5.0</version>
           <configuration>
             <additionalJOptions>
               <additionalJOption>-Xdoclint:all</additionalJOption>
@@ -3168,7 +3173,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-deploy-plugin</artifactId>
-          <version>3.0.0</version>
+          <version>3.1.0</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 9741e53452a82..d2ae199ea962a 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -36,6 +36,90 @@ object MimaExcludes {
 
   // Exclude rules for 3.5.x
   lazy val v35excludes = v34excludes ++ Seq(
+    // [SPARK-42508][CONNECT][ML] Extract the common .ml classes to mllib-common
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.package"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.package$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.AttributeGroup"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.AttributeGroup$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.AttributeKeys"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.Attribute"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.Attribute$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.AttributeFactory"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.NumericAttribute"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.NumericAttribute$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.NominalAttribute"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.NominalAttribute$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.BinaryAttribute"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.BinaryAttribute$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.UnresolvedAttribute"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.UnresolvedAttribute$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.AttributeType"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.attribute.AttributeType$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.feature.LabeledPoint"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.feature.LabeledPoint$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.linalg.JsonMatrixConverter"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.linalg.JsonVectorConverter"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.linalg.MatrixUDT"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.linalg.VectorUDT"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.linalg.SQLDataTypes"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.linalg.SQLDataTypes$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.Param"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.ParamPair"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.ParamPair$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.JavaParams"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.ParamMap"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.ParamMap$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.ParamValidators"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.ParamValidators$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.DoubleParam"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.IntParam"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.FloatParam"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.FloatParam$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.LongParam"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.BooleanParam"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.IntArrayParam"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.StringArrayParam"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.DoubleArrayParam"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.DoubleArrayArrayParam"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasAggregationDepth"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasBlockSize"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasCheckpointInterval"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasCollectSubModels"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasDistanceMeasure"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasElasticNetParam"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasFeaturesCol"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasFitIntercept"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasHandleInvalid"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasInputCol"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasInputCols"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasLabelCol"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasLoss"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasMaxBlockSizeInMB"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasMaxIter"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasNumFeatures"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasOutputCol"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasOutputCols"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasPredictionCol"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasProbabilityCol"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasRawPredictionCol"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasRegParam"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasRelativeError"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasSeed"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasSolver"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasStandardization"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasStepSize"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasThreshold"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasThresholds"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasTol"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasValidationIndicatorCol"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasVarianceCol"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.param.shared.HasWeightCol"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.PredictorParams"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.classification.ClassifierParams"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.classification.ProbabilisticClassifierParams"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.util.Identifiable"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.util.Identifiable$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.util.SchemaUtils")
   )
 
   // Exclude rules for 3.4.x from 3.3.0
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index ed9cfeb7cd2de..6a9d58e0abe18 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -57,9 +57,9 @@ object BuildCommons {
   val connectClient = ProjectRef(buildLocation, "connect-client-jvm")
 
   val allProjects@Seq(
-    core, graphx, mllib, mllibLocal, repl, networkCommon, networkShuffle, launcher, unsafe, tags, sketch, kvstore, _*
+    core, graphx, mllib, mllibLocal, mllibCommon, repl, networkCommon, networkShuffle, launcher, unsafe, tags, sketch, kvstore, _*
   ) = Seq(
-    "core", "graphx", "mllib", "mllib-local", "repl", "network-common", "network-shuffle", "launcher", "unsafe",
+    "core", "graphx", "mllib", "mllib-local", "mllib-common", "repl", "network-common", "network-shuffle", "launcher", "unsafe",
     "tags", "sketch", "kvstore"
   ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++ Seq(connectCommon, connect, connectClient)
 
@@ -403,7 +403,8 @@ object SparkBuild extends PomBuild {
   val mimaProjects = allProjects.filterNot { x =>
     Seq(
       spark, hive, hiveThriftServer, repl, networkCommon, networkShuffle, networkYarn,
-      unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon, connect, connectClient, protobuf
+      unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon, connect, connectClient, protobuf,
+      mllibCommon
     ).contains(x)
   }
 
@@ -1082,7 +1083,7 @@ object DependencyOverrides {
   lazy val guavaVersion = sys.props.get("guava.version").getOrElse("14.0.1")
   lazy val settings = Seq(
     dependencyOverrides += "com.google.guava" % "guava" % guavaVersion,
-    dependencyOverrides += "xerces" % "xercesImpl" % "2.12.0",
+    dependencyOverrides += "xerces" % "xercesImpl" % "2.12.2",
     dependencyOverrides += "jline" % "jline" % "2.14.6",
     dependencyOverrides += "org.apache.avro" % "avro" % "1.11.1")
 }
diff --git a/python/docs/source/migration_guide/pyspark_upgrade.rst b/python/docs/source/migration_guide/pyspark_upgrade.rst
index 0a59054868469..d06475f9b365e 100644
--- a/python/docs/source/migration_guide/pyspark_upgrade.rst
+++ b/python/docs/source/migration_guide/pyspark_upgrade.rst
@@ -33,6 +33,7 @@ Upgrading from PySpark 3.3 to 3.4
 * In Spark 3.4, the ``Series.concat`` sort parameter will be respected to follow pandas 1.4 behaviors.
 * In Spark 3.4, the ``DataFrame.__setitem__`` will make a copy and replace pre-existing arrays, which will NOT be over-written to follow pandas 1.4 behaviors.
 * In Spark 3.4, the ``SparkSession.sql`` and the Pandas on Spark API ``sql`` have got new parameter ``args`` which provides binding of named parameters to their SQL literals.
+* In Spark 3.4, Pandas API on Spark follows for the pandas 2.0, and some APIs were deprecated or removed in Spark 3.4 according to the changes made in pandas 2.0. Please refer to the [release notes of pandas](https://pandas.pydata.org/docs/dev/whatsnew/) for more details.
 
 
 Upgrading from PySpark 3.2 to 3.3
@@ -108,4 +109,4 @@ Upgrading from PySpark 1.4 to 1.5
 Upgrading from PySpark 1.0-1.2 to 1.3
 -------------------------------------
 
-* When using DataTypes in Python you will need to construct them (i.e. ``StringType()``) instead of referencing a singleton.
\ No newline at end of file
+* When using DataTypes in Python you will need to construct them (i.e. ``StringType()``) instead of referencing a singleton.
diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst
index 70fc04ef9cf23..cbc46e1fae18c 100644
--- a/python/docs/source/reference/pyspark.sql/functions.rst
+++ b/python/docs/source/reference/pyspark.sql/functions.rst
@@ -159,6 +159,7 @@ Collection Functions
     array_sort
     array_insert
     array_remove
+    array_prepend
     array_distinct
     array_intersect
     array_union
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index 80f25f39badb7..b8bca7776dd5c 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -69,11 +69,10 @@
 from pyspark.version import __version__
 from pyspark._globals import _NoValue  # noqa: F401
 
-T = TypeVar("T")
-F = TypeVar("F", bound=Callable)
+_F = TypeVar("_F", bound=Callable)
 
 
-def since(version: Union[str, float]) -> Callable[[F], F]:
+def since(version: Union[str, float]) -> Callable[[_F], _F]:
     """
     A decorator that annotates a function to append the version of Spark the function was added.
     """
@@ -81,7 +80,7 @@ def since(version: Union[str, float]) -> Callable[[F], F]:
 
     indent_p = re.compile(r"\n( +)")
 
-    def deco(f: F) -> F:
+    def deco(f: _F) -> _F:
         assert f.__doc__ is not None
 
         indents = indent_p.findall(f.__doc__)
@@ -93,11 +92,11 @@ def deco(f: F) -> F:
 
 
 def copy_func(
-    f: F,
+    f: _F,
     name: Optional[str] = None,
     sinceversion: Optional[Union[str, float]] = None,
     doc: Optional[str] = None,
-) -> F:
+) -> _F:
     """
     Returns a function with same code, globals, defaults, closure, and
     name (or provide a new name).
@@ -119,10 +118,10 @@ def copy_func(
         fn.__doc__ = doc
     if sinceversion is not None:
         fn = since(sinceversion)(fn)
-    return cast(F, fn)
+    return cast(_F, fn)
 
 
-def keyword_only(func: F) -> F:
+def keyword_only(func: _F) -> _F:
     """
     A decorator that forces keyword arguments in the wrapped method
     and saves actual input keyword arguments in `_input_kwargs`.
@@ -139,7 +138,7 @@ def wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
         self._input_kwargs = kwargs
         return func(self, **kwargs)
 
-    return cast(F, wrapper)
+    return cast(_F, wrapper)
 
 
 # To avoid circular dependencies
diff --git a/python/pyspark/errors/__init__.py b/python/pyspark/errors/__init__.py
index 95da7ca2aa89f..94117fc516016 100644
--- a/python/pyspark/errors/__init__.py
+++ b/python/pyspark/errors/__init__.py
@@ -31,6 +31,7 @@
     SparkUpgradeException,
     PySparkTypeError,
     PySparkValueError,
+    PySparkAttributeError,
 )
 
 
@@ -47,4 +48,5 @@
     "SparkUpgradeException",
     "PySparkTypeError",
     "PySparkValueError",
+    "PySparkAttributeError",
 ]
diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py
index 8c0f79f7d5aa6..dda1f5a1f844c 100644
--- a/python/pyspark/errors/error_classes.py
+++ b/python/pyspark/errors/error_classes.py
@@ -39,6 +39,11 @@
       "Function `<func_name>` should return Column, got <return_type>."
     ]
   },
+  "JVM_ATTRIBUTE_NOT_SUPPORTED" : {
+    "message" : [
+      "Attribute `<attr_name>` is not supported in Spark Connect as it depends on the JVM. If you need to use this attribute, do not use Spark Connect when creating your session."
+    ]
+  },
   "NOT_BOOL" : {
     "message" : [
       "Argument `<arg_name>` should be a bool, got <arg_type>."
diff --git a/python/pyspark/errors/exceptions/base.py b/python/pyspark/errors/exceptions/base.py
index 6e67039374d90..fa66b80ac3a93 100644
--- a/python/pyspark/errors/exceptions/base.py
+++ b/python/pyspark/errors/exceptions/base.py
@@ -160,3 +160,9 @@ class PySparkTypeError(PySparkException, TypeError):
     """
     Wrapper class for TypeError to support error classes.
     """
+
+
+class PySparkAttributeError(PySparkException, AttributeError):
+    """
+    Wrapper class for AttributeError to support error classes.
+    """
diff --git a/python/pyspark/ml/connect/__init__.py b/python/pyspark/ml/connect/__init__.py
new file mode 100644
index 0000000000000..7612e0caa28e2
--- /dev/null
+++ b/python/pyspark/ml/connect/__init__.py
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Spark Connect Python Client - ML module"""
diff --git a/python/pyspark/ml/connect/functions.py b/python/pyspark/ml/connect/functions.py
new file mode 100644
index 0000000000000..ab7e3ab3c9adc
--- /dev/null
+++ b/python/pyspark/ml/connect/functions.py
@@ -0,0 +1,76 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
+from pyspark.ml import functions as PyMLFunctions
+
+from pyspark.sql.connect.column import Column
+from pyspark.sql.connect.functions import _invoke_function, _to_col, lit
+
+
+def vector_to_array(col: Column, dtype: str = "float64") -> Column:
+    return _invoke_function("vector_to_array", _to_col(col), lit(dtype))
+
+
+vector_to_array.__doc__ = PyMLFunctions.vector_to_array.__doc__
+
+
+def array_to_vector(col: Column) -> Column:
+    return _invoke_function("array_to_vector", _to_col(col))
+
+
+array_to_vector.__doc__ = PyMLFunctions.array_to_vector.__doc__
+
+
+def _test() -> None:
+    import sys
+    import doctest
+    from pyspark.sql import SparkSession as PySparkSession
+    import pyspark.ml.connect.functions
+
+    globs = pyspark.ml.connect.functions.__dict__.copy()
+
+    # TODO: split vector_to_array doctest since it includes .mllib vectors
+    del pyspark.ml.connect.functions.vector_to_array.__doc__
+
+    # TODO: spark.createDataFrame should support UDT
+    del pyspark.ml.connect.functions.array_to_vector.__doc__
+
+    globs["spark"] = (
+        PySparkSession.builder.appName("ml.connect.functions tests")
+        .remote("local[4]")
+        .getOrCreate()
+    )
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.ml.connect.functions,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS
+        | doctest.NORMALIZE_WHITESPACE
+        | doctest.IGNORE_EXCEPTION_DETAIL,
+    )
+
+    globs["spark"].stop()
+
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/ml/functions.py b/python/pyspark/ml/functions.py
index 9a4b381b3bcc7..c335934be8daa 100644
--- a/python/pyspark/ml/functions.py
+++ b/python/pyspark/ml/functions.py
@@ -35,6 +35,7 @@
     StringType,
     StructType,
 )
+from pyspark.ml.util import try_remote_functions
 from typing import Any, Callable, Iterator, List, Mapping, TYPE_CHECKING, Tuple, Union, Optional
 
 if TYPE_CHECKING:
@@ -60,12 +61,16 @@
 ]
 
 
+@try_remote_functions
 def vector_to_array(col: Column, dtype: str = "float64") -> Column:
     """
     Converts a column of MLlib sparse/dense vectors into a column of dense arrays.
 
     .. versionadded:: 3.0.0
 
+    .. versionchanged:: 3.5.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :py:class:`pyspark.sql.Column` or str
@@ -112,6 +117,7 @@ def vector_to_array(col: Column, dtype: str = "float64") -> Column:
     )
 
 
+@try_remote_functions
 def array_to_vector(col: Column) -> Column:
     """
     Converts a column of array of numeric type into a column of pyspark.ml.linalg.DenseVector
@@ -119,6 +125,9 @@ def array_to_vector(col: Column) -> Column:
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.5.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :py:class:`pyspark.sql.Column` or str
diff --git a/python/pyspark/ml/tests/connect/test_connect_function.py b/python/pyspark/ml/tests/connect/test_connect_function.py
new file mode 100644
index 0000000000000..7da3d3f1addd8
--- /dev/null
+++ b/python/pyspark/ml/tests/connect/test_connect_function.py
@@ -0,0 +1,113 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import unittest
+
+from pyspark.sql import SparkSession as PySparkSession
+from pyspark.sql.dataframe import DataFrame as SDF
+from pyspark.ml import functions as SF
+
+from pyspark.testing.sqlutils import SQLTestUtils
+from pyspark.testing.connectutils import (
+    should_test_connect,
+    ReusedConnectTestCase,
+)
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+
+if should_test_connect:
+    from pyspark.sql.connect.dataframe import DataFrame as CDF
+    from pyspark.ml.connect import functions as CF
+
+
+class SparkConnectMLFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, SQLTestUtils):
+    """These test cases exercise the interface to the proto plan
+    generation but do not call Spark."""
+
+    @classmethod
+    def setUpClass(cls):
+        super(SparkConnectMLFunctionTests, cls).setUpClass()
+        # Disable the shared namespace so pyspark.sql.functions, etc point the regular
+        # PySpark libraries.
+        os.environ["PYSPARK_NO_NAMESPACE_SHARE"] = "1"
+        cls.connect = cls.spark  # Switch Spark Connect session and regular PySpark sesion.
+        cls.spark = PySparkSession._instantiatedSession
+        assert cls.spark is not None
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.spark = cls.connect  # Stopping Spark Connect closes the session in JVM at the server.
+        super(SparkConnectMLFunctionTests, cls).setUpClass()
+        del os.environ["PYSPARK_NO_NAMESPACE_SHARE"]
+
+    def compare_by_show(self, df1, df2, n: int = 20, truncate: int = 20):
+        assert isinstance(df1, (SDF, CDF))
+        if isinstance(df1, SDF):
+            str1 = df1._jdf.showString(n, truncate, False)
+        else:
+            str1 = df1._show_string(n, truncate, False)
+
+        assert isinstance(df2, (SDF, CDF))
+        if isinstance(df2, SDF):
+            str2 = df2._jdf.showString(n, truncate, False)
+        else:
+            str2 = df2._show_string(n, truncate, False)
+
+        self.assertEqual(str1, str2)
+
+    def test_array_vector_conversion(self):
+        query = """
+            SELECT * FROM VALUES
+            (1, 4, ARRAY(1.0, 2.0, 3.0)),
+            (1, 2, ARRAY(-1.0, -2.0, -3.0))
+            AS tab(a, b, c)
+            """
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        self.compare_by_show(
+            cdf.select(cdf.b, CF.array_to_vector(cdf.c)),
+            sdf.select(sdf.b, SF.array_to_vector(sdf.c)),
+        )
+
+        cdf1 = cdf.select("a", CF.array_to_vector(cdf.c).alias("d"))
+        sdf1 = sdf.select("a", SF.array_to_vector(sdf.c).alias("d"))
+
+        self.compare_by_show(
+            cdf1.select(CF.vector_to_array(cdf1.d)),
+            sdf1.select(SF.vector_to_array(sdf1.d)),
+        )
+        self.compare_by_show(
+            cdf1.select(CF.vector_to_array(cdf1.d, "float32")),
+            sdf1.select(SF.vector_to_array(sdf1.d, "float32")),
+        )
+        self.compare_by_show(
+            cdf1.select(CF.vector_to_array(cdf1.d, "float64")),
+            sdf1.select(SF.vector_to_array(sdf1.d, "float64")),
+        )
+
+
+if __name__ == "__main__":
+    from pyspark.ml.tests.connect.test_connect_function import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index 67aa2124b22b8..5d1f89cbc13b5 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -19,13 +19,27 @@
 import os
 import time
 import uuid
-
-from typing import Any, Dict, Generic, List, Optional, Sequence, Type, TypeVar, cast, TYPE_CHECKING
+import functools
+
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Generic,
+    List,
+    Optional,
+    Sequence,
+    Type,
+    TypeVar,
+    cast,
+    TYPE_CHECKING,
+)
 
 
 from pyspark import SparkContext, since
 from pyspark.ml.common import inherit_doc
 from pyspark.sql import SparkSession
+from pyspark.sql.utils import is_remote
 from pyspark.util import VersionUtils
 
 if TYPE_CHECKING:
@@ -41,6 +55,8 @@
 RL = TypeVar("RL", bound="MLReadable")
 JR = TypeVar("JR", bound="JavaMLReader")
 
+FuncT = TypeVar("FuncT", bound=Callable[..., Any])
+
 
 def _jvm() -> "JavaGateway":
     """
@@ -715,3 +731,19 @@ def getUidMap(instance: Any) -> Dict[str, "Params"]:
                 f"UIDs. List of UIDs: {list(uidMap.keys())}."
             )
         return uidMap
+
+
+def try_remote_functions(f: FuncT) -> FuncT:
+    """Mark API supported from Spark Connect."""
+
+    @functools.wraps(f)
+    def wrapped(*args: Any, **kwargs: Any) -> Any:
+
+        if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
+            from pyspark.ml.connect import functions
+
+            return getattr(functions, f.__name__)(*args, **kwargs)
+        else:
+            return f(*args, **kwargs)
+
+    return cast(FuncT, wrapped)
diff --git a/python/pyspark/sql/connect/_typing.py b/python/pyspark/sql/connect/_typing.py
index 6df3f15d87dda..63aae5d248743 100644
--- a/python/pyspark/sql/connect/_typing.py
+++ b/python/pyspark/sql/connect/_typing.py
@@ -22,7 +22,8 @@
 else:
     from typing_extensions import Protocol
 
-from typing import Any, Callable, Iterable, Union, Optional
+from types import FunctionType
+from typing import Any, Callable, Iterable, Union, Optional, NewType
 import datetime
 import decimal
 
@@ -53,6 +54,13 @@
 
 ArrowMapIterFunction = Callable[[Iterable[pyarrow.RecordBatch]], Iterable[pyarrow.RecordBatch]]
 
+PandasGroupedMapFunction = Union[
+    Callable[[DataFrameLike], DataFrameLike],
+    Callable[[Any, DataFrameLike], DataFrameLike],
+]
+
+GroupedMapPandasUserDefinedFunction = NewType("GroupedMapPandasUserDefinedFunction", FunctionType)
+
 
 class UserDefinedFunctionLike(Protocol):
     func: Callable[..., Any]
diff --git a/python/pyspark/sql/connect/catalog.py b/python/pyspark/sql/connect/catalog.py
index 261f87b4cc66f..788c48e037bd8 100644
--- a/python/pyspark/sql/connect/catalog.py
+++ b/python/pyspark/sql/connect/catalog.py
@@ -331,9 +331,6 @@ def _test() -> None:
         PySparkSession.builder.appName("sql.connect.catalog tests").remote("local[4]").getOrCreate()
     )
 
-    # TODO(SPARK-41818): java.lang.ClassNotFoundException) .DefaultSource
-    del pyspark.sql.connect.catalog.Catalog.recoverPartitions.__doc__
-
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.connect.catalog,
         globs=globs,
diff --git a/python/pyspark/sql/connect/client.py b/python/pyspark/sql/connect/client.py
index 8dd80a931b9a4..090d239fbb41e 100644
--- a/python/pyspark/sql/connect/client.py
+++ b/python/pyspark/sql/connect/client.py
@@ -619,16 +619,16 @@ def _build_observed_metrics(
             for x in metrics
         ]
 
-    def to_table(self, plan: pb2.Plan) -> "pa.Table":
+    def to_table(self, plan: pb2.Plan) -> Tuple["pa.Table", Optional[StructType]]:
         """
         Return given plan as a PyArrow Table.
         """
         logger.info(f"Executing plan {self._proto_to_string(plan)}")
         req = self._execute_plan_request_with_metadata()
         req.plan.CopyFrom(plan)
-        table, _, _, _3 = self._execute_and_fetch(req)
+        table, schema, _, _, _ = self._execute_and_fetch(req)
         assert table is not None
-        return table
+        return table, schema
 
     def to_pandas(self, plan: pb2.Plan) -> "pd.DataFrame":
         """
@@ -637,7 +637,7 @@ def to_pandas(self, plan: pb2.Plan) -> "pd.DataFrame":
         logger.info(f"Executing plan {self._proto_to_string(plan)}")
         req = self._execute_plan_request_with_metadata()
         req.plan.CopyFrom(plan)
-        table, metrics, observed_metrics, _ = self._execute_and_fetch(req)
+        table, _, metrics, observed_metrics, _ = self._execute_and_fetch(req)
         assert table is not None
         column_names = table.column_names
         table = table.rename_columns([f"col_{i}" for i in range(len(column_names))])
@@ -696,7 +696,7 @@ def execute_command(
         if self._user_id:
             req.user_context.user_id = self._user_id
         req.plan.command.CopyFrom(command)
-        data, _, _, properties = self._execute_and_fetch(req)
+        data, _, _, _, properties = self._execute_and_fetch(req)
         if data is not None:
             return (data.to_pandas(), properties)
         else:
@@ -844,12 +844,19 @@ def _execute(self, req: pb2.ExecutePlanRequest) -> None:
 
     def _execute_and_fetch(
         self, req: pb2.ExecutePlanRequest
-    ) -> Tuple[Optional["pa.Table"], List[PlanMetrics], List[PlanObservedMetrics], Dict[str, Any]]:
+    ) -> Tuple[
+        Optional["pa.Table"],
+        Optional[StructType],
+        List[PlanMetrics],
+        List[PlanObservedMetrics],
+        Dict[str, Any],
+    ]:
         logger.info("ExecuteAndFetch")
 
         m: Optional[pb2.ExecutePlanResponse.Metrics] = None
         om: List[pb2.ExecutePlanResponse.ObservedMetrics] = []
         batches: List[pa.RecordBatch] = []
+        schema: Optional[StructType] = None
         properties = {}
         try:
             for attempt in Retrying(
@@ -869,6 +876,10 @@ def _execute_and_fetch(
                         if b.observed_metrics is not None:
                             logger.debug("Received observed metric batch.")
                             om.extend(b.observed_metrics)
+                        if b.HasField("schema"):
+                            dt = types.proto_schema_to_pyspark_data_type(b.schema)
+                            assert isinstance(dt, StructType)
+                            schema = dt
                         if b.HasField("sql_command_result"):
                             properties["sql_command_result"] = b.sql_command_result.relation
                         if b.HasField("arrow_batch"):
@@ -888,9 +899,9 @@ def _execute_and_fetch(
 
         if len(batches) > 0:
             table = pa.Table.from_batches(batches=batches)
-            return table, metrics, observed_metrics, properties
+            return table, schema, metrics, observed_metrics, properties
         else:
-            return None, metrics, observed_metrics, properties
+            return None, schema, metrics, observed_metrics, properties
 
     def _config_request_with_metadata(self) -> pb2.ConfigRequest:
         req = pb2.ConfigRequest()
diff --git a/python/pyspark/sql/connect/column.py b/python/pyspark/sql/connect/column.py
index d2be32b905e84..f30a5f258f28c 100644
--- a/python/pyspark/sql/connect/column.py
+++ b/python/pyspark/sql/connect/column.py
@@ -31,7 +31,7 @@
     Optional,
 )
 
-from pyspark.errors import PySparkTypeError
+from pyspark.errors import PySparkTypeError, PySparkAttributeError
 from pyspark.sql.types import DataType
 from pyspark.sql.column import Column as PySparkColumn
 
@@ -433,6 +433,10 @@ def dropFields(self, *fieldNames: str) -> "Column":
     dropFields.__doc__ = PySparkColumn.dropFields.__doc__
 
     def __getattr__(self, item: Any) -> "Column":
+        if item == "_jc":
+            raise PySparkAttributeError(
+                error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": "_jc"}
+            )
         if item.startswith("__"):
             raise AttributeError(item)
         return self[item]
@@ -459,6 +463,12 @@ def __nonzero__(self) -> None:
 
     __bool__ = __nonzero__
 
+    @property
+    def _jc(self) -> None:
+        raise PySparkAttributeError(
+            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": "_jc"}
+        )
+
 
 Column.__doc__ = PySparkColumn.__doc__
 
diff --git a/python/pyspark/sql/connect/conversion.py b/python/pyspark/sql/connect/conversion.py
index 2b16fc7766d1c..ba488d4d04e89 100644
--- a/python/pyspark/sql/connect/conversion.py
+++ b/python/pyspark/sql/connect/conversion.py
@@ -37,6 +37,7 @@
     NullType,
     DecimalType,
     StringType,
+    UserDefinedType,
 )
 
 from pyspark.sql.connect.types import to_arrow_schema
@@ -79,6 +80,8 @@ def _need_converter(dataType: DataType) -> bool:
         elif isinstance(dataType, StringType):
             # Coercion to StringType is allowed
             return True
+        elif isinstance(dataType, UserDefinedType):
+            return True
         else:
             return False
 
@@ -229,6 +232,19 @@ def convert_string(value: Any) -> Any:
 
             return convert_string
 
+        elif isinstance(dataType, UserDefinedType):
+            udt: UserDefinedType = dataType
+
+            conv = LocalDataToArrowConversion._create_converter(dataType.sqlType())
+
+            def convert_udt(value: Any) -> Any:
+                if value is None:
+                    return None
+                else:
+                    return conv(udt.serialize(value))
+
+            return convert_udt
+
         else:
 
             return lambda value: value
@@ -286,6 +302,8 @@ def _need_converter(dataType: DataType) -> bool:
         elif isinstance(dataType, (TimestampType, TimestampNTZType)):
             # Always remove the time zone info for now
             return True
+        elif isinstance(dataType, UserDefinedType):
+            return True
         else:
             return False
 
@@ -380,6 +398,19 @@ def convert_timestample(value: Any) -> Any:
 
             return convert_timestample
 
+        elif isinstance(dataType, UserDefinedType):
+            udt: UserDefinedType = dataType
+
+            conv = ArrowTableToRowsConversion._create_converter(dataType.sqlType())
+
+            def convert_udt(value: Any) -> Any:
+                if value is None:
+                    return None
+                else:
+                    return udt.deserialize(conv(value))
+
+            return convert_udt
+
         else:
 
             return lambda value: value
diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py
index 0887294ddcf60..f9ef561373d06 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -50,7 +50,7 @@
     DataFrameStatFunctions as PySparkDataFrameStatFunctions,
 )
 
-from pyspark.errors import PySparkTypeError
+from pyspark.errors import PySparkTypeError, PySparkAttributeError
 from pyspark.errors.exceptions.connect import SparkConnectException
 from pyspark.rdd import PythonEvalType
 import pyspark.sql.connect.plan as plan
@@ -1304,6 +1304,10 @@ def _get_alias(self) -> Optional[str]:
         return None
 
     def __getattr__(self, name: str) -> "Column":
+        if name in ["_jseq", "_jdf", "_jmap", "_jcols"]:
+            raise PySparkAttributeError(
+                error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": name}
+            )
         return self[name]
 
     @overload
@@ -1344,9 +1348,9 @@ def collect(self) -> List[Row]:
         if self._session is None:
             raise Exception("Cannot collect on empty session.")
         query = self._plan.to_proto(self._session.client)
-        table = self._session.client.to_table(query)
+        table, schema = self._session.client.to_table(query)
 
-        schema = from_arrow_schema(table.schema)
+        schema = schema or from_arrow_schema(table.schema)
 
         assert schema is not None and isinstance(schema, StructType)
 
@@ -1567,8 +1571,11 @@ def to_pandas_on_spark(self, *args: Any, **kwargs: Any) -> None:
     def pandas_api(self, *args: Any, **kwargs: Any) -> None:
         raise NotImplementedError("pandas_api() is not implemented.")
 
-    def registerTempTable(self, *args: Any, **kwargs: Any) -> None:
-        raise NotImplementedError("registerTempTable() is not implemented.")
+    def registerTempTable(self, name: str) -> None:
+        warnings.warn("Deprecated in 2.0, use createOrReplaceTempView instead.", FutureWarning)
+        self.createOrReplaceTempView(name)
+
+    registerTempTable.__doc__ = PySparkDataFrame.registerTempTable.__doc__
 
     def storageLevel(self, *args: Any, **kwargs: Any) -> None:
         raise NotImplementedError("storageLevel() is not implemented.")
diff --git a/python/pyspark/sql/connect/functions.py b/python/pyspark/sql/connect/functions.py
index c89b0ad3fc008..f01e333c9d765 100644
--- a/python/pyspark/sql/connect/functions.py
+++ b/python/pyspark/sql/connect/functions.py
@@ -54,6 +54,11 @@
 from pyspark.sql import functions as pysparkfuncs
 from pyspark.sql.types import _from_numpy_type, DataType, StructType, ArrayType, StringType
 
+# The implementation of pandas_udf is embedded in pyspark.sql.function.pandas_udf
+# for code reuse.
+from pyspark.sql.functions import pandas_udf  # noqa: F401
+
+
 if TYPE_CHECKING:
     from pyspark.sql.connect._typing import (
         ColumnOrName,
@@ -2466,10 +2471,6 @@ def udf(
 udf.__doc__ = pysparkfuncs.udf.__doc__
 
 
-def pandas_udf(*args: Any, **kwargs: Any) -> None:
-    raise NotImplementedError("pandas_udf() is not implemented.")
-
-
 def _test() -> None:
     import sys
     import doctest
@@ -2481,9 +2482,6 @@ def _test() -> None:
     # Spark Connect does not support Spark Context but the test depends on that.
     del pyspark.sql.connect.functions.monotonically_increasing_id.__doc__
 
-    # TODO(SPARK-41843): Implement SparkSession.udf
-    del pyspark.sql.connect.functions.call_udf.__doc__
-
     globs["spark"] = (
         PySparkSession.builder.appName("sql.connect.functions tests")
         .remote("local[4]")
diff --git a/python/pyspark/sql/connect/group.py b/python/pyspark/sql/connect/group.py
index e699ce7105a4f..a75a50501bddd 100644
--- a/python/pyspark/sql/connect/group.py
+++ b/python/pyspark/sql/connect/group.py
@@ -14,6 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import warnings
+
 from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__)
@@ -30,6 +32,7 @@
     cast,
 )
 
+from pyspark.rdd import PythonEvalType
 from pyspark.sql.group import GroupedData as PySparkGroupedData
 from pyspark.sql.types import NumericType
 
@@ -38,8 +41,13 @@
 from pyspark.sql.connect.functions import _invoke_function, col, lit
 
 if TYPE_CHECKING:
-    from pyspark.sql.connect._typing import LiteralType
+    from pyspark.sql.connect._typing import (
+        LiteralType,
+        PandasGroupedMapFunction,
+        GroupedMapPandasUserDefinedFunction,
+    )
     from pyspark.sql.connect.dataframe import DataFrame
+    from pyspark.sql.types import StructType
 
 
 class GroupedData:
@@ -203,11 +211,54 @@ def pivot(self, pivot_col: str, values: Optional[List["LiteralType"]] = None) ->
 
     pivot.__doc__ = PySparkGroupedData.pivot.__doc__
 
-    def apply(self, *args: Any, **kwargs: Any) -> None:
-        raise NotImplementedError("apply() is not implemented.")
+    def apply(self, udf: "GroupedMapPandasUserDefinedFunction") -> "DataFrame":
+        # Columns are special because hasattr always return True
+        if (
+            isinstance(udf, Column)
+            or not hasattr(udf, "func")
+            or (
+                udf.evalType  # type: ignore[attr-defined]
+                != PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF
+            )
+        ):
+            raise ValueError(
+                "Invalid udf: the udf argument must be a pandas_udf of type " "GROUPED_MAP."
+            )
+
+        warnings.warn(
+            "It is preferred to use 'applyInPandas' over this "
+            "API. This API will be deprecated in the future releases. See SPARK-28264 for "
+            "more details.",
+            UserWarning,
+        )
+
+        return self.applyInPandas(udf.func, schema=udf.returnType)  # type: ignore[attr-defined]
+
+    apply.__doc__ = PySparkGroupedData.apply.__doc__
+
+    def applyInPandas(
+        self, func: "PandasGroupedMapFunction", schema: Union["StructType", str]
+    ) -> "DataFrame":
+        from pyspark.sql.connect.udf import UserDefinedFunction
+        from pyspark.sql.connect.dataframe import DataFrame
+
+        udf_obj = UserDefinedFunction(
+            func,
+            returnType=schema,
+            evalType=PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
+        )
+
+        return DataFrame.withPlan(
+            plan.GroupMap(
+                child=self._df._plan,
+                grouping_cols=self._grouping_cols,
+                function=udf_obj,
+                cols=self._df.columns,
+            ),
+            session=self._df._session,
+        )
 
-    def applyInPandas(self, *args: Any, **kwargs: Any) -> None:
-        raise NotImplementedError("applyInPandas() is not implemented.")
+    applyInPandas.__doc__ = PySparkGroupedData.applyInPandas.__doc__
 
     def applyInPandasWithState(self, *args: Any, **kwargs: Any) -> None:
         raise NotImplementedError("applyInPandasWithState() is not implemented.")
diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py
index 4e31811a9e2cb..dbfcfea767887 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -259,6 +259,7 @@ def __init__(
         schema: Optional[str] = None,
         options: Optional[Mapping[str, str]] = None,
         paths: Optional[List[str]] = None,
+        predicates: Optional[List[str]] = None,
     ) -> None:
         super().__init__(None)
 
@@ -274,10 +275,15 @@ def __init__(
             assert isinstance(paths, list)
             assert all(isinstance(path, str) for path in paths)
 
+        if predicates is not None:
+            assert isinstance(predicates, list)
+            assert all(isinstance(predicate, str) for predicate in predicates)
+
         self._format = format
         self._schema = schema
         self._options = options
         self._paths = paths
+        self._predicates = predicates
 
     def plan(self, session: "SparkConnectClient") -> proto.Relation:
         plan = self._create_proto_relation()
@@ -290,6 +296,8 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
                 plan.read.data_source.options[k] = v
         if self._paths is not None and len(self._paths) > 0:
             plan.read.data_source.paths.extend(self._paths)
+        if self._predicates is not None and len(self._predicates) > 0:
+            plan.read.data_source.predicates.extend(self._predicates)
         return plan
 
 
@@ -1915,6 +1923,33 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
         return plan
 
 
+class GroupMap(LogicalPlan):
+    """Logical plan object for a Group Map API: apply, applyInPandas."""
+
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        grouping_cols: Sequence[Column],
+        function: "UserDefinedFunction",
+        cols: List[str],
+    ):
+        assert isinstance(grouping_cols, list) and all(isinstance(c, Column) for c in grouping_cols)
+
+        super().__init__(child)
+        self._grouping_cols = grouping_cols
+        self._func = function._build_common_inline_user_defined_function(*cols)
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.group_map.input.CopyFrom(self._child.plan(session))
+        plan.group_map.grouping_expressions.extend(
+            [c.to_plan(session) for c in self._grouping_cols]
+        )
+        plan.group_map.func.CopyFrom(self._func.to_plan_udf(session))
+        return plan
+
+
 class CachedRelation(LogicalPlan):
     def __init__(self, plan: proto.Relation) -> None:
         super(CachedRelation, self).__init__(None)
diff --git a/python/pyspark/sql/connect/proto/base_pb2.py b/python/pyspark/sql/connect/proto/base_pb2.py
index 030a28cf36058..3655734489301 100644
--- a/python/pyspark/sql/connect/proto/base_pb2.py
+++ b/python/pyspark/sql/connect/proto/base_pb2.py
@@ -37,7 +37,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\x89\x0e\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1a\x35\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04planB\t\n\x07\x61nalyzeB\x0e\n\x0c_client_type"\xb4\n\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06resultB\x08\n\x06result"\xd1\x01\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"\xca\t\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1a=\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1a`\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06valuesB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x84\x08\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB\x0e\n\x0c_client_type"z\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xb1\x06\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payload"\xbc\x01\n\x14\x41\x64\x64\x41rtifactsResponse\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful2\xed\x02\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x42"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3'
+    b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\x89\x0e\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1a\x35\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04planB\t\n\x07\x61nalyzeB\x0e\n\x0c_client_type"\xb4\n\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06resultB\x08\n\x06result"\xd1\x01\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"\xfb\t\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x12/\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1a=\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1a`\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06valuesB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x84\x08\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB\x0e\n\x0c_client_type"z\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xe7\x06\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x06 \x01(\tH\x01R\nclientType\x88\x01\x01\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payloadB\x0e\n\x0c_client_type"\xbc\x01\n\x14\x41\x64\x64\x41rtifactsResponse\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful2\xed\x02\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x42"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3'
 )
 
 
@@ -690,57 +690,57 @@
     _EXECUTEPLANREQUEST._serialized_start = 3573
     _EXECUTEPLANREQUEST._serialized_end = 3782
     _EXECUTEPLANRESPONSE._serialized_start = 3785
-    _EXECUTEPLANRESPONSE._serialized_end = 5011
-    _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_start = 4242
-    _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_end = 4313
-    _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_start = 4315
-    _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_end = 4376
-    _EXECUTEPLANRESPONSE_METRICS._serialized_start = 4379
-    _EXECUTEPLANRESPONSE_METRICS._serialized_end = 4896
-    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_start = 4474
-    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_end = 4806
-    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_start = 4683
-    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_end = 4806
-    _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_start = 4808
-    _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_end = 4896
-    _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_start = 4898
-    _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_end = 4994
-    _KEYVALUE._serialized_start = 5013
-    _KEYVALUE._serialized_end = 5078
-    _CONFIGREQUEST._serialized_start = 5081
-    _CONFIGREQUEST._serialized_end = 6109
-    _CONFIGREQUEST_OPERATION._serialized_start = 5301
-    _CONFIGREQUEST_OPERATION._serialized_end = 5799
-    _CONFIGREQUEST_SET._serialized_start = 5801
-    _CONFIGREQUEST_SET._serialized_end = 5853
-    _CONFIGREQUEST_GET._serialized_start = 5855
-    _CONFIGREQUEST_GET._serialized_end = 5880
-    _CONFIGREQUEST_GETWITHDEFAULT._serialized_start = 5882
-    _CONFIGREQUEST_GETWITHDEFAULT._serialized_end = 5945
-    _CONFIGREQUEST_GETOPTION._serialized_start = 5947
-    _CONFIGREQUEST_GETOPTION._serialized_end = 5978
-    _CONFIGREQUEST_GETALL._serialized_start = 5980
-    _CONFIGREQUEST_GETALL._serialized_end = 6028
-    _CONFIGREQUEST_UNSET._serialized_start = 6030
-    _CONFIGREQUEST_UNSET._serialized_end = 6057
-    _CONFIGREQUEST_ISMODIFIABLE._serialized_start = 6059
-    _CONFIGREQUEST_ISMODIFIABLE._serialized_end = 6093
-    _CONFIGRESPONSE._serialized_start = 6111
-    _CONFIGRESPONSE._serialized_end = 6233
-    _ADDARTIFACTSREQUEST._serialized_start = 6236
-    _ADDARTIFACTSREQUEST._serialized_end = 7053
-    _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_start = 6585
-    _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_end = 6638
-    _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_start = 6640
-    _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_end = 6751
-    _ADDARTIFACTSREQUEST_BATCH._serialized_start = 6753
-    _ADDARTIFACTSREQUEST_BATCH._serialized_end = 6846
-    _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_start = 6849
-    _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_end = 7042
-    _ADDARTIFACTSRESPONSE._serialized_start = 7056
-    _ADDARTIFACTSRESPONSE._serialized_end = 7244
-    _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_start = 7163
-    _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_end = 7244
-    _SPARKCONNECTSERVICE._serialized_start = 7247
-    _SPARKCONNECTSERVICE._serialized_end = 7612
+    _EXECUTEPLANRESPONSE._serialized_end = 5060
+    _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_start = 4291
+    _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_end = 4362
+    _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_start = 4364
+    _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_end = 4425
+    _EXECUTEPLANRESPONSE_METRICS._serialized_start = 4428
+    _EXECUTEPLANRESPONSE_METRICS._serialized_end = 4945
+    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_start = 4523
+    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_end = 4855
+    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_start = 4732
+    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_end = 4855
+    _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_start = 4857
+    _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_end = 4945
+    _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_start = 4947
+    _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_end = 5043
+    _KEYVALUE._serialized_start = 5062
+    _KEYVALUE._serialized_end = 5127
+    _CONFIGREQUEST._serialized_start = 5130
+    _CONFIGREQUEST._serialized_end = 6158
+    _CONFIGREQUEST_OPERATION._serialized_start = 5350
+    _CONFIGREQUEST_OPERATION._serialized_end = 5848
+    _CONFIGREQUEST_SET._serialized_start = 5850
+    _CONFIGREQUEST_SET._serialized_end = 5902
+    _CONFIGREQUEST_GET._serialized_start = 5904
+    _CONFIGREQUEST_GET._serialized_end = 5929
+    _CONFIGREQUEST_GETWITHDEFAULT._serialized_start = 5931
+    _CONFIGREQUEST_GETWITHDEFAULT._serialized_end = 5994
+    _CONFIGREQUEST_GETOPTION._serialized_start = 5996
+    _CONFIGREQUEST_GETOPTION._serialized_end = 6027
+    _CONFIGREQUEST_GETALL._serialized_start = 6029
+    _CONFIGREQUEST_GETALL._serialized_end = 6077
+    _CONFIGREQUEST_UNSET._serialized_start = 6079
+    _CONFIGREQUEST_UNSET._serialized_end = 6106
+    _CONFIGREQUEST_ISMODIFIABLE._serialized_start = 6108
+    _CONFIGREQUEST_ISMODIFIABLE._serialized_end = 6142
+    _CONFIGRESPONSE._serialized_start = 6160
+    _CONFIGRESPONSE._serialized_end = 6282
+    _ADDARTIFACTSREQUEST._serialized_start = 6285
+    _ADDARTIFACTSREQUEST._serialized_end = 7156
+    _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_start = 6672
+    _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_end = 6725
+    _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_start = 6727
+    _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_end = 6838
+    _ADDARTIFACTSREQUEST_BATCH._serialized_start = 6840
+    _ADDARTIFACTSREQUEST_BATCH._serialized_end = 6933
+    _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_start = 6936
+    _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_end = 7129
+    _ADDARTIFACTSRESPONSE._serialized_start = 7159
+    _ADDARTIFACTSRESPONSE._serialized_end = 7347
+    _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_start = 7266
+    _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_end = 7347
+    _SPARKCONNECTSERVICE._serialized_start = 7350
+    _SPARKCONNECTSERVICE._serialized_end = 7715
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/base_pb2.pyi b/python/pyspark/sql/connect/proto/base_pb2.pyi
index 8ea50f6a58021..4c020308d9a9a 100644
--- a/python/pyspark/sql/connect/proto/base_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/base_pb2.pyi
@@ -1056,6 +1056,7 @@ class ExecutePlanResponse(google.protobuf.message.Message):
     EXTENSION_FIELD_NUMBER: builtins.int
     METRICS_FIELD_NUMBER: builtins.int
     OBSERVED_METRICS_FIELD_NUMBER: builtins.int
+    SCHEMA_FIELD_NUMBER: builtins.int
     session_id: builtins.str
     @property
     def arrow_batch(self) -> global___ExecutePlanResponse.ArrowBatch: ...
@@ -1077,6 +1078,9 @@ class ExecutePlanResponse(google.protobuf.message.Message):
         global___ExecutePlanResponse.ObservedMetrics
     ]:
         """The metrics observed during the execution of the query plan."""
+    @property
+    def schema(self) -> pyspark.sql.connect.proto.types_pb2.DataType:
+        """(Optional) The Spark schema. This field is available when `collect` is called."""
     def __init__(
         self,
         *,
@@ -1087,6 +1091,7 @@ class ExecutePlanResponse(google.protobuf.message.Message):
         metrics: global___ExecutePlanResponse.Metrics | None = ...,
         observed_metrics: collections.abc.Iterable[global___ExecutePlanResponse.ObservedMetrics]
         | None = ...,
+        schema: pyspark.sql.connect.proto.types_pb2.DataType | None = ...,
     ) -> None: ...
     def HasField(
         self,
@@ -1099,6 +1104,8 @@ class ExecutePlanResponse(google.protobuf.message.Message):
             b"metrics",
             "response_type",
             b"response_type",
+            "schema",
+            b"schema",
             "sql_command_result",
             b"sql_command_result",
         ],
@@ -1116,6 +1123,8 @@ class ExecutePlanResponse(google.protobuf.message.Message):
             b"observed_metrics",
             "response_type",
             b"response_type",
+            "schema",
+            b"schema",
             "session_id",
             b"session_id",
             "sql_command_result",
@@ -1604,6 +1613,7 @@ class AddArtifactsRequest(google.protobuf.message.Message):
 
     SESSION_ID_FIELD_NUMBER: builtins.int
     USER_CONTEXT_FIELD_NUMBER: builtins.int
+    CLIENT_TYPE_FIELD_NUMBER: builtins.int
     BATCH_FIELD_NUMBER: builtins.int
     BEGIN_CHUNK_FIELD_NUMBER: builtins.int
     CHUNK_FIELD_NUMBER: builtins.int
@@ -1617,6 +1627,11 @@ class AddArtifactsRequest(google.protobuf.message.Message):
     @property
     def user_context(self) -> global___UserContext:
         """User context"""
+    client_type: builtins.str
+    """Provides optional information about the client sending the request. This field
+    can be used for language or version specific information and is only intended for
+    logging purposes and will not be interpreted by the server.
+    """
     @property
     def batch(self) -> global___AddArtifactsRequest.Batch: ...
     @property
@@ -1635,6 +1650,7 @@ class AddArtifactsRequest(google.protobuf.message.Message):
         *,
         session_id: builtins.str = ...,
         user_context: global___UserContext | None = ...,
+        client_type: builtins.str | None = ...,
         batch: global___AddArtifactsRequest.Batch | None = ...,
         begin_chunk: global___AddArtifactsRequest.BeginChunkedArtifact | None = ...,
         chunk: global___AddArtifactsRequest.ArtifactChunk | None = ...,
@@ -1642,12 +1658,16 @@ class AddArtifactsRequest(google.protobuf.message.Message):
     def HasField(
         self,
         field_name: typing_extensions.Literal[
+            "_client_type",
+            b"_client_type",
             "batch",
             b"batch",
             "begin_chunk",
             b"begin_chunk",
             "chunk",
             b"chunk",
+            "client_type",
+            b"client_type",
             "payload",
             b"payload",
             "user_context",
@@ -1657,12 +1677,16 @@ class AddArtifactsRequest(google.protobuf.message.Message):
     def ClearField(
         self,
         field_name: typing_extensions.Literal[
+            "_client_type",
+            b"_client_type",
             "batch",
             b"batch",
             "begin_chunk",
             b"begin_chunk",
             "chunk",
             b"chunk",
+            "client_type",
+            b"client_type",
             "payload",
             b"payload",
             "session_id",
@@ -1671,6 +1695,11 @@ class AddArtifactsRequest(google.protobuf.message.Message):
             b"user_context",
         ],
     ) -> None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_client_type", b"_client_type"]
+    ) -> typing_extensions.Literal["client_type"] | None: ...
+    @typing.overload
     def WhichOneof(
         self, oneof_group: typing_extensions.Literal["payload", b"payload"]
     ) -> typing_extensions.Literal["batch", "begin_chunk", "chunk"] | None: ...
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py b/python/pyspark/sql/connect/proto/relations_pb2.py
index 521a10f214cc2..aa6d39cd4f06b 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.py
+++ b/python/pyspark/sql/connect/proto/relations_pb2.py
@@ -36,7 +36,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto"\xb8\x13\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"[\n\x0eRelationCommon\x12\x1f\n\x0bsource_info\x18\x01 \x01(\tR\nsourceInfo\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id"\x86\x01\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x30\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryR\x04\x61rgs\x1a\x37\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"\xf0\x03\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x1a=\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x1a\x95\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\xd7\x03\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xc6\x04\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x81\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xab\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x42\x16\n\x14_all_columns_as_keys"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xef\x01\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x65\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryR\x10renameColumnsMap\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\x82\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x84\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schemaB"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3'
+    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto"\xf0\x13\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"[\n\x0eRelationCommon\x12\x1f\n\x0bsource_info\x18\x01 \x01(\tR\nsourceInfo\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id"\x86\x01\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x30\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryR\x04\x61rgs\x1a\x37\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"\xf0\x03\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x1a=\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x1a\x95\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\xd7\x03\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xc6\x04\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x81\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xab\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x42\x16\n\x14_all_columns_as_keys"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xef\x01\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x65\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryR\x10renameColumnsMap\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\x82\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc"\xcb\x01\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x84\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schemaB"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3'
 )
 
 
@@ -92,6 +92,7 @@
 _TOSCHEMA = DESCRIPTOR.message_types_by_name["ToSchema"]
 _REPARTITIONBYEXPRESSION = DESCRIPTOR.message_types_by_name["RepartitionByExpression"]
 _MAPPARTITIONS = DESCRIPTOR.message_types_by_name["MapPartitions"]
+_GROUPMAP = DESCRIPTOR.message_types_by_name["GroupMap"]
 _COLLECTMETRICS = DESCRIPTOR.message_types_by_name["CollectMetrics"]
 _PARSE = DESCRIPTOR.message_types_by_name["Parse"]
 _PARSE_OPTIONSENTRY = _PARSE.nested_types_by_name["OptionsEntry"]
@@ -640,6 +641,17 @@
 )
 _sym_db.RegisterMessage(MapPartitions)
 
+GroupMap = _reflection.GeneratedProtocolMessageType(
+    "GroupMap",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _GROUPMAP,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.GroupMap)
+    },
+)
+_sym_db.RegisterMessage(GroupMap)
+
 CollectMetrics = _reflection.GeneratedProtocolMessageType(
     "CollectMetrics",
     (_message.Message,),
@@ -685,117 +697,119 @@
     _PARSE_OPTIONSENTRY._options = None
     _PARSE_OPTIONSENTRY._serialized_options = b"8\001"
     _RELATION._serialized_start = 165
-    _RELATION._serialized_end = 2653
-    _UNKNOWN._serialized_start = 2655
-    _UNKNOWN._serialized_end = 2664
-    _RELATIONCOMMON._serialized_start = 2666
-    _RELATIONCOMMON._serialized_end = 2757
-    _SQL._serialized_start = 2760
-    _SQL._serialized_end = 2894
-    _SQL_ARGSENTRY._serialized_start = 2839
-    _SQL_ARGSENTRY._serialized_end = 2894
-    _READ._serialized_start = 2897
-    _READ._serialized_end = 3393
-    _READ_NAMEDTABLE._serialized_start = 3039
-    _READ_NAMEDTABLE._serialized_end = 3100
-    _READ_DATASOURCE._serialized_start = 3103
-    _READ_DATASOURCE._serialized_end = 3380
-    _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 3300
-    _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 3358
-    _PROJECT._serialized_start = 3395
-    _PROJECT._serialized_end = 3512
-    _FILTER._serialized_start = 3514
-    _FILTER._serialized_end = 3626
-    _JOIN._serialized_start = 3629
-    _JOIN._serialized_end = 4100
-    _JOIN_JOINTYPE._serialized_start = 3892
-    _JOIN_JOINTYPE._serialized_end = 4100
-    _SETOPERATION._serialized_start = 4103
-    _SETOPERATION._serialized_end = 4582
-    _SETOPERATION_SETOPTYPE._serialized_start = 4419
-    _SETOPERATION_SETOPTYPE._serialized_end = 4533
-    _LIMIT._serialized_start = 4584
-    _LIMIT._serialized_end = 4660
-    _OFFSET._serialized_start = 4662
-    _OFFSET._serialized_end = 4741
-    _TAIL._serialized_start = 4743
-    _TAIL._serialized_end = 4818
-    _AGGREGATE._serialized_start = 4821
-    _AGGREGATE._serialized_end = 5403
-    _AGGREGATE_PIVOT._serialized_start = 5160
-    _AGGREGATE_PIVOT._serialized_end = 5271
-    _AGGREGATE_GROUPTYPE._serialized_start = 5274
-    _AGGREGATE_GROUPTYPE._serialized_end = 5403
-    _SORT._serialized_start = 5406
-    _SORT._serialized_end = 5566
-    _DROP._serialized_start = 5569
-    _DROP._serialized_end = 5710
-    _DEDUPLICATE._serialized_start = 5713
-    _DEDUPLICATE._serialized_end = 5884
-    _LOCALRELATION._serialized_start = 5886
-    _LOCALRELATION._serialized_end = 5975
-    _SAMPLE._serialized_start = 5978
-    _SAMPLE._serialized_end = 6251
-    _RANGE._serialized_start = 6254
-    _RANGE._serialized_end = 6399
-    _SUBQUERYALIAS._serialized_start = 6401
-    _SUBQUERYALIAS._serialized_end = 6515
-    _REPARTITION._serialized_start = 6518
-    _REPARTITION._serialized_end = 6660
-    _SHOWSTRING._serialized_start = 6663
-    _SHOWSTRING._serialized_end = 6805
-    _STATSUMMARY._serialized_start = 6807
-    _STATSUMMARY._serialized_end = 6899
-    _STATDESCRIBE._serialized_start = 6901
-    _STATDESCRIBE._serialized_end = 6982
-    _STATCROSSTAB._serialized_start = 6984
-    _STATCROSSTAB._serialized_end = 7085
-    _STATCOV._serialized_start = 7087
-    _STATCOV._serialized_end = 7183
-    _STATCORR._serialized_start = 7186
-    _STATCORR._serialized_end = 7323
-    _STATAPPROXQUANTILE._serialized_start = 7326
-    _STATAPPROXQUANTILE._serialized_end = 7490
-    _STATFREQITEMS._serialized_start = 7492
-    _STATFREQITEMS._serialized_end = 7617
-    _STATSAMPLEBY._serialized_start = 7620
-    _STATSAMPLEBY._serialized_end = 7929
-    _STATSAMPLEBY_FRACTION._serialized_start = 7821
-    _STATSAMPLEBY_FRACTION._serialized_end = 7920
-    _NAFILL._serialized_start = 7932
-    _NAFILL._serialized_end = 8066
-    _NADROP._serialized_start = 8069
-    _NADROP._serialized_end = 8203
-    _NAREPLACE._serialized_start = 8206
-    _NAREPLACE._serialized_end = 8502
-    _NAREPLACE_REPLACEMENT._serialized_start = 8361
-    _NAREPLACE_REPLACEMENT._serialized_end = 8502
-    _TODF._serialized_start = 8504
-    _TODF._serialized_end = 8592
-    _WITHCOLUMNSRENAMED._serialized_start = 8595
-    _WITHCOLUMNSRENAMED._serialized_end = 8834
-    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 8767
-    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 8834
-    _WITHCOLUMNS._serialized_start = 8836
-    _WITHCOLUMNS._serialized_end = 8955
-    _HINT._serialized_start = 8958
-    _HINT._serialized_end = 9090
-    _UNPIVOT._serialized_start = 9093
-    _UNPIVOT._serialized_end = 9420
-    _UNPIVOT_VALUES._serialized_start = 9350
-    _UNPIVOT_VALUES._serialized_end = 9409
-    _TOSCHEMA._serialized_start = 9422
-    _TOSCHEMA._serialized_end = 9528
-    _REPARTITIONBYEXPRESSION._serialized_start = 9531
-    _REPARTITIONBYEXPRESSION._serialized_end = 9734
-    _MAPPARTITIONS._serialized_start = 9737
-    _MAPPARTITIONS._serialized_end = 9867
-    _COLLECTMETRICS._serialized_start = 9870
-    _COLLECTMETRICS._serialized_end = 10006
-    _PARSE._serialized_start = 10009
-    _PARSE._serialized_end = 10397
-    _PARSE_OPTIONSENTRY._serialized_start = 3300
-    _PARSE_OPTIONSENTRY._serialized_end = 3358
-    _PARSE_PARSEFORMAT._serialized_start = 10298
-    _PARSE_PARSEFORMAT._serialized_end = 10386
+    _RELATION._serialized_end = 2709
+    _UNKNOWN._serialized_start = 2711
+    _UNKNOWN._serialized_end = 2720
+    _RELATIONCOMMON._serialized_start = 2722
+    _RELATIONCOMMON._serialized_end = 2813
+    _SQL._serialized_start = 2816
+    _SQL._serialized_end = 2950
+    _SQL_ARGSENTRY._serialized_start = 2895
+    _SQL_ARGSENTRY._serialized_end = 2950
+    _READ._serialized_start = 2953
+    _READ._serialized_end = 3449
+    _READ_NAMEDTABLE._serialized_start = 3095
+    _READ_NAMEDTABLE._serialized_end = 3156
+    _READ_DATASOURCE._serialized_start = 3159
+    _READ_DATASOURCE._serialized_end = 3436
+    _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 3356
+    _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 3414
+    _PROJECT._serialized_start = 3451
+    _PROJECT._serialized_end = 3568
+    _FILTER._serialized_start = 3570
+    _FILTER._serialized_end = 3682
+    _JOIN._serialized_start = 3685
+    _JOIN._serialized_end = 4156
+    _JOIN_JOINTYPE._serialized_start = 3948
+    _JOIN_JOINTYPE._serialized_end = 4156
+    _SETOPERATION._serialized_start = 4159
+    _SETOPERATION._serialized_end = 4638
+    _SETOPERATION_SETOPTYPE._serialized_start = 4475
+    _SETOPERATION_SETOPTYPE._serialized_end = 4589
+    _LIMIT._serialized_start = 4640
+    _LIMIT._serialized_end = 4716
+    _OFFSET._serialized_start = 4718
+    _OFFSET._serialized_end = 4797
+    _TAIL._serialized_start = 4799
+    _TAIL._serialized_end = 4874
+    _AGGREGATE._serialized_start = 4877
+    _AGGREGATE._serialized_end = 5459
+    _AGGREGATE_PIVOT._serialized_start = 5216
+    _AGGREGATE_PIVOT._serialized_end = 5327
+    _AGGREGATE_GROUPTYPE._serialized_start = 5330
+    _AGGREGATE_GROUPTYPE._serialized_end = 5459
+    _SORT._serialized_start = 5462
+    _SORT._serialized_end = 5622
+    _DROP._serialized_start = 5625
+    _DROP._serialized_end = 5766
+    _DEDUPLICATE._serialized_start = 5769
+    _DEDUPLICATE._serialized_end = 5940
+    _LOCALRELATION._serialized_start = 5942
+    _LOCALRELATION._serialized_end = 6031
+    _SAMPLE._serialized_start = 6034
+    _SAMPLE._serialized_end = 6307
+    _RANGE._serialized_start = 6310
+    _RANGE._serialized_end = 6455
+    _SUBQUERYALIAS._serialized_start = 6457
+    _SUBQUERYALIAS._serialized_end = 6571
+    _REPARTITION._serialized_start = 6574
+    _REPARTITION._serialized_end = 6716
+    _SHOWSTRING._serialized_start = 6719
+    _SHOWSTRING._serialized_end = 6861
+    _STATSUMMARY._serialized_start = 6863
+    _STATSUMMARY._serialized_end = 6955
+    _STATDESCRIBE._serialized_start = 6957
+    _STATDESCRIBE._serialized_end = 7038
+    _STATCROSSTAB._serialized_start = 7040
+    _STATCROSSTAB._serialized_end = 7141
+    _STATCOV._serialized_start = 7143
+    _STATCOV._serialized_end = 7239
+    _STATCORR._serialized_start = 7242
+    _STATCORR._serialized_end = 7379
+    _STATAPPROXQUANTILE._serialized_start = 7382
+    _STATAPPROXQUANTILE._serialized_end = 7546
+    _STATFREQITEMS._serialized_start = 7548
+    _STATFREQITEMS._serialized_end = 7673
+    _STATSAMPLEBY._serialized_start = 7676
+    _STATSAMPLEBY._serialized_end = 7985
+    _STATSAMPLEBY_FRACTION._serialized_start = 7877
+    _STATSAMPLEBY_FRACTION._serialized_end = 7976
+    _NAFILL._serialized_start = 7988
+    _NAFILL._serialized_end = 8122
+    _NADROP._serialized_start = 8125
+    _NADROP._serialized_end = 8259
+    _NAREPLACE._serialized_start = 8262
+    _NAREPLACE._serialized_end = 8558
+    _NAREPLACE_REPLACEMENT._serialized_start = 8417
+    _NAREPLACE_REPLACEMENT._serialized_end = 8558
+    _TODF._serialized_start = 8560
+    _TODF._serialized_end = 8648
+    _WITHCOLUMNSRENAMED._serialized_start = 8651
+    _WITHCOLUMNSRENAMED._serialized_end = 8890
+    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 8823
+    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 8890
+    _WITHCOLUMNS._serialized_start = 8892
+    _WITHCOLUMNS._serialized_end = 9011
+    _HINT._serialized_start = 9014
+    _HINT._serialized_end = 9146
+    _UNPIVOT._serialized_start = 9149
+    _UNPIVOT._serialized_end = 9476
+    _UNPIVOT_VALUES._serialized_start = 9406
+    _UNPIVOT_VALUES._serialized_end = 9465
+    _TOSCHEMA._serialized_start = 9478
+    _TOSCHEMA._serialized_end = 9584
+    _REPARTITIONBYEXPRESSION._serialized_start = 9587
+    _REPARTITIONBYEXPRESSION._serialized_end = 9790
+    _MAPPARTITIONS._serialized_start = 9793
+    _MAPPARTITIONS._serialized_end = 9923
+    _GROUPMAP._serialized_start = 9926
+    _GROUPMAP._serialized_end = 10129
+    _COLLECTMETRICS._serialized_start = 10132
+    _COLLECTMETRICS._serialized_end = 10268
+    _PARSE._serialized_start = 10271
+    _PARSE._serialized_end = 10659
+    _PARSE_OPTIONSENTRY._serialized_start = 3356
+    _PARSE_OPTIONSENTRY._serialized_end = 3414
+    _PARSE_PARSEFORMAT._serialized_start = 10560
+    _PARSE_PARSEFORMAT._serialized_end = 10648
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi
index ab1561996ef21..6ae4a323f6f78 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi
@@ -92,6 +92,7 @@ class Relation(google.protobuf.message.Message):
     MAP_PARTITIONS_FIELD_NUMBER: builtins.int
     COLLECT_METRICS_FIELD_NUMBER: builtins.int
     PARSE_FIELD_NUMBER: builtins.int
+    GROUP_MAP_FIELD_NUMBER: builtins.int
     FILL_NA_FIELD_NUMBER: builtins.int
     DROP_NA_FIELD_NUMBER: builtins.int
     REPLACE_FIELD_NUMBER: builtins.int
@@ -167,6 +168,8 @@ class Relation(google.protobuf.message.Message):
     @property
     def parse(self) -> global___Parse: ...
     @property
+    def group_map(self) -> global___GroupMap: ...
+    @property
     def fill_na(self) -> global___NAFill:
         """NA functions"""
     @property
@@ -233,6 +236,7 @@ class Relation(google.protobuf.message.Message):
         map_partitions: global___MapPartitions | None = ...,
         collect_metrics: global___CollectMetrics | None = ...,
         parse: global___Parse | None = ...,
+        group_map: global___GroupMap | None = ...,
         fill_na: global___NAFill | None = ...,
         drop_na: global___NADrop | None = ...,
         replace: global___NAReplace | None = ...,
@@ -283,6 +287,8 @@ class Relation(google.protobuf.message.Message):
             b"filter",
             "freq_items",
             b"freq_items",
+            "group_map",
+            b"group_map",
             "hint",
             b"hint",
             "join",
@@ -378,6 +384,8 @@ class Relation(google.protobuf.message.Message):
             b"filter",
             "freq_items",
             b"freq_items",
+            "group_map",
+            b"group_map",
             "hint",
             b"hint",
             "join",
@@ -470,6 +478,7 @@ class Relation(google.protobuf.message.Message):
         "map_partitions",
         "collect_metrics",
         "parse",
+        "group_map",
         "fill_na",
         "drop_na",
         "replace",
@@ -2733,6 +2742,48 @@ class MapPartitions(google.protobuf.message.Message):
 
 global___MapPartitions = MapPartitions
 
+class GroupMap(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int
+    FUNC_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) Input relation for Group Map API: apply, applyInPandas."""
+    @property
+    def grouping_expressions(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression
+    ]:
+        """(Required) Expressions for grouping keys."""
+    @property
+    def func(self) -> pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction:
+        """(Required) Input user-defined function."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        grouping_expressions: collections.abc.Iterable[
+            pyspark.sql.connect.proto.expressions_pb2.Expression
+        ]
+        | None = ...,
+        func: pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction
+        | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["func", b"func", "input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "func", b"func", "grouping_expressions", b"grouping_expressions", "input", b"input"
+        ],
+    ) -> None: ...
+
+global___GroupMap = GroupMap
+
 class CollectMetrics(google.protobuf.message.Message):
     """Collect arbitrary (named) metrics from a dataset."""
 
diff --git a/python/pyspark/sql/connect/readwriter.py b/python/pyspark/sql/connect/readwriter.py
index 52a7a6c8cf59f..192ec68b92a2a 100644
--- a/python/pyspark/sql/connect/readwriter.py
+++ b/python/pyspark/sql/connect/readwriter.py
@@ -19,7 +19,7 @@
 check_dependencies(__name__)
 
 from typing import Dict
-from typing import Optional, Union, List, overload, Tuple, cast, Any
+from typing import Optional, Union, List, overload, Tuple, cast
 from typing import TYPE_CHECKING
 
 from pyspark.sql.connect.plan import Read, DataSource, LogicalPlan, WriteOperation, WriteOperationV2
@@ -30,6 +30,7 @@
     DataFrameReader as PySparkDataFrameReader,
     DataFrameWriterV2 as PySparkDataFrameWriterV2,
 )
+from pyspark.errors import PySparkAttributeError
 
 if TYPE_CHECKING:
     from pyspark.sql.connect.dataframe import DataFrame
@@ -339,8 +340,89 @@ def orc(
 
     orc.__doc__ = PySparkDataFrameReader.orc.__doc__
 
-    def jdbc(self, *args: Any, **kwargs: Any) -> None:
-        raise NotImplementedError("jdbc() not supported for DataFrameWriter")
+    @overload
+    def jdbc(
+        self, url: str, table: str, *, properties: Optional[Dict[str, str]] = None
+    ) -> "DataFrame":
+        ...
+
+    @overload
+    def jdbc(
+        self,
+        url: str,
+        table: str,
+        column: str,
+        lowerBound: Union[int, str],
+        upperBound: Union[int, str],
+        numPartitions: int,
+        *,
+        properties: Optional[Dict[str, str]] = None,
+    ) -> "DataFrame":
+        ...
+
+    @overload
+    def jdbc(
+        self,
+        url: str,
+        table: str,
+        *,
+        predicates: List[str],
+        properties: Optional[Dict[str, str]] = None,
+    ) -> "DataFrame":
+        ...
+
+    def jdbc(
+        self,
+        url: str,
+        table: str,
+        column: Optional[str] = None,
+        lowerBound: Optional[Union[int, str]] = None,
+        upperBound: Optional[Union[int, str]] = None,
+        numPartitions: Optional[int] = None,
+        predicates: Optional[List[str]] = None,
+        properties: Optional[Dict[str, str]] = None,
+    ) -> "DataFrame":
+        if properties is None:
+            properties = dict()
+
+        self.format("jdbc")
+
+        if column is not None:
+            assert lowerBound is not None, "lowerBound can not be None when ``column`` is specified"
+            assert upperBound is not None, "upperBound can not be None when ``column`` is specified"
+            assert (
+                numPartitions is not None
+            ), "numPartitions can not be None when ``column`` is specified"
+            self.options(
+                partitionColumn=column,
+                lowerBound=lowerBound,
+                upperBound=upperBound,
+                numPartitions=numPartitions,
+            )
+            self.options(**properties)
+            self.options(url=url, dbtable=table)
+            return self.load()
+        else:
+            self.options(**properties)
+            self.options(url=url, dbtable=table)
+            if predicates is not None:
+                plan = DataSource(
+                    format=self._format,
+                    schema=self._schema,
+                    options=self._options,
+                    predicates=predicates,
+                )
+                return self._df(plan)
+            else:
+                return self.load()
+
+    jdbc.__doc__ = PySparkDataFrameReader.jdbc.__doc__
+
+    @property
+    def _jreader(self) -> None:
+        raise PySparkAttributeError(
+            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": "_jreader"}
+        )
 
 
 DataFrameReader.__doc__ = PySparkDataFrameReader.__doc__
@@ -603,8 +685,19 @@ def orc(
 
     orc.__doc__ = PySparkDataFrameWriter.orc.__doc__
 
-    def jdbc(self, *args: Any, **kwargs: Any) -> None:
-        raise NotImplementedError("jdbc() not supported for DataFrameWriter")
+    def jdbc(
+        self,
+        url: str,
+        table: str,
+        mode: Optional[str] = None,
+        properties: Optional[Dict[str, str]] = None,
+    ) -> None:
+        if properties is None:
+            properties = dict()
+
+        self.format("jdbc").mode(mode).options(**properties).options(url=url, dbtable=table).save()
+
+    jdbc.__doc__ = PySparkDataFrameWriter.jdbc.__doc__
 
 
 class DataFrameWriterV2(OptionUtils):
diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py
index ffa139eba3ed0..8fe5020f4a45a 100644
--- a/python/pyspark/sql/connect/session.py
+++ b/python/pyspark/sql/connect/session.py
@@ -67,6 +67,7 @@
     TimestampType,
 )
 from pyspark.sql.utils import to_str
+from pyspark.errors import PySparkAttributeError
 
 if TYPE_CHECKING:
     from pyspark.sql.connect._typing import OptionalPrimitiveType
@@ -341,20 +342,23 @@ def createDataFrame(
                     # For cases like createDataFrame([("Alice", None, 80.1)], schema)
                     # we can not infer the schema from the data itself.
                     warnings.warn("failed to infer the schema from data")
-                    if _schema is None and _schema_str is not None:
+                    if _schema_str is not None:
                         _parsed = self.client._analyze(
                             method="ddl_parse", ddl_string=_schema_str
                         ).parsed
                         if isinstance(_parsed, StructType):
-                            _schema = _parsed
+                            _inferred_schema = _parsed
                         elif isinstance(_parsed, DataType):
-                            _schema = StructType().add("value", _parsed)
-                    if _schema is None or not isinstance(_schema, StructType):
+                            _inferred_schema = StructType().add("value", _parsed)
+                        _schema_str = None
+                    if _inferred_schema is None or not isinstance(_inferred_schema, StructType):
                         raise ValueError(
                             "Some of types cannot be determined after inferring, "
                             "a StructType Schema is required in this case"
                         )
-                    _inferred_schema = _schema
+
+                if _schema_str is None and _cols is None:
+                    _schema = _inferred_schema
 
             from pyspark.sql.connect.conversion import LocalDataToArrowConversion
 
@@ -484,6 +488,31 @@ def streams(self) -> Any:
     def readStream(self) -> Any:
         raise NotImplementedError("readStream() is not implemented.")
 
+    @property
+    def _jsc(self) -> None:
+        raise PySparkAttributeError(
+            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": "_jsc"}
+        )
+
+    @property
+    def _jconf(self) -> None:
+        raise PySparkAttributeError(
+            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": "_jconf"}
+        )
+
+    @property
+    def _jvm(self) -> None:
+        raise PySparkAttributeError(
+            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": "_jvm"}
+        )
+
+    @property
+    def _jsparkSession(self) -> None:
+        raise PySparkAttributeError(
+            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED",
+            message_parameters={"attr_name": "_jsparkSession"},
+        )
+
     @property
     def udf(self) -> "UDFRegistration":
         from pyspark.sql.connect.udf import UDFRegistration
diff --git a/python/pyspark/sql/connect/types.py b/python/pyspark/sql/connect/types.py
index b5145d91c76b7..dfb0fb5303fb8 100644
--- a/python/pyspark/sql/connect/types.py
+++ b/python/pyspark/sql/connect/types.py
@@ -323,6 +323,8 @@ def to_arrow_type(dt: DataType) -> "pa.DataType":
         arrow_type = pa.struct(fields)
     elif type(dt) == NullType:
         arrow_type = pa.null()
+    elif isinstance(dt, UserDefinedType):
+        arrow_type = to_arrow_type(dt.sqlType())
     else:
         raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
     return arrow_type
diff --git a/python/pyspark/sql/connect/udf.py b/python/pyspark/sql/connect/udf.py
index 2128c908081a0..9afc6e0e626a5 100644
--- a/python/pyspark/sql/connect/udf.py
+++ b/python/pyspark/sql/connect/udf.py
@@ -98,7 +98,7 @@ def __init__(
             )
 
         self.func = func
-        self._returnType: DataType = (
+        self.returnType: DataType = (
             UnparsedDataType(returnType) if isinstance(returnType, str) else returnType
         )
         self._name = name or (
@@ -116,7 +116,7 @@ def _build_common_inline_user_defined_function(
         arg_exprs = [col._expr for col in arg_cols]
 
         py_udf = PythonUDF(
-            output_type=self._returnType,
+            output_type=self.returnType,
             eval_type=self.evalType,
             func=self.func,
             python_ver="%d.%d" % sys.version_info[:2],
@@ -160,7 +160,7 @@ def wrapper(*args: "ColumnOrName") -> Column:
         )
 
         wrapper.func = self.func  # type: ignore[attr-defined]
-        wrapper.returnType = self._returnType  # type: ignore[attr-defined]
+        wrapper.returnType = self.returnType  # type: ignore[attr-defined]
         wrapper.evalType = self.evalType  # type: ignore[attr-defined]
         wrapper.deterministic = self.deterministic  # type: ignore[attr-defined]
         wrapper.asNondeterministic = functools.wraps(  # type: ignore[attr-defined]
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index fde08e7574720..d520aa169993b 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -304,6 +304,9 @@ def registerTempTable(self, name: str) -> None:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         .. deprecated:: 2.0.0
             Use :meth:`DataFrame.createOrReplaceTempView` instead.
 
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 051fd52a13c02..1f02be3ad216a 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -7631,6 +7631,36 @@ def get(col: "ColumnOrName", index: Union["ColumnOrName", int]) -> Column:
     return _invoke_function_over_columns("get", col, index)
 
 
+@try_remote_functions
+def array_prepend(col: "ColumnOrName", value: Any) -> Column:
+    """
+    Collection function: Returns an array containing element as
+    well as all elements from array. The new element is positioned
+    at the beginning of the array.
+
+    .. versionadded:: 3.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        name of column containing array
+    value :
+        a literal value, or a :class:`~pyspark.sql.Column` expression.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        an array excluding given value.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([([2, 3, 4],), ([],)], ['data'])
+    >>> df.select(array_prepend(df.data, 1)).collect()
+    [Row(array_prepend(data, 1)=[1, 2, 3, 4]), Row(array_prepend(data, 1)=[1])]
+    """
+    return _invoke_function_over_columns("array_prepend", col, lit(value))
+
+
 @try_remote_functions
 def array_remove(col: "ColumnOrName", element: Any) -> Column:
     """
diff --git a/python/pyspark/sql/pandas/group_ops.py b/python/pyspark/sql/pandas/group_ops.py
index bca96eaf20594..f03aa35bb833e 100644
--- a/python/pyspark/sql/pandas/group_ops.py
+++ b/python/pyspark/sql/pandas/group_ops.py
@@ -48,6 +48,9 @@ def apply(self, udf: "GroupedMapPandasUserDefinedFunction") -> DataFrame:
 
         .. versionadded:: 2.3.0
 
+        .. versionchanged:: 3.4.0
+            Support Spark Connect.
+
         Parameters
         ----------
         udf : :func:`pyspark.sql.functions.pandas_udf`
@@ -128,6 +131,9 @@ def applyInPandas(
 
         .. versionadded:: 3.0.0
 
+        .. versionchanged:: 3.4.0
+            Support Spark Connect.
+
         Parameters
         ----------
         func : function
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index e79b93919040d..4fa1c1e12ee61 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -862,6 +862,9 @@ def jdbc(
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         table : str
@@ -1869,6 +1872,9 @@ def jdbc(
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         table : str
diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py
index cd6890a630b06..491865ad9c92c 100644
--- a/python/pyspark/sql/tests/connect/test_connect_basic.py
+++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
@@ -23,7 +23,7 @@
 import tempfile
 from collections import defaultdict
 
-from pyspark.errors import PySparkTypeError
+from pyspark.errors import PySparkAttributeError, PySparkTypeError
 from pyspark.sql import SparkSession as PySparkSession, Row
 from pyspark.sql.types import (
     StructType,
@@ -2845,8 +2845,6 @@ def test_unsupported_group_functions(self):
         # SPARK-41927: Disable unsupported functions.
         cg = self.connect.read.table(self.tbl_name).groupBy("id")
         for f in (
-            "apply",
-            "applyInPandas",
             "applyInPandasWithState",
             "cogroup",
         ):
@@ -2871,18 +2869,6 @@ def test_unsupported_session_functions(self):
             with self.assertRaises(NotImplementedError):
                 getattr(self.connect, f)()
 
-    def test_unsupported_io_functions(self):
-        # SPARK-41964: Disable unsupported functions.
-        df = self.connect.createDataFrame([(x, f"{x}") for x in range(100)], ["id", "name"])
-
-        for f in ("jdbc",):
-            with self.assertRaises(NotImplementedError):
-                getattr(self.connect.read, f)()
-
-        for f in ("jdbc",):
-            with self.assertRaises(NotImplementedError):
-                getattr(df.write, f)()
-
     def test_sql_with_command(self):
         # SPARK-42705: spark.sql should return values from the command.
         self.assertEqual(
@@ -2948,6 +2934,53 @@ def test_map_has_nullable(self):
         self.assertEqual(cdf2.schema, sdf2.schema)
         self.assertEqual(cdf2.collect(), sdf2.collect())
 
+    def test_unsupported_jvm_attribute(self):
+        # Unsupported jvm attributes for Spark session.
+        unsupported_attrs = ["_jsc", "_jconf", "_jvm", "_jsparkSession"]
+        spark_session = self.connect
+        for attr in unsupported_attrs:
+            with self.assertRaises(PySparkAttributeError) as pe:
+                getattr(spark_session, attr)
+
+            self.check_error(
+                exception=pe.exception,
+                error_class="JVM_ATTRIBUTE_NOT_SUPPORTED",
+                message_parameters={"attr_name": attr},
+            )
+
+        # Unsupported jvm attributes for DataFrame.
+        unsupported_attrs = ["_jseq", "_jdf", "_jmap", "_jcols"]
+        cdf = self.connect.range(10)
+        for attr in unsupported_attrs:
+            with self.assertRaises(PySparkAttributeError) as pe:
+                getattr(cdf, attr)
+
+            self.check_error(
+                exception=pe.exception,
+                error_class="JVM_ATTRIBUTE_NOT_SUPPORTED",
+                message_parameters={"attr_name": attr},
+            )
+
+        # Unsupported jvm attributes for Column.
+        with self.assertRaises(PySparkAttributeError) as pe:
+            getattr(cdf.id, "_jc")
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED",
+            message_parameters={"attr_name": "_jc"},
+        )
+
+        # Unsupported jvm attributes for DataFrameReader.
+        with self.assertRaises(PySparkAttributeError) as pe:
+            getattr(spark_session.read, "_jreader")
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED",
+            message_parameters={"attr_name": "_jreader"},
+        )
+
 
 @unittest.skipIf(not should_test_connect, connect_requirement_message)
 class ClientTests(unittest.TestCase):
diff --git a/python/pyspark/sql/tests/connect/test_connect_function.py b/python/pyspark/sql/tests/connect/test_connect_function.py
index 599e595af621b..a984bba1b6639 100644
--- a/python/pyspark/sql/tests/connect/test_connect_function.py
+++ b/python/pyspark/sql/tests/connect/test_connect_function.py
@@ -2393,14 +2393,11 @@ def sfun(x):
             sdf.withColumn("A", sfun(sdf.c)).toPandas(),
         )
 
-    def test_unsupported_functions(self):
-        # SPARK-41928: Disable unsupported functions.
-
+    def test_pandas_udf_import(self):
         from pyspark.sql.connect import functions as CF
+        from pyspark.sql import functions as SF
 
-        for f in ("pandas_udf",):
-            with self.assertRaises(NotImplementedError):
-                getattr(CF, f)()
+        self.assert_eq(getattr(CF, "pandas_udf"), getattr(SF, "pandas_udf"))
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py b/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py
new file mode 100644
index 0000000000000..1736e395723a1
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py
@@ -0,0 +1,102 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.sql.tests.pandas.test_pandas_grouped_map import GroupedApplyInPandasTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class GroupedApplyInPandasTests(GroupedApplyInPandasTestsMixin, ReusedConnectTestCase):
+    # TODO(SPARK-42822): Fix ambiguous reference for case-insensitive grouping column
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_case_insensitive_grouping_column(self):
+        super().test_case_insensitive_grouping_column()
+
+    # TODO(SPARK-42857): Support CreateDataFrame from Decimal128
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_supported_types(self):
+        super().test_supported_types()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_wrong_return_type(self):
+        super().test_wrong_return_type()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_wrong_args(self):
+        super().test_wrong_args()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_unsupported_types(self):
+        super().test_unsupported_types()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_register_grouped_map_udf(self):
+        super().test_register_grouped_map_udf()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_column_order(self):
+        super().test_column_order()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_apply_in_pandas_returning_wrong_column_names(self):
+        super().test_apply_in_pandas_returning_wrong_column_names()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_apply_in_pandas_returning_no_column_names_and_wrong_amount(self):
+        super().test_apply_in_pandas_returning_no_column_names_and_wrong_amount()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_apply_in_pandas_returning_incompatible_type(self):
+        super().test_apply_in_pandas_returning_incompatible_type()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_apply_in_pandas_not_returning_pandas_dataframe(self):
+        super().test_apply_in_pandas_not_returning_pandas_dataframe()
+
+    @unittest.skip("Spark Connect doesn't support RDD but the test depends on it.")
+    def test_grouped_with_empty_partition(self):
+        super().test_grouped_with_empty_partition()
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.connect.test_parity_pandas_grouped_map import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py b/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
index 4b1ce0a958788..d2eab7fa4f3a7 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
@@ -17,6 +17,8 @@
 
 import unittest
 
+from pyspark.sql.connect.types import UnparsedDataType
+from pyspark.sql.functions import pandas_udf, PandasUDFType
 from pyspark.sql.tests.pandas.test_pandas_udf import PandasUDFTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 
@@ -32,28 +34,37 @@ def test_udf_wrong_arg(self):
     def test_pandas_udf_timestamp_ntz(self):
         super().test_pandas_udf_timestamp_ntz()
 
-    @unittest.skip("Spark Connect does not support spark.conf but the test depends on it.")
-    def test_pandas_udf_detect_unsafe_type_conversion(self):
-        super().test_pandas_udf_detect_unsafe_type_conversion()
+    def test_pandas_udf_decorator_with_return_type_string(self):
+        @pandas_udf("v double", PandasUDFType.GROUPED_MAP)
+        def foo(x):
+            return x
 
-    @unittest.skip("Spark Connect does not support spark.conf but the test depends on it.")
-    def test_pandas_udf_arrow_overflow(self):
-        super().test_pandas_udf_arrow_overflow()
-
-    # TODO(SPARK-42247): standardize `returnType` attribute of UDF
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_pandas_udf_decorator(self):
-        super().test_pandas_udf_decorator()
-
-    # TODO(SPARK-42247): standardize `returnType` attribute of UDF
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_pandas_udf_basic(self):
-        super().test_pandas_udf_basic()
-
-    # TODO(SPARK-42340): implement GroupedData.applyInPandas
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_stopiteration_in_grouped_map(self):
-        super().test_stopiteration_in_grouped_map()
+        self.assertEqual(foo.returnType, UnparsedDataType("v double"))
+        self.assertEqual(foo.evalType, PandasUDFType.GROUPED_MAP)
+
+        @pandas_udf(returnType="double", functionType=PandasUDFType.SCALAR)
+        def foo(x):
+            return x
+
+        self.assertEqual(foo.returnType, UnparsedDataType("double"))
+        self.assertEqual(foo.evalType, PandasUDFType.SCALAR)
+
+    def test_pandas_udf_basic_with_return_type_string(self):
+        udf = pandas_udf(lambda x: x, "double", PandasUDFType.SCALAR)
+        self.assertEqual(udf.returnType, UnparsedDataType("double"))
+        self.assertEqual(udf.evalType, PandasUDFType.SCALAR)
+
+        udf = pandas_udf(lambda x: x, "v double", PandasUDFType.GROUPED_MAP)
+        self.assertEqual(udf.returnType, UnparsedDataType("v double"))
+        self.assertEqual(udf.evalType, PandasUDFType.GROUPED_MAP)
+
+        udf = pandas_udf(lambda x: x, "v double", functionType=PandasUDFType.GROUPED_MAP)
+        self.assertEqual(udf.returnType, UnparsedDataType("v double"))
+        self.assertEqual(udf.evalType, PandasUDFType.GROUPED_MAP)
+
+        udf = pandas_udf(lambda x: x, returnType="v double", functionType=PandasUDFType.GROUPED_MAP)
+        self.assertEqual(udf.returnType, UnparsedDataType("v double"))
+        self.assertEqual(udf.evalType, PandasUDFType.GROUPED_MAP)
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/connect/test_parity_types.py b/python/pyspark/sql/tests/connect/test_parity_types.py
index 67d5a17660e2f..a2f81fbf25e97 100644
--- a/python/pyspark/sql/tests/connect/test_parity_types.py
+++ b/python/pyspark/sql/tests/connect/test_parity_types.py
@@ -34,26 +34,6 @@ def test_apply_schema_to_dict_and_rows(self):
     def test_apply_schema_to_row(self):
         super().test_apply_schema_to_dict_and_rows()
 
-    # TODO(SPARK-42020): createDataFrame with UDT
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_apply_schema_with_udt(self):
-        super().test_apply_schema_with_udt()
-
-    # TODO(SPARK-42020): createDataFrame with UDT
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_cast_to_string_with_udt(self):
-        super().test_cast_to_string_with_udt()
-
-    # TODO(SPARK-42020): createDataFrame with UDT
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_cast_to_udt_with_udt(self):
-        super().test_cast_to_udt_with_udt()
-
-    # TODO(SPARK-42020): createDataFrame with UDT
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_complex_nested_udt_in_df(self):
-        super().test_complex_nested_udt_in_df()
-
     @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
     def test_create_dataframe_schema_mismatch(self):
         super().test_create_dataframe_schema_mismatch()
@@ -103,46 +83,14 @@ def test_infer_schema_to_local(self):
     def test_infer_schema_upcast_int_to_string(self):
         super().test_infer_schema_upcast_int_to_string()
 
-    # TODO(SPARK-42020): createDataFrame with UDT
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_infer_schema_with_udt(self):
-        super().test_infer_schema_with_udt()
-
-    # TODO(SPARK-42020): createDataFrame with UDT
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_nested_udt_in_df(self):
-        super().test_nested_udt_in_df()
-
-    # TODO(SPARK-42020): createDataFrame with UDT
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_parquet_with_udt(self):
-        super().test_parquet_with_udt()
-
-    # TODO(SPARK-42020): createDataFrame with UDT
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_simple_udt_in_df(self):
-        super().test_simple_udt_in_df()
-
-    # TODO(SPARK-42020): createDataFrame with UDT
-    @unittest.skip("Fails in Spark Connect, should enable.")
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
     def test_udf_with_udt(self):
         super().test_udf_with_udt()
 
-    # TODO(SPARK-42020): createDataFrame with UDT
-    @unittest.skip("Fails in Spark Connect, should enable.")
+    @unittest.skip("Requires JVM access.")
     def test_udt(self):
         super().test_udt()
 
-    # TODO(SPARK-42020): createDataFrame with UDT
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_udt_with_none(self):
-        super().test_udt_with_none()
-
-    # TODO(SPARK-42020): createDataFrame with UDT
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_union_with_udt(self):
-        super().test_union_with_udt()
-
 
 if __name__ == "__main__":
     import unittest
diff --git a/python/pyspark/sql/tests/connect/test_parity_udf.py b/python/pyspark/sql/tests/connect/test_parity_udf.py
index 50f0d36be5df3..1be7d69b8c329 100644
--- a/python/pyspark/sql/tests/connect/test_parity_udf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_udf.py
@@ -44,10 +44,6 @@ def test_udf_with_input_file_name_for_hadooprdd(self):
     def test_same_accumulator_in_udfs(self):
         super().test_same_accumulator_in_udfs()
 
-    @unittest.skip("Spark Connect does not support spark.conf but the test depends on it.")
-    def test_udf_with_column_vector(self):
-        super().test_udf_with_column_vector()
-
     @unittest.skip("Spark Connect does not support spark.conf but the test depends on it.")
     def test_udf_timestamp_ntz(self):
         super().test_udf_timestamp_ntz()
@@ -56,14 +52,6 @@ def test_udf_timestamp_ntz(self):
     def test_broadcast_in_udf(self):
         super().test_broadcast_in_udf()
 
-    @unittest.skip("Spark Connect does not support sql_conf but the test depends on it.")
-    def test_file_dsv2_with_udf_filter(self):
-        super().test_file_dsv2_with_udf_filter()
-
-    @unittest.skip("Spark Connect does not support sql_conf but the test depends on it.")
-    def test_udf_in_join_condition(self):
-        super().test_udf_in_join_condition()
-
     @unittest.skip("Spark Connect does not support cache() but the test depends on it.")
     def test_udf_cache(self):
         super().test_udf_cache()
@@ -76,13 +64,11 @@ def test_udf_defers_judf_initialization(self):
     def test_nondeterministic_udf3(self):
         super().test_nondeterministic_udf3()
 
-    @unittest.skip("Requires JVM access.")
     def test_nondeterministic_udf_in_aggregate(self):
-        super().test_nondeterministic_udf_in_aggregate()
+        self.check_nondeterministic_udf_in_aggregate()
 
-    @unittest.skip("Requires JVM access.")
     def test_udf_registration_return_type_not_none(self):
-        super().test_udf_registration_return_type_not_none()
+        self.check_udf_registration_return_type_not_none()
 
     @unittest.skip("Spark Connect doesn't support RDD but the test depends on it.")
     def test_worker_original_stdin_closed(self):
@@ -92,16 +78,6 @@ def test_worker_original_stdin_closed(self):
     def test_udf_on_sql_context(self):
         super().test_udf_on_sql_context()
 
-    # TODO(SPARK-42247): implement `UserDefinedFunction.returnType`
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_udf3(self):
-        super().test_udf3()
-
-    # TODO(SPARK-42247): implement `UserDefinedFunction.returnType`
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_udf_registration_return_type_none(self):
-        super().test_udf_registration_return_type_none()
-
     @unittest.skip("Spark Connect does not support SQLContext but the test depends on it.")
     def test_non_existed_udf_with_sql_context(self):
         super().test_non_existed_udf_with_sql_context()
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
index 88e68b043035e..36bdae0294491 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
@@ -73,7 +73,7 @@
     not have_pandas or not have_pyarrow,
     cast(str, pandas_requirement_message or pyarrow_requirement_message),
 )
-class GroupedApplyInPandasTests(ReusedSQLTestCase):
+class GroupedApplyInPandasTestsMixin:
     @property
     def data(self):
         return (
@@ -289,17 +289,17 @@ def stats_with_no_column_names(key, pdf):
         return pd.DataFrame([key + (pdf.v.mean(),)])
 
     def test_apply_in_pandas_returning_column_names(self):
-        self._test_apply_in_pandas(GroupedApplyInPandasTests.stats_with_column_names)
+        self._test_apply_in_pandas(GroupedApplyInPandasTestsMixin.stats_with_column_names)
 
     def test_apply_in_pandas_returning_no_column_names(self):
-        self._test_apply_in_pandas(GroupedApplyInPandasTests.stats_with_no_column_names)
+        self._test_apply_in_pandas(GroupedApplyInPandasTestsMixin.stats_with_no_column_names)
 
     def test_apply_in_pandas_returning_column_names_sometimes(self):
         def stats(key, pdf):
             if key[0] % 2:
-                return GroupedApplyInPandasTests.stats_with_column_names(key, pdf)
+                return GroupedApplyInPandasTestsMixin.stats_with_column_names(key, pdf)
             else:
-                return GroupedApplyInPandasTests.stats_with_no_column_names(key, pdf)
+                return GroupedApplyInPandasTestsMixin.stats_with_no_column_names(key, pdf)
 
         self._test_apply_in_pandas(stats)
 
@@ -782,7 +782,7 @@ def _test_apply_in_pandas_returning_empty_dataframe(self, empty_df):
 
         def stats(key, pdf):
             if key[0] % 2 == 0:
-                return GroupedApplyInPandasTests.stats_with_no_column_names(key, pdf)
+                return GroupedApplyInPandasTestsMixin.stats_with_no_column_names(key, pdf)
             return empty_df
 
         result = (
@@ -805,6 +805,10 @@ def _test_apply_in_pandas_returning_empty_dataframe_error(self, empty_df, error)
                 self._test_apply_in_pandas_returning_empty_dataframe(empty_df)
 
 
+class GroupedApplyInPandasTests(GroupedApplyInPandasTestsMixin, ReusedSQLTestCase):
+    pass
+
+
 if __name__ == "__main__":
     from pyspark.sql.tests.pandas.test_pandas_grouped_map import *  # noqa: F401
 
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf.py b/python/pyspark/sql/tests/pandas/test_pandas_udf.py
index 0f92711313040..4e1eec38a0cb1 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_udf.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf.py
@@ -47,16 +47,17 @@ def test_pandas_udf_basic(self):
         self.assertEqual(udf.returnType, DoubleType())
         self.assertEqual(udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
 
-        udf = pandas_udf(lambda x: x, "double", PandasUDFType.SCALAR)
-        self.assertEqual(udf.returnType, DoubleType())
-        self.assertEqual(udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
-
         udf = pandas_udf(
             lambda x: x, StructType([StructField("v", DoubleType())]), PandasUDFType.GROUPED_MAP
         )
         self.assertEqual(udf.returnType, StructType([StructField("v", DoubleType())]))
         self.assertEqual(udf.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
 
+    def test_pandas_udf_basic_with_return_type_string(self):
+        udf = pandas_udf(lambda x: x, "double", PandasUDFType.SCALAR)
+        self.assertEqual(udf.returnType, DoubleType())
+        self.assertEqual(udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
+
         udf = pandas_udf(lambda x: x, "v double", PandasUDFType.GROUPED_MAP)
         self.assertEqual(udf.returnType, StructType([StructField("v", DoubleType())]))
         self.assertEqual(udf.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
@@ -93,33 +94,36 @@ def foo(x):
         self.assertEqual(foo.returnType, schema)
         self.assertEqual(foo.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
 
-        @pandas_udf("v double", PandasUDFType.GROUPED_MAP)
+        @pandas_udf(schema, functionType=PandasUDFType.GROUPED_MAP)
         def foo(x):
             return x
 
         self.assertEqual(foo.returnType, schema)
         self.assertEqual(foo.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
 
-        @pandas_udf(schema, functionType=PandasUDFType.GROUPED_MAP)
+        @pandas_udf(returnType=schema, functionType=PandasUDFType.GROUPED_MAP)
         def foo(x):
             return x
 
         self.assertEqual(foo.returnType, schema)
         self.assertEqual(foo.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
 
-        @pandas_udf(returnType="double", functionType=PandasUDFType.SCALAR)
+    def test_pandas_udf_decorator_with_return_type_string(self):
+        schema = StructType([StructField("v", DoubleType())])
+
+        @pandas_udf("v double", PandasUDFType.GROUPED_MAP)
         def foo(x):
             return x
 
-        self.assertEqual(foo.returnType, DoubleType())
-        self.assertEqual(foo.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
+        self.assertEqual(foo.returnType, schema)
+        self.assertEqual(foo.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
 
-        @pandas_udf(returnType=schema, functionType=PandasUDFType.GROUPED_MAP)
+        @pandas_udf(returnType="double", functionType=PandasUDFType.SCALAR)
         def foo(x):
             return x
 
-        self.assertEqual(foo.returnType, schema)
-        self.assertEqual(foo.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
+        self.assertEqual(foo.returnType, DoubleType())
+        self.assertEqual(foo.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
 
     def test_udf_wrong_arg(self):
         with QuietTest(self.sc):
diff --git a/python/pyspark/sql/tests/test_datasources.py b/python/pyspark/sql/tests/test_datasources.py
index 9b6692a9d213c..6418983b06a44 100644
--- a/python/pyspark/sql/tests/test_datasources.py
+++ b/python/pyspark/sql/tests/test_datasources.py
@@ -198,6 +198,30 @@ def test_jdbc(self):
         url = f"jdbc:derby:{db}"
         dbtable = "test_table"
 
+        try:
+            df = self.spark.range(10)
+            df.write.jdbc(url=f"{url};create=true", table=dbtable)
+
+            readback = self.spark.read.jdbc(url=url, table=dbtable)
+            self.assertEqual(sorted(df.collect()), sorted(readback.collect()))
+
+            additional_arguments = dict(column="id", lowerBound=3, upperBound=8, numPartitions=10)
+            readback = self.spark.read.jdbc(url=url, table=dbtable, **additional_arguments)
+            self.assertEqual(sorted(df.collect()), sorted(readback.collect()))
+
+            additional_arguments = dict(predicates=['"id" < 5'])
+            readback = self.spark.read.jdbc(url=url, table=dbtable, **additional_arguments)
+            self.assertEqual(sorted(df.filter("id < 5").collect()), sorted(readback.collect()))
+        finally:
+            # Clean up.
+            with self.assertRaisesRegex(Exception, f"Database '{db}' dropped."):
+                self.spark.read.jdbc(url=f"{url};drop=true", table=dbtable).collect()
+
+    def test_jdbc_format(self):
+        db = f"memory:{uuid.uuid4()}"
+        url = f"jdbc:derby:{db}"
+        dbtable = "test_table"
+
         try:
             df = self.spark.range(10)
             df.write.format("jdbc").options(url=f"{url};create=true", dbtable=dbtable).save()
diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index 9db090fa81058..aaac43cdf6721 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -692,10 +692,10 @@ def test_cast_to_udt_with_udt(self):
 
         row = Row(point=ExamplePoint(1.0, 2.0), python_only_point=PythonOnlyPoint(1.0, 2.0))
         df = self.spark.createDataFrame([row])
-        self.assertRaises(AnalysisException, lambda: df.select(col("point").cast(PythonOnlyUDT())))
-        self.assertRaises(
-            AnalysisException, lambda: df.select(col("python_only_point").cast(ExamplePointUDT()))
-        )
+        with self.assertRaises(AnalysisException):
+            df.select(col("point").cast(PythonOnlyUDT())).collect()
+        with self.assertRaises(AnalysisException):
+            df.select(col("python_only_point").cast(ExamplePointUDT())).collect()
 
     def test_struct_type(self):
         struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index b766b0c017892..250386d8c6ecb 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -106,10 +106,13 @@ def test_udf_registration_return_type_none(self):
 
     def test_udf_registration_return_type_not_none(self):
         with QuietTest(self.sc):
-            with self.assertRaisesRegex(TypeError, "Invalid return type"):
-                self.spark.catalog.registerFunction(
-                    "f", UserDefinedFunction(lambda x, y: len(x) + y, StringType()), StringType()
-                )
+            self.check_udf_registration_return_type_not_none()
+
+    def check_udf_registration_return_type_not_none(self):
+        with self.assertRaisesRegex(TypeError, "Invalid return type"):
+            self.spark.catalog.registerFunction(
+                "f", UserDefinedFunction(lambda x, y: len(x) + y, StringType()), StringType()
+            )
 
     def test_nondeterministic_udf(self):
         # Test that nondeterministic UDFs are evaluated only once in chained UDF evaluations
@@ -154,17 +157,20 @@ def test_nondeterministic_udf3(self):
         self.assertFalse(deterministic)
 
     def test_nondeterministic_udf_in_aggregate(self):
+        with QuietTest(self.sc):
+            self.check_nondeterministic_udf_in_aggregate()
+
+    def check_nondeterministic_udf_in_aggregate(self):
         from pyspark.sql.functions import sum
         import random
 
         udf_random_col = udf(lambda: int(100 * random.random()), "int").asNondeterministic()
         df = self.spark.range(10)
 
-        with QuietTest(self.sc):
-            with self.assertRaisesRegex(AnalysisException, "nondeterministic"):
-                df.groupby("id").agg(sum(udf_random_col())).collect()
-            with self.assertRaisesRegex(AnalysisException, "nondeterministic"):
-                df.agg(sum(udf_random_col())).collect()
+        with self.assertRaisesRegex(AnalysisException, "nondeterministic"):
+            df.groupby("id").agg(sum(udf_random_col())).collect()
+        with self.assertRaisesRegex(AnalysisException, "nondeterministic"):
+            df.agg(sum(udf_random_col())).collect()
 
     def test_chained_udf(self):
         self.spark.catalog.registerFunction("double", lambda x: x + x, IntegerType())
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 297ad8532ae55..1ffd5adba8426 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -26,8 +26,6 @@
 
   <artifactId>spark-kubernetes-integration-tests_2.12</artifactId>
   <properties>
-    <download-maven-plugin.version>1.3.0</download-maven-plugin.version>
-    <extraScalaTestArgs></extraScalaTestArgs>
     <sbt.project.name>kubernetes-integration-tests</sbt.project.name>
 
     <!-- Integration Test Configuration Properties -->
@@ -163,7 +161,7 @@
           <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
           <junitxml>.</junitxml>
           <filereports>SparkTestSuite.txt</filereports>
-          <argLine>-ea -Xmx4g -XX:ReservedCodeCacheSize=1g ${extraScalaTestArgs}</argLine>
+          <argLine>-ea -Xmx4g -XX:ReservedCodeCacheSize=1g</argLine>
           <stderr/>
           <systemProperties>
             <log4j.configurationFile>file:src/test/resources/log4j2.properties</log4j.configurationFile>
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 252c84a1cd4ba..8bf31a9286e7c 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -498,7 +498,10 @@ private[spark] class ApplicationMaster(
     // that when the driver sends an initial executor request (e.g. after an AM restart),
     // the allocator is ready to service requests.
     rpcEnv.setupEndpoint("YarnAM", new AMEndpoint(rpcEnv, driverRef))
-
+    if (_sparkConf.get(SHUFFLE_SERVICE_ENABLED)) {
+      logInfo("Initializing service data for shuffle service using name '" +
+        s"${_sparkConf.get(SHUFFLE_SERVICE_NAME)}'")
+    }
     allocator.allocateResources()
     val ms = MetricsSystem.createMetricsSystem(MetricsSystemInstances.APPLICATION_MASTER, sparkConf)
     val prefix = _sparkConf.get(YARN_METRICS_NAMESPACE).getOrElse(appId)
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index 0148b6f3c9535..1f3121ed224fe 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -115,7 +115,6 @@ private[yarn] class ExecutorRunnable(
           ByteBuffer.allocate(0)
         }
       val serviceName = sparkConf.get(SHUFFLE_SERVICE_NAME)
-      logInfo(s"Initializing service data for shuffle service using name '$serviceName'")
       ctx.setServiceData(Collections.singletonMap(serviceName, secretBytes))
     }
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java
index a3d08338c7a90..2adfe75f7d80c 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java
@@ -66,12 +66,33 @@ public interface RequiresDistributionAndOrdering extends Write {
    * <p>
    * Note that Spark doesn't support the number of partitions on {@link UnspecifiedDistribution},
    * the query will fail if the number of partitions are provided but the distribution is
-   * unspecified.
+   * unspecified. Data sources may either request a particular number of partitions or
+   * a preferred partition size via {@link #advisoryPartitionSizeInBytes}, not both.
    *
    * @return the required number of partitions, any value less than 1 mean no requirement.
    */
   default int requiredNumPartitions() { return 0; }
 
+  /**
+   * Returns the advisory (not guaranteed) shuffle partition size in bytes for this write.
+   * <p>
+   * Implementations may override this to indicate the preferable partition size in shuffles
+   * performed to satisfy the requested distribution. Note that Spark doesn't support setting
+   * the advisory partition size for {@link UnspecifiedDistribution}, the query will fail if
+   * the advisory partition size is set but the distribution is unspecified. Data sources may
+   * either request a particular number of partitions via {@link #requiredNumPartitions()} or
+   * a preferred partition size, not both.
+   * <p>
+   * Data sources should be careful with large advisory sizes as it will impact the writing
+   * parallelism and may degrade the overall job performance.
+   * <p>
+   * Note this value only acts like a guidance and Spark does not guarantee the actual and advisory
+   * shuffle partition sizes will match. Ignored if the adaptive execution is disabled.
+   *
+   * @return the advisory partition size, any value less than 1 means no preference.
+   */
+  default long advisoryPartitionSizeInBytes() { return 0; }
+
   /**
    * Returns the ordering required by this write.
    * <p>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ddd26c2efe183..3a2dff78cba90 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -322,7 +322,9 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       ResolveUnion ::
       RewriteDeleteFromTable ::
       typeCoercionRules ++
-      Seq(ResolveWithCTE) ++
+      Seq(
+        ResolveWithCTE,
+        ExtractDistributedSequenceID) ++
       extendedResolutionRules : _*),
     Batch("Remove TempResolvedColumn", Once, RemoveTempResolvedColumn),
     Batch("Post-Hoc Resolution", Once,
@@ -1743,7 +1745,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       // table `t` even if there is a Project node between the table scan node and Sort node.
       // We also need to propagate the missing attributes from the descendant node to the current
       // node, and project them way at the end via an extra Project.
-      case r @ RepartitionByExpression(partitionExprs, child, _)
+      case r @ RepartitionByExpression(partitionExprs, child, _, _)
         if !r.resolved || r.missingInput.nonEmpty =>
         val resolvedNoOuter = partitionExprs.map(resolveExpressionByPlanChildren(_, r))
         val (newPartitionExprs, newChild) = resolveExprsAndAddMissingAttrs(resolvedNoOuter, child)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ExtractDistributedSequenceID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ExtractDistributedSequenceID.scala
new file mode 100644
index 0000000000000..bf6ab8e50616c
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ExtractDistributedSequenceID.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, DistributedSequenceID}
+import org.apache.spark.sql.catalyst.plans.logical.{AttachDistributedSequence, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreePattern.DISTRIBUTED_SEQUENCE_ID
+import org.apache.spark.sql.types.LongType
+
+/**
+ * Extracts [[DistributedSequenceID]] in logical plans, and replace it to
+ * [[AttachDistributedSequence]] because this expressions requires a shuffle
+ * to generate a sequence that needs the context of the whole data, e.g.,
+ * [[org.apache.spark.rdd.RDD.zipWithIndex]].
+ */
+object ExtractDistributedSequenceID extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = {
+    plan.resolveOperatorsUpWithPruning(_.containsPattern(DISTRIBUTED_SEQUENCE_ID)) {
+      case plan: LogicalPlan if plan.resolved &&
+          plan.expressions.exists(_.exists(_.isInstanceOf[DistributedSequenceID])) =>
+        val attr = AttributeReference("distributed_sequence_id", LongType, nullable = false)()
+        val newPlan = plan.withNewChildren(plan.children.map(AttachDistributedSequence(attr, _)))
+          .transformExpressions { case _: DistributedSequenceID => attr }
+        Project(plan.output, newPlan)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index ad82a83619931..aca73741c6396 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -697,6 +697,7 @@ object FunctionRegistry {
     expression[Sequence]("sequence"),
     expression[ArrayRepeat]("array_repeat"),
     expression[ArrayRemove]("array_remove"),
+    expression[ArrayPrepend]("array_prepend"),
     expression[ArrayDistinct]("array_distinct"),
     expression[ArrayTransform]("transform"),
     expression[MapFilter]("map_filter"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/DistributedSequenceID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/DistributedSequenceID.scala
new file mode 100644
index 0000000000000..5a0bff990e68a
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/DistributedSequenceID.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.trees.TreePattern.{DISTRIBUTED_SEQUENCE_ID, TreePattern}
+import org.apache.spark.sql.types.{DataType, LongType}
+
+/**
+ * Returns increasing 64-bit integers consecutive from 0.
+ * The generated ID is guaranteed to be increasing consecutive started from 0.
+ *
+ * @note this expression is dedicated for Pandas API on Spark to use.
+ */
+case class DistributedSequenceID() extends LeafExpression with Unevaluable with NonSQLExpression {
+
+  override def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = {
+    DistributedSequenceID()
+  }
+
+  override def nullable: Boolean = false
+
+  override def dataType: DataType = LongType
+
+  final override val nodePatterns: Seq[TreePattern] = Seq(DISTRIBUTED_SEQUENCE_ID)
+
+  override def nodeName: String = "distributed_sequence_id"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
index 330d66a21bea5..3ffd9f9d88750 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
@@ -144,10 +144,9 @@ class EquivalentExpressions {
 
   private def supportedExpression(e: Expression) = {
     !e.exists {
-      // `LambdaVariable` is usually used as a loop variable and `NamedLambdaVariable` is used in
-      // higher-order functions, which can't be evaluated ahead of the execution.
+      // `LambdaVariable` is usually used as a loop variable, which can't be evaluated ahead of the
+      // loop. So we can't evaluate sub-expressions containing `LambdaVariable` at the beginning.
       case _: LambdaVariable => true
-      case _: NamedLambdaVariable => true
 
       // `PlanExpression` wraps query plan. To compare query plans of `PlanExpression` on executor,
       // can cause error like NPE.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 289859d420bba..2ccb3a6d0cd57 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -1399,6 +1399,152 @@ case class ArrayContains(left: Expression, right: Expression)
     copy(left = newLeft, right = newRight)
 }
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+      _FUNC_(array, element) - Add the element at the beginning of the array passed as first
+      argument. Type of element should be the same as the type of the elements of the array.
+      Null element is also prepended to the array. But if the array passed is NULL
+      output is NULL
+    """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+       ["d","b","d","c","a"]
+      > SELECT _FUNC_(array(1, 2, 3, null), null);
+       [null,1,2,3,null]
+      > SELECT _FUNC_(CAST(null as Array<Int>), 2);
+       NULL
+  """,
+  group = "array_funcs",
+  since = "3.5.0")
+case class ArrayPrepend(left: Expression, right: Expression)
+  extends BinaryExpression
+    with ImplicitCastInputTypes
+    with ComplexTypeMergingExpression
+    with QueryErrorsBase {
+
+  override def nullable: Boolean = left.nullable
+
+  @transient protected lazy val elementType: DataType =
+    inputTypes.head.asInstanceOf[ArrayType].elementType
+
+  override def eval(input: InternalRow): Any = {
+    val value1 = left.eval(input)
+    if (value1 == null) {
+      null
+    } else {
+      val value2 = right.eval(input)
+      nullSafeEval(value1, value2)
+    }
+  }
+  override def nullSafeEval(arr: Any, elementData: Any): Any = {
+    val arrayData = arr.asInstanceOf[ArrayData]
+    val numberOfElements = arrayData.numElements() + 1
+    if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+      throw QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+    }
+    val finalData = new Array[Any](numberOfElements)
+    finalData.update(0, elementData)
+    arrayData.foreach(elementType, (i: Int, v: Any) => finalData.update(i + 1, v))
+    new GenericArrayData(finalData)
+  }
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val leftGen = left.genCode(ctx)
+    val rightGen = right.genCode(ctx)
+    val f = (arr: String, value: String) => {
+      val newArraySize = s"$arr.numElements() + 1"
+      val newArray = ctx.freshName("newArray")
+      val i = ctx.freshName("i")
+      val iPlus1 = s"$i+1"
+      val zero = "0"
+      val allocation = CodeGenerator.createArrayData(
+        newArray,
+        elementType,
+        newArraySize,
+        s" $prettyName failed.")
+      val assignment =
+        CodeGenerator.createArrayAssignment(newArray, elementType, arr, iPlus1, i, false)
+      val newElemAssignment =
+        CodeGenerator.setArrayElement(newArray, elementType, zero, value, Some(rightGen.isNull))
+      s"""
+         |$allocation
+         |$newElemAssignment
+         |for (int $i = 0; $i < $arr.numElements(); $i ++) {
+         |  $assignment
+         |}
+         |${ev.value} = $newArray;
+         |""".stripMargin
+    }
+    val resultCode = f(leftGen.value, rightGen.value)
+    if(nullable) {
+      val nullSafeEval = leftGen.code + rightGen.code + ctx.nullSafeExec(nullable, leftGen.isNull) {
+        s"""
+           |${ev.isNull} = false;
+           |${resultCode}
+           |""".stripMargin
+      }
+      ev.copy(code =
+        code"""
+          |boolean ${ev.isNull} = true;
+          |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+          |$nullSafeEval
+        """.stripMargin
+      )
+    } else {
+      ev.copy(code =
+        code"""
+          |${leftGen.code}
+          |${rightGen.code}
+          |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+          |$resultCode
+        """.stripMargin, isNull = FalseLiteral)
+    }
+  }
+
+  override def prettyName: String = "array_prepend"
+
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): ArrayPrepend =
+    copy(left = newLeft, right = newRight)
+
+  override def dataType: DataType = if (right.nullable) left.dataType.asNullable else left.dataType
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    (left.dataType, right.dataType) match {
+      case (ArrayType(e1, _), e2) if e1.sameType(e2) => TypeCheckResult.TypeCheckSuccess
+      case (ArrayType(e1, _), e2) => DataTypeMismatch(
+        errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> toSQLId(prettyName),
+          "leftType" -> toSQLType(left.dataType),
+          "rightType" -> toSQLType(right.dataType),
+          "dataType" -> toSQLType(ArrayType)
+        ))
+      case _ =>
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> "0",
+            "requiredType" -> toSQLType(ArrayType),
+            "inputSql" -> toSQLExpr(left),
+            "inputType" -> toSQLType(left.dataType)
+          )
+        )
+    }
+  }
+  override def inputTypes: Seq[AbstractDataType] = {
+    (left.dataType, right.dataType) match {
+      case (ArrayType(e1, hasNull), e2) =>
+        TypeCoercion.findTightestCommonType(e1, e2) match {
+          case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+          case _ => Seq.empty
+        }
+      case _ => Seq.empty
+    }
+  }
+}
+
 /**
  * Checks if the two arrays contain at least one common element.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala
index 8e3dc6622059d..261be2914630e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala
@@ -84,11 +84,11 @@ object InferWindowGroupLimit extends Rule[LogicalPlan] with PredicateHelper {
           }
           // Pick a rank-like function with the smallest limit
           selectedLimits.minBy(_._1) match {
-            case (limit, rankLikeFunction) if limit <= conf.windowGroupLimitThreshold =>
+            case (limit, rankLikeFunction) if limit <= conf.windowGroupLimitThreshold &&
+              child.maxRows.forall(_ > limit) =>
               if (limit > 0) {
                 val newFilterChild = if (rankLikeFunction.isInstanceOf[RowNumber] &&
-                  partitionSpec.isEmpty && child.maxRows.forall(_ > limit) &&
-                  limit < conf.topKSortFallbackThreshold) {
+                  partitionSpec.isEmpty && limit < conf.topKSortFallbackThreshold) {
                   // Top n (Limit + Sort) have better performance than WindowGroupLimit if the
                   // window function is RowNumber and Window partitionSpec is empty.
                   Limit(Literal(limit), window)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index ca55a28160509..13d1ee31a226c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -87,7 +87,6 @@ abstract class Optimizer(catalogManager: CatalogManager)
         CollapseProject,
         OptimizeWindowFunctions,
         CollapseWindow,
-        CombineFilters,
         EliminateOffsets,
         EliminateLimits,
         CombineUnions,
@@ -130,7 +129,6 @@ abstract class Optimizer(catalogManager: CatalogManager)
     val operatorOptimizationBatch: Seq[Batch] = {
       Batch("Operator Optimization before Inferring Filters", fixedPoint,
         operatorOptimizationRuleSet: _*) ::
-      Batch("Infer window group limit", Once, InferWindowGroupLimit) ::
       Batch("Infer Filters", Once,
         InferFiltersFromGenerate,
         InferFiltersFromConstraints) ::
@@ -1203,15 +1201,16 @@ object CollapseRepartition extends Rule[LogicalPlan] {
     }
     // Case 2: When a RepartitionByExpression has a child of global Sort, Repartition or
     // RepartitionByExpression we can remove the child.
-    case r @ RepartitionByExpression(_, child @ (Sort(_, true, _) | _: RepartitionOperation), _) =>
+    case r @ RepartitionByExpression(
+        _, child @ (Sort(_, true, _) | _: RepartitionOperation), _, _) =>
       r.withNewChildren(child.children)
     // Case 3: When a RebalancePartitions has a child of local or global Sort, Repartition or
     // RepartitionByExpression we can remove the child.
-    case r @ RebalancePartitions(_, child @ (_: Sort | _: RepartitionOperation), _) =>
+    case r @ RebalancePartitions(_, child @ (_: Sort | _: RepartitionOperation), _, _) =>
       r.withNewChildren(child.children)
     // Case 4: When a RebalancePartitions has a child of RebalancePartitions we can remove the
     // child.
-    case r @ RebalancePartitions(_, child: RebalancePartitions, _) =>
+    case r @ RebalancePartitions(_, child: RebalancePartitions, _, _) =>
       r.withNewChildren(child.children)
   }
 }
@@ -1223,7 +1222,7 @@ object CollapseRepartition extends Rule[LogicalPlan] {
 object OptimizeRepartition extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
     _.containsPattern(REPARTITION_OPERATION), ruleId) {
-    case r @ RepartitionByExpression(partitionExpressions, _, numPartitions)
+    case r @ RepartitionByExpression(partitionExpressions, _, numPartitions, _)
       if partitionExpressions.nonEmpty && partitionExpressions.forall(_.foldable) &&
         numPartitions.isEmpty =>
       r.copy(optNumPartitions = Some(1))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 26be405601a9b..cdf48dd265f68 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -1790,6 +1790,8 @@ trait HasPartitionExpressions extends SQLConfHelper {
 
   def optNumPartitions: Option[Int]
 
+  def optAdvisoryPartitionSize: Option[Long]
+
   protected def partitioning: Partitioning = if (partitionExpressions.isEmpty) {
     RoundRobinPartitioning(numPartitions)
   } else {
@@ -1820,7 +1822,11 @@ trait HasPartitionExpressions extends SQLConfHelper {
 case class RepartitionByExpression(
     partitionExpressions: Seq[Expression],
     child: LogicalPlan,
-    optNumPartitions: Option[Int]) extends RepartitionOperation with HasPartitionExpressions {
+    optNumPartitions: Option[Int],
+    optAdvisoryPartitionSize: Option[Long] = None)
+  extends RepartitionOperation with HasPartitionExpressions {
+
+  require(optNumPartitions.isEmpty || optAdvisoryPartitionSize.isEmpty)
 
   override val partitioning: Partitioning = {
     if (numPartitions == 1) {
@@ -1857,7 +1863,11 @@ object RepartitionByExpression {
 case class RebalancePartitions(
     partitionExpressions: Seq[Expression],
     child: LogicalPlan,
-    optNumPartitions: Option[Int] = None) extends UnaryNode with HasPartitionExpressions {
+    optNumPartitions: Option[Int] = None,
+    optAdvisoryPartitionSize: Option[Long] = None) extends UnaryNode with HasPartitionExpressions {
+
+  require(optNumPartitions.isEmpty || optAdvisoryPartitionSize.isEmpty)
+
   override def maxRows: Option[Long] = child.maxRows
   override def output: Seq[Attribute] = child.output
   override val nodePatterns: Seq[TreePattern] = Seq(REBALANCE_PARTITIONS)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index b90fc585a0925..7468d895cfff1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -786,7 +786,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product with Tre
     }
 
     // Skip no-arg constructors that are just there for kryo.
-    val ctors = allCtors.filter(allowEmptyArgs || _.getParameterTypes.size != 0)
+    val ctors = allCtors.filter(allowEmptyArgs || _.getParameterCount != 0)
     if (ctors.isEmpty) {
       throw QueryExecutionErrors.constructorNotFoundError(nodeName)
     }
@@ -796,7 +796,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product with Tre
       newArgs ++ otherCopyArgs
     }
     val defaultCtor = ctors.find { ctor =>
-      if (ctor.getParameterTypes.length != allArgs.length) {
+      if (ctor.getParameterCount != allArgs.length) {
         false
       } else if (allArgs.contains(null)) {
         // if there is a `null`, we can't figure out the class, therefore we should just fallback
@@ -806,7 +806,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product with Tre
         val argsArray: Array[Class[_]] = allArgs.map(_.getClass)
         ClassUtils.isAssignable(argsArray, ctor.getParameterTypes, true /* autoboxing */)
       }
-    }.getOrElse(ctors.maxBy(_.getParameterTypes.length)) // fall back to older heuristic
+    }.getOrElse(ctors.maxBy(_.getParameterCount)) // fall back to older heuristic
 
     try {
       CurrentOrigin.withOrigin(origin) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
index 37d3ada534944..8e904cf3c1653 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
@@ -97,6 +97,7 @@ object TreePattern extends Enumeration  {
   val UPDATE_FIELDS: Value = Value
   val UPPER_OR_LOWER: Value = Value
   val UP_CAST: Value = Value
+  val DISTRIBUTED_SEQUENCE_ID: Value = Value
 
   // Logical plan patterns (alphabetically ordered)
   val AGGREGATE: Value = Value
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 497a7be1420c8..f58f9a9292897 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1803,7 +1803,19 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
 
   def numberOfPartitionsNotAllowedWithUnspecifiedDistributionError(): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1178",
+      errorClass = "INVALID_WRITE_DISTRIBUTION.PARTITION_NUM_WITH_UNSPECIFIED_DISTRIBUTION",
+      messageParameters = Map.empty)
+  }
+
+  def partitionSizeNotAllowedWithUnspecifiedDistributionError(): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_WRITE_DISTRIBUTION.PARTITION_SIZE_WITH_UNSPECIFIED_DISTRIBUTION",
+      messageParameters = Map.empty)
+  }
+
+  def numberAndSizeOfPartitionsNotAllowedTogether(): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_WRITE_DISTRIBUTION.PARTITION_NUM_AND_SIZE",
       messageParameters = Map.empty)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
index 6c6635bac5708..d6a8fec81dd85 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
@@ -106,6 +106,7 @@ private[sql] object ArrowUtils {
               .add(MapVector.VALUE_NAME, valueType, nullable = valueContainsNull),
             nullable = false,
             timeZoneId)).asJava)
+      case udt: UserDefinedType[_] => toArrowField(name, udt.sqlType, nullable, timeZoneId)
       case dataType =>
         val fieldType = new FieldType(nullable, toArrowType(dataType, timeZoneId), null)
         new Field(name, fieldType, Seq.empty[Field].asJava)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index 60300ba62f2f5..3abc70a3d5518 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -1855,6 +1855,50 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(ArrayRepeat(Literal("hi"), Literal(null, IntegerType)), null)
   }
 
+  test("SPARK-41233: ArrayPrepend") {
+    val a0 = Literal.create(Seq(1, 2, 3, 4), ArrayType(IntegerType))
+    val a1 = Literal.create(Seq("a", "b", "c"), ArrayType(StringType))
+    val a2 = Literal.create(Seq.empty[Integer], ArrayType(IntegerType))
+    val a3 = Literal.create(null, ArrayType(StringType))
+
+    checkEvaluation(ArrayPrepend(a0, Literal(0)), Seq(0, 1, 2, 3, 4))
+    checkEvaluation(ArrayPrepend(a1, Literal("a")), Seq("a", "a", "b", "c"))
+    checkEvaluation(ArrayPrepend(a2, Literal(1)), Seq(1))
+    checkEvaluation(ArrayPrepend(a2, Literal(null, IntegerType)), Seq(null))
+    checkEvaluation(ArrayPrepend(a3, Literal("a")), null)
+    checkEvaluation(ArrayPrepend(a3, Literal(null, StringType)), null)
+
+    // complex data types
+    val data = Seq[Array[Byte]](
+      Array[Byte](5, 6),
+      Array[Byte](1, 2),
+      Array[Byte](1, 2),
+      Array[Byte](5, 6))
+    val b0 = Literal.create(
+      data,
+      ArrayType(BinaryType))
+    val b1 = Literal.create(Seq[Array[Byte]](Array[Byte](2, 1), null), ArrayType(BinaryType))
+    val nullBinary = Literal.create(null, BinaryType)
+    // Calling ArrayPrepend with a null element should result in NULL being prepended to the array
+    val dataWithNullPrepended = null +: data
+    checkEvaluation(ArrayPrepend(b0, nullBinary), dataWithNullPrepended)
+    val dataToPrepend1 = Literal.create(Array[Byte](5, 6), BinaryType)
+    checkEvaluation(
+      ArrayPrepend(b1, dataToPrepend1),
+      Seq[Array[Byte]](Array[Byte](5, 6), Array[Byte](2, 1), null))
+
+    val c0 = Literal.create(
+      Seq[Seq[Int]](Seq[Int](1, 2), Seq[Int](3, 4)),
+      ArrayType(ArrayType(IntegerType)))
+    val dataToPrepend2 = Literal.create(Seq[Int](5, 6), ArrayType(IntegerType))
+    checkEvaluation(
+      ArrayPrepend(c0, dataToPrepend2),
+      Seq(Seq[Int](5, 6), Seq[Int](1, 2), Seq[Int](3, 4)))
+    checkEvaluation(
+      ArrayPrepend(c0, Literal.create(Seq.empty[Int], ArrayType(IntegerType))),
+      Seq(Seq.empty[Int], Seq[Int](1, 2), Seq[Int](3, 4)))
+  }
+
   test("Array remove") {
     val a0 = Literal.create(Seq(1, 2, 3, 2, 2, 5), ArrayType(IntegerType))
     val a1 = Literal.create(Seq("b", "a", "a", "c", "b"), ArrayType(StringType))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
index cd7d80a8296e1..4c62ca35b1e51 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
@@ -55,6 +55,7 @@ abstract class InMemoryBaseTable(
     val distribution: Distribution = Distributions.unspecified(),
     val ordering: Array[SortOrder] = Array.empty,
     val numPartitions: Option[Int] = None,
+    val advisoryPartitionSize: Option[Long] = None,
     val isDistributionStrictlyRequired: Boolean = true,
     val numRowsPerSplit: Int = Int.MaxValue)
   extends Table with SupportsRead with SupportsWrite with SupportsMetadataColumns {
@@ -450,6 +451,10 @@ abstract class InMemoryBaseTable(
         numPartitions.getOrElse(0)
       }
 
+      override def advisoryPartitionSizeInBytes(): Long = {
+        advisoryPartitionSize.getOrElse(0)
+      }
+
       override def toBatch: BatchWrite = writer
 
       override def toStreaming: StreamingWrite = streamingWriter match {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
index 318248dae0594..d71bf1aeecd89 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
@@ -39,10 +39,12 @@ class InMemoryTable(
     distribution: Distribution = Distributions.unspecified(),
     ordering: Array[SortOrder] = Array.empty,
     numPartitions: Option[Int] = None,
+    advisoryPartitionSize: Option[Long] = None,
     isDistributionStrictlyRequired: Boolean = true,
     override val numRowsPerSplit: Int = Int.MaxValue)
   extends InMemoryBaseTable(name, schema, partitioning, properties, distribution,
-    ordering, numPartitions, isDistributionStrictlyRequired, numRowsPerSplit) with SupportsDelete {
+    ordering, numPartitions, advisoryPartitionSize, isDistributionStrictlyRequired,
+    numRowsPerSplit) with SupportsDelete {
 
   override def canDeleteWhere(filters: Array[Filter]): Boolean = {
     InMemoryTable.supportsFilters(filters)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
index 8a744c1c19815..a6da7308a25bf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
@@ -91,7 +91,7 @@ class BasicInMemoryTableCatalog extends TableCatalog {
       partitions: Array[Transform],
       properties: util.Map[String, String]): Table = {
     createTable(ident, schema, partitions, properties, Distributions.unspecified(),
-      Array.empty, None)
+      Array.empty, None, None)
   }
 
   override def createTable(
@@ -111,6 +111,7 @@ class BasicInMemoryTableCatalog extends TableCatalog {
       distribution: Distribution,
       ordering: Array[SortOrder],
       requiredNumPartitions: Option[Int],
+      advisoryPartitionSize: Option[Long],
       distributionStrictlyRequired: Boolean = true,
       numRowsPerSplit: Int = Int.MaxValue): Table = {
     if (tables.containsKey(ident)) {
@@ -121,7 +122,8 @@ class BasicInMemoryTableCatalog extends TableCatalog {
 
     val tableName = s"$name.${ident.quoted}"
     val table = new InMemoryTable(tableName, schema, partitions, properties, distribution,
-      ordering, requiredNumPartitions, distributionStrictlyRequired, numRowsPerSplit)
+      ordering, requiredNumPartitions, advisoryPartitionSize, distributionStrictlyRequired,
+      numRowsPerSplit)
     tables.put(ident, table)
     namespaces.putIfAbsent(ident.namespace.toList, Map())
     table
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index edcfad0c7983a..57da3b5af6068 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -3973,11 +3973,7 @@ class Dataset[T] private[sql](
    * This is for 'distributed-sequence' default index in pandas API on Spark.
    */
   private[sql] def withSequenceColumn(name: String) = {
-    Dataset.ofRows(
-      sparkSession,
-      AttachDistributedSequence(
-        AttributeReference(name, LongType, nullable = false)(),
-        logicalPlan))
+    select(Column(DistributedSequenceID()).alias(name), col("*"))
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 066e609a6d338..3d2dc8c05d257 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -100,10 +100,8 @@ class SparkSession private(
   private[sql] def this(
       sc: SparkContext,
       initialSessionOptions: java.util.HashMap[String, String]) = {
-    this(sc, None, None,
-      SparkSession.applyExtensions(
-        sc.getConf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS).getOrElse(Seq.empty),
-        new SparkSessionExtensions), initialSessionOptions.asScala.toMap)
+    this(sc, None, None, SparkSession.applyExtensions(sc, new SparkSessionExtensions),
+      initialSessionOptions.asScala.toMap)
   }
 
   private[sql] def this(sc: SparkContext) = this(sc, new java.util.HashMap[String, String]())
@@ -1014,9 +1012,7 @@ object SparkSession extends Logging {
         }
 
         loadExtensions(extensions)
-        applyExtensions(
-          sparkContext.getConf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS).getOrElse(Seq.empty),
-          extensions)
+        applyExtensions(sparkContext, extensions)
 
         session = new SparkSession(sparkContext, None, None, extensions, options.toMap)
         setDefaultSession(session)
@@ -1269,12 +1265,14 @@ object SparkSession extends Logging {
   }
 
   /**
-   * Initialize extensions for given extension classnames. The classes will be applied to the
+   * Initialize extensions specified in [[StaticSQLConf]]. The classes will be applied to the
    * extensions passed into this function.
    */
   private def applyExtensions(
-      extensionConfClassNames: Seq[String],
+      sparkContext: SparkContext,
       extensions: SparkSessionExtensions): SparkSessionExtensions = {
+    val extensionConfClassNames = sparkContext.getConf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS)
+      .getOrElse(Seq.empty)
     extensionConfClassNames.foreach { extensionConfClassName =>
       try {
         val extensionConfClass = Utils.classForName(extensionConfClassName)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
index 8c420838ca274..f05fe9d60fbd6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
@@ -84,6 +84,11 @@ class SparkOptimizer(
       PushPredicateThroughNonJoin,
       PushProjectionThroughLimit,
       RemoveNoopOperators) :+
+    Batch("Infer window group limit", Once,
+      InferWindowGroupLimit,
+      LimitPushDown,
+      LimitPushDownThroughWindow,
+      EliminateLimits) :+
     Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) :+
     Batch("Replace CTE with Repartition", Once, ReplaceCTERefWithRepartition)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 47b3fa2c684e8..ddf1213cfed99 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -893,14 +893,18 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         } else {
           REPARTITION_BY_NUM
         }
-        exchange.ShuffleExchangeExec(r.partitioning, planLater(r.child), shuffleOrigin) :: Nil
+        exchange.ShuffleExchangeExec(
+          r.partitioning, planLater(r.child),
+          shuffleOrigin, r.optAdvisoryPartitionSize) :: Nil
       case r: logical.RebalancePartitions =>
         val shuffleOrigin = if (r.partitionExpressions.isEmpty) {
           REBALANCE_PARTITIONS_BY_NONE
         } else {
           REBALANCE_PARTITIONS_BY_COL
         }
-        exchange.ShuffleExchangeExec(r.partitioning, planLater(r.child), shuffleOrigin) :: Nil
+        exchange.ShuffleExchangeExec(
+          r.partitioning, planLater(r.child),
+          shuffleOrigin, r.optAdvisoryPartitionSize) :: Nil
       case ExternalRDD(outputObjAttr, rdd) => ExternalRDDScanExec(outputObjAttr, rdd) :: Nil
       case r: LogicalRDD =>
         RDDScanExec(r.output, r.rdd, "ExistingRDD", r.outputPartitioning, r.outputOrdering) :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEUtils.scala
index 1a0836ed752bf..578e0acd80525 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEUtils.scala
@@ -30,7 +30,7 @@ object AQEUtils {
     // Project/Filter/LocalSort/CollectMetrics.
     // Note: we only care about `HashPartitioning` as `EnsureRequirements` can only optimize out
     // user-specified repartition with `HashPartitioning`.
-    case ShuffleExchangeExec(h: HashPartitioning, _, shuffleOrigin)
+    case ShuffleExchangeExec(h: HashPartitioning, _, shuffleOrigin, _)
         if shuffleOrigin == REPARTITION_BY_COL || shuffleOrigin == REPARTITION_BY_NUM =>
       val numPartitions = if (shuffleOrigin == REPARTITION_BY_NUM) {
         Some(h.numPartitions)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
index dfc7e23c82d4c..34399001c726f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
@@ -64,16 +64,6 @@ case class CoalesceShufflePartitions(session: SparkSession) extends AQEShuffleRe
         1
       }
     }
-    val advisoryTargetSize = conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES)
-    val minPartitionSize = if (Utils.isTesting) {
-      // In the tests, we usually set the target size to a very small value that is even smaller
-      // than the default value of the min partition size. Here we also adjust the min partition
-      // size to be not larger than 20% of the target size, so that the tests don't need to set
-      // both configs all the time to check the coalescing behavior.
-      conf.getConf(SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_SIZE).min(advisoryTargetSize / 5)
-    } else {
-      conf.getConf(SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_SIZE)
-    }
 
     // Sub-plans under the Union operator can be coalesced independently, so we can divide them
     // into independent "coalesce groups", and all shuffle stages within each group have to be
@@ -100,6 +90,17 @@ case class CoalesceShufflePartitions(session: SparkSession) extends AQEShuffleRe
     val specsMap = mutable.HashMap.empty[Int, Seq[ShufflePartitionSpec]]
     // Coalesce partitions for each coalesce group independently.
     coalesceGroups.zip(minNumPartitionsByGroup).foreach { case (shuffleStages, minNumPartitions) =>
+      val advisoryTargetSize = advisoryPartitionSize(shuffleStages)
+      val minPartitionSize = if (Utils.isTesting) {
+        // In the tests, we usually set the target size to a very small value that is even smaller
+        // than the default value of the min partition size. Here we also adjust the min partition
+        // size to be not larger than 20% of the target size, so that the tests don't need to set
+        // both configs all the time to check the coalescing behavior.
+        conf.getConf(SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_SIZE).min(advisoryTargetSize / 5)
+      } else {
+        conf.getConf(SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_SIZE)
+      }
+
       val newPartitionSpecs = ShufflePartitionsUtil.coalescePartitions(
         shuffleStages.map(_.shuffleStage.mapStats),
         shuffleStages.map(_.partitionSpecs),
@@ -121,6 +122,19 @@ case class CoalesceShufflePartitions(session: SparkSession) extends AQEShuffleRe
     }
   }
 
+  // data sources may request a particular advisory partition size for the final write stage
+  // if it happens, the advisory partition size will be set in ShuffleQueryStageExec
+  // only one shuffle stage is expected in such cases
+  private def advisoryPartitionSize(shuffleStages: Seq[ShuffleStageInfo]): Long = {
+    val defaultAdvisorySize = conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES)
+    shuffleStages match {
+      case Seq(stage) =>
+        stage.shuffleStage.advisoryPartitionSize.getOrElse(defaultAdvisorySize)
+      case _ =>
+        defaultAdvisorySize
+    }
+  }
+
   /**
    * Gather all coalesce-able groups such that the shuffle stages in each child of a Union operator
    * are in their independent groups if:
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewInRebalancePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewInRebalancePartitions.scala
index b34ab3e380b18..abd096b9c7c3b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewInRebalancePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewInRebalancePartitions.scala
@@ -69,7 +69,8 @@ object OptimizeSkewInRebalancePartitions extends AQEShuffleReadRule {
   }
 
   private def tryOptimizeSkewedPartitions(shuffle: ShuffleQueryStageExec): SparkPlan = {
-    val advisorySize = conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES)
+    val defaultAdvisorySize = conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES)
+    val advisorySize = shuffle.advisoryPartitionSize.getOrElse(defaultAdvisorySize)
     val mapStats = shuffle.mapStats
     if (mapStats.isEmpty ||
       mapStats.get.bytesByPartitionId.forall(_ <= advisorySize)) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
index 8a2abadd19e95..a27f783215e1b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
@@ -180,6 +180,8 @@ case class ShuffleQueryStageExec(
       throw new IllegalStateException(s"wrong plan for shuffle stage:\n ${plan.treeString}")
   }
 
+  def advisoryPartitionSize: Option[Long] = shuffle.advisoryPartitionSize
+
   @transient private lazy val shuffleFuture = shuffle.submitShuffleJob
 
   override protected def doMaterialize(): Future[Any] = shuffleFuture
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
index 770b2442e403c..d36dd89f66e93 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
@@ -140,7 +140,8 @@ private[sql] object ColumnAccessor {
       case ByteType => new ByteColumnAccessor(buf)
       case ShortType => new ShortColumnAccessor(buf)
       case IntegerType | DateType | _: YearMonthIntervalType => new IntColumnAccessor(buf)
-      case LongType | TimestampType | _: DayTimeIntervalType => new LongColumnAccessor(buf)
+      case LongType | TimestampType | TimestampNTZType | _: DayTimeIntervalType =>
+        new LongColumnAccessor(buf)
       case FloatType => new FloatColumnAccessor(buf)
       case DoubleType => new DoubleColumnAccessor(buf)
       case StringType => new StringColumnAccessor(buf)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
index 148cd9e9335e4..268a65b81ff68 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -192,19 +192,19 @@ class JDBCOptions(
 
   // An option to allow/disallow pushing down aggregate into JDBC data source
   // This only applies to Data Source V2 JDBC
-  val pushDownAggregate = parameters.getOrElse(JDBC_PUSHDOWN_AGGREGATE, "false").toBoolean
+  val pushDownAggregate = parameters.getOrElse(JDBC_PUSHDOWN_AGGREGATE, "true").toBoolean
 
   // An option to allow/disallow pushing down LIMIT into V2 JDBC data source
   // This only applies to Data Source V2 JDBC
-  val pushDownLimit = parameters.getOrElse(JDBC_PUSHDOWN_LIMIT, "false").toBoolean
+  val pushDownLimit = parameters.getOrElse(JDBC_PUSHDOWN_LIMIT, "true").toBoolean
 
   // An option to allow/disallow pushing down OFFSET into V2 JDBC data source
   // This only applies to Data Source V2 JDBC
-  val pushDownOffset = parameters.getOrElse(JDBC_PUSHDOWN_OFFSET, "false").toBoolean
+  val pushDownOffset = parameters.getOrElse(JDBC_PUSHDOWN_OFFSET, "true").toBoolean
 
   // An option to allow/disallow pushing down TABLESAMPLE into JDBC data source
   // This only applies to Data Source V2 JDBC
-  val pushDownTableSample = parameters.getOrElse(JDBC_PUSHDOWN_TABLESAMPLE, "false").toBoolean
+  val pushDownTableSample = parameters.getOrElse(JDBC_PUSHDOWN_TABLESAMPLE, "true").toBoolean
 
   // The local path of user's keytab file, which is assumed to be pre-uploaded to all nodes either
   // by --files option of spark-submit or manually
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala
index b0b0d7bbc2dec..9b1155ef6987e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala
@@ -36,6 +36,7 @@ object DistributionAndOrderingUtils {
       funCatalogOpt: Option[FunctionCatalog]): LogicalPlan = write match {
     case write: RequiresDistributionAndOrdering =>
       val numPartitions = write.requiredNumPartitions()
+      val partitionSize = write.advisoryPartitionSizeInBytes()
 
       val distribution = write.requiredDistribution match {
         case d: OrderedDistribution =>
@@ -49,17 +50,25 @@ object DistributionAndOrderingUtils {
 
       val queryWithDistribution = if (distribution.nonEmpty) {
         val optNumPartitions = if (numPartitions > 0) Some(numPartitions) else None
+        val optPartitionSize = if (partitionSize > 0) Some(partitionSize) else None
+
+        if (optNumPartitions.isDefined && optPartitionSize.isDefined) {
+          throw QueryCompilationErrors.numberAndSizeOfPartitionsNotAllowedTogether()
+        }
+
         // the conversion to catalyst expressions above produces SortOrder expressions
         // for OrderedDistribution and generic expressions for ClusteredDistribution
         // this allows RebalancePartitions/RepartitionByExpression to pick either
         // range or hash partitioning
         if (write.distributionStrictlyRequired()) {
-          RepartitionByExpression(distribution, query, optNumPartitions)
+          RepartitionByExpression(distribution, query, optNumPartitions, optPartitionSize)
         } else {
-          RebalancePartitions(distribution, query, optNumPartitions)
+          RebalancePartitions(distribution, query, optNumPartitions, optPartitionSize)
         }
       } else if (numPartitions > 0) {
         throw QueryCompilationErrors.numberOfPartitionsNotAllowedWithUnspecifiedDistributionError()
+      } else if (partitionSize > 0) {
+        throw QueryCompilationErrors.partitionSizeNotAllowedWithUnspecifiedDistributionError()
       } else {
         query
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
index bc90a869fd9b3..457a9e0a868f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -188,7 +188,8 @@ case class EnsureRequirements(
             }
 
             child match {
-              case ShuffleExchangeExec(_, c, so) => ShuffleExchangeExec(newPartitioning, c, so)
+              case ShuffleExchangeExec(_, c, so, ps) =>
+                ShuffleExchangeExec(newPartitioning, c, so, ps)
               case _ => ShuffleExchangeExec(newPartitioning, child)
             }
           }
@@ -578,7 +579,7 @@ case class EnsureRequirements(
 
   def apply(plan: SparkPlan): SparkPlan = {
     val newPlan = plan.transformUp {
-      case operator @ ShuffleExchangeExec(upper: HashPartitioning, child, shuffleOrigin)
+      case operator @ ShuffleExchangeExec(upper: HashPartitioning, child, shuffleOrigin, _)
           if optimizeOutRepartition &&
             (shuffleOrigin == REPARTITION_BY_COL || shuffleOrigin == REPARTITION_BY_NUM) =>
         def hasSemanticEqualPartitioning(partitioning: Partitioning): Boolean = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
index 806a048b244ef..8d967458ad7ce 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
@@ -55,6 +55,11 @@ trait ShuffleExchangeLike extends Exchange {
    */
   def numPartitions: Int
 
+  /**
+   * Returns the advisory partition size.
+   */
+  def advisoryPartitionSize: Option[Long]
+
   /**
    * The origin of this shuffle operator.
    */
@@ -115,7 +120,8 @@ case object REBALANCE_PARTITIONS_BY_COL extends ShuffleOrigin
 case class ShuffleExchangeExec(
     override val outputPartitioning: Partitioning,
     child: SparkPlan,
-    shuffleOrigin: ShuffleOrigin = ENSURE_REQUIREMENTS)
+    shuffleOrigin: ShuffleOrigin = ENSURE_REQUIREMENTS,
+    advisoryPartitionSize: Option[Long] = None)
   extends ShuffleExchangeLike {
 
   private lazy val writeMetrics =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
index 32caf8a1bc815..363cc2b5c46a0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
@@ -72,7 +72,21 @@ class RocksDB(
   tableFormatConfig.setFilterPolicy(bloomFilter)
   tableFormatConfig.setFormatVersion(conf.formatVersion)
 
-  private val dbOptions = new Options() // options to open the RocksDB
+  private val columnFamilyOptions = new ColumnFamilyOptions()
+
+  private val dbOptions =
+    new Options(new DBOptions(), columnFamilyOptions) // options to open the RocksDB
+
+  // Set RocksDB options around MemTable memory usage. By default, we let RocksDB
+  // use its internal default values for these settings.
+  if (conf.writeBufferSizeMB > 0L) {
+    columnFamilyOptions.setWriteBufferSize(conf.writeBufferSizeMB * 1024 * 1024)
+  }
+
+  if (conf.maxWriteBufferNumber > 0L) {
+    columnFamilyOptions.setMaxWriteBufferNumber(conf.maxWriteBufferNumber)
+  }
+
   dbOptions.setCreateIfMissing(true)
   dbOptions.setTableFormatConfig(tableFormatConfig)
   dbOptions.setMaxOpenFiles(conf.maxOpenFiles)
@@ -403,7 +417,9 @@ class RocksDB(
       /** Number of bytes read during compaction */
       "totalBytesReadByCompaction" -> COMPACT_READ_BYTES,
       /** Number of bytes written during compaction */
-      "totalBytesWrittenByCompaction" -> COMPACT_WRITE_BYTES
+      "totalBytesWrittenByCompaction" -> COMPACT_WRITE_BYTES,
+      /** Number of bytes written during flush */
+      "totalBytesWrittenByFlush" -> FLUSH_WRITE_BYTES
     ).toMap
     val nativeOpsMetrics = nativeOpsMetricTickers.mapValues { typ =>
       nativeStats.getTickerCount(typ)
@@ -556,7 +572,9 @@ case class RocksDBConf(
     resetStatsOnLoad : Boolean,
     formatVersion: Int,
     trackTotalNumberOfRows: Boolean,
-    maxOpenFiles: Int)
+    maxOpenFiles: Int,
+    writeBufferSizeMB: Long,
+    maxWriteBufferNumber: Int)
 
 object RocksDBConf {
   /** Common prefix of all confs in SQLConf that affects RocksDB */
@@ -580,7 +598,8 @@ object RocksDBConf {
   private val COMPACT_ON_COMMIT_CONF = SQLConfEntry("compactOnCommit", "false")
   private val BLOCK_SIZE_KB_CONF = SQLConfEntry("blockSizeKB", "4")
   private val BLOCK_CACHE_SIZE_MB_CONF = SQLConfEntry("blockCacheSizeMB", "8")
-  private val LOCK_ACQUIRE_TIMEOUT_MS_CONF = SQLConfEntry("lockAcquireTimeoutMs", "60000")
+  // See SPARK-42794 for details.
+  private val LOCK_ACQUIRE_TIMEOUT_MS_CONF = SQLConfEntry("lockAcquireTimeoutMs", "120000")
   private val RESET_STATS_ON_LOAD = SQLConfEntry("resetStatsOnLoad", "true")
   // Config to specify the number of open files that can be used by the DB. Value of -1 means
   // that files opened are always kept open.
@@ -606,6 +625,15 @@ object RocksDBConf {
   // again when you really need the know the number for observability/debuggability.
   private val TRACK_TOTAL_NUMBER_OF_ROWS = SQLConfEntry("trackTotalNumberOfRows", "true")
 
+  // Configuration to control maximum size of MemTable in RocksDB
+  private val WRITE_BUFFER_SIZE_MB_CONF = SQLConfEntry("writeBufferSizeMB", "-1")
+
+  // Configuration to set maximum number of MemTables in RocksDB, both active and immutable.
+  // If the active MemTable fills up and the total number of MemTables is larger than
+  // maxWriteBufferNumber, then RocksDB will stall further writes.
+  // This may happen if the flush process is slower than the write rate.
+  private val MAX_WRITE_BUFFER_NUMBER_CONF = SQLConfEntry("maxWriteBufferNumber", "-1")
+
   def apply(storeConf: StateStoreConf): RocksDBConf = {
     val sqlConfs = CaseInsensitiveMap[String](storeConf.sqlConfs)
     val extraConfs = CaseInsensitiveMap[String](storeConf.extraOptions)
@@ -630,6 +658,14 @@ object RocksDBConf {
       }
     }
 
+    def getLongConf(conf: ConfEntry): Long = {
+      Try { getConfigMap(conf).getOrElse(conf.fullName, conf.default).toLong } getOrElse {
+        throw new IllegalArgumentException(
+          s"Invalid value for '${conf.fullName}', must be a long"
+        )
+      }
+    }
+
     def getPositiveLongConf(conf: ConfEntry): Long = {
       Try {
         getConfigMap(conf).getOrElse(conf.fullName, conf.default).toLong
@@ -657,7 +693,9 @@ object RocksDBConf {
       getBooleanConf(RESET_STATS_ON_LOAD),
       getPositiveIntConf(FORMAT_VERSION),
       getBooleanConf(TRACK_TOTAL_NUMBER_OF_ROWS),
-      getIntConf(MAX_OPEN_FILES_CONF))
+      getIntConf(MAX_OPEN_FILES_CONF),
+      getLongConf(WRITE_BUFFER_SIZE_MB_CONF),
+      getIntConf(MAX_WRITE_BUFFER_NUMBER_CONF))
   }
 
   def apply(): RocksDBConf = apply(new StateStoreConf())
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
index 79614df629927..3a128561b1d0d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
@@ -142,7 +142,8 @@ private[sql] class RocksDBStateStoreProvider
         CUSTOM_METRIC_STALL_TIME -> nativeOpsLatencyMillis("writerStallDuration"),
         CUSTOM_METRIC_TOTAL_COMPACT_TIME -> sumNativeOpsLatencyMillis("compaction"),
         CUSTOM_METRIC_COMPACT_READ_BYTES -> nativeOpsMetrics("totalBytesReadByCompaction"),
-        CUSTOM_METRIC_COMPACT_WRITTEN_BYTES -> nativeOpsMetrics("totalBytesWrittenByCompaction")
+        CUSTOM_METRIC_COMPACT_WRITTEN_BYTES -> nativeOpsMetrics("totalBytesWrittenByCompaction"),
+        CUSTOM_METRIC_FLUSH_WRITTEN_BYTES -> nativeOpsMetrics("totalBytesWrittenByFlush")
       ) ++ rocksDBMetrics.zipFileBytesUncompressed.map(bytes =>
         Map(CUSTOM_METRIC_ZIP_FILE_BYTES_UNCOMPRESSED -> bytes)).getOrElse(Map())
 
@@ -296,6 +297,10 @@ object RocksDBStateStoreProvider {
   val CUSTOM_METRIC_COMPACT_WRITTEN_BYTES = StateStoreCustomSizeMetric(
     "rocksdbTotalBytesWrittenByCompaction",
     "RocksDB: compaction - total bytes written by the compaction process")
+  val CUSTOM_METRIC_FLUSH_WRITTEN_BYTES = StateStoreCustomSizeMetric(
+    "rocksdbTotalBytesWrittenByFlush",
+    "RocksDB: flush - total bytes written by flush"
+  )
 
   // Total SST file size
   val CUSTOM_METRIC_SST_FILE_SIZE = StateStoreCustomSizeMetric(
@@ -310,6 +315,6 @@ object RocksDBStateStoreProvider {
     CUSTOM_METRIC_BLOCK_CACHE_MISS, CUSTOM_METRIC_BLOCK_CACHE_HITS, CUSTOM_METRIC_BYTES_READ,
     CUSTOM_METRIC_BYTES_WRITTEN, CUSTOM_METRIC_ITERATOR_BYTES_READ, CUSTOM_METRIC_STALL_TIME,
     CUSTOM_METRIC_TOTAL_COMPACT_TIME, CUSTOM_METRIC_COMPACT_READ_BYTES,
-    CUSTOM_METRIC_COMPACT_WRITTEN_BYTES
+    CUSTOM_METRIC_COMPACT_WRITTEN_BYTES, CUSTOM_METRIC_FLUSH_WRITTEN_BYTES
   )
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index cb5c1ad5c4954..5081f58220246 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -4044,6 +4044,17 @@ object functions {
     ArrayCompact(column.expr)
   }
 
+  /**
+   * Returns an array containing value as well as all elements from array. The new element is
+   * positioned at the beginning of the array.
+   *
+   * @group collection_funcs
+   * @since 3.5.0
+   */
+  def array_prepend(column: Column, element: Any): Column = withExpr {
+    ArrayPrepend(column.expr, lit(element).expr)
+  }
+
   /**
    * Removes duplicate values from the array.
    * @group collection_funcs
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
index a6ae5a8abf47f..aa7aa6859c9e1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
@@ -185,4 +185,8 @@ private object DB2Dialect extends JdbcDialect {
 
   override def getJdbcSQLQueryBuilder(options: JDBCOptions): JdbcSQLQueryBuilder =
     new DB2SQLQueryBuilder(this, options)
+
+  override def supportsLimit: Boolean = true
+
+  override def supportsOffset: Boolean = true
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
index 5ede793f6d16f..c246b50f4e156 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
@@ -276,4 +276,8 @@ private[sql] object H2Dialect extends JdbcDialect {
       }
     }
   }
+
+  override def supportsLimit: Boolean = true
+
+  override def supportsOffset: Boolean = true
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index bc50ef1ace798..753f14b8d93a5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -583,12 +583,16 @@ abstract class JdbcDialect extends Serializable with Logging {
    * {@link OracleDialect.OracleSQLQueryBuilder} and
    * {@link MsSqlServerDialect.MsSqlServerSQLQueryBuilder}.
    */
-  def supportsLimit: Boolean = true
+  def supportsLimit: Boolean = false
 
   /**
    * Returns ture if dialect supports OFFSET clause.
+   *
+   * Note: Some build-in dialect supports OFFSET clause with some trick, please see:
+   * {@link OracleDialect.OracleSQLQueryBuilder} and
+   * {@link MySQLDialect.MySQLSQLQueryBuilder}.
    */
-  def supportsOffset: Boolean = true
+  def supportsOffset: Boolean = false
 
   def supportsTableSample: Boolean = false
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
index fc0d2d2470ae1..e986ac6354868 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
@@ -212,5 +212,5 @@ private object MsSqlServerDialect extends JdbcDialect {
   override def getJdbcSQLQueryBuilder(options: JDBCOptions): JdbcSQLQueryBuilder =
     new MsSqlServerSQLQueryBuilder(this, options)
 
-  override def supportsOffset: Boolean = false
+  override def supportsLimit: Boolean = true
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index 7db2237c4745b..e688af561c422 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -320,4 +320,8 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
 
   override def getJdbcSQLQueryBuilder(options: JDBCOptions): JdbcSQLQueryBuilder =
     new MySQLSQLQueryBuilder(this, options)
+
+  override def supportsLimit: Boolean = true
+
+  override def supportsOffset: Boolean = true
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
index 55b4f1eb004e1..95774d38e50ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
@@ -220,4 +220,8 @@ private case object OracleDialect extends JdbcDialect {
 
   override def getJdbcSQLQueryBuilder(options: JDBCOptions): JdbcSQLQueryBuilder =
     new OracleSQLQueryBuilder(this, options)
+
+  override def supportsLimit: Boolean = true
+
+  override def supportsOffset: Boolean = true
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
index c2ca45d9143a5..011375f937113 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -171,15 +171,6 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper {
     s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)} $nullable"
   }
 
-  override def supportsTableSample: Boolean = true
-
-  override def getTableSample(sample: TableSampleInfo): String = {
-    // hard-coded to BERNOULLI for now because Spark doesn't have a way to specify sample
-    // method name
-    "TABLESAMPLE BERNOULLI" +
-      s" (${(sample.upperBound - sample.lowerBound) * 100}) REPEATABLE (${sample.seed})"
-  }
-
   // CREATE INDEX syntax
   // https://www.postgresql.org/docs/14/sql-createindex.html
   override def createIndex(
@@ -243,4 +234,17 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper {
       case _ => super.classifyException(message, e)
     }
   }
+
+  override def supportsLimit: Boolean = true
+
+  override def supportsOffset: Boolean = true
+
+  override def supportsTableSample: Boolean = true
+
+  override def getTableSample(sample: TableSampleInfo): String = {
+    // hard-coded to BERNOULLI for now because Spark doesn't have a way to specify sample
+    // method name
+    "TABLESAMPLE BERNOULLI" +
+      s" (${(sample.upperBound - sample.lowerBound) * 100}) REPEATABLE (${sample.seed})"
+  }
 }
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 0894d03f9d412..6b5b67f984916 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -26,6 +26,7 @@
 | org.apache.spark.sql.catalyst.expressions.ArrayMax | array_max | SELECT array_max(array(1, 20, null, 3)) | struct<array_max(array(1, 20, NULL, 3)):int> |
 | org.apache.spark.sql.catalyst.expressions.ArrayMin | array_min | SELECT array_min(array(1, 20, null, 3)) | struct<array_min(array(1, 20, NULL, 3)):int> |
 | org.apache.spark.sql.catalyst.expressions.ArrayPosition | array_position | SELECT array_position(array(3, 2, 1), 1) | struct<array_position(array(3, 2, 1), 1):bigint> |
+| org.apache.spark.sql.catalyst.expressions.ArrayPrepend | array_prepend | SELECT array_prepend(array('b', 'd', 'c', 'a'), 'd') | struct<array_prepend(array(b, d, c, a), d):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.ArrayRemove | array_remove | SELECT array_remove(array(1, 2, 3, null, 3), 3) | struct<array_remove(array(1, 2, 3, NULL, 3), 3):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.ArrayRepeat | array_repeat | SELECT array_repeat('123', 2) | struct<array_repeat(123, 2):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.ArraySize | array_size | SELECT array_size(array('b', 'd', 'c', 'a')) | struct<array_size(array(b, d, c, a)):int> |
@@ -421,4 +422,4 @@
 | org.apache.spark.sql.catalyst.expressions.xml.XPathList | xpath | SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()') | struct<xpath(<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>, a/b/text()):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.xml.XPathLong | xpath_long | SELECT xpath_long('<a><b>1</b><b>2</b></a>', 'sum(a/b)') | struct<xpath_long(<a><b>1</b><b>2</b></a>, sum(a/b)):bigint> |
 | org.apache.spark.sql.catalyst.expressions.xml.XPathShort | xpath_short | SELECT xpath_short('<a><b>1</b><b>2</b></a>', 'sum(a/b)') | struct<xpath_short(<a><b>1</b><b>2</b></a>, sum(a/b)):smallint> |
-| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') | struct<xpath_string(<a><b>b</b><c>cc</c></a>, a/c):string> |
\ No newline at end of file
+| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') | struct<xpath_string(<a><b>b</b><c>cc</c></a>, a/c):string> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/array.sql b/sql/core/src/test/resources/sql-tests/inputs/array.sql
index 3d107cb6dfc07..d3c36b79d1f3a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/array.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/array.sql
@@ -160,3 +160,14 @@ select array_append(CAST(null AS ARRAY<String>), CAST(null as String));
 select array_append(array(), 1);
 select array_append(CAST(array() AS ARRAY<String>), CAST(NULL AS String));
 select array_append(array(CAST(NULL AS String)), CAST(NULL AS String));
+
+-- function array_prepend
+select array_prepend(array(1, 2, 3), 4);
+select array_prepend(array('a', 'b', 'c'), 'd');
+select array_prepend(array(1, 2, 3, NULL), NULL);
+select array_prepend(array('a', 'b', 'c', NULL), NULL);
+select array_prepend(CAST(null AS ARRAY<String>), 'a');
+select array_prepend(CAST(null AS ARRAY<String>), CAST(null as String));
+select array_prepend(array(), 1);
+select array_prepend(CAST(array() AS ARRAY<String>), CAST(NULL AS String));
+select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String));
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
index 0d8ef39ed60c6..d228c605705d6 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
@@ -784,3 +784,75 @@ select array_append(array(CAST(NULL AS String)), CAST(NULL AS String))
 struct<array_append(array(CAST(NULL AS STRING)), CAST(NULL AS STRING)):array<string>>
 -- !query output
 [null,null]
+
+
+-- !query
+select array_prepend(array(1, 2, 3), 4)
+-- !query schema
+struct<array_prepend(array(1, 2, 3), 4):array<int>>
+-- !query output
+[4,1,2,3]
+
+
+-- !query
+select array_prepend(array('a', 'b', 'c'), 'd')
+-- !query schema
+struct<array_prepend(array(a, b, c), d):array<string>>
+-- !query output
+["d","a","b","c"]
+
+
+-- !query
+select array_prepend(array(1, 2, 3, NULL), NULL)
+-- !query schema
+struct<array_prepend(array(1, 2, 3, NULL), NULL):array<int>>
+-- !query output
+[null,1,2,3,null]
+
+
+-- !query
+select array_prepend(array('a', 'b', 'c', NULL), NULL)
+-- !query schema
+struct<array_prepend(array(a, b, c, NULL), NULL):array<string>>
+-- !query output
+[null,"a","b","c",null]
+
+
+-- !query
+select array_prepend(CAST(null AS ARRAY<String>), 'a')
+-- !query schema
+struct<array_prepend(NULL, a):array<string>>
+-- !query output
+NULL
+
+
+-- !query
+select array_prepend(CAST(null AS ARRAY<String>), CAST(null as String))
+-- !query schema
+struct<array_prepend(NULL, CAST(NULL AS STRING)):array<string>>
+-- !query output
+NULL
+
+
+-- !query
+select array_prepend(array(), 1)
+-- !query schema
+struct<array_prepend(array(), 1):array<int>>
+-- !query output
+[1]
+
+
+-- !query
+select array_prepend(CAST(array() AS ARRAY<String>), CAST(NULL AS String))
+-- !query schema
+struct<array_prepend(array(), CAST(NULL AS STRING)):array<string>>
+-- !query output
+[null]
+
+
+-- !query
+select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String))
+-- !query schema
+struct<array_prepend(array(CAST(NULL AS STRING)), CAST(NULL AS STRING)):array<string>>
+-- !query output
+[null,null]
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
index 609122a23d316..029bd767f54c4 100644
--- a/sql/core/src/test/resources/sql-tests/results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -665,3 +665,75 @@ select array_append(array(CAST(NULL AS String)), CAST(NULL AS String))
 struct<array_append(array(CAST(NULL AS STRING)), CAST(NULL AS STRING)):array<string>>
 -- !query output
 [null,null]
+
+
+-- !query
+select array_prepend(array(1, 2, 3), 4)
+-- !query schema
+struct<array_prepend(array(1, 2, 3), 4):array<int>>
+-- !query output
+[4,1,2,3]
+
+
+-- !query
+select array_prepend(array('a', 'b', 'c'), 'd')
+-- !query schema
+struct<array_prepend(array(a, b, c), d):array<string>>
+-- !query output
+["d","a","b","c"]
+
+
+-- !query
+select array_prepend(array(1, 2, 3, NULL), NULL)
+-- !query schema
+struct<array_prepend(array(1, 2, 3, NULL), NULL):array<int>>
+-- !query output
+[null,1,2,3,null]
+
+
+-- !query
+select array_prepend(array('a', 'b', 'c', NULL), NULL)
+-- !query schema
+struct<array_prepend(array(a, b, c, NULL), NULL):array<string>>
+-- !query output
+[null,"a","b","c",null]
+
+
+-- !query
+select array_prepend(CAST(null AS ARRAY<String>), 'a')
+-- !query schema
+struct<array_prepend(NULL, a):array<string>>
+-- !query output
+NULL
+
+
+-- !query
+select array_prepend(CAST(null AS ARRAY<String>), CAST(null as String))
+-- !query schema
+struct<array_prepend(NULL, CAST(NULL AS STRING)):array<string>>
+-- !query output
+NULL
+
+
+-- !query
+select array_prepend(array(), 1)
+-- !query schema
+struct<array_prepend(array(), 1):array<int>>
+-- !query output
+[1]
+
+
+-- !query
+select array_prepend(CAST(array() AS ARRAY<String>), CAST(NULL AS String))
+-- !query schema
+struct<array_prepend(array(), CAST(NULL AS STRING)):array<string>>
+-- !query output
+[null]
+
+
+-- !query
+select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String))
+-- !query schema
+struct<array_prepend(array(CAST(NULL AS STRING)), CAST(NULL AS STRING)):array<string>>
+-- !query output
+[null,null]
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
index c7c09bf7c7940..eb141eaf3a946 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
@@ -44,7 +44,7 @@ abstract class CTEInlineSuiteBase
       checkAnswer(df, Nil)
 
       val r = df.queryExecution.optimizedPlan.find {
-        case RepartitionByExpression(p, _, None) => p.isEmpty
+        case RepartitionByExpression(p, _, None, _) => p.isEmpty
         case _ => false
       }
       assert(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index bd03d29282042..355f2dfffb57f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -2651,6 +2651,74 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     )
   }
 
+  test("SPARK-41233: array prepend") {
+    val df = Seq(
+      (Array[Int](2, 3, 4), Array("b", "c", "d"), Array("", ""), 2),
+      (Array.empty[Int], Array.empty[String], Array.empty[String], 2),
+      (null, null, null, 2)).toDF("a", "b", "c", "d")
+    checkAnswer(
+      df.select(array_prepend($"a", 1), array_prepend($"b", "a"), array_prepend($"c", "")),
+      Seq(
+        Row(Seq(1, 2, 3, 4), Seq("a", "b", "c", "d"), Seq("", "", "")),
+        Row(Seq(1), Seq("a"), Seq("")),
+        Row(null, null, null)))
+    checkAnswer(
+      df.select(array_prepend($"a", $"d")),
+      Seq(
+        Row(Seq(2, 2, 3, 4)),
+        Row(Seq(2)),
+        Row(null)))
+    checkAnswer(
+      df.selectExpr("array_prepend(a, d)"),
+      Seq(
+        Row(Seq(2, 2, 3, 4)),
+        Row(Seq(2)),
+        Row(null)))
+    checkAnswer(
+      OneRowRelation().selectExpr("array_prepend(array(1, 2), 1.23D)"),
+      Seq(
+        Row(Seq(1.23, 1.0, 2.0))
+      )
+    )
+    checkAnswer(
+      df.selectExpr("array_prepend(a, 1)", "array_prepend(b, \"a\")", "array_prepend(c, \"\")"),
+      Seq(
+        Row(Seq(1, 2, 3, 4), Seq("a", "b", "c", "d"), Seq("", "", "")),
+        Row(Seq(1), Seq("a"), Seq("")),
+        Row(null, null, null)))
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq(("a string element", "a")).toDF().selectExpr("array_prepend(_1, _2)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "paramIndex" -> "0",
+        "sqlExpr" -> "\"array_prepend(_1, _2)\"",
+        "inputSql" -> "\"_1\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "\"ARRAY\""),
+      queryContext = Array(ExpectedContext("", "", 0, 20, "array_prepend(_1, _2)")))
+    checkError(
+      exception = intercept[AnalysisException] {
+        OneRowRelation().selectExpr("array_prepend(array(1, 2), '1')")
+      },
+      errorClass = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_prepend(array(1, 2), 1)\"",
+        "functionName" -> "`array_prepend`",
+        "dataType" -> "\"ARRAY\"",
+        "leftType" -> "\"ARRAY<INT>\"",
+        "rightType" -> "\"STRING\""),
+      queryContext = Array(ExpectedContext("", "", 0, 30, "array_prepend(array(1, 2), '1')")))
+    val df2 = Seq((Array[String]("a", "b", "c"), "d"),
+      (null, "d"),
+      (Array[String]("x", "y", "z"), null),
+      (null, null)
+    ).toDF("a", "b")
+    checkAnswer(df2.selectExpr("array_prepend(a, b)"),
+      Seq(Row(Seq("d", "a", "b", "c")), Row(null), Row(Seq(null, "x", "y", "z")), Row(null)))
+  }
+
   test("array remove") {
     val df = Seq(
       (Array[Int](2, 1, 2, 3), Array("a", "b", "c", "a"), Array("", ""), 2),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index bf8d7816e472b..a15c049715b22 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -3316,9 +3316,9 @@ class DataFrameSuite extends QueryTest
   }
 
   test("SPARK-36338: DataFrame.withSequenceColumn should append unique sequence IDs") {
-    val ids = spark.range(10).repartition(5)
-      .withSequenceColumn("default_index").collect().map(_.getLong(0))
-    assert(ids.toSet === Range(0, 10).toSet)
+    val ids = spark.range(10).repartition(5).withSequenceColumn("default_index")
+    assert(ids.collect().map(_.getLong(0)).toSet === Range(0, 10).toSet)
+    assert(ids.take(5).map(_.getLong(0)).toSet === Range(0, 5).toSet)
   }
 
   test("SPARK-35320: Reading JSON with key type different to String in a map should fail") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index eef5a32e3efa8..1ee9fd6a6a895 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -1183,7 +1183,7 @@ class DataFrameWindowFunctionsSuite extends QueryTest
     def isShuffleExecByRequirement(
         plan: ShuffleExchangeExec,
         desiredClusterColumns: Seq[String]): Boolean = plan match {
-      case ShuffleExchangeExec(op: HashPartitioning, _, ENSURE_REQUIREMENTS) =>
+      case ShuffleExchangeExec(op: HashPartitioning, _, ENSURE_REQUIREMENTS, _) =>
         partitionExpressionsColumns(op.expressions) === desiredClusterColumns
       case _ => false
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 263e361413c56..b6297bc24acb6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1731,7 +1731,7 @@ class DatasetSuite extends QueryTest
         val agg = cp.groupBy($"id" % 2).agg(count($"id"))
 
         agg.queryExecution.executedPlan.collectFirst {
-          case ShuffleExchangeExec(_, _: RDDScanExec, _) =>
+          case ShuffleExchangeExec(_, _: RDDScanExec, _, _) =>
           case BroadcastExchangeExec(_, _: RDDScanExec) =>
         }.foreach { _ =>
           fail(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index e8eade02f886f..6b4e6bdfd3df0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -211,6 +211,14 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
       AnsiAnalyzerTestCase(newName, inputFile, newResultFile)
   }
 
+  /** An ANSI-related test case. */
+  protected case class AnsiTestCase(
+      name: String, inputFile: String, resultFile: String) extends TestCase with AnsiTest
+
+  /** An analyzer test that shows the analyzed plan string as output. */
+  protected case class AnalyzerTestCase(
+      name: String, inputFile: String, resultFile: String) extends TestCase with AnalyzerTest
+
   /** A PostgreSQL test case. */
   protected case class PgSQLTestCase(
       name: String, inputFile: String, resultFile: String) extends TestCase with PgSQLTest {
@@ -460,11 +468,10 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
           val (_, output) =
             handleExceptions(getNormalizedQueryAnalysisResult(localSparkSession, sql))
           // We might need to do some query canonicalization in the future.
-          val result = AnalyzerOutput(
+          AnalyzerOutput(
             sql = sql,
             schema = None,
             output = output.mkString("\n").replaceAll("\\s+$", ""))
-          result
         case _ =>
           val (schema, output) =
             handleExceptions(getNormalizedQueryExecutionResult(localSparkSession, sql))
@@ -529,6 +536,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
         file.getAbsolutePath.replace(inputFilePath, analyzerGoldenFilePath) + ".out"
       val absPath = file.getAbsolutePath
       val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator)
+      val analyzerTestCaseName = s"${testCaseName}_analyzer_test"
 
       // Create test cases of test types that depend on the input filename.
       val newTestCases: Seq[TestCase] = if (file.getAbsolutePath.startsWith(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 48ad10992c5cd..1d3efcb86bc0d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -956,6 +956,7 @@ case class PreRuleReplaceAddWithBrokenVersion() extends Rule[SparkPlan] {
 case class MyShuffleExchangeExec(delegate: ShuffleExchangeExec) extends ShuffleExchangeLike {
   override def numMappers: Int = delegate.numMappers
   override def numPartitions: Int = delegate.numPartitions
+  override def advisoryPartitionSize: Option[Long] = delegate.advisoryPartitionSize
   override def shuffleOrigin: ShuffleOrigin = {
     delegate.shuffleOrigin
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
index 09be936a0f29c..be5e1b524e565 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
@@ -240,7 +240,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
       partitions: Array[Transform],
       catalog: InMemoryTableCatalog = catalog): Unit = {
     catalog.createTable(Identifier.of(Array("ns"), table),
-      schema, partitions, emptyProps, Distributions.unspecified(), Array.empty, None,
+      schema, partitions, emptyProps, Distributions.unspecified(), Array.empty, None, None,
       numRowsPerSplit = 1)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
index f7905daa20a5e..341b53f032a5f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
@@ -117,8 +117,10 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     Seq(true, false).foreach { distributionStrictlyRequired =>
       Seq(true, false).foreach { dataSkewed =>
         Seq(true, false).foreach { coalesce =>
-          checkOrderedDistributionAndSortWithSameExprs(
-            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+          partitionSizes(dataSkewed, coalesce).foreach { partitionSize =>
+            checkOrderedDistributionAndSortWithSameExprs(
+              cmd, None, partitionSize, distributionStrictlyRequired, dataSkewed, coalesce)
+          }
         }
       }
     }
@@ -127,6 +129,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
   private def checkOrderedDistributionAndSortWithSameExprs(
       command: String,
       targetNumPartitions: Option[Int] = None,
+      targetPartitionSize: Option[Long] = None,
       distributionStrictlyRequired: Boolean = true,
       dataSkewed: Boolean = false,
       coalesce: Boolean = false): Unit = {
@@ -153,6 +156,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       tableDistribution,
       tableOrdering,
       targetNumPartitions,
+      targetPartitionSize,
       expectedWritePartitioning = writePartitioning,
       expectedWriteOrdering = writeOrdering,
       writeCommand = command,
@@ -213,8 +217,10 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     Seq(true, false).foreach { distributionStrictlyRequired =>
       Seq(true, false).foreach { dataSkewed =>
         Seq(true, false).foreach { coalesce =>
-          checkClusteredDistributionAndSortWithSameExprs(
-            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+          partitionSizes(dataSkewed, coalesce).foreach { partitionSize =>
+            checkClusteredDistributionAndSortWithSameExprs(
+              cmd, None, partitionSize, distributionStrictlyRequired, dataSkewed, coalesce)
+          }
         }
       }
     }
@@ -223,6 +229,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
   private def checkClusteredDistributionAndSortWithSameExprs(
       command: String,
       targetNumPartitions: Option[Int] = None,
+      targetPartitionSize: Option[Long] = None,
       distributionStrictlyRequired: Boolean = true,
       dataSkewed: Boolean = false,
       coalesce: Boolean = false): Unit = {
@@ -258,6 +265,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       tableDistribution,
       tableOrdering,
       targetNumPartitions,
+      targetPartitionSize,
       expectedWritePartitioning = writePartitioning,
       expectedWriteOrdering = writeOrdering,
       writeCommand = command,
@@ -322,8 +330,10 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     Seq(true, false).foreach { distributionStrictlyRequired =>
       Seq(true, false).foreach { dataSkewed =>
         Seq(true, false).foreach { coalesce =>
-          checkClusteredDistributionAndSortWithExtendedExprs(
-            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+          partitionSizes(dataSkewed, coalesce).foreach { partitionSize =>
+            checkClusteredDistributionAndSortWithExtendedExprs(
+              cmd, None, partitionSize, distributionStrictlyRequired, dataSkewed, coalesce)
+          }
         }
       }
     }
@@ -332,6 +342,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
   private def checkClusteredDistributionAndSortWithExtendedExprs(
       command: String,
       targetNumPartitions: Option[Int] = None,
+      targetPartitionSize: Option[Long] = None,
       distributionStrictlyRequired: Boolean = true,
       dataSkewed: Boolean = false,
       coalesce: Boolean = false): Unit = {
@@ -367,6 +378,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       tableDistribution,
       tableOrdering,
       targetNumPartitions,
+      targetPartitionSize,
       expectedWritePartitioning = writePartitioning,
       expectedWriteOrdering = writeOrdering,
       writeCommand = command,
@@ -558,8 +570,10 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     Seq(true, false).foreach { distributionStrictlyRequired =>
       Seq(true, false).foreach { dataSkewed =>
         Seq(true, false).foreach { coalesce =>
-          checkOrderedDistributionAndSortWithManualGlobalSort(
-            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+          partitionSizes(dataSkewed, coalesce).foreach { partitionSize =>
+            checkOrderedDistributionAndSortWithManualGlobalSort(
+              cmd, None, partitionSize, distributionStrictlyRequired, dataSkewed, coalesce)
+          }
         }
       }
     }
@@ -568,6 +582,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
   private def checkOrderedDistributionAndSortWithManualGlobalSort(
       command: String,
       targetNumPartitions: Option[Int] = None,
+      targetPartitionSize: Option[Long] = None,
       distributionStrictlyRequired: Boolean = true,
       dataSkewed: Boolean = false,
       coalesce: Boolean = false): Unit = {
@@ -601,6 +616,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       tableDistribution,
       tableOrdering,
       targetNumPartitions,
+      targetPartitionSize,
       expectedWritePartitioning = writePartitioning,
       expectedWriteOrdering = writeOrdering,
       writeTransform = df => df.orderBy("data", "id"),
@@ -641,8 +657,10 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     Seq(true, false).foreach { distributionStrictlyRequired =>
       Seq(true, false).foreach { dataSkewed =>
         Seq(true, false).foreach { coalesce =>
-          checkOrderedDistributionAndSortWithIncompatibleGlobalSort(
-            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+          partitionSizes(dataSkewed, coalesce).foreach { partitionSize =>
+            checkOrderedDistributionAndSortWithIncompatibleGlobalSort(
+              cmd, None, partitionSize, distributionStrictlyRequired, dataSkewed, coalesce)
+          }
         }
       }
     }
@@ -651,6 +669,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
   private def checkOrderedDistributionAndSortWithIncompatibleGlobalSort(
       command: String,
       targetNumPartitions: Option[Int] = None,
+      targetPartitionSize: Option[Long] = None,
       distributionStrictlyRequired: Boolean = true,
       dataSkewed: Boolean = false,
       coalesce: Boolean = false): Unit = {
@@ -684,6 +703,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       tableDistribution,
       tableOrdering,
       targetNumPartitions,
+      targetPartitionSize,
       expectedWritePartitioning = writePartitioning,
       expectedWriteOrdering = writeOrdering,
       writeTransform = df => df.orderBy(df("data").desc, df("id").asc),
@@ -722,8 +742,10 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     Seq(true, false).foreach { distributionStrictlyRequired =>
       Seq(true, false).foreach { dataSkewed =>
         Seq(true, false).foreach { coalesce =>
-          checkOrderedDistributionAndSortWithManualLocalSort(
-            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+          partitionSizes(dataSkewed, coalesce).foreach { partitionSize =>
+            checkOrderedDistributionAndSortWithManualLocalSort(
+              cmd, None, partitionSize, distributionStrictlyRequired, dataSkewed, coalesce)
+          }
         }
       }
     }
@@ -732,6 +754,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
   private def checkOrderedDistributionAndSortWithManualLocalSort(
       command: String,
       targetNumPartitions: Option[Int] = None,
+      targetPartitionSize: Option[Long] = None,
       distributionStrictlyRequired: Boolean = true,
       dataSkewed: Boolean = false,
       coalesce: Boolean = false): Unit = {
@@ -765,6 +788,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       tableDistribution,
       tableOrdering,
       targetNumPartitions,
+      targetPartitionSize,
       expectedWritePartitioning = writePartitioning,
       expectedWriteOrdering = writeOrdering,
       writeTransform = df => df.sortWithinPartitions("data", "id"),
@@ -805,8 +829,10 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     Seq(true, false).foreach { distributionStrictlyRequired =>
       Seq(true, false).foreach { dataSkewed =>
         Seq(true, false).foreach { coalesce =>
-          checkClusteredDistributionAndLocalSortWithManualGlobalSort(
-            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+          partitionSizes(dataSkewed, coalesce).foreach { partitionSize =>
+            checkClusteredDistributionAndLocalSortWithManualGlobalSort(
+              cmd, None, partitionSize, distributionStrictlyRequired, dataSkewed, coalesce)
+          }
         }
       }
     }
@@ -815,6 +841,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
   private def checkClusteredDistributionAndLocalSortWithManualGlobalSort(
       command: String,
       targetNumPartitions: Option[Int] = None,
+      targetPartitionSize: Option[Long] = None,
       distributionStrictlyRequired: Boolean = true,
       dataSkewed: Boolean = false,
       coalesce: Boolean = false): Unit = {
@@ -849,6 +876,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       tableDistribution,
       tableOrdering,
       targetNumPartitions,
+      targetPartitionSize,
       expectedWritePartitioning = writePartitioning,
       expectedWriteOrdering = writeOrdering,
       writeTransform = df => df.orderBy("data", "id"),
@@ -889,8 +917,10 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     Seq(true, false).foreach { distributionStrictlyRequired =>
       Seq(true, false).foreach { dataSkewed =>
         Seq(true, false).foreach { coalesce =>
-          checkClusteredDistributionAndLocalSortWithManualLocalSort(
-            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+          partitionSizes(dataSkewed, coalesce).foreach { partitionSize =>
+            checkClusteredDistributionAndLocalSortWithManualLocalSort(
+              cmd, None, partitionSize, distributionStrictlyRequired, dataSkewed, coalesce)
+          }
         }
       }
     }
@@ -899,6 +929,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
   private def checkClusteredDistributionAndLocalSortWithManualLocalSort(
       command: String,
       targetNumPartitions: Option[Int] = None,
+      targetPartitionSize: Option[Long] = None,
       distributionStrictlyRequired: Boolean = true,
       dataSkewed: Boolean = false,
       coalesce: Boolean = false): Unit = {
@@ -933,6 +964,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       tableDistribution,
       tableOrdering,
       targetNumPartitions,
+      targetPartitionSize,
       expectedWritePartitioning = writePartitioning,
       expectedWriteOrdering = writeOrdering,
       writeTransform = df => df.sortWithinPartitions("data", "id"),
@@ -948,7 +980,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     )
     val distribution = Distributions.ordered(ordering)
 
-    catalog.createTable(ident, schema, Array.empty, emptyProps, distribution, ordering, None)
+    catalog.createTable(ident, schema, Array.empty, emptyProps, distribution, ordering, None, None)
 
     withTempDir { checkpointDir =>
       val inputData = ContinuousMemoryStream[(Long, String)]
@@ -1028,8 +1060,10 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     Seq(true, false).foreach { distributionStrictlyRequired =>
       Seq(true, false).foreach { dataSkewed =>
         Seq(true, false).foreach { coalesce =>
-          checkClusteredDistributionAndLocalSortContainsV2Function(
-            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+          partitionSizes(dataSkewed, coalesce).foreach { partitionSize =>
+            checkClusteredDistributionAndLocalSortContainsV2Function(
+              cmd, None, partitionSize, distributionStrictlyRequired, dataSkewed, coalesce)
+          }
         }
       }
     }
@@ -1038,6 +1072,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
   private def checkClusteredDistributionAndLocalSortContainsV2Function(
       command: String,
       targetNumPartitions: Option[Int] = None,
+      targetPartitionSize: Option[Long] = None,
       distributionStrictlyRequired: Boolean = true,
       dataSkewed: Boolean = false,
       coalesce: Boolean = false): Unit = {
@@ -1094,6 +1129,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       tableDistribution,
       tableOrdering,
       targetNumPartitions,
+      targetPartitionSize,
       expectedWritePartitioning = writePartitioning,
       expectedWriteOrdering = writeOrdering,
       writeCommand = command,
@@ -1107,6 +1143,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       tableDistribution: Distribution,
       tableOrdering: Array[SortOrder],
       tableNumPartitions: Option[Int],
+      tablePartitionSize: Option[Long] = None,
       expectedWritePartitioning: physical.Partitioning,
       expectedWriteOrdering: Seq[catalyst.expressions.SortOrder],
       writeTransform: DataFrame => DataFrame = df => df,
@@ -1121,6 +1158,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
         tableDistribution,
         tableOrdering,
         tableNumPartitions,
+        tablePartitionSize,
         expectedWritePartitioning,
         expectedWriteOrdering,
         writeTransform,
@@ -1131,6 +1169,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
         tableDistribution,
         tableOrdering,
         tableNumPartitions,
+        tablePartitionSize,
         expectedWritePartitioning,
         expectedWriteOrdering,
         writeTransform,
@@ -1147,6 +1186,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       tableDistribution: Distribution,
       tableOrdering: Array[SortOrder],
       tableNumPartitions: Option[Int],
+      tablePartitionSize: Option[Long],
       expectedWritePartitioning: physical.Partitioning,
       expectedWriteOrdering: Seq[catalyst.expressions.SortOrder],
       writeTransform: DataFrame => DataFrame = df => df,
@@ -1158,7 +1198,8 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     // scalastyle:on argcount
 
     catalog.createTable(ident, schema, Array.empty, emptyProps, tableDistribution,
-      tableOrdering, tableNumPartitions, distributionStrictlyRequired)
+      tableOrdering, tableNumPartitions, tablePartitionSize,
+      distributionStrictlyRequired)
 
     val df = if (!dataSkewed) {
       spark.createDataFrame(Seq((1, "a"), (2, "b"), (3, "c"))).toDF("id", "data")
@@ -1181,16 +1222,31 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       }
     } else {
       if (coalesce) {
-        val executedPlan = executeCommand()
-        val read = collect(executedPlan) {
-          case r: AQEShuffleReadExec => r
+        // if the partition size is configured for the table, set the SQL conf to something small
+        // so that the overriding behavior is tested
+        val defaultAdvisoryPartitionSize = if (tablePartitionSize.isDefined) "15" else "32MB"
+        withSQLConf(
+          SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+          SQLConf.COALESCE_PARTITIONS_ENABLED.key -> "true",
+          SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+          SQLConf.SHUFFLE_PARTITIONS.key -> "5",
+          SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> defaultAdvisoryPartitionSize,
+          SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_NUM.key -> "1") {
+
+          val executedPlan = executeCommand()
+          val read = collect(executedPlan) {
+            case r: AQEShuffleReadExec => r
+          }
+          assert(read.size == 1)
+          assert(read.head.partitionSpecs.size == 1)
+          checkPartitioningAndOrdering(
+            // num of partition in expectedWritePartitioning is 1
+            executedPlan, expectedWritePartitioning, expectedWriteOrdering, 1)
         }
-        assert(read.size == 1)
-        assert(read.head.partitionSpecs.size == 1)
-        checkPartitioningAndOrdering(
-          // num of partition in expectedWritePartitioning is 1
-          executedPlan, expectedWritePartitioning, expectedWriteOrdering, 1)
       } else {
+        // if the partition size is configured for the table, set the SQL conf to something big
+        // so that the overriding behavior is tested
+        val defaultAdvisoryPartitionSize = if (tablePartitionSize.isDefined) "64MB" else "100"
         if (dataSkewed && !distributionStrictlyRequired) {
           withSQLConf(
             SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
@@ -1198,7 +1254,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
             SQLConf.ADAPTIVE_OPTIMIZE_SKEWS_IN_REBALANCE_PARTITIONS_ENABLED.key -> "true",
             SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
             SQLConf.SHUFFLE_PARTITIONS.key -> "5",
-            SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "100",
+            SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> defaultAdvisoryPartitionSize,
             SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_NUM.key -> "1") {
             val executedPlan = executeCommand()
             val read = collect(executedPlan) {
@@ -1231,6 +1287,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       tableDistribution: Distribution,
       tableOrdering: Array[SortOrder],
       tableNumPartitions: Option[Int],
+      tablePartitionSize: Option[Long],
       expectedWritePartitioning: physical.Partitioning,
       expectedWriteOrdering: Seq[catalyst.expressions.SortOrder],
       writeTransform: DataFrame => DataFrame = df => df,
@@ -1238,7 +1295,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       expectAnalysisException: Boolean = false): Unit = {
 
     catalog.createTable(ident, schema, Array.empty, emptyProps, tableDistribution,
-      tableOrdering, tableNumPartitions)
+      tableOrdering, tableNumPartitions, tablePartitionSize)
 
     withTempDir { checkpointDir =>
       val inputData = MemoryStream[(Long, String)]
@@ -1377,4 +1434,14 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     HashPartitioning(writePartitioningExprs,
       targetNumPartitions.getOrElse(conf.numShufflePartitions))
   }
+
+  private def partitionSizes(dataSkew: Boolean, coalesce: Boolean): Seq[Option[Long]] = {
+    if (coalesce) {
+      Seq(Some(1000L), None)
+    } else if (dataSkew) {
+      Seq(Some(100L), None)
+    } else {
+      Seq(None)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 4b3d3a4b8058a..de24b8c82b043 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -730,10 +730,10 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
     outputPlan match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
              SortExec(_, _,
-               ShuffleExchangeExec(HashPartitioning(leftPartitioningExpressions, _), _, _), _),
+               ShuffleExchangeExec(HashPartitioning(leftPartitioningExpressions, _), _, _, _), _),
              SortExec(_, _,
                ShuffleExchangeExec(HashPartitioning(rightPartitioningExpressions, _),
-               _, _), _), _) =>
+               _, _, _), _), _) =>
         assert(leftKeys === smjExec.leftKeys)
         assert(rightKeys === smjExec.rightKeys)
         assert(leftKeys === leftPartitioningExpressions)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
index df1ddb7d9cddd..09da1e1e7b013 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
@@ -55,7 +55,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
     EnsureRequirements.apply(smjExec1) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
         SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _),
-        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), _) =>
+        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _, _), _), _) =>
         assert(leftKeys === Seq(exprA, exprB))
         assert(rightKeys === Seq(exprB, exprA))
       case other => fail(other.toString)
@@ -66,7 +66,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       exprA :: exprB :: Nil, exprB :: exprA :: Nil, Inner, None, plan2, plan1)
     EnsureRequirements.apply(smjExec2) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
-        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), _) =>
         assert(leftKeys === Seq(exprB, exprA))
         assert(rightKeys === Seq(exprA, exprB))
@@ -79,7 +79,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       exprD :: exprC :: Nil, exprB :: exprA :: Nil, Inner, None, plan1, plan1)
     EnsureRequirements.apply(smjExec3) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
-        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), _) =>
         assert(leftKeys === Seq(exprC, exprD))
         assert(rightKeys === Seq(exprA, exprB))
@@ -121,8 +121,8 @@ class EnsureRequirementsSuite extends SharedSparkSession {
     )
     EnsureRequirements.apply(smjExec2) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
-      SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _),
-      SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), _) =>
+      SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _, _), _),
+      SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _, _), _), _) =>
         assert(leftKeys === Seq(exprC, exprB, exprD))
         assert(rightKeys === Seq(exprD, exprA, exprC))
       case other => fail(other.toString)
@@ -140,7 +140,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       exprA :: exprB :: Nil, exprC :: exprB :: Nil, Inner, None, plan1, plan2)
     EnsureRequirements.apply(smjExec1) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
-        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _), _) =>
         assert(leftKeys === Seq(exprB, exprA))
         assert(rightKeys === Seq(exprB, exprC))
@@ -154,7 +154,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       exprA :: exprB :: Nil, exprC :: exprB :: Nil, Inner, None, plan1, plan3)
     EnsureRequirements.apply(smjExec2) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
-        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), _) =>
         assert(leftKeys === Seq(exprB, exprA))
         assert(rightKeys === Seq(exprB, exprC))
@@ -168,7 +168,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
     EnsureRequirements.apply(smjExec3) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
         SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _),
-        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), _) =>
+        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _, _), _), _) =>
         assert(leftKeys === Seq(exprB, exprC))
         assert(rightKeys === Seq(exprB, exprA))
       case other => fail(other.toString)
@@ -314,7 +314,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       applyEnsureRequirementsWithSubsetKeys(smjExec) match {
         case SortMergeJoinExec(leftKeys, rightKeys, _, _,
         SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _),
-        SortExec(_, _, ShuffleExchangeExec(p: HashPartitioning, _, _), _), _) =>
+        SortExec(_, _, ShuffleExchangeExec(p: HashPartitioning, _, _, _), _), _) =>
           assert(leftKeys === Seq(exprA, exprB))
           assert(rightKeys === Seq(exprC, exprD))
           assert(p.expressions == Seq(exprC))
@@ -330,7 +330,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
         exprA :: exprB :: Nil, exprC :: exprD :: Nil, Inner, None, plan1, plan2)
       applyEnsureRequirementsWithSubsetKeys(smjExec) match {
         case SortMergeJoinExec(leftKeys, rightKeys, _, _,
-        SortExec(_, _, ShuffleExchangeExec(p: HashPartitioning, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(p: HashPartitioning, _, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _), _) =>
           assert(leftKeys === Seq(exprA, exprB))
           assert(rightKeys === Seq(exprC, exprD))
@@ -348,7 +348,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       applyEnsureRequirementsWithSubsetKeys(smjExec) match {
         case SortMergeJoinExec(leftKeys, rightKeys, _, _,
         SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _),
-        SortExec(_, _, ShuffleExchangeExec(p: HashPartitioning, _, _), _), _) =>
+        SortExec(_, _, ShuffleExchangeExec(p: HashPartitioning, _, _, _), _), _) =>
           assert(leftKeys === Seq(exprA, exprB))
           assert(rightKeys === Seq(exprC, exprD))
           assert(p.expressions == Seq(exprC))
@@ -367,7 +367,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
     applyEnsureRequirementsWithSubsetKeys(smjExec) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
       SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _),
-      SortExec(_, _, ShuffleExchangeExec(p: HashPartitioning, _, _), _), _) =>
+      SortExec(_, _, ShuffleExchangeExec(p: HashPartitioning, _, _, _), _), _) =>
         assert(leftKeys === Seq(exprA, exprB, exprB))
         assert(rightKeys === Seq(exprA, exprC, exprC))
         assert(p.expressions == Seq(exprA, exprC, exprA))
@@ -383,7 +383,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
     applyEnsureRequirementsWithSubsetKeys(smjExec) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
       SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _),
-      SortExec(_, _, ShuffleExchangeExec(p: HashPartitioning, _, _), _), _) =>
+      SortExec(_, _, ShuffleExchangeExec(p: HashPartitioning, _, _, _), _), _) =>
         assert(leftKeys === Seq(exprA, exprB, exprB))
         assert(rightKeys === Seq(exprA, exprC, exprD))
         assert(p.expressions == Seq(exprA, exprC, exprA))
@@ -439,8 +439,8 @@ class EnsureRequirementsSuite extends SharedSparkSession {
         exprA :: exprB :: Nil, exprC :: exprD :: Nil, Inner, None, plan1, plan2)
       EnsureRequirements.apply(smjExec) match {
         case SortMergeJoinExec(_, _, _, _,
-        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
-        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _, _), _), _) =>
           assert(left.numPartitions == 5)
           assert(right.numPartitions == 5)
         case other => fail(other.toString)
@@ -457,7 +457,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       EnsureRequirements.apply(smjExec) match {
         case SortMergeJoinExec(_, _, _, _,
         SortExec(_, _, DummySparkPlan(_, _, left: HashPartitioning, _, _), _),
-        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _, _), _), _) =>
           assert(left.numPartitions == 10)
           assert(right.numPartitions == 10)
           assert(right.expressions == Seq(exprC, exprD))
@@ -476,8 +476,8 @@ class EnsureRequirementsSuite extends SharedSparkSession {
         exprA :: exprB :: Nil, exprC :: exprD :: Nil, Inner, None, plan1, plan2)
       EnsureRequirements.apply(smjExec) match {
         case SortMergeJoinExec(_, _, _, _,
-        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
-        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _, _), _), _) =>
           assert(left.numPartitions == 5)
           assert(left.expressions == Seq(exprA, exprB))
           assert(right.numPartitions == 5)
@@ -487,7 +487,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       applyEnsureRequirementsWithSubsetKeys(smjExec) match {
         case SortMergeJoinExec(_, _, _, _,
         SortExec(_, _, DummySparkPlan(_, _, left: HashPartitioning, _, _), _),
-        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _, _), _), _) =>
           assert(left.numPartitions == 1)
           assert(right.numPartitions == 1)
           assert(right.expressions == Seq(exprC))
@@ -505,8 +505,8 @@ class EnsureRequirementsSuite extends SharedSparkSession {
         exprA :: exprB :: Nil, exprC :: exprD :: Nil, Inner, None, plan1, plan2)
       EnsureRequirements.apply(smjExec) match {
         case SortMergeJoinExec(_, _, _, _,
-        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
-        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _, _), _), _) =>
           assert(left.numPartitions == conf.numShufflePartitions)
           assert(left.expressions == Seq(exprA, exprB))
           assert(right.numPartitions == conf.numShufflePartitions)
@@ -524,7 +524,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       applyEnsureRequirementsWithSubsetKeys(smjExec) match {
         case SortMergeJoinExec(_, _, _, _,
         SortExec(_, _, DummySparkPlan(_, _, left: PartitioningCollection, _, _), _),
-        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _, _), _), _) =>
           assert(left.numPartitions == 10)
           assert(right.numPartitions == 10)
           assert(right.expressions == Seq(exprA))
@@ -540,7 +540,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
         Inner, None, plan1, plan2)
       applyEnsureRequirementsWithSubsetKeys(smjExec) match {
         case SortMergeJoinExec(_, _, _, _,
-        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, right: PartitioningCollection, _, _), _), _) =>
           assert(left.numPartitions == 20)
           assert(left.expressions == Seq(exprC))
@@ -582,7 +582,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       EnsureRequirements.apply(smjExec) match {
         case SortMergeJoinExec(_, _, _, _,
         SortExec(_, _, DummySparkPlan(_, _, left: HashPartitioning, _, _), _),
-        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _, _), _), _) =>
           assert(left.expressions === Seq(exprA, exprB))
           assert(right.expressions === Seq(exprC, exprD))
           assert(left.numPartitions == 6)
@@ -601,7 +601,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
         exprA :: exprB :: Nil, exprC :: exprD :: Nil, Inner, None, plan1, plan2)
       EnsureRequirements.apply(smjExec) match {
         case SortMergeJoinExec(_, _, _, _,
-        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, right: HashPartitioning, _, _), _), _) =>
           assert(left.expressions === Seq(exprA, exprB))
           assert(right.expressions === Seq(exprC, exprD))
@@ -619,7 +619,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
         exprA :: exprB :: Nil, exprC :: exprD :: Nil, Inner, None, plan1, plan2)
       EnsureRequirements.apply(smjExec) match {
         case SortMergeJoinExec(_, _, _, _,
-        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, right: HashPartitioning, _, _), _), _) =>
           assert(left.expressions === Seq(exprA, exprB))
           assert(right.expressions === Seq(exprC, exprD))
@@ -639,7 +639,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
         exprA :: exprB :: Nil, exprC :: exprD :: Nil, Inner, None, plan1, plan2)
       EnsureRequirements.apply(smjExec) match {
         case SortMergeJoinExec(_, _, _, _,
-        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, right: HashPartitioning, _, _), _), _) =>
           assert(left.expressions === Seq(exprA, exprB))
           assert(right.expressions === Seq(exprC, exprD))
@@ -661,8 +661,8 @@ class EnsureRequirementsSuite extends SharedSparkSession {
         exprA :: exprB :: Nil, exprC :: exprD :: Nil, Inner, None, plan1, plan2)
       EnsureRequirements.apply(smjExec) match {
         case SortMergeJoinExec(_, _, _, _,
-        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
-        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _, _), _), _) =>
           assert(left.expressions === Seq(exprA, exprB))
           assert(right.expressions === Seq(exprC, exprD))
           assert(left.numPartitions == conf.numShufflePartitions)
@@ -686,7 +686,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
           EnsureRequirements.apply(smjExec) match {
             case SortMergeJoinExec(leftKeys, rightKeys, _, _,
             SortExec(_, _, DummySparkPlan(_, _, left: HashPartitioning, _, _), _),
-            SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+            SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _, _), _), _) =>
               assert(leftKeys === Seq(exprA, exprB))
               assert(rightKeys === Seq(exprC, exprD))
               assert(left.numPartitions == 9)
@@ -709,8 +709,8 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       var smjExec = SortMergeJoinExec(exprA :: Nil, exprC :: Nil, Inner, None, plan1, plan2)
       EnsureRequirements.apply(smjExec) match {
         case SortMergeJoinExec(leftKeys, rightKeys, _, _,
-        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
-        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _, _), _), _) =>
           assert(leftKeys === Seq(exprA))
           assert(rightKeys === Seq(exprC))
           assert(left.numPartitions == 20)
@@ -728,7 +728,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       EnsureRequirements.apply(smjExec) match {
         case SortMergeJoinExec(leftKeys, rightKeys, _, _,
         SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _),
-        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _, _), _), _) =>
           assert(leftKeys === Seq(exprA))
           assert(rightKeys === Seq(exprC))
           assert(right.numPartitions == 10)
@@ -760,8 +760,8 @@ class EnsureRequirementsSuite extends SharedSparkSession {
         } else {
           EnsureRequirements.apply(smjExec) match {
             case SortMergeJoinExec(leftKeys, rightKeys, _, _,
-            SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
-            SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+            SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _, _), _),
+            SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _, _), _), _) =>
               assert(leftKeys === Seq(exprA))
               assert(rightKeys === Seq(exprC))
               assert(left.numPartitions == 5)
@@ -872,8 +872,8 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       exprA :: exprB :: exprB :: Nil, exprA :: exprC :: exprC :: Nil, Inner, None, plan1, plan2)
     EnsureRequirements.apply(smjExec) match {
       case SortMergeJoinExec(_, _, _, _,
-        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
-        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+        SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _, _), _), _) =>
         assert(left.expressions === Seq(exprA, exprB, exprB))
         assert(right.expressions === Seq(exprA, exprC, exprC))
       case other => fail(other.toString)
@@ -954,8 +954,8 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       exprA :: exprB :: exprC :: Nil, exprA :: exprB :: exprC :: Nil, Inner, None, plan1, plan2)
     applyEnsureRequirementsWithSubsetKeys(smjExec) match {
       case SortMergeJoinExec(_, _, _, _,
-      SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
-      SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+      SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _, _), _),
+      SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _, _), _), _) =>
         assert(left.expressions === Seq(exprA, exprB, exprC))
         assert(right.expressions === Seq(exprA, exprB, exprC))
       case other => fail(other.toString)
@@ -974,8 +974,8 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       exprA :: exprB :: exprB :: Nil, exprA :: exprC :: exprC :: Nil, Inner, None, plan1, plan2)
     applyEnsureRequirementsWithSubsetKeys(smjExec) match {
       case SortMergeJoinExec(_, _, _, _,
-      SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
-      SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+      SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _, _), _),
+      SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _, _), _), _) =>
         assert(left.expressions === Seq(exprA, exprB, exprB))
         assert(right.expressions === Seq(exprA, exprC, exprC))
       case other => fail(other.toString)
@@ -992,8 +992,8 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       exprA :: exprB :: exprB :: Nil, exprA :: exprC :: exprC :: Nil, Inner, None, plan1, plan2)
     applyEnsureRequirementsWithSubsetKeys(smjExec) match {
       case SortMergeJoinExec(_, _, _, _,
-      SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
-      SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+      SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _, _), _),
+      SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _, _), _), _) =>
         assert(left.expressions === Seq(exprA, exprB, exprB))
         assert(right.expressions === Seq(exprA, exprC, exprC))
       case other => fail(other.toString)
@@ -1013,8 +1013,8 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       exprA :: exprB :: exprB :: Nil, exprA :: exprC :: exprC :: Nil, Inner, None, plan1, plan2)
     applyEnsureRequirementsWithSubsetKeys(smjExec) match {
       case SortMergeJoinExec(_, _, _, _,
-      SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
-      SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+      SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _, _), _),
+      SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _, _), _), _) =>
         assert(left.expressions === Seq(exprA, exprB, exprB))
         assert(right.expressions === Seq(exprA, exprC, exprC))
       case other => fail(other.toString)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
index dc505963b4d34..d1af0acb530b2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
@@ -95,7 +95,7 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest {
               "rocksdbReadBlockCacheHitCount", "rocksdbReadBlockCacheMissCount",
               "rocksdbTotalBytesReadByCompaction", "rocksdbTotalBytesWrittenByCompaction",
               "rocksdbTotalCompactionLatencyMs", "rocksdbWriterStallLatencyMs",
-              "rocksdbTotalBytesReadThroughIterator"))
+              "rocksdbTotalBytesReadThroughIterator", "rocksdbTotalBytesWrittenByFlush"))
           }
         } finally {
           query.stop()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreSuite.scala
index 1998e2af114d4..54c9974187486 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreSuite.scala
@@ -76,6 +76,8 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
         (RocksDBConf.ROCKSDB_SQL_CONF_NAME_PREFIX + ".compactOnCommit", "true"),
         (RocksDBConf.ROCKSDB_SQL_CONF_NAME_PREFIX + ".lockAcquireTimeoutMs", "10"),
         (RocksDBConf.ROCKSDB_SQL_CONF_NAME_PREFIX + ".maxOpenFiles", "1000"),
+        (RocksDBConf.ROCKSDB_SQL_CONF_NAME_PREFIX + ".maxWriteBufferNumber", "3"),
+        (RocksDBConf.ROCKSDB_SQL_CONF_NAME_PREFIX + ".writeBufferSizeMB", "16"),
         (SQLConf.STATE_STORE_ROCKSDB_FORMAT_VERSION.key, "4")
       )
       testConfs.foreach { case (k, v) => spark.conf.set(k, v) }
@@ -102,6 +104,8 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
       assert(rocksDBConfInTask.lockAcquireTimeoutMs == 10L)
       assert(rocksDBConfInTask.formatVersion == 4)
       assert(rocksDBConfInTask.maxOpenFiles == 1000)
+      assert(rocksDBConfInTask.maxWriteBufferNumber == 3)
+      assert(rocksDBConfInTask.writeBufferSizeMB == 16L)
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
index cf0ff4808fc40..3d45a8868e075 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
@@ -544,6 +544,8 @@ class RocksDBSuite extends SparkFunSuite {
       assert(metrics.nativeOpsMetrics("writerStallDuration") >= 0)
       assert(metrics.nativeOpsMetrics("totalBytesReadByCompaction") >= 0)
       assert(metrics.nativeOpsMetrics("totalBytesWrittenByCompaction") >=0)
+
+      assert(metrics.nativeOpsMetrics("totalBytesWrittenByFlush") >= 0)
     }
 
     withTempDir { dir =>
@@ -658,6 +660,33 @@ class RocksDBSuite extends SparkFunSuite {
     }
   }
 
+  Seq("1", "2", "3").foreach { maxWriteBufferNumber =>
+    Seq("16", "32", "64").foreach {writeBufferSizeMB =>
+      test(s"SPARK-42819: configure memtable memory usage with " +
+        s"maxWriteBufferNumber=$maxWriteBufferNumber and writeBufferSize=$writeBufferSizeMB") {
+        withTempDir { dir =>
+          val sqlConf = new SQLConf
+          sqlConf.setConfString("spark.sql.streaming.stateStore.rocksdb.maxWriteBufferNumber",
+            maxWriteBufferNumber)
+          sqlConf.setConfString("spark.sql.streaming.stateStore.rocksdb.writeBufferSizeMB",
+            writeBufferSizeMB)
+          val dbConf = RocksDBConf(StateStoreConf(sqlConf))
+          assert(dbConf.maxWriteBufferNumber === maxWriteBufferNumber.toInt)
+          assert(dbConf.writeBufferSizeMB === writeBufferSizeMB.toInt)
+
+          val remoteDir = dir.getCanonicalPath
+          withDB(remoteDir, conf = dbConf) { db =>
+            // Do some DB ops
+            db.load(0)
+            db.put("a", "1")
+            db.commit()
+            assert(toStr(db.get("a")) === "1")
+          }
+        }
+      }
+    }
+  }
+
   test("SPARK-37224: flipping option 'trackTotalNumberOfRows' during restart") {
     withTempDir { dir =>
       val remoteDir = dir.getCanonicalPath
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
index 910e3e682de46..5e06eb729ea3e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
@@ -148,6 +148,20 @@ class ColumnVectorSuite extends SparkFunSuite {
     }
   }
 
+  testVectors("timestamp_ntz", 10, TimestampNTZType) { testVector =>
+    (0 until 10).foreach { i =>
+      testVector.appendLong(i)
+    }
+
+    val array = new ColumnarArray(testVector, 0, 10)
+    val arrayCopy = array.copy()
+
+    (0 until 10).foreach { i =>
+      assert(array.get(i, TimestampNTZType) === i)
+      assert(arrayCopy.get(i, TimestampNTZType) === i)
+    }
+  }
+
   testVectors("float", 10, FloatType) { testVector =>
     (0 until 10).foreach { i =>
       testVector.appendFloat(i.toFloat)
@@ -502,25 +516,26 @@ class ColumnVectorSuite extends SparkFunSuite {
   }
 
   test("CachedBatch long Apis") {
-    val dataType = LongType
-    val columnBuilder = ColumnBuilderHelper(dataType, 1024, "col", true)
-    val row = new SpecificInternalRow(Array(dataType))
+    Seq(LongType, TimestampType, TimestampNTZType).foreach { dataType =>
+      val columnBuilder = ColumnBuilderHelper(dataType, 1024, "col", true)
+      val row = new SpecificInternalRow(Array(dataType))
 
-    row.setNullAt(0)
-    columnBuilder.appendFrom(row, 0)
-    for (i <- 1 until 16) {
-      row.setLong(0, i.toLong)
+      row.setNullAt(0)
       columnBuilder.appendFrom(row, 0)
-    }
+      for (i <- 1 until 16) {
+        row.setLong(0, i.toLong)
+        columnBuilder.appendFrom(row, 0)
+      }
 
-    withVectors(16, dataType) { testVector =>
-      val columnAccessor = ColumnAccessor(dataType, columnBuilder.build)
-      ColumnAccessor.decompress(columnAccessor, testVector, 16)
+      withVectors(16, dataType) { testVector =>
+        val columnAccessor = ColumnAccessor(dataType, columnBuilder.build)
+        ColumnAccessor.decompress(columnAccessor, testVector, 16)
 
-      assert(testVector.isNullAt(0))
-      for (i <- 1 until 16) {
-        assert(testVector.isNullAt(i) == false)
-        assert(testVector.getLong(i) == i.toLong)
+        assert(testVector.isNullAt(0))
+        for (i <- 1 until 16) {
+          assert(testVector.isNullAt(i) == false)
+          assert(testVector.getLong(i) == i.toLong)
+        }
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index 40868f896f5ac..c22e44596603e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -619,8 +619,8 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
 
         assert(query.lastExecution.executedPlan.collect {
           case j @ StreamingSymmetricHashJoinExec(_, _, _, _, _, _, _, _, _,
-            ShuffleExchangeExec(opA: HashPartitioning, _, _),
-            ShuffleExchangeExec(opB: HashPartitioning, _, _))
+            ShuffleExchangeExec(opA: HashPartitioning, _, _, _),
+            ShuffleExchangeExec(opB: HashPartitioning, _, _, _))
               if partitionExpressionsColumns(opA.expressions) === Seq("a", "b")
                 && partitionExpressionsColumns(opB.expressions) === Seq("a", "b")
                 && opA.numPartitions == numPartitions && opB.numPartitions == numPartitions => j
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 51b314ad2c1a0..22df4e67440a9 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -201,14 +201,6 @@ private[hive] object SparkSQLCLIDriver extends Logging {
       case e: UnsupportedEncodingException => exit(ERROR_PATH_NOT_FOUND)
     }
 
-    if (sessionState.database != null) {
-      SparkSQLEnv.sqlContext.sessionState.catalog.setCurrentDatabase(
-        s"${sessionState.database}")
-    }
-
-    // Execute -i init files (always in silent mode)
-    cli.processInitFiles(sessionState)
-
     // We don't propagate hive.metastore.warehouse.dir, because it might has been adjusted in
     // [[SharedState.loadHiveConfFile]] based on the user specified or default values of
     // spark.sql.warehouse.dir and hive.metastore.warehouse.dir.
@@ -216,6 +208,13 @@ private[hive] object SparkSQLCLIDriver extends Logging {
       SparkSQLEnv.sqlContext.setConf(k, v)
     }
 
+    if (sessionState.database != null) {
+      SparkSQLEnv.sqlContext.sql(s"USE ${sessionState.database}")
+    }
+
+    // Execute -i init files (always in silent mode)
+    cli.processInitFiles(sessionState)
+
     cli.printMasterAndAppId
 
     if (sessionState.execString != null) {
diff --git a/sql/hive-thriftserver/src/test/resources/log4j2.properties b/sql/hive-thriftserver/src/test/resources/log4j2.properties
index 5a3681a2a7ec8..9141d616c4a9a 100644
--- a/sql/hive-thriftserver/src/test/resources/log4j2.properties
+++ b/sql/hive-thriftserver/src/test/resources/log4j2.properties
@@ -48,7 +48,7 @@ appender.file.layout.pattern = %d{HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex
 appender.file.filter.1.type = Filters
 
 appender.file.filter.1.a.type = RegexFilter
-appender.file.filter.1.a.regx = .*Thrift error occurred during processing of message.*
+appender.file.filter.1.a.regex = .*Thrift error occurred during processing of message.*
 appender.file.filter.1.a.onMatch = deny
 appender.file.filter.1.a.onMismatch = neutral
 
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 5413635ba4747..651c6b7aafb1c 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.spark.{ErrorMessageFormat, SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.ProcessTestUtils.ProcessOutputCapturer
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.HiveUtils._
 import org.apache.spark.sql.hive.client.HiveClientImpl
@@ -806,4 +807,29 @@ class CliSuite extends SparkFunSuite {
       prompt = "spark-sql (spark_42448)>")(
       "select current_database();" -> "spark_42448")
   }
+
+  test("SPARK-42823: multipart identifier support for specify database by --database option") {
+    val catalogName = "testcat"
+    val catalogImpl = s"spark.sql.catalog.$catalogName=${classOf[JDBCTableCatalog].getName}"
+    val catalogUrl =
+      s"spark.sql.catalog.$catalogName.url=jdbc:derby:memory:$catalogName;create=true"
+    val catalogDriver =
+      s"spark.sql.catalog.$catalogName.driver=org.apache.derby.jdbc.AutoloadedDriver"
+    val database = s"-database $catalogName.SYS"
+    val catalogConfigs =
+      Seq(catalogImpl, catalogDriver, catalogUrl, "spark.sql.catalogImplementation=in-memory")
+        .flatMap(Seq("--conf", _))
+    runCliWithin(
+      2.minute,
+      catalogConfigs ++ Seq("--database", s"$catalogName.SYS"))(
+      "SELECT CURRENT_CATALOG();" -> catalogName,
+      "SELECT CURRENT_SCHEMA();" -> "SYS")
+
+    runCliWithin(
+      2.minute,
+      catalogConfigs ++
+        Seq("--conf", s"spark.sql.defaultCatalog=$catalogName", "--database", "SYS"))(
+      "SELECT CURRENT_CATALOG();" -> catalogName,
+      "SELECT CURRENT_SCHEMA();" -> "SYS")
+  }
 }
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 5dbde9e894826..ae8bb135aec79 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -204,7 +204,6 @@
           <plugin>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>build-helper-maven-plugin</artifactId>
-            <version>3.0.0</version>
             <executions>
               <execution>
                 <id>add-scala-test-sources</id>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index 67229d494a2dc..98b2258ea13c1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -130,57 +130,32 @@ private[hive] case class HiveGenericUDF(
     name: String, funcWrapper: HiveFunctionWrapper, children: Seq[Expression])
   extends Expression
   with HiveInspectors
-  with Logging
   with UserDefinedExpression {
 
   override def nullable: Boolean = true
 
-  override lazy val deterministic: Boolean = isUDFDeterministic && children.forall(_.deterministic)
-
-  override def foldable: Boolean =
-    isUDFDeterministic && returnInspector.isInstanceOf[ConstantObjectInspector]
-
-  @transient
-  lazy val function = funcWrapper.createFunction[GenericUDF]()
+  override lazy val deterministic: Boolean =
+    isUDFDeterministic && children.forall(_.deterministic)
 
-  @transient
-  private lazy val argumentInspectors = children.map(toInspector)
+  override def foldable: Boolean = isUDFDeterministic &&
+    evaluator.returnInspector.isInstanceOf[ConstantObjectInspector]
 
-  @transient
-  private lazy val returnInspector = {
-    function.initializeAndFoldConstants(argumentInspectors.toArray)
-  }
+  override lazy val dataType: DataType = inspectorToDataType(evaluator.returnInspector)
 
-  // Visible for codegen
   @transient
-  lazy val unwrapper: Any => Any = unwrapperFor(returnInspector)
+  private lazy val evaluator = new HiveGenericUDFEvaluator(funcWrapper, children)
 
   @transient
-  private lazy val isUDFDeterministic = {
-    val udfType = function.getClass.getAnnotation(classOf[HiveUDFType])
+  private val isUDFDeterministic = {
+    val udfType = evaluator.function.getClass.getAnnotation(classOf[HiveUDFType])
     udfType != null && udfType.deterministic() && !udfType.stateful()
   }
 
-  // Visible for codegen
-  @transient
-  lazy val deferredObjects: Array[DeferredObject] = argumentInspectors.zip(children).map {
-    case (inspect, child) => new DeferredObjectAdapter(inspect, child.dataType)
-  }.toArray[DeferredObject]
-
-  override lazy val dataType: DataType = inspectorToDataType(returnInspector)
-
   override def eval(input: InternalRow): Any = {
-    returnInspector // Make sure initialized.
-
-    var i = 0
-    val length = children.length
-    while (i < length) {
-      val idx = i
-      deferredObjects(i).asInstanceOf[DeferredObjectAdapter]
-        .set(children(idx).eval(input))
-      i += 1
+    children.zipWithIndex.map {
+      case (child, idx) => evaluator.setArg(idx, child.eval(input))
     }
-    unwrapper(function.evaluate(deferredObjects))
+    evaluator.evaluate()
   }
 
   override def prettyName: String = name
@@ -191,18 +166,18 @@ private[hive] case class HiveGenericUDF(
 
   override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression =
     copy(children = newChildren)
-  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val refTerm = ctx.addReferenceObj("this", this)
-    val childrenEvals = children.map(_.genCode(ctx))
 
-    val setDeferredObjects = childrenEvals.zipWithIndex.map {
+  protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val refEvaluator = ctx.addReferenceObj("evaluator", evaluator)
+    val evals = children.map(_.genCode(ctx))
+
+    val setValues = evals.zipWithIndex.map {
       case (eval, i) =>
-        val deferredObjectAdapterClz = classOf[DeferredObjectAdapter].getCanonicalName
         s"""
            |if (${eval.isNull}) {
-           |  (($deferredObjectAdapterClz) $refTerm.deferredObjects()[$i]).set(null);
+           |  $refEvaluator.setArg($i, null);
            |} else {
-           |  (($deferredObjectAdapterClz) $refTerm.deferredObjects()[$i]).set(${eval.value});
+           |  $refEvaluator.setArg($i, ${eval.value});
            |}
            |""".stripMargin
     }
@@ -211,13 +186,12 @@ private[hive] case class HiveGenericUDF(
     val resultTerm = ctx.freshName("result")
     ev.copy(code =
       code"""
-         |${childrenEvals.map(_.code).mkString("\n")}
-         |${setDeferredObjects.mkString("\n")}
+         |${evals.map(_.code).mkString("\n")}
+         |${setValues.mkString("\n")}
          |$resultType $resultTerm = null;
          |boolean ${ev.isNull} = false;
          |try {
-         |  $resultTerm = ($resultType) $refTerm.unwrapper().apply(
-         |    $refTerm.function().evaluate($refTerm.deferredObjects()));
+         |  $resultTerm = ($resultType) $refEvaluator.evaluate();
          |  ${ev.isNull} = $resultTerm == null;
          |} catch (Throwable e) {
          |  throw QueryExecutionErrors.failedExecuteUserDefinedFunctionError(
@@ -235,6 +209,36 @@ private[hive] case class HiveGenericUDF(
   }
 }
 
+class HiveGenericUDFEvaluator(
+    funcWrapper: HiveFunctionWrapper, children: Seq[Expression])
+  extends HiveInspectors
+  with Serializable {
+
+  @transient
+  lazy val function = funcWrapper.createFunction[GenericUDF]()
+
+  @transient
+  private lazy val argumentInspectors = children.map(toInspector)
+
+  @transient
+  lazy val returnInspector = {
+    function.initializeAndFoldConstants(argumentInspectors.toArray)
+  }
+
+  @transient
+  private lazy val deferredObjects: Array[DeferredObject] = argumentInspectors.zip(children).map {
+    case (inspect, child) => new DeferredObjectAdapter(inspect, child.dataType)
+  }.toArray[DeferredObject]
+
+  @transient
+  private lazy val unwrapper: Any => Any = unwrapperFor(returnInspector)
+
+  def setArg(index: Int, arg: Any): Unit =
+    deferredObjects(index).asInstanceOf[DeferredObjectAdapter].set(arg)
+
+  def evaluate(): Any = unwrapper(function.evaluate(deferredObjects))
+}
+
 /**
  * Converts a Hive Generic User Defined Table Generating Function (UDTF) to a
  * `Generator`. Note that the semantics of Generators do not allow