apache · gatorsmile · Jan 19, 2018 · Jan 19, 2018 · Jan 20, 2018 · jiangxb1987
diff --git a/...rk/sql/sources/HadoopFsRelationTest.scala → ...on/datasources/HadoopFsRelationTest.scala b/...rk/sql/sources/HadoopFsRelationTest.scala → ...on/datasources/HadoopFsRelationTest.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources
+package org.apache.spark.sql.execution.datasources
 
 import java.io.File
 
@@ -26,15 +26,14 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.sql._
 import org.apache.spark.sql.execution.DataSourceScanExec
-import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.sql.sources.SimpleTextSource
+import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
 import org.apache.spark.sql.types._
 
 
-abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with TestHiveSingleton {
-  import spark.implicits._
+abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils {
+  import testImplicits._
 
   val dataSourceName: String
 

diff --git a/...l/sources/JsonHadoopFsRelationSuite.scala → ...rces/json/JsonHadoopFsRelationSuite.scala b/...l/sources/JsonHadoopFsRelationSuite.scala → ...rces/json/JsonHadoopFsRelationSuite.scala
@@ -15,17 +15,20 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources
+package org.apache.spark.sql.execution.datasources.json
 
 import java.math.BigDecimal
 
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.catalog.CatalogUtils
+import org.apache.spark.sql.execution.datasources.HadoopFsRelationTest
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 
-class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
+class JsonHadoopFsRelationSuite extends HadoopFsRelationTest with SharedSQLContext {
   override val dataSourceName: String = "json"
 
   private val badJson = "\u0000\u0000\u0000A\u0001AAA"
@@ -110,14 +113,16 @@ class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
 
   test("invalid json with leading nulls - from file (multiLine=true)") {
     import testImplicits._
-    withTempDir { tempDir =>
-      val path = tempDir.getAbsolutePath
-      Seq(badJson, """{"a":1}""").toDS().write.mode("overwrite").text(path)
-      val expected = s"""$badJson\n{"a":1}\n"""
-      val schema = new StructType().add("a", IntegerType).add("_corrupt_record", StringType)
-      val df =
-        spark.read.format(dataSourceName).option("multiLine", true).schema(schema).load(path)
-      checkAnswer(df, Row(null, expected))
+    withSQLConf(SQLConf.MAX_RECORDS_PER_FILE.key -> "2") {
+      withTempDir { tempDir =>
+        val path = tempDir.getAbsolutePath
+        Seq(badJson, """{"a":1}""").toDS().repartition(1).write.mode("overwrite").text(path)
+        val expected = s"""$badJson\n{"a":1}\n"""
+        val schema = new StructType().add("a", IntegerType).add("_corrupt_record", StringType)
+        val df =
+          spark.read.format(dataSourceName).option("multiLine", true).schema(schema).load(path)
+        checkAnswer(df, Row(null, expected))
+      }
     }
   }
 

diff --git a/...l/hive/orc/OrcHadoopFsRelationSuite.scala → ...ources/orc/OrcHadoopFsRelationSuite.scala b/...l/hive/orc/OrcHadoopFsRelationSuite.scala → ...ources/orc/OrcHadoopFsRelationSuite.scala
@@ -15,19 +15,20 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hive.orc
-
-import java.io.File
+package org.apache.spark.sql.execution.datasources.orc
 
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.catalog.CatalogUtils
+import org.apache.spark.sql.execution.datasources.HadoopFsRelationTest
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.HadoopFsRelationTest
+import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 
-class OrcHadoopFsRelationSuite extends HadoopFsRelationTest {
+class OrcHadoopFsRelationSuite extends SharedSQLContext
+
+abstract class OrcHadoopFsRelationBase extends HadoopFsRelationTest {
   import testImplicits._
 
   override protected val enableAutoThreadAudit = false
@@ -82,44 +83,4 @@ class OrcHadoopFsRelationSuite extends HadoopFsRelationTest {
       }
     }
   }
-
-  test("SPARK-13543: Support for specifying compression codec for ORC via option()") {
-    withTempPath { dir =>
-      val path = s"${dir.getCanonicalPath}/table1"
-      val df = (1 to 5).map(i => (i, (i % 2).toString)).toDF("a", "b")
-      df.write
-        .option("compression", "ZlIb")
-        .orc(path)
-
-      // Check if this is compressed as ZLIB.
-      val maybeOrcFile = new File(path).listFiles().find { f =>
-        !f.getName.startsWith("_") && f.getName.endsWith(".zlib.orc")
-      }
-      assert(maybeOrcFile.isDefined)
-      val orcFilePath = maybeOrcFile.get.toPath.toString
-      val expectedCompressionKind =
-        OrcFileOperator.getFileReader(orcFilePath).get.getCompression
-      assert("ZLIB" === expectedCompressionKind.name())
-
-      val copyDf = spark
-        .read
-        .orc(path)
-      checkAnswer(df, copyDf)
-    }
-  }
-
-  test("Default compression codec is snappy for ORC compression") {
-    withTempPath { file =>
-      spark.range(0, 10).write
-        .orc(file.getCanonicalPath)
-      val expectedCompressionKind =
-        OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression
-      assert("SNAPPY" === expectedCompressionKind.name())
-    }
-  }
-}
-
-class HiveOrcHadoopFsRelationSuite extends OrcHadoopFsRelationSuite {
-  override val dataSourceName: String =
-    classOf[org.apache.spark.sql.hive.orc.OrcFileFormat].getCanonicalName
 }
diff --git a/...ources/ParquetHadoopFsRelationSuite.scala → ...arquet/ParquetHadoopFsRelationSuite.scala b/...ources/ParquetHadoopFsRelationSuite.scala → ...arquet/ParquetHadoopFsRelationSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources
+package org.apache.spark.sql.execution.datasources.parquet
 
 import java.io.File
 
@@ -25,12 +25,13 @@ import org.apache.parquet.hadoop.ParquetOutputFormat
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog.CatalogUtils
-import org.apache.spark.sql.execution.datasources.SQLHadoopMapReduceCommitProtocol
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelationTest, SQLHadoopMapReduceCommitProtocol}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 
 
-class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest {
+class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest with SharedSQLContext {
   import testImplicits._
 
   override val dataSourceName: String = "parquet"
@@ -162,7 +163,6 @@ class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest {
   }
 
   test("SPARK-11500: Not deterministic order of columns when using merging schemas.") {
-    import testImplicits._
     withSQLConf(SQLConf.PARQUET_SCHEMA_MERGING_ENABLED.key -> "true") {
       withTempPath { dir =>
         val pathOne = s"${dir.getCanonicalPath}/part=1"

diff --git a/...ces/SimpleTextHadoopFsRelationSuite.scala → ...ces/SimpleTextHadoopFsRelationSuite.scala b/...ces/SimpleTextHadoopFsRelationSuite.scala → ...ces/SimpleTextHadoopFsRelationSuite.scala
@@ -21,9 +21,12 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.catalyst.catalog.CatalogUtils
 import org.apache.spark.sql.catalyst.expressions.PredicateHelper
+import org.apache.spark.sql.execution.datasources.HadoopFsRelationTest
+import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 
-class SimpleTextHadoopFsRelationSuite extends HadoopFsRelationTest with PredicateHelper {
+class SimpleTextHadoopFsRelationSuite
+  extends HadoopFsRelationTest with PredicateHelper with SharedSQLContext {
   override val dataSourceName: String = classOf[SimpleTextSource].getCanonicalName
 
   // We have a very limited number of supported types at here since it is just for a

diff --git a/...park/sql/sources/SimpleTextRelation.scala → ...park/sql/sources/SimpleTextRelation.scala b/...park/sql/sources/SimpleTextRelation.scala → ...park/sql/sources/SimpleTextRelation.scala
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcHadoopFsRelationSuite.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.orc
+
+import java.io.File
+
+import org.apache.spark.sql.execution.datasources.orc.OrcHadoopFsRelationBase
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+
+class HiveOrcHadoopFsRelationSuite extends OrcHadoopFsRelationBase with TestHiveSingleton {
+  import testImplicits._
+
+  override val dataSourceName: String =
+    classOf[org.apache.spark.sql.hive.orc.OrcFileFormat].getCanonicalName
+
+  test("SPARK-13543: Support for specifying compression codec for ORC via option()") {
+    withTempPath { dir =>
+      val path = s"${dir.getCanonicalPath}/table1"
+      val df = (1 to 5).map(i => (i, (i % 2).toString)).toDF("a", "b")
+      df.write
+        .option("compression", "ZlIb")
+        .orc(path)
+
+      // Check if this is compressed as ZLIB.
+      val maybeOrcFile = new File(path).listFiles().find { f =>
+        !f.getName.startsWith("_") && f.getName.endsWith(".zlib.orc")
+      }
+      assert(maybeOrcFile.isDefined)
+      val orcFilePath = maybeOrcFile.get.toPath.toString
+      val expectedCompressionKind =
+        OrcFileOperator.getFileReader(orcFilePath).get.getCompression
+      assert("ZLIB" === expectedCompressionKind.name())
+
+      val copyDf = spark
+        .read
+        .orc(path)
+      checkAnswer(df, copyDf)
+    }
+  }
+
+  test("Default compression codec is snappy for ORC compression") {
+    withTempPath { file =>
+      spark.range(0, 10).write
+        .orc(file.getCanonicalPath)
+      val expectedCompressionKind =
+        OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression
+      assert("SNAPPY" === expectedCompressionKind.name())
+    }
+  }
+}