diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationTest.scala similarity index 99% rename from sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala rename to sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationTest.scala index 80aff446bc24b..8a28f8a20bd55 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelationTest.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.spark.sql.sources +package org.apache.spark.sql.execution.datasources import java.io.File @@ -26,15 +26,14 @@ import org.apache.hadoop.fs.Path import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.sql._ import org.apache.spark.sql.execution.DataSourceScanExec -import org.apache.spark.sql.execution.datasources._ -import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.sources.SimpleTextSource +import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils} import org.apache.spark.sql.types._ -abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with TestHiveSingleton { - import spark.implicits._ +abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils { + import testImplicits._ val dataSourceName: String diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonHadoopFsRelationSuite.scala similarity index 83% rename from sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala rename to sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonHadoopFsRelationSuite.scala index 27f398ebf301a..928726ed24eba 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonHadoopFsRelationSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.spark.sql.sources +package org.apache.spark.sql.execution.datasources.json import java.math.BigDecimal @@ -23,9 +23,12 @@ import org.apache.hadoop.fs.Path import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.catalog.CatalogUtils +import org.apache.spark.sql.execution.datasources.HadoopFsRelationTest +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ -class JsonHadoopFsRelationSuite extends HadoopFsRelationTest { +class JsonHadoopFsRelationSuite extends HadoopFsRelationTest with SharedSQLContext { override val dataSourceName: String = "json" private val badJson = "\u0000\u0000\u0000A\u0001AAA" @@ -110,14 +113,16 @@ class JsonHadoopFsRelationSuite extends HadoopFsRelationTest { test("invalid json with leading nulls - from file (multiLine=true)") { import testImplicits._ - withTempDir { tempDir => - val path = tempDir.getAbsolutePath - Seq(badJson, """{"a":1}""").toDS().write.mode("overwrite").text(path) - val expected = s"""$badJson\n{"a":1}\n""" - val schema = new StructType().add("a", IntegerType).add("_corrupt_record", StringType) - val df = - spark.read.format(dataSourceName).option("multiLine", true).schema(schema).load(path) - checkAnswer(df, Row(null, expected)) + withSQLConf(SQLConf.MAX_RECORDS_PER_FILE.key -> "2") { + withTempDir { tempDir => + val path = tempDir.getAbsolutePath + Seq(badJson, """{"a":1}""").toDS().repartition(1).write.mode("overwrite").text(path) + val expected = s"""$badJson\n{"a":1}\n""" + val schema = new StructType().add("a", IntegerType).add("_corrupt_record", StringType) + val df = + spark.read.format(dataSourceName).option("multiLine", true).schema(schema).load(path) + checkAnswer(df, Row(null, expected)) + } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcHadoopFsRelationSuite.scala similarity index 65% rename from sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala rename to sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcHadoopFsRelationSuite.scala index a1f054b8e3f44..6ed1227d57ba2 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcHadoopFsRelationSuite.scala @@ -15,19 +15,20 @@ * limitations under the License. */ -package org.apache.spark.sql.hive.orc - -import java.io.File +package org.apache.spark.sql.execution.datasources.orc import org.apache.hadoop.fs.Path import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.catalog.CatalogUtils +import org.apache.spark.sql.execution.datasources.HadoopFsRelationTest import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.sources.HadoopFsRelationTest +import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ -class OrcHadoopFsRelationSuite extends HadoopFsRelationTest { +class OrcHadoopFsRelationSuite extends SharedSQLContext + +abstract class OrcHadoopFsRelationBase extends HadoopFsRelationTest { import testImplicits._ override protected val enableAutoThreadAudit = false @@ -82,44 +83,4 @@ class OrcHadoopFsRelationSuite extends HadoopFsRelationTest { } } } - - test("SPARK-13543: Support for specifying compression codec for ORC via option()") { - withTempPath { dir => - val path = s"${dir.getCanonicalPath}/table1" - val df = (1 to 5).map(i => (i, (i % 2).toString)).toDF("a", "b") - df.write - .option("compression", "ZlIb") - .orc(path) - - // Check if this is compressed as ZLIB. - val maybeOrcFile = new File(path).listFiles().find { f => - !f.getName.startsWith("_") && f.getName.endsWith(".zlib.orc") - } - assert(maybeOrcFile.isDefined) - val orcFilePath = maybeOrcFile.get.toPath.toString - val expectedCompressionKind = - OrcFileOperator.getFileReader(orcFilePath).get.getCompression - assert("ZLIB" === expectedCompressionKind.name()) - - val copyDf = spark - .read - .orc(path) - checkAnswer(df, copyDf) - } - } - - test("Default compression codec is snappy for ORC compression") { - withTempPath { file => - spark.range(0, 10).write - .orc(file.getCanonicalPath) - val expectedCompressionKind = - OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression - assert("SNAPPY" === expectedCompressionKind.name()) - } - } -} - -class HiveOrcHadoopFsRelationSuite extends OrcHadoopFsRelationSuite { - override val dataSourceName: String = - classOf[org.apache.spark.sql.hive.orc.OrcFileFormat].getCanonicalName } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetHadoopFsRelationSuite.scala similarity index 97% rename from sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala rename to sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetHadoopFsRelationSuite.scala index dce5bb7ddba66..92ddffb62a785 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetHadoopFsRelationSuite.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.spark.sql.sources +package org.apache.spark.sql.execution.datasources.parquet import java.io.File @@ -25,12 +25,13 @@ import org.apache.parquet.hadoop.ParquetOutputFormat import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.catalog.CatalogUtils -import org.apache.spark.sql.execution.datasources.SQLHadoopMapReduceCommitProtocol +import org.apache.spark.sql.execution.datasources.{HadoopFsRelationTest, SQLHadoopMapReduceCommitProtocol} import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ -class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest { +class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest with SharedSQLContext { import testImplicits._ override val dataSourceName: String = "parquet" @@ -162,7 +163,6 @@ class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest { } test("SPARK-11500: Not deterministic order of columns when using merging schemas.") { - import testImplicits._ withSQLConf(SQLConf.PARQUET_SCHEMA_MERGING_ENABLED.key -> "true") { withTempPath { dir => val pathOne = s"${dir.getCanonicalPath}/part=1" diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala similarity index 93% rename from sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala rename to sql/core/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala index 2ec593b95c9b6..57700fe9f87a7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala @@ -21,9 +21,12 @@ import org.apache.hadoop.fs.Path import org.apache.spark.sql.catalyst.catalog.CatalogUtils import org.apache.spark.sql.catalyst.expressions.PredicateHelper +import org.apache.spark.sql.execution.datasources.HadoopFsRelationTest +import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ -class SimpleTextHadoopFsRelationSuite extends HadoopFsRelationTest with PredicateHelper { +class SimpleTextHadoopFsRelationSuite + extends HadoopFsRelationTest with PredicateHelper with SharedSQLContext { override val dataSourceName: String = classOf[SimpleTextSource].getCanonicalName // We have a very limited number of supported types at here since it is just for a diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala similarity index 100% rename from sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala rename to sql/core/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcHadoopFsRelationSuite.scala new file mode 100644 index 0000000000000..55d909987d2d7 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcHadoopFsRelationSuite.scala @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.orc + +import java.io.File + +import org.apache.spark.sql.execution.datasources.orc.OrcHadoopFsRelationBase +import org.apache.spark.sql.hive.test.TestHiveSingleton + +class HiveOrcHadoopFsRelationSuite extends OrcHadoopFsRelationBase with TestHiveSingleton { + import testImplicits._ + + override val dataSourceName: String = + classOf[org.apache.spark.sql.hive.orc.OrcFileFormat].getCanonicalName + + test("SPARK-13543: Support for specifying compression codec for ORC via option()") { + withTempPath { dir => + val path = s"${dir.getCanonicalPath}/table1" + val df = (1 to 5).map(i => (i, (i % 2).toString)).toDF("a", "b") + df.write + .option("compression", "ZlIb") + .orc(path) + + // Check if this is compressed as ZLIB. + val maybeOrcFile = new File(path).listFiles().find { f => + !f.getName.startsWith("_") && f.getName.endsWith(".zlib.orc") + } + assert(maybeOrcFile.isDefined) + val orcFilePath = maybeOrcFile.get.toPath.toString + val expectedCompressionKind = + OrcFileOperator.getFileReader(orcFilePath).get.getCompression + assert("ZLIB" === expectedCompressionKind.name()) + + val copyDf = spark + .read + .orc(path) + checkAnswer(df, copyDf) + } + } + + test("Default compression codec is snappy for ORC compression") { + withTempPath { file => + spark.range(0, 10).write + .orc(file.getCanonicalPath) + val expectedCompressionKind = + OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression + assert("SNAPPY" === expectedCompressionKind.name()) + } + } +}