From ca79a488929f7777b5c2262c12f85bfa0272ca5d Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Fri, 27 Nov 2020 11:16:25 +0800 Subject: [PATCH] refactor test --- .../unescaped-quotes-unescaped-delimiter.csv | 3 -- .../execution/datasources/csv/CSVSuite.scala | 33 ++++++++++++------- 2 files changed, 21 insertions(+), 15 deletions(-) delete mode 100644 sql/core/src/test/resources/test-data/unescaped-quotes-unescaped-delimiter.csv diff --git a/sql/core/src/test/resources/test-data/unescaped-quotes-unescaped-delimiter.csv b/sql/core/src/test/resources/test-data/unescaped-quotes-unescaped-delimiter.csv deleted file mode 100644 index a1d91b6d27a79..0000000000000 --- a/sql/core/src/test/resources/test-data/unescaped-quotes-unescaped-delimiter.csv +++ /dev/null @@ -1,3 +0,0 @@ -c1,c2 -"a,""b,c","xyz" -"a,b,c","x""yz" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 97c0fe11c17ad..30f0e45d04eab 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -75,8 +75,6 @@ abstract class CSVSuite private val valueMalformedFile = "test-data/value-malformed.csv" private val badAfterGoodFile = "test-data/bad_after_good.csv" private val malformedRowFile = "test-data/malformedRow.csv" - private val unescapedQuotesAndUnescapedDelimiterFile = - "test-data/unescaped-quotes-unescaped-delimiter.csv" /** Verifies data and schema. */ private def verifyCars( @@ -2432,16 +2430,27 @@ abstract class CSVSuite } test("SPARK-33566: configure UnescapedQuoteHandling to parse " + - "unescapedQuotesAndUnescapedDelimiterFile correctly") { - // Without configure UnescapedQuoteHandling to STOP_AT_CLOSING_QUOTE, - // the result will be Row(""""a,""b""", """c""""), Row("""a,b,c""", """"x""yz"""") - val result = spark.read - .option("inferSchema", "true") - .option("header", "true") - .option("unescapedQuoteHandling", "STOP_AT_CLOSING_QUOTE") - .csv(testFile(unescapedQuotesAndUnescapedDelimiterFile)).collect() - val exceptResults = Array(Row("""a,""b,c""", "xyz"), Row("""a,b,c""", """x""yz""")) - assert(result.sameElements(exceptResults)) + "unescaped quotes and unescaped delimiter data correctly") { + withTempPath { path => + val dataPath = path.getCanonicalPath + val row1 = Row("""a,""b,c""", "xyz") + val row2 = Row("""a,b,c""", """x""yz""") + // Generate the test data, use `,` as delimiter and `"` as quotes, but they didn't escape. + Seq( + """c1,c2""", + s""""${row1.getString(0)}","${row1.getString(1)}"""", + s""""${row2.getString(0)}","${row2.getString(1)}"""") + .toDF().repartition(1).write.text(dataPath) + // Without configure UnescapedQuoteHandling to STOP_AT_CLOSING_QUOTE, + // the result will be Row(""""a,""b""", """c""""), Row("""a,b,c""", """"x""yz"""") + val result = spark.read + .option("inferSchema", "true") + .option("header", "true") + .option("unescapedQuoteHandling", "STOP_AT_CLOSING_QUOTE") + .csv(dataPath).collect() + val exceptResults = Array(row1, row2) + assert(result.sameElements(exceptResults)) + } } }