From ccc0250a08fa9519a6dc3c1153e51c1e110f1d7d Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sat, 29 Aug 2020 14:06:01 -0700 Subject: [PATCH] [SPARK-32718][SQL] Remove unnecessary keywords for interval units ### What changes were proposed in this pull request? Remove the YEAR, MONTH, DAY, HOUR, MINUTE, SECOND keywords. They are not useful in the parser, as we need to support plural like YEARS, so the parser has to accept the general identifier as interval unit anyway. ### Why are the changes needed? These keywords are reserved in ANSI. If Spark has these keywords, then they become reserved under ANSI mode. This makes Spark not able to run TPCDS queries as they use YEAR as alias name. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added `TPCDSQueryANSISuite`, to make sure Spark with ANSI mode can run TPCDS queries. Closes #29560 from cloud-fan/keyword. Authored-by: Wenchen Fan Signed-off-by: Dongjoon Hyun --- docs/sql-ref-ansi-compliance.md | 6 -- .../spark/sql/catalyst/parser/SqlBase.g4 | 26 +------- .../sql/catalyst/parser/AstBuilder.scala | 2 +- .../resources/sql-tests/inputs/interval.sql | 4 ++ .../sql-tests/results/ansi/datetime.sql.out | 10 +-- .../sql-tests/results/ansi/interval.sql.out | 64 +++++++++++++------ .../sql-tests/results/interval.sql.out | 42 +++++++++++- .../apache/spark/sql/TPCDSQuerySuite.scala | 7 ++ 8 files changed, 101 insertions(+), 60 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index e786c0bc9aff8..d6e99312bb66e 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -181,7 +181,6 @@ Below is a list of all the keywords in Spark SQL. |DATA|non-reserved|non-reserved|non-reserved| |DATABASE|non-reserved|non-reserved|non-reserved| |DATABASES|non-reserved|non-reserved|non-reserved| -|DAY|reserved|non-reserved|reserved| |DBPROPERTIES|non-reserved|non-reserved|non-reserved| |DEFINED|non-reserved|non-reserved|non-reserved| |DELETE|non-reserved|non-reserved|reserved| @@ -227,7 +226,6 @@ Below is a list of all the keywords in Spark SQL. |GROUP|reserved|non-reserved|reserved| |GROUPING|non-reserved|non-reserved|reserved| |HAVING|reserved|non-reserved|reserved| -|HOUR|reserved|non-reserved|reserved| |IF|non-reserved|non-reserved|not a keyword| |IGNORE|non-reserved|non-reserved|non-reserved| |IMPORT|non-reserved|non-reserved|non-reserved| @@ -265,8 +263,6 @@ Below is a list of all the keywords in Spark SQL. |MATCHED|non-reserved|non-reserved|non-reserved| |MERGE|non-reserved|non-reserved|non-reserved| |MINUS|non-reserved|strict-non-reserved|non-reserved| -|MINUTE|reserved|non-reserved|reserved| -|MONTH|reserved|non-reserved|reserved| |MSCK|non-reserved|non-reserved|non-reserved| |NAMESPACE|non-reserved|non-reserved|non-reserved| |NAMESPACES|non-reserved|non-reserved|non-reserved| @@ -326,7 +322,6 @@ Below is a list of all the keywords in Spark SQL. |ROWS|non-reserved|non-reserved|reserved| |SCHEMA|non-reserved|non-reserved|non-reserved| |SCHEMAS|non-reserved|non-reserved|not a keyword| -|SECOND|reserved|non-reserved|reserved| |SELECT|reserved|non-reserved|reserved| |SEMI|non-reserved|strict-non-reserved|non-reserved| |SEPARATED|non-reserved|non-reserved|non-reserved| @@ -385,5 +380,4 @@ Below is a list of all the keywords in Spark SQL. |WHERE|reserved|non-reserved|reserved| |WINDOW|non-reserved|non-reserved|reserved| |WITH|reserved|non-reserved|reserved| -|YEAR|reserved|non-reserved|reserved| |ZONE|non-reserved|non-reserved|non-reserved| diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 6fce7819897a6..ad0de528708a4 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -850,7 +850,7 @@ errorCapturingMultiUnitsInterval ; multiUnitsInterval - : (intervalValue intervalUnit)+ + : (intervalValue unit+=identifier)+ ; errorCapturingUnitToUnitInterval @@ -858,7 +858,7 @@ errorCapturingUnitToUnitInterval ; unitToUnitInterval - : value=intervalValue from=intervalUnit TO to=intervalUnit + : value=intervalValue from=identifier TO to=identifier ; intervalValue @@ -866,16 +866,6 @@ intervalValue | STRING ; -intervalUnit - : DAY - | HOUR - | MINUTE - | MONTH - | SECOND - | YEAR - | identifier - ; - colPosition : position=FIRST | position=AFTER afterCol=errorCapturingIdentifier ; @@ -1285,7 +1275,6 @@ nonReserved | DATA | DATABASE | DATABASES - | DAY | DBPROPERTIES | DEFINED | DELETE @@ -1329,7 +1318,6 @@ nonReserved | GROUP | GROUPING | HAVING - | HOUR | IF | IGNORE | IMPORT @@ -1362,8 +1350,6 @@ nonReserved | MAP | MATCHED | MERGE - | MINUTE - | MONTH | MSCK | NAMESPACE | NAMESPACES @@ -1418,7 +1404,6 @@ nonReserved | ROW | ROWS | SCHEMA - | SECOND | SELECT | SEPARATED | SERDE @@ -1473,7 +1458,6 @@ nonReserved | WHERE | WINDOW | WITH - | YEAR | ZONE //--DEFAULT-NON-RESERVED-END ; @@ -1537,7 +1521,6 @@ CURRENT_USER: 'CURRENT_USER'; DATA: 'DATA'; DATABASE: 'DATABASE'; DATABASES: 'DATABASES' | 'SCHEMAS'; -DAY: 'DAY'; DBPROPERTIES: 'DBPROPERTIES'; DEFINED: 'DEFINED'; DELETE: 'DELETE'; @@ -1583,7 +1566,6 @@ GRANT: 'GRANT'; GROUP: 'GROUP'; GROUPING: 'GROUPING'; HAVING: 'HAVING'; -HOUR: 'HOUR'; IF: 'IF'; IGNORE: 'IGNORE'; IMPORT: 'IMPORT'; @@ -1620,8 +1602,6 @@ MACRO: 'MACRO'; MAP: 'MAP'; MATCHED: 'MATCHED'; MERGE: 'MERGE'; -MINUTE: 'MINUTE'; -MONTH: 'MONTH'; MSCK: 'MSCK'; NAMESPACE: 'NAMESPACE'; NAMESPACES: 'NAMESPACES'; @@ -1679,7 +1659,6 @@ ROLLUP: 'ROLLUP'; ROW: 'ROW'; ROWS: 'ROWS'; SCHEMA: 'SCHEMA'; -SECOND: 'SECOND'; SELECT: 'SELECT'; SEMI: 'SEMI'; SEPARATED: 'SEPARATED'; @@ -1738,7 +1717,6 @@ WHEN: 'WHEN'; WHERE: 'WHERE'; WINDOW: 'WINDOW'; WITH: 'WITH'; -YEAR: 'YEAR'; ZONE: 'ZONE'; //--SPARK-KEYWORD-LIST-END //============================ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 716ceb4141130..d90c9901f96ac 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2125,7 +2125,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging */ override def visitMultiUnitsInterval(ctx: MultiUnitsIntervalContext): CalendarInterval = { withOrigin(ctx) { - val units = ctx.intervalUnit().asScala + val units = ctx.unit.asScala val values = ctx.intervalValue().asScala try { assert(units.length == values.length) diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql index 7173863313b2f..a5bc43f6228b9 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql @@ -55,6 +55,7 @@ select interval '15:40:32.99899999' hour to second; select interval '40:32.99899999' minute to second; select interval '40:32' minute to second; select interval 30 day day; +select interval 30 days days; -- invalid day-time string intervals select interval '20 15:40:32.99899999' day to hour; @@ -90,6 +91,9 @@ select interval '12:11:10' hour to second '1' year; select interval (-30) day; select interval (a + 1) day; select interval 30 day day day; +select interval (-30) days; +select interval (a + 1) days; +select interval 30 days days days; -- Interval year-month arithmetic diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out index d2d66713780d8..5fe0bd56bf8af 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out @@ -199,15 +199,9 @@ struct +struct -- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -no viable alternative at input 'year'(line 1, pos 7) - -== SQL == -select year('1500-01-01'), month('1500-01-01'), dayOfYear('1500-01-01') --------^^^ +1500 1 1 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index f80bea1d32734..c1c8a70bdde34 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 94 +-- Number of queries: 98 -- !query @@ -355,15 +355,17 @@ struct -- !query select interval 30 day day -- !query schema -struct<> +struct -- !query output -org.apache.spark.sql.catalyst.parser.ParseException +30 days -no viable alternative at input 'day'(line 1, pos 23) -== SQL == -select interval 30 day day ------------------------^^^ +-- !query +select interval 30 days days +-- !query schema +struct +-- !query output +30 days -- !query @@ -655,41 +657,63 @@ select interval (-30) day -- !query schema struct<> -- !query output -org.apache.spark.sql.catalyst.parser.ParseException +org.apache.spark.sql.AnalysisException +Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7 -no viable alternative at input 'day'(line 1, pos 22) -== SQL == -select interval (-30) day -----------------------^^^ +-- !query +select interval (a + 1) day +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7 -- !query -select interval (a + 1) day +select interval 30 day day day -- !query schema struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -no viable alternative at input 'day'(line 1, pos 24) +extraneous input 'day' expecting {, ';'}(line 1, pos 27) == SQL == -select interval (a + 1) day -------------------------^^^ +select interval 30 day day day +---------------------------^^^ -- !query -select interval 30 day day day +select interval (-30) days +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7 + + +-- !query +select interval (a + 1) days +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7 + + +-- !query +select interval 30 days days days -- !query schema struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -no viable alternative at input 'day'(line 1, pos 23) +extraneous input 'days' expecting {, ';'}(line 1, pos 29) == SQL == -select interval 30 day day day ------------------------^^^ +select interval 30 days days days +-----------------------------^^^ -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 297c4fcd0cb95..31294cca4ce2d 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 94 +-- Number of queries: 98 -- !query @@ -354,6 +354,14 @@ struct 30 days +-- !query +select interval 30 days days +-- !query schema +struct +-- !query output +30 days + + -- !query select interval '20 15:40:32.99899999' day to hour -- !query schema @@ -670,6 +678,38 @@ select interval 30 day day day ---------------------------^^^ +-- !query +select interval (-30) days +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7 + + +-- !query +select interval (a + 1) days +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7 + + +-- !query +select interval 30 days days days +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +extraneous input 'days' expecting {, ';'}(line 1, pos 29) + +== SQL == +select interval 30 days days days +-----------------------------^^^ + + -- !query create temporary view interval_arithmetic as select CAST(dateval AS date), CAST(tsval AS timestamp), dateval as strval from values diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala index 30751af61d10e..decd1d6d08d27 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala @@ -17,7 +17,9 @@ package org.apache.spark.sql +import org.apache.spark.SparkConf import org.apache.spark.sql.catalyst.util.resourceToString +import org.apache.spark.sql.internal.SQLConf /** * This test suite ensures all the TPC-DS queries can be successfully analyzed, optimized @@ -65,3 +67,8 @@ class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSBase { class TPCDSQueryWithStatsSuite extends TPCDSQuerySuite { override def injectStats: Boolean = true } + +class TPCDSQueryANSISuite extends TPCDSQuerySuite { + override protected def sparkConf: SparkConf = + super.sparkConf.set(SQLConf.ANSI_ENABLED, true) +}