Skip to content

Commit

Permalink
[SPARK-32718][SQL] Remove unnecessary keywords for interval units
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

Remove the YEAR, MONTH, DAY, HOUR, MINUTE, SECOND keywords. They are not useful in the parser, as we need to support plural like YEARS, so the parser has to accept the general identifier as interval unit anyway.

### Why are the changes needed?

These keywords are reserved in ANSI. If Spark has these keywords, then they become reserved under ANSI mode. This makes Spark not able to run TPCDS queries as they use YEAR as alias name.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Added `TPCDSQueryANSISuite`, to make sure Spark with ANSI mode can run TPCDS queries.

Closes apache#29560 from cloud-fan/keyword.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
  • Loading branch information
cloud-fan authored and dongjoon-hyun committed Aug 29, 2020
1 parent a0bd273 commit ccc0250
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 60 deletions.
6 changes: 0 additions & 6 deletions docs/sql-ref-ansi-compliance.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@ Below is a list of all the keywords in Spark SQL.
|DATA|non-reserved|non-reserved|non-reserved|
|DATABASE|non-reserved|non-reserved|non-reserved|
|DATABASES|non-reserved|non-reserved|non-reserved|
|DAY|reserved|non-reserved|reserved|
|DBPROPERTIES|non-reserved|non-reserved|non-reserved|
|DEFINED|non-reserved|non-reserved|non-reserved|
|DELETE|non-reserved|non-reserved|reserved|
Expand Down Expand Up @@ -227,7 +226,6 @@ Below is a list of all the keywords in Spark SQL.
|GROUP|reserved|non-reserved|reserved|
|GROUPING|non-reserved|non-reserved|reserved|
|HAVING|reserved|non-reserved|reserved|
|HOUR|reserved|non-reserved|reserved|
|IF|non-reserved|non-reserved|not a keyword|
|IGNORE|non-reserved|non-reserved|non-reserved|
|IMPORT|non-reserved|non-reserved|non-reserved|
Expand Down Expand Up @@ -265,8 +263,6 @@ Below is a list of all the keywords in Spark SQL.
|MATCHED|non-reserved|non-reserved|non-reserved|
|MERGE|non-reserved|non-reserved|non-reserved|
|MINUS|non-reserved|strict-non-reserved|non-reserved|
|MINUTE|reserved|non-reserved|reserved|
|MONTH|reserved|non-reserved|reserved|
|MSCK|non-reserved|non-reserved|non-reserved|
|NAMESPACE|non-reserved|non-reserved|non-reserved|
|NAMESPACES|non-reserved|non-reserved|non-reserved|
Expand Down Expand Up @@ -326,7 +322,6 @@ Below is a list of all the keywords in Spark SQL.
|ROWS|non-reserved|non-reserved|reserved|
|SCHEMA|non-reserved|non-reserved|non-reserved|
|SCHEMAS|non-reserved|non-reserved|not a keyword|
|SECOND|reserved|non-reserved|reserved|
|SELECT|reserved|non-reserved|reserved|
|SEMI|non-reserved|strict-non-reserved|non-reserved|
|SEPARATED|non-reserved|non-reserved|non-reserved|
Expand Down Expand Up @@ -385,5 +380,4 @@ Below is a list of all the keywords in Spark SQL.
|WHERE|reserved|non-reserved|reserved|
|WINDOW|non-reserved|non-reserved|reserved|
|WITH|reserved|non-reserved|reserved|
|YEAR|reserved|non-reserved|reserved|
|ZONE|non-reserved|non-reserved|non-reserved|
Original file line number Diff line number Diff line change
Expand Up @@ -850,32 +850,22 @@ errorCapturingMultiUnitsInterval
;

multiUnitsInterval
: (intervalValue intervalUnit)+
: (intervalValue unit+=identifier)+
;

errorCapturingUnitToUnitInterval
: body=unitToUnitInterval (error1=multiUnitsInterval | error2=unitToUnitInterval)?
;

unitToUnitInterval
: value=intervalValue from=intervalUnit TO to=intervalUnit
: value=intervalValue from=identifier TO to=identifier
;

intervalValue
: (PLUS | MINUS)? (INTEGER_VALUE | DECIMAL_VALUE)
| STRING
;

intervalUnit
: DAY
| HOUR
| MINUTE
| MONTH
| SECOND
| YEAR
| identifier
;

colPosition
: position=FIRST | position=AFTER afterCol=errorCapturingIdentifier
;
Expand Down Expand Up @@ -1285,7 +1275,6 @@ nonReserved
| DATA
| DATABASE
| DATABASES
| DAY
| DBPROPERTIES
| DEFINED
| DELETE
Expand Down Expand Up @@ -1329,7 +1318,6 @@ nonReserved
| GROUP
| GROUPING
| HAVING
| HOUR
| IF
| IGNORE
| IMPORT
Expand Down Expand Up @@ -1362,8 +1350,6 @@ nonReserved
| MAP
| MATCHED
| MERGE
| MINUTE
| MONTH
| MSCK
| NAMESPACE
| NAMESPACES
Expand Down Expand Up @@ -1418,7 +1404,6 @@ nonReserved
| ROW
| ROWS
| SCHEMA
| SECOND
| SELECT
| SEPARATED
| SERDE
Expand Down Expand Up @@ -1473,7 +1458,6 @@ nonReserved
| WHERE
| WINDOW
| WITH
| YEAR
| ZONE
//--DEFAULT-NON-RESERVED-END
;
Expand Down Expand Up @@ -1537,7 +1521,6 @@ CURRENT_USER: 'CURRENT_USER';
DATA: 'DATA';
DATABASE: 'DATABASE';
DATABASES: 'DATABASES' | 'SCHEMAS';
DAY: 'DAY';
DBPROPERTIES: 'DBPROPERTIES';
DEFINED: 'DEFINED';
DELETE: 'DELETE';
Expand Down Expand Up @@ -1583,7 +1566,6 @@ GRANT: 'GRANT';
GROUP: 'GROUP';
GROUPING: 'GROUPING';
HAVING: 'HAVING';
HOUR: 'HOUR';
IF: 'IF';
IGNORE: 'IGNORE';
IMPORT: 'IMPORT';
Expand Down Expand Up @@ -1620,8 +1602,6 @@ MACRO: 'MACRO';
MAP: 'MAP';
MATCHED: 'MATCHED';
MERGE: 'MERGE';
MINUTE: 'MINUTE';
MONTH: 'MONTH';
MSCK: 'MSCK';
NAMESPACE: 'NAMESPACE';
NAMESPACES: 'NAMESPACES';
Expand Down Expand Up @@ -1679,7 +1659,6 @@ ROLLUP: 'ROLLUP';
ROW: 'ROW';
ROWS: 'ROWS';
SCHEMA: 'SCHEMA';
SECOND: 'SECOND';
SELECT: 'SELECT';
SEMI: 'SEMI';
SEPARATED: 'SEPARATED';
Expand Down Expand Up @@ -1738,7 +1717,6 @@ WHEN: 'WHEN';
WHERE: 'WHERE';
WINDOW: 'WINDOW';
WITH: 'WITH';
YEAR: 'YEAR';
ZONE: 'ZONE';
//--SPARK-KEYWORD-LIST-END
//============================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2125,7 +2125,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
*/
override def visitMultiUnitsInterval(ctx: MultiUnitsIntervalContext): CalendarInterval = {
withOrigin(ctx) {
val units = ctx.intervalUnit().asScala
val units = ctx.unit.asScala
val values = ctx.intervalValue().asScala
try {
assert(units.length == values.length)
Expand Down
4 changes: 4 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/interval.sql
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ select interval '15:40:32.99899999' hour to second;
select interval '40:32.99899999' minute to second;
select interval '40:32' minute to second;
select interval 30 day day;
select interval 30 days days;

-- invalid day-time string intervals
select interval '20 15:40:32.99899999' day to hour;
Expand Down Expand Up @@ -90,6 +91,9 @@ select interval '12:11:10' hour to second '1' year;
select interval (-30) day;
select interval (a + 1) day;
select interval 30 day day day;
select interval (-30) days;
select interval (a + 1) days;
select interval 30 days days days;

-- Interval year-month arithmetic

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,15 +199,9 @@ struct<weekday(CAST(2007-02-03 AS DATE)):int,weekday(CAST(2009-07-30 AS DATE)):i
-- !query
select year('1500-01-01'), month('1500-01-01'), dayOfYear('1500-01-01')
-- !query schema
struct<>
struct<year(CAST(1500-01-01 AS DATE)):int,month(CAST(1500-01-01 AS DATE)):int,dayofyear(CAST(1500-01-01 AS DATE)):int>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

no viable alternative at input 'year'(line 1, pos 7)

== SQL ==
select year('1500-01-01'), month('1500-01-01'), dayOfYear('1500-01-01')
-------^^^
1500 1 1


-- !query
Expand Down
64 changes: 44 additions & 20 deletions sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 94
-- Number of queries: 98


-- !query
Expand Down Expand Up @@ -355,15 +355,17 @@ struct<INTERVAL '40 minutes 32 seconds':interval>
-- !query
select interval 30 day day
-- !query schema
struct<>
struct<day:interval>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException
30 days

no viable alternative at input 'day'(line 1, pos 23)

== SQL ==
select interval 30 day day
-----------------------^^^
-- !query
select interval 30 days days
-- !query schema
struct<days:interval>
-- !query output
30 days


-- !query
Expand Down Expand Up @@ -655,41 +657,63 @@ select interval (-30) day
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException
org.apache.spark.sql.AnalysisException
Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7

no viable alternative at input 'day'(line 1, pos 22)

== SQL ==
select interval (-30) day
----------------------^^^
-- !query
select interval (a + 1) day
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7


-- !query
select interval (a + 1) day
select interval 30 day day day
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

no viable alternative at input 'day'(line 1, pos 24)
extraneous input 'day' expecting {<EOF>, ';'}(line 1, pos 27)

== SQL ==
select interval (a + 1) day
------------------------^^^
select interval 30 day day day
---------------------------^^^


-- !query
select interval 30 day day day
select interval (-30) days
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7


-- !query
select interval (a + 1) days
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7


-- !query
select interval 30 days days days
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

no viable alternative at input 'day'(line 1, pos 23)
extraneous input 'days' expecting {<EOF>, ';'}(line 1, pos 29)

== SQL ==
select interval 30 day day day
-----------------------^^^
select interval 30 days days days
-----------------------------^^^


-- !query
Expand Down
42 changes: 41 additions & 1 deletion sql/core/src/test/resources/sql-tests/results/interval.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 94
-- Number of queries: 98


-- !query
Expand Down Expand Up @@ -354,6 +354,14 @@ struct<day:interval>
30 days


-- !query
select interval 30 days days
-- !query schema
struct<days:interval>
-- !query output
30 days


-- !query
select interval '20 15:40:32.99899999' day to hour
-- !query schema
Expand Down Expand Up @@ -670,6 +678,38 @@ select interval 30 day day day
---------------------------^^^


-- !query
select interval (-30) days
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7


-- !query
select interval (a + 1) days
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7


-- !query
select interval 30 days days days
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

extraneous input 'days' expecting {<EOF>, ';'}(line 1, pos 29)

== SQL ==
select interval 30 days days days
-----------------------------^^^


-- !query
create temporary view interval_arithmetic as
select CAST(dateval AS date), CAST(tsval AS timestamp), dateval as strval from values
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

package org.apache.spark.sql

import org.apache.spark.SparkConf
import org.apache.spark.sql.catalyst.util.resourceToString
import org.apache.spark.sql.internal.SQLConf

/**
* This test suite ensures all the TPC-DS queries can be successfully analyzed, optimized
Expand Down Expand Up @@ -65,3 +67,8 @@ class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSBase {
class TPCDSQueryWithStatsSuite extends TPCDSQuerySuite {
override def injectStats: Boolean = true
}

class TPCDSQueryANSISuite extends TPCDSQuerySuite {
override protected def sparkConf: SparkConf =
super.sparkConf.set(SQLConf.ANSI_ENABLED, true)
}

0 comments on commit ccc0250

Please sign in to comment.