From 179be76c295c5b22a9a4cf7ceac89186b90bd65e Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Wed, 28 Aug 2024 22:08:29 +0100 Subject: [PATCH 1/8] support new interval logic from sqlparse-rs --- Cargo.toml | 3 +- datafusion-cli/Cargo.lock | 6 +- datafusion/sql/src/expr/mod.rs | 4 +- datafusion/sql/src/expr/unary_op.rs | 2 +- datafusion/sql/src/expr/value.rs | 195 +++++++----------- datafusion/sql/tests/cases/plan_to_sql.rs | 44 +++- datafusion/sql/tests/sql_integration.rs | 53 ++++- datafusion/sqllogictest/test_files/dates.slt | 6 +- .../sqllogictest/test_files/interval.slt | 11 +- .../test_files/pg_compat/pg_interval.slt | 81 ++++++++ .../sqllogictest/test_files/timestamps.slt | 6 +- .../sqllogictest/test_files/type_coercion.slt | 4 +- 12 files changed, 254 insertions(+), 161 deletions(-) create mode 100644 datafusion/sqllogictest/test_files/pg_compat/pg_interval.slt diff --git a/Cargo.toml b/Cargo.toml index 124747999041..7d90eecb9239 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -134,7 +134,8 @@ rand = "0.8" regex = "1.8" rstest = "0.22.0" serde_json = "1" -sqlparser = { version = "0.50.0", features = ["visitor"] } +#sqlparser = { version = "0.50.0", features = ["visitor"] } +sqlparser = { git = "https://github.com/samuelcolvin/sqlparser-rs", rev = "6c2a971", features = ["visitor"] } tempfile = "3" thiserror = "1.0.44" tokio = { version = "1.36", features = ["macros", "rt", "sync"] } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index e35eb3906b9a..f21a7ba4ecfd 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -3545,8 +3545,7 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" version = "0.50.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2e5b515a2bd5168426033e9efbfd05500114833916f1d5c268f938b4ee130ac" +source = "git+https://github.com/samuelcolvin/sqlparser-rs?rev=6c2a971#6c2a9717897d7986c655c24bfcbcd7e814fd3d40" dependencies = [ "log", "sqlparser_derive", @@ -3555,8 +3554,7 @@ dependencies = [ [[package]] name = "sqlparser_derive" version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +source = "git+https://github.com/samuelcolvin/sqlparser-rs?rev=6c2a971#6c2a9717897d7986c655c24bfcbcd7e814fd3d40" dependencies = [ "proc-macro2", "quote", diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 035fd3816c6c..1b10fac0eb08 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -197,9 +197,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } SQLExpr::Array(arr) => self.sql_array_literal(arr.elem, schema), - SQLExpr::Interval(interval) => { - self.sql_interval_to_expr(false, interval, schema, planner_context) - } + SQLExpr::Interval(interval) => self.sql_interval_to_expr(false, interval), SQLExpr::Identifier(id) => { self.sql_identifier_to_expr(id, schema, planner_context) } diff --git a/datafusion/sql/src/expr/unary_op.rs b/datafusion/sql/src/expr/unary_op.rs index 9fcee7a06124..2a341fb7c446 100644 --- a/datafusion/sql/src/expr/unary_op.rs +++ b/datafusion/sql/src/expr/unary_op.rs @@ -43,7 +43,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { self.parse_sql_number(&n, true) } SQLExpr::Interval(interval) => { - self.sql_interval_to_expr(true, interval, schema, planner_context) + self.sql_interval_to_expr(true, interval) } // not a literal, apply negative operator on expression _ => Ok(Expr::Negative(Box::new(self.sql_expr_to_logical_expr( diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs index afcd182fa343..64575645bc44 100644 --- a/datafusion/sql/src/expr/value.rs +++ b/datafusion/sql/src/expr/value.rs @@ -26,7 +26,7 @@ use datafusion_expr::expr::{BinaryExpr, Placeholder}; use datafusion_expr::planner::PlannerResult; use datafusion_expr::{lit, Expr, Operator}; use log::debug; -use sqlparser::ast::{BinaryOperator, Expr as SQLExpr, Interval, Value}; +use sqlparser::ast::{BinaryOperator, Expr as SQLExpr, Interval, UnaryOperator, Value}; use sqlparser::parser::ParserError::ParserError; use std::borrow::Cow; @@ -168,12 +168,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { /// Convert a SQL interval expression to a DataFusion logical plan /// expression + #[allow(clippy::only_used_in_recursion)] pub(super) fn sql_interval_to_expr( &self, negative: bool, interval: Interval, - schema: &DFSchema, - planner_context: &mut PlannerContext, ) -> Result { if interval.leading_precision.is_some() { return not_impl_err!( @@ -196,127 +195,42 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { ); } - // Only handle string exprs for now - let value = match *interval.value { - SQLExpr::Value( - Value::SingleQuotedString(s) | Value::DoubleQuotedString(s), - ) => { - if negative { - format!("-{s}") - } else { - s - } - } - // Support expressions like `interval '1 month' + date/timestamp`. - // Such expressions are parsed like this by sqlparser-rs - // - // Interval - // BinaryOp - // Value(StringLiteral) - // Cast - // Value(StringLiteral) - // - // This code rewrites them to the following: - // - // BinaryOp - // Interval - // Value(StringLiteral) - // Cast - // Value(StringLiteral) - SQLExpr::BinaryOp { left, op, right } => { - let df_op = match op { - BinaryOperator::Plus => Operator::Plus, - BinaryOperator::Minus => Operator::Minus, - BinaryOperator::Eq => Operator::Eq, - BinaryOperator::NotEq => Operator::NotEq, - BinaryOperator::Gt => Operator::Gt, - BinaryOperator::GtEq => Operator::GtEq, - BinaryOperator::Lt => Operator::Lt, - BinaryOperator::LtEq => Operator::LtEq, - _ => { - return not_impl_err!("Unsupported interval operator: {op:?}"); - } - }; - match ( - interval.leading_field.as_ref(), - left.as_ref(), - right.as_ref(), - ) { - (_, _, SQLExpr::Value(_)) => { - let left_expr = self.sql_interval_to_expr( - negative, - Interval { - value: left, - leading_field: interval.leading_field.clone(), - leading_precision: None, - last_field: None, - fractional_seconds_precision: None, - }, - schema, - planner_context, - )?; - let right_expr = self.sql_interval_to_expr( - false, - Interval { - value: right, - leading_field: interval.leading_field, - leading_precision: None, - last_field: None, - fractional_seconds_precision: None, - }, - schema, - planner_context, - )?; - return Ok(Expr::BinaryExpr(BinaryExpr::new( - Box::new(left_expr), - df_op, - Box::new(right_expr), - ))); - } - // In this case, the left node is part of the interval - // expr and the right node is an independent expr. - // - // Leading field is not supported when the right operand - // is not a value. - (None, _, _) => { - let left_expr = self.sql_interval_to_expr( - negative, - Interval { - value: left, - leading_field: None, - leading_precision: None, - last_field: None, - fractional_seconds_precision: None, - }, - schema, - planner_context, - )?; - let right_expr = self.sql_expr_to_logical_expr( - *right, - schema, - planner_context, - )?; - return Ok(Expr::BinaryExpr(BinaryExpr::new( - Box::new(left_expr), - df_op, - Box::new(right_expr), - ))); - } - _ => { - let value = SQLExpr::BinaryOp { left, op, right }; - return not_impl_err!( - "Unsupported interval argument. Expected string literal, got: {value:?}" - ); - } + if let SQLExpr::BinaryOp { left, op, right } = *interval.value { + let df_op = match op { + BinaryOperator::Plus => Operator::Plus, + BinaryOperator::Minus => Operator::Minus, + _ => { + return not_impl_err!("Unsupported interval operator: {op:?}"); } - } - _ => { - return not_impl_err!( - "Unsupported interval argument. Expected string literal, got: {:?}", - interval.value - ); - } - }; + }; + let left_expr = self.sql_interval_to_expr( + negative, + Interval { + value: left, + leading_field: interval.leading_field.clone(), + leading_precision: None, + last_field: None, + fractional_seconds_precision: None, + }, + )?; + let right_expr = self.sql_interval_to_expr( + false, + Interval { + value: right, + leading_field: interval.leading_field, + leading_precision: None, + last_field: None, + fractional_seconds_precision: None, + }, + )?; + return Ok(Expr::BinaryExpr(BinaryExpr::new( + Box::new(left_expr), + df_op, + Box::new(right_expr), + ))); + } + + let value = interval_literal(*interval.value, negative)?; let value = if has_units(&value) { // If the interval already contains a unit @@ -343,6 +257,41 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } } +fn interval_literal(interval_value: SQLExpr, negative: bool) -> Result { + let s = match interval_value { + SQLExpr::Value(Value::SingleQuotedString(s) | Value::DoubleQuotedString(s)) => s, + SQLExpr::Value(Value::Number(ref v, long)) => { + if long { + return not_impl_err!( + "Unsupported interval argument. Long number not supported: {interval_value:?}" + ); + } else { + v.to_string() + } + } + SQLExpr::UnaryOp { op, expr } => { + let negative = match op { + UnaryOperator::Minus => !negative, + UnaryOperator::Plus => negative, + _ => { + return not_impl_err!( + "Unsupported SQL unary operator in interval {op:?}" + ); + } + }; + interval_literal(*expr, negative)? + } + _ => { + return not_impl_err!("Unsupported interval argument. Expected string literal or number, got: {interval_value:?}"); + } + }; + if negative { + Ok(format!("-{s}")) + } else { + Ok(s) + } +} + // TODO make interval parsing better in arrow-rs / expose `IntervalType` fn has_units(val: &str) -> bool { let val = val.to_lowercase(); diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index ed23fada0cfb..3a0ad92f1a61 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -30,7 +30,7 @@ use datafusion_sql::unparser::dialect::{ use datafusion_sql::unparser::{expr_to_sql, plan_to_sql, Unparser}; use datafusion_functions::core::planner::CoreFunctionPlanner; -use sqlparser::dialect::{Dialect, GenericDialect, MySqlDialect}; +use sqlparser::dialect::{Dialect, GenericDialect, MySqlDialect, PostgreSqlDialect}; use sqlparser::parser::Parser; use crate::common::{MockContextProvider, MockSessionState}; @@ -481,11 +481,17 @@ fn test_table_references_in_plan_to_sql() { assert_eq!(format!("{}", sql), expected_sql) } - test("catalog.schema.table", "SELECT catalog.\"schema\".\"table\".id, catalog.\"schema\".\"table\".\"value\" FROM catalog.\"schema\".\"table\""); - test("schema.table", "SELECT \"schema\".\"table\".id, \"schema\".\"table\".\"value\" FROM \"schema\".\"table\""); + test( + "catalog.schema.table", + r#"SELECT "catalog"."schema"."table".id, "catalog"."schema"."table"."value" FROM "catalog"."schema"."table""#, + ); + test( + "schema.table", + r#"SELECT "schema"."table".id, "schema"."table"."value" FROM "schema"."table""#, + ); test( "table", - "SELECT \"table\".id, \"table\".\"value\" FROM \"table\"", + r#"SELECT "table".id, "table"."value" FROM "table""#, ); } @@ -507,10 +513,10 @@ fn test_table_scan_with_no_projection_in_plan_to_sql() { test( "catalog.schema.table", - "SELECT * FROM catalog.\"schema\".\"table\"", + r#"SELECT * FROM "catalog"."schema"."table""#, ); - test("schema.table", "SELECT * FROM \"schema\".\"table\""); - test("table", "SELECT * FROM \"table\""); + test("schema.table", r#"SELECT * FROM "schema"."table""#); + test("table", r#"SELECT * FROM "table""#); } #[test] @@ -590,8 +596,8 @@ fn test_pretty_roundtrip() -> Result<()> { Ok(()) } -fn sql_round_trip(query: &str, expect: &str) { - let statement = Parser::new(&GenericDialect {}) +fn sql_round_trip(query: &str, expect: &str, dialect: &dyn Dialect) { + let statement = Parser::new(dialect) .try_with_sql(query) .unwrap() .parse_statement() @@ -609,16 +615,36 @@ fn sql_round_trip(query: &str, expect: &str) { #[test] fn test_interval_lhs_eq() { + sql_round_trip( + "select interval 2 second = interval 2 second", + "SELECT (INTERVAL '0 YEARS 0 MONS 0 DAYS 0 HOURS 0 MINS 2.000000000 SECS' = INTERVAL '0 YEARS 0 MONS 0 DAYS 0 HOURS 0 MINS 2.000000000 SECS')", + &GenericDialect {} + ); +} + +#[test] +fn test_interval_lhs_eq_pg() { sql_round_trip( "select interval '2 seconds' = interval '2 seconds'", "SELECT (INTERVAL '0 YEARS 0 MONS 0 DAYS 0 HOURS 0 MINS 2.000000000 SECS' = INTERVAL '0 YEARS 0 MONS 0 DAYS 0 HOURS 0 MINS 2.000000000 SECS')", + &PostgreSqlDialect {} ); } #[test] fn test_interval_lhs_lt() { + sql_round_trip( + "select interval 2 second < interval 2 second", + "SELECT (INTERVAL '0 YEARS 0 MONS 0 DAYS 0 HOURS 0 MINS 2.000000000 SECS' < INTERVAL '0 YEARS 0 MONS 0 DAYS 0 HOURS 0 MINS 2.000000000 SECS')", + &GenericDialect {} + ); +} + +#[test] +fn test_interval_lhs_lt_pg() { sql_round_trip( "select interval '2 seconds' < interval '2 seconds'", "SELECT (INTERVAL '0 YEARS 0 MONS 0 DAYS 0 HOURS 0 MINS 2.000000000 SECS' < INTERVAL '0 YEARS 0 MONS 0 DAYS 0 HOURS 0 MINS 2.000000000 SECS')", + &PostgreSqlDialect {} ); } diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 5685e09c9c9f..e69a34086be1 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -49,7 +49,9 @@ use datafusion_functions_aggregate::{ }; use datafusion_functions_aggregate::{average::avg_udaf, grouping::grouping_udaf}; use rstest::rstest; -use sqlparser::dialect::{Dialect, GenericDialect, HiveDialect, MySqlDialect}; +use sqlparser::dialect::{ + Dialect, GenericDialect, HiveDialect, MySqlDialect, PostgreSqlDialect, +}; mod cases; mod common; @@ -2682,6 +2684,14 @@ fn quick_test_with_options(sql: &str, expected: &str, options: ParserOptions) { assert_eq!(format!("{plan}"), expected); } +fn pg_quick_test(sql: &str, expected: &str) { + let dialect = &PostgreSqlDialect {}; + let plan = + logical_plan_with_dialect_and_options(sql, dialect, ParserOptions::default()) + .unwrap(); + assert_eq!(format!("{plan}"), expected); +} + fn prepare_stmt_quick_test( sql: &str, expected_plan: &str, @@ -2750,36 +2760,69 @@ fn join_with_aliases() { #[test] fn negative_interval_plus_interval_in_projection() { - let sql = "select -interval '2 days' + interval '5 days';"; + let sql = "select -interval 2 day + interval 5 day;"; let expected = "Projection: IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: -2, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\")\n EmptyRelation"; quick_test(sql, expected); } +#[test] +fn pg_negative_interval_plus_interval_in_projection() { + let sql = "select -interval '2 days' + interval '5 days';"; + let expected = + "Projection: IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: -2, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\")\n EmptyRelation"; + pg_quick_test(sql, expected); +} + #[test] fn complex_interval_expression_in_projection() { - let sql = "select -interval '2 days' + interval '5 days'+ (-interval '3 days' + interval '5 days');"; + let sql = + "select -interval 2 day + interval 5 day + (-interval 3 day + interval 5 day);"; let expected = "Projection: IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: -2, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: -3, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\")\n EmptyRelation"; quick_test(sql, expected); } +#[test] +fn pg_complex_interval_expression_in_projection() { + let sql = "select -interval '2 days' + interval '5 days'+ (-interval '3 days' + interval '5 days');"; + let expected = + "Projection: IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: -2, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: -3, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\")\n EmptyRelation"; + pg_quick_test(sql, expected); +} + #[test] fn negative_sum_intervals_in_projection() { - let sql = "select -((interval '2 days' + interval '5 days') + -(interval '4 days' + interval '7 days'));"; + let sql = "select -((interval 2 day + interval 5 day) + -(interval 4 day + interval 7 day));"; let expected = "Projection: (- IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 2, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\") + (- IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 4, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 7, nanoseconds: 0 }\")))\n EmptyRelation"; quick_test(sql, expected); } +#[test] +fn pg_negative_sum_intervals_in_projection() { + let sql = "select -((interval '2 days' + interval '5 days') + -(interval '4 days' + interval '7 days'));"; + let expected = + "Projection: (- IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 2, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\") + (- IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 4, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 7, nanoseconds: 0 }\")))\n EmptyRelation"; + pg_quick_test(sql, expected); +} + #[test] fn date_plus_interval_in_projection() { - let sql = "select t_date32 + interval '5 days' FROM test"; + let sql = "select t_date32 + interval 5 day FROM test"; let expected = "Projection: test.t_date32 + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\")\n TableScan: test"; quick_test(sql, expected); } +#[test] +fn pg_date_plus_interval_in_projection() { + let sql = "select t_date32 + interval '5 days' FROM test"; + let expected = + "Projection: test.t_date32 + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\")\n TableScan: test"; + pg_quick_test(sql, expected); +} + #[test] fn date_plus_interval_in_filter() { let sql = "select t_date64 FROM test \ diff --git a/datafusion/sqllogictest/test_files/dates.slt b/datafusion/sqllogictest/test_files/dates.slt index e21637bd8913..b9583f3ce165 100644 --- a/datafusion/sqllogictest/test_files/dates.slt +++ b/datafusion/sqllogictest/test_files/dates.slt @@ -62,7 +62,7 @@ h query T rowsort select i_item_desc from test -where d3_date > d2_date + INTERVAL '5 days' AND i_item_desc != 'g'; +where d3_date > d2_date + INTERVAL 5 day AND i_item_desc != 'g'; ---- h @@ -77,8 +77,8 @@ h ## Use OR query T rowsort select i_item_desc from test -where d3_date > d2_date + INTERVAL '5 days' - OR d3_date = d2_date + INTERVAL '3 days'; +where d3_date > d2_date + INTERVAL 5 day + OR d3_date = d2_date + INTERVAL 3 day; ---- d g diff --git a/datafusion/sqllogictest/test_files/interval.slt b/datafusion/sqllogictest/test_files/interval.slt index afb262cf95a5..22304de6e059 100644 --- a/datafusion/sqllogictest/test_files/interval.slt +++ b/datafusion/sqllogictest/test_files/interval.slt @@ -292,7 +292,7 @@ select -interval '1 year' - '1 month' - '1 day' - '1 hour' - '1 minute' - '1 sec # Interval string literal + date query D -select interval '1 month' + '1 day' + '2012-01-01'::date; +select interval 1 month + interval 1 day + '2012-01-01'::date; ---- 2012-02-02 @@ -304,19 +304,16 @@ select ( interval '1 month' + '1 day' ) + '2012-01-01'::date; # Interval nested string literal + date query D -select interval '1 year' + '1 month' + '1 day' + '2012-01-01'::date +select interval 1 year + interval 1 month + interval 1 day + '2012-01-01'::date ---- 2013-02-02 # Interval nested string literal subtraction + date query D -select interval '1 year' - '1 month' + '1 day' + '2012-01-01'::date +select interval 1 year - interval 1 month + interval 1 day + '2012-01-01'::date ---- 2012-12-02 - - - # Use interval SQL type query TT select @@ -569,7 +566,7 @@ select '1 month'::interval - ts from t; # interval + date query D -select interval '1 month' + '2012-01-01'::date; +select interval 1 month + '2012-01-01'::date; ---- 2012-02-01 diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_interval.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_interval.slt new file mode 100644 index 000000000000..caf272a52ebd --- /dev/null +++ b/datafusion/sqllogictest/test_files/pg_compat/pg_interval.slt @@ -0,0 +1,81 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +# interval - date +onlyif postgres +query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types +select interval '1 month' - '2023-05-01'::date; + +# interval - timestamp +onlyif postgres +query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types +SELECT interval '1 month' - '2023-05-01 12:30:00'::timestamp; + +# interval as LHS +onlyif postgres +query B +select interval '2 seconds' = interval '2 seconds'; +---- +true + +onlyif postgres +query B +select interval '1 seconds' < interval '2 seconds'; +---- +true + +onlyif postgres +query B +select interval '1 seconds' < arrow_cast(123, 'Duration(Nanosecond)') +---- +false + +# Interval string literal + date +onlyif postgres +query D +select interval '1 month' + '1 day' + '2012-01-01'::date; +---- +2012-02-02 + +# Interval nested string literal + date +onlyif postgres +query D +select interval '1 year' + '1 month' + '1 day' + '2012-01-01'::date +---- +2013-02-02 + +# Interval nested string literal subtraction + date +onlyif postgres +query D +select interval '1 year' - '1 month' + '1 day' + '2012-01-01'::date +---- +2012-12-02 + +# Interval nested string literal subtraction + date +onlyif postgres +query D +select interval '1 year' - '1 month' + '1 day' + '2012-01-01'::date +---- +2012-12-02 + +# interval + date +onlyif postgres +query D +select interval '1 month' + '2012-01-01'::date; +---- +2012-02-01 diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index fb0fd8397f2d..aa32788388f3 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -3156,18 +3156,18 @@ select arrow_cast(123, 'Duration(Nanosecond)') < interval '1 seconds'; true query B -select interval '1 seconds' < arrow_cast(123, 'Duration(Nanosecond)') +select interval 1 second < arrow_cast(123, 'Duration(Nanosecond)') ---- false # interval as LHS query B -select interval '2 seconds' = interval '2 seconds'; +select interval 2 second = interval 2 second; ---- true query B -select interval '1 seconds' < interval '2 seconds'; +select interval 1 second < interval 2 second; ---- true diff --git a/datafusion/sqllogictest/test_files/type_coercion.slt b/datafusion/sqllogictest/test_files/type_coercion.slt index e420c0cc7155..67b9295140b0 100644 --- a/datafusion/sqllogictest/test_files/type_coercion.slt +++ b/datafusion/sqllogictest/test_files/type_coercion.slt @@ -44,11 +44,11 @@ SELECT '2023-05-01 12:30:00'::timestamp - interval '1 month'; # interval - date query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types -select interval '1 month' - '2023-05-01'::date; +select interval 1 month - '2023-05-01'::date; # interval - timestamp query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types -SELECT interval '1 month' - '2023-05-01 12:30:00'::timestamp; +SELECT interval 1 month - '2023-05-01 12:30:00'::timestamp; #################################### From edf318b151fd2f43504983f1af29cac19b92835a Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Thu, 29 Aug 2024 09:58:48 +0100 Subject: [PATCH 2/8] uprev sqlparser-rs branch --- Cargo.toml | 2 +- datafusion-cli/Cargo.lock | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7d90eecb9239..02dac3966397 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -135,7 +135,7 @@ regex = "1.8" rstest = "0.22.0" serde_json = "1" #sqlparser = { version = "0.50.0", features = ["visitor"] } -sqlparser = { git = "https://github.com/samuelcolvin/sqlparser-rs", rev = "6c2a971", features = ["visitor"] } +sqlparser = { git = "https://github.com/samuelcolvin/sqlparser-rs", rev = "31a19f7", features = ["visitor"] } tempfile = "3" thiserror = "1.0.44" tokio = { version = "1.36", features = ["macros", "rt", "sync"] } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index f21a7ba4ecfd..2ba747fd152c 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -3545,7 +3545,7 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" version = "0.50.0" -source = "git+https://github.com/samuelcolvin/sqlparser-rs?rev=6c2a971#6c2a9717897d7986c655c24bfcbcd7e814fd3d40" +source = "git+https://github.com/samuelcolvin/sqlparser-rs?rev=31a19f7#31a19f77a502d10fda487cffc0c65bfedcf4ffc9" dependencies = [ "log", "sqlparser_derive", @@ -3554,7 +3554,7 @@ dependencies = [ [[package]] name = "sqlparser_derive" version = "0.2.2" -source = "git+https://github.com/samuelcolvin/sqlparser-rs?rev=6c2a971#6c2a9717897d7986c655c24bfcbcd7e814fd3d40" +source = "git+https://github.com/samuelcolvin/sqlparser-rs?rev=31a19f7#31a19f77a502d10fda487cffc0c65bfedcf4ffc9" dependencies = [ "proc-macro2", "quote", From 6bba073caa47b2ba6f5ba7de8c680257c29456b5 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Thu, 12 Sep 2024 13:38:38 +0100 Subject: [PATCH 3/8] use sqlparser 51 --- Cargo.toml | 3 +-- datafusion-cli/Cargo.lock | 8 +++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1dbb11e9a372..a559b242aa05 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -137,8 +137,7 @@ rand = "0.8" regex = "1.8" rstest = "0.22.0" serde_json = "1" -#sqlparser = { version = "0.50.0", features = ["visitor"] } -sqlparser = { git = "https://github.com/samuelcolvin/sqlparser-rs", rev = "31a19f7", features = ["visitor"] } +sqlparser = { version = "0.51.0", features = ["visitor"] } tempfile = "3" thiserror = "1.0.44" tokio = { version = "1.36", features = ["macros", "rt", "sync"] } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 9d95a3845e26..87bd2eeb126b 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -3570,8 +3570,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.50.0" -source = "git+https://github.com/samuelcolvin/sqlparser-rs?rev=31a19f7#31a19f77a502d10fda487cffc0c65bfedcf4ffc9" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" dependencies = [ "log", "sqlparser_derive", @@ -3580,7 +3581,8 @@ dependencies = [ [[package]] name = "sqlparser_derive" version = "0.2.2" -source = "git+https://github.com/samuelcolvin/sqlparser-rs?rev=31a19f7#31a19f77a502d10fda487cffc0c65bfedcf4ffc9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", From 268d478f3e5b9dfe08e1010954c5ad64e6bd1f28 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Thu, 12 Sep 2024 16:27:41 +0100 Subject: [PATCH 4/8] better extract logic and interval testing --- .../functions/src/datetime/date_part.rs | 44 +++++++---- datafusion/sqllogictest/test_files/expr.slt | 15 ++++ .../sqllogictest/test_files/interval.slt | 73 +++---------------- .../test_files/interval_mysql.slt | 71 ++++++++++++++++++ 4 files changed, 123 insertions(+), 80 deletions(-) create mode 100644 datafusion/sqllogictest/test_files/interval_mysql.slt diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index e24b11aeb71f..8ee82d872651 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -16,9 +16,11 @@ // under the License. use std::any::Any; +use std::str::FromStr; use std::sync::Arc; use arrow::array::{Array, ArrayRef, Float64Array}; +use arrow::compute::kernels::cast_utils::IntervalUnit; use arrow::compute::{binary, cast, date_part, DatePart}; use arrow::datatypes::DataType::{ Date32, Date64, Float64, Time32, Time64, Timestamp, Utf8, Utf8View, @@ -161,22 +163,32 @@ impl ScalarUDFImpl for DatePartFunc { return exec_err!("Date part '{part}' not supported"); } - let arr = match part_trim.to_lowercase().as_str() { - "year" => date_part_f64(array.as_ref(), DatePart::Year)?, - "quarter" => date_part_f64(array.as_ref(), DatePart::Quarter)?, - "month" => date_part_f64(array.as_ref(), DatePart::Month)?, - "week" => date_part_f64(array.as_ref(), DatePart::Week)?, - "day" => date_part_f64(array.as_ref(), DatePart::Day)?, - "doy" => date_part_f64(array.as_ref(), DatePart::DayOfYear)?, - "dow" => date_part_f64(array.as_ref(), DatePart::DayOfWeekSunday0)?, - "hour" => date_part_f64(array.as_ref(), DatePart::Hour)?, - "minute" => date_part_f64(array.as_ref(), DatePart::Minute)?, - "second" => seconds(array.as_ref(), Second)?, - "millisecond" => seconds(array.as_ref(), Millisecond)?, - "microsecond" => seconds(array.as_ref(), Microsecond)?, - "nanosecond" => seconds(array.as_ref(), Nanosecond)?, - "epoch" => epoch(array.as_ref())?, - _ => return exec_err!("Date part '{part}' not supported"), + // using IntervalUnit here means we hand off all the work of supporting plurals (like "seconds") + // and synonyms ( like "ms,msec,msecond,millisecond") to Arrow + let arr = if let Ok(interval_unit) = IntervalUnit::from_str(part_trim) { + match interval_unit { + IntervalUnit::Year => date_part_f64(array.as_ref(), DatePart::Year)?, + IntervalUnit::Month => date_part_f64(array.as_ref(), DatePart::Month)?, + IntervalUnit::Week => date_part_f64(array.as_ref(), DatePart::Week)?, + IntervalUnit::Day => date_part_f64(array.as_ref(), DatePart::Day)?, + IntervalUnit::Hour => date_part_f64(array.as_ref(), DatePart::Hour)?, + IntervalUnit::Minute => date_part_f64(array.as_ref(), DatePart::Minute)?, + IntervalUnit::Second => seconds(array.as_ref(), Second)?, + IntervalUnit::Millisecond => seconds(array.as_ref(), Millisecond)?, + IntervalUnit::Microsecond => seconds(array.as_ref(), Microsecond)?, + IntervalUnit::Nanosecond => seconds(array.as_ref(), Nanosecond)?, + // century and decade are not supported by `DatePart`, although they are supported in postgres + _ => return exec_err!("Date part '{part}' not supported"), + } + } else { + // special cases that can be extracted (in postgres) but are not interval units + match part_trim.to_lowercase().as_str() { + "qtr" | "quarter" => date_part_f64(array.as_ref(), DatePart::Quarter)?, + "doy" => date_part_f64(array.as_ref(), DatePart::DayOfYear)?, + "dow" => date_part_f64(array.as_ref(), DatePart::DayOfWeekSunday0)?, + "epoch" => epoch(array.as_ref())?, + _ => return exec_err!("Date part '{part}' not supported"), + } }; Ok(if is_scalar { diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt index 002e8db2132d..a478e3617261 100644 --- a/datafusion/sqllogictest/test_files/expr.slt +++ b/datafusion/sqllogictest/test_files/expr.slt @@ -1355,6 +1355,16 @@ SELECT date_part('second', arrow_cast('23:32:50.123456789'::time, 'Time64(Nanose ---- 50.123456789 +query R +select extract(second from '2024-08-09T12:13:14') +---- +14 + +query R +select extract(seconds from '2024-08-09T12:13:14') +---- +14 + query R SELECT extract(second from arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) ---- @@ -1381,6 +1391,11 @@ SELECT extract(microsecond from arrow_cast('23:32:50.123456789'::time, 'Time64(N ---- 50123456.789000005 +query R +SELECT extract(us from arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) +---- +50123456.789000005 + query R SELECT date_part('nanosecond', arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)')) ---- diff --git a/datafusion/sqllogictest/test_files/interval.slt b/datafusion/sqllogictest/test_files/interval.slt index 51bb02ebb1ef..09c190852ec3 100644 --- a/datafusion/sqllogictest/test_files/interval.slt +++ b/datafusion/sqllogictest/test_files/interval.slt @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. - # Use `interval` SQL literal syntax # the types should be the same: https://github.com/apache/datafusion/issues/5801 query TT @@ -206,87 +205,39 @@ select interval '5 YEAR 5 MONTH 5 DAY 5 HOUR 5 MINUTE 5 SECOND 5 MILLISECOND 5 M ---- 65 mons 5 days 5 hours 5 mins 5.005005005 secs -# Interval with string literal addition -query ? -select interval '1 month' + '1 month' ----- -2 mons - -# Interval with string literal addition and leading field +# Interval mega nested literal addition query ? -select interval '1' + '1' month ----- -2 mons - -# Interval with nested string literal addition -query ? -select interval '1 month' + '1 month' + '1 month' ----- -3 mons - -# Interval with nested string literal addition and leading field -query ? -select interval '1' + '1' + '1' month ----- -3 mons - -# Interval mega nested string literal addition -query ? -select interval '1 year' + '1 month' + '1 day' + '1 hour' + '1 minute' + '1 second' + '1 millisecond' + '1 microsecond' + '1 nanosecond' +select interval '1 year' + interval '1 month' + interval '1 day' + interval '1 hour' + interval '1 minute' + interval '1 second' + interval '1 millisecond' + interval '1 microsecond' + interval '1 nanosecond' ---- 13 mons 1 days 1 hours 1 mins 1.001001001 secs # Interval with string literal subtraction query ? -select interval '1 month' - '1 day'; +select interval '1 month' - interval '1 day'; ---- 1 mons -1 days -# Interval with string literal subtraction and leading field -query ? -select interval '5' - '1' - '2' year; ----- -24 mons - # Interval with nested string literal subtraction query ? -select interval '1 month' - '1 day' - '1 hour'; +select interval '1 month' - interval '1 day' - interval '1 hour'; ---- 1 mons -1 days -1 hours -# Interval with nested string literal subtraction and leading field -query ? -select interval '10' - '1' - '1' month; ----- -8 mons - # Interval mega nested string literal subtraction query ? -select interval '1 year' - '1 month' - '1 day' - '1 hour' - '1 minute' - '1 second' - '1 millisecond' - '1 microsecond' - '1 nanosecond' +select interval '1 year' - interval '1 month' - interval '1 day' - interval '1 hour' - interval '1 minute' - interval '1 second' - interval '1 millisecond' - interval '1 microsecond' - interval '1 nanosecond' ---- 11 mons -1 days -1 hours -1 mins -1.001001001 secs -# Interval with string literal negation and leading field -query ? -select -interval '5' - '1' - '2' year; ----- --96 mons - -# Interval with nested string literal negation +# Interval with nested literal negation query ? -select -interval '1 month' + '1 day' + '1 hour'; +select -interval '1 month' + interval '1 day' + interval '1 hour'; ---- -1 mons 1 days 1 hours -# Interval with nested string literal negation and leading field +# Interval mega nested literal negation query ? -select -interval '10' - '1' - '1' month; ----- --12 mons - -# Interval mega nested string literal negation -query ? -select -interval '1 year' - '1 month' - '1 day' - '1 hour' - '1 minute' - '1 second' - '1 millisecond' - '1 microsecond' - '1 nanosecond' +select -interval '1 year' - interval '1 month' - interval '1 day' - interval '1 hour' - interval '1 minute' - interval '1 second' - interval '1 millisecond' - interval '1 microsecond' - interval '1 nanosecond' ---- -13 mons -1 days -1 hours -1 mins -1.001001001 secs @@ -296,12 +247,6 @@ select interval 1 month + interval 1 day + '2012-01-01'::date; ---- 2012-02-02 -# Interval string literal parenthesized + date -query D -select ( interval '1 month' + '1 day' ) + '2012-01-01'::date; ----- -2012-02-02 - # Interval nested string literal + date query D select interval 1 year + interval 1 month + interval 1 day + '2012-01-01'::date diff --git a/datafusion/sqllogictest/test_files/interval_mysql.slt b/datafusion/sqllogictest/test_files/interval_mysql.slt new file mode 100644 index 000000000000..26d1e96cabd9 --- /dev/null +++ b/datafusion/sqllogictest/test_files/interval_mysql.slt @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Use `interval` SQL literal syntax with MySQL dialect + +# this should fail generic dialect +query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \+ Utf8 to valid types +select interval '1' + '1' month + +statement ok +set datafusion.sql_parser.dialect = 'Mysql'; + +# Interval with string literal addition and leading field +query ? +select interval '1' + '1' month +---- +2 mons + +# Interval with nested string literal addition +query ? +select interval 1 + 1 + 1 month +---- +3 mons + +# Interval with nested string literal addition and leading field +query ? +select interval '1' + '1' + '1' month +---- +3 mons + +# Interval with string literal subtraction and leading field +query ? +select interval '5' - '1' - '2' year; +---- +24 mons + +# Interval with nested string literal subtraction and leading field +query ? +select interval '10' - '1' - '1' month; +---- +8 mons + +# Interval with string literal negation and leading field +query ? +select -interval '5' - '1' - '2' year; +---- +-96 mons + +# Interval with nested string literal negation and leading field +query ? +select -interval '10' - '1' - '1' month; +---- +-12 mons + +# revert to standard dialect +statement ok +set datafusion.sql_parser.dialect = 'Generic'; From 8fe6a2fe69a15a599088ca697abdd85091472c42 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Thu, 12 Sep 2024 16:37:49 +0100 Subject: [PATCH 5/8] revert unnecessary changes --- .../core/src/datasource/physical_plan/csv.rs | 16 +++--- datafusion/sql/tests/sql_integration.rs | 50 ++----------------- datafusion/sqllogictest/test_files/dates.slt | 6 +-- .../sqllogictest/test_files/interval.slt | 2 +- .../test_files/interval_mysql.slt | 2 +- .../sqllogictest/test_files/timestamps.slt | 6 +-- .../sqllogictest/test_files/type_coercion.slt | 4 +- 7 files changed, 22 insertions(+), 64 deletions(-) diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs index 6cd1864deb1d..0e51effdf41a 100644 --- a/datafusion/core/src/datasource/physical_plan/csv.rs +++ b/datafusion/core/src/datasource/physical_plan/csv.rs @@ -1329,15 +1329,15 @@ mod tests { let df = ctx.sql(r#"select * from t1"#).await?.collect().await?; let expected = [ - "+-------+-----------------------------+", - "| col1 | col2 |", - "+-------+-----------------------------+", - "| 1 | hello\rworld |", - "| 2 | something\relse |", + "+-------+------------------------+", + "| col1 | col2 |", + "+-------+------------------------+", + "| 1 | hello\rworld |", + "| 2 | something\relse |", "| 3 | \rmany\rlines\rmake\rgood test\r |", - "| 4 | unquoted |", - "| value | end |", - "+-------+-----------------------------+", + "| 4 | unquoted |", + "| value | end |", + "+-------+------------------------+", ]; crate::assert_batches_eq!(expected, &df); Ok(()) diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 7f9561e8d4f4..eb6ffd5b9443 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -49,9 +49,7 @@ use datafusion_functions_aggregate::{ }; use datafusion_functions_aggregate::{average::avg_udaf, grouping::grouping_udaf}; use rstest::rstest; -use sqlparser::dialect::{ - Dialect, GenericDialect, HiveDialect, MySqlDialect, PostgreSqlDialect, -}; +use sqlparser::dialect::{Dialect, GenericDialect, HiveDialect, MySqlDialect}; mod cases; mod common; @@ -2684,14 +2682,6 @@ fn quick_test_with_options(sql: &str, expected: &str, options: ParserOptions) { assert_eq!(format!("{plan}"), expected); } -fn pg_quick_test(sql: &str, expected: &str) { - let dialect = &PostgreSqlDialect {}; - let plan = - logical_plan_with_dialect_and_options(sql, dialect, ParserOptions::default()) - .unwrap(); - assert_eq!(format!("{plan}"), expected); -} - fn prepare_stmt_quick_test( sql: &str, expected_plan: &str, @@ -2760,18 +2750,10 @@ fn join_with_aliases() { #[test] fn negative_interval_plus_interval_in_projection() { - let sql = "select -interval 2 day + interval 5 day;"; - let expected = - "Projection: IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: -2, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\")\n EmptyRelation"; - quick_test(sql, expected); -} - -#[test] -fn pg_negative_interval_plus_interval_in_projection() { let sql = "select -interval '2 days' + interval '5 days';"; let expected = "Projection: IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: -2, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\")\n EmptyRelation"; - pg_quick_test(sql, expected); + quick_test(sql, expected); } #[test] @@ -2783,46 +2765,22 @@ fn complex_interval_expression_in_projection() { quick_test(sql, expected); } -#[test] -fn pg_complex_interval_expression_in_projection() { - let sql = "select -interval '2 days' + interval '5 days'+ (-interval '3 days' + interval '5 days');"; - let expected = - "Projection: IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: -2, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: -3, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\")\n EmptyRelation"; - pg_quick_test(sql, expected); -} - #[test] fn negative_sum_intervals_in_projection() { - let sql = "select -((interval 2 day + interval 5 day) + -(interval 4 day + interval 7 day));"; - let expected = - "Projection: (- IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 2, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\") + (- IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 4, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 7, nanoseconds: 0 }\")))\n EmptyRelation"; - quick_test(sql, expected); -} - -#[test] -fn pg_negative_sum_intervals_in_projection() { let sql = "select -((interval '2 days' + interval '5 days') + -(interval '4 days' + interval '7 days'));"; let expected = "Projection: (- IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 2, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\") + (- IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 4, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 7, nanoseconds: 0 }\")))\n EmptyRelation"; - pg_quick_test(sql, expected); + quick_test(sql, expected); } #[test] fn date_plus_interval_in_projection() { - let sql = "select t_date32 + interval 5 day FROM test"; + let sql = "select t_date32 + interval '5 day' FROM test"; let expected = "Projection: test.t_date32 + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\")\n TableScan: test"; quick_test(sql, expected); } -#[test] -fn pg_date_plus_interval_in_projection() { - let sql = "select t_date32 + interval '5 days' FROM test"; - let expected = - "Projection: test.t_date32 + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\")\n TableScan: test"; - pg_quick_test(sql, expected); -} - #[test] fn date_plus_interval_in_filter() { let sql = "select t_date64 FROM test \ diff --git a/datafusion/sqllogictest/test_files/dates.slt b/datafusion/sqllogictest/test_files/dates.slt index 59d83fa60033..2e56d5062e35 100644 --- a/datafusion/sqllogictest/test_files/dates.slt +++ b/datafusion/sqllogictest/test_files/dates.slt @@ -62,7 +62,7 @@ h query T rowsort select i_item_desc from test -where d3_date > d2_date + INTERVAL 5 day AND i_item_desc != 'g'; +where d3_date > d2_date + INTERVAL '5 day' AND i_item_desc != 'g'; ---- h @@ -77,8 +77,8 @@ h ## Use OR query T rowsort select i_item_desc from test -where d3_date > d2_date + INTERVAL 5 day - OR d3_date = d2_date + INTERVAL 3 day; +where d3_date > d2_date + INTERVAL '5 day' + OR d3_date = d2_date + INTERVAL '3 day'; ---- d g diff --git a/datafusion/sqllogictest/test_files/interval.slt b/datafusion/sqllogictest/test_files/interval.slt index 09c190852ec3..c73e340f9115 100644 --- a/datafusion/sqllogictest/test_files/interval.slt +++ b/datafusion/sqllogictest/test_files/interval.slt @@ -511,7 +511,7 @@ select '1 month'::interval - ts from t; # interval + date query D -select interval 1 month + '2012-01-01'::date; +select interval '1 month' + '2012-01-01'::date; ---- 2012-02-01 diff --git a/datafusion/sqllogictest/test_files/interval_mysql.slt b/datafusion/sqllogictest/test_files/interval_mysql.slt index 26d1e96cabd9..c05bb007e5f1 100644 --- a/datafusion/sqllogictest/test_files/interval_mysql.slt +++ b/datafusion/sqllogictest/test_files/interval_mysql.slt @@ -17,7 +17,7 @@ # Use `interval` SQL literal syntax with MySQL dialect -# this should fail generic dialect +# this should fail with the generic dialect query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \+ Utf8 to valid types select interval '1' + '1' month diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index 1237ee9b5ebb..1e3b516641ff 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -3156,18 +3156,18 @@ select arrow_cast(123, 'Duration(Nanosecond)') < interval '1 seconds'; true query B -select interval 1 second < arrow_cast(123, 'Duration(Nanosecond)') +select interval '1 second' < arrow_cast(123, 'Duration(Nanosecond)') ---- false # interval as LHS query B -select interval 2 second = interval 2 second; +select interval '2 second' = interval '2 second'; ---- true query B -select interval 1 second < interval 2 second; +select interval '1 second' < interval '2 second'; ---- true diff --git a/datafusion/sqllogictest/test_files/type_coercion.slt b/datafusion/sqllogictest/test_files/type_coercion.slt index 97d1b84e2ef6..0f9399cede2e 100644 --- a/datafusion/sqllogictest/test_files/type_coercion.slt +++ b/datafusion/sqllogictest/test_files/type_coercion.slt @@ -44,11 +44,11 @@ SELECT '2023-05-01 12:30:00'::timestamp - interval '1 month'; # interval - date query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types -select interval 1 month - '2023-05-01'::date; +select interval '1 month' - '2023-05-01'::date; # interval - timestamp query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types -SELECT interval 1 month - '2023-05-01 12:30:00'::timestamp; +SELECT interval '1 month' - '2023-05-01 12:30:00'::timestamp; # dictionary(int32, utf8) -> utf8 query T From 5ad18e0cf0b9ea6963866b308ac7777259dd7536 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Thu, 12 Sep 2024 16:45:38 +0100 Subject: [PATCH 6/8] revert unnecessary changes, more --- datafusion/sql/tests/cases/plan_to_sql.rs | 26 +----- datafusion/sql/tests/sql_integration.rs | 5 +- datafusion/sqllogictest/test_files/dates.slt | 6 +- .../test_files/pg_compat/pg_interval.slt | 81 ------------------- .../sqllogictest/test_files/timestamps.slt | 6 +- 5 files changed, 11 insertions(+), 113 deletions(-) delete mode 100644 datafusion/sqllogictest/test_files/pg_compat/pg_interval.slt diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index d9c747bec562..bd338e440e36 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -34,7 +34,7 @@ use datafusion_expr::builder::{ table_scan_with_filter_and_fetch, table_scan_with_filters, }; use datafusion_functions::core::planner::CoreFunctionPlanner; -use sqlparser::dialect::{Dialect, GenericDialect, MySqlDialect, PostgreSqlDialect}; +use sqlparser::dialect::{Dialect, GenericDialect, MySqlDialect}; use sqlparser::parser::Parser; #[test] @@ -610,8 +610,8 @@ fn test_pretty_roundtrip() -> Result<()> { Ok(()) } -fn sql_round_trip(query: &str, expect: &str, dialect: &dyn Dialect) { - let statement = Parser::new(dialect) +fn sql_round_trip(query: &str, expect: &str) { + let statement = Parser::new(&GenericDialect {}) .try_with_sql(query) .unwrap() .parse_statement() @@ -775,36 +775,16 @@ fn test_table_scan_pushdown() -> Result<()> { #[test] fn test_interval_lhs_eq() { - sql_round_trip( - "select interval 2 second = interval 2 second", - "SELECT (INTERVAL '2.000000000 SECS' = INTERVAL '2.000000000 SECS')", - &GenericDialect {}, - ); -} - -#[test] -fn test_interval_lhs_eq_pg() { sql_round_trip( "select interval '2 seconds' = interval '2 seconds'", "SELECT (INTERVAL '2.000000000 SECS' = INTERVAL '2.000000000 SECS')", - &PostgreSqlDialect {}, ); } #[test] fn test_interval_lhs_lt() { - sql_round_trip( - "select interval 2 second < interval 2 second", - "SELECT (INTERVAL '2.000000000 SECS' < INTERVAL '2.000000000 SECS')", - &GenericDialect {}, - ); -} - -#[test] -fn test_interval_lhs_lt_pg() { sql_round_trip( "select interval '2 seconds' < interval '2 seconds'", "SELECT (INTERVAL '2.000000000 SECS' < INTERVAL '2.000000000 SECS')", - &PostgreSqlDialect {}, ); } diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index eb6ffd5b9443..b64df963ac9e 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -2758,8 +2758,7 @@ fn negative_interval_plus_interval_in_projection() { #[test] fn complex_interval_expression_in_projection() { - let sql = - "select -interval 2 day + interval 5 day + (-interval 3 day + interval 5 day);"; + let sql ="select -interval '2 days' + interval '5 days'+ (-interval '3 days' + interval '5 days');"; let expected = "Projection: IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: -2, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: -3, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\")\n EmptyRelation"; quick_test(sql, expected); @@ -2775,7 +2774,7 @@ fn negative_sum_intervals_in_projection() { #[test] fn date_plus_interval_in_projection() { - let sql = "select t_date32 + interval '5 day' FROM test"; + let sql = "select t_date32 + interval '5 days' FROM test"; let expected = "Projection: test.t_date32 + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\")\n TableScan: test"; quick_test(sql, expected); diff --git a/datafusion/sqllogictest/test_files/dates.slt b/datafusion/sqllogictest/test_files/dates.slt index 2e56d5062e35..3950a165a004 100644 --- a/datafusion/sqllogictest/test_files/dates.slt +++ b/datafusion/sqllogictest/test_files/dates.slt @@ -62,7 +62,7 @@ h query T rowsort select i_item_desc from test -where d3_date > d2_date + INTERVAL '5 day' AND i_item_desc != 'g'; +where d3_date > d2_date + INTERVAL '5 days' AND i_item_desc != 'g'; ---- h @@ -77,8 +77,8 @@ h ## Use OR query T rowsort select i_item_desc from test -where d3_date > d2_date + INTERVAL '5 day' - OR d3_date = d2_date + INTERVAL '3 day'; +where d3_date > d2_date + INTERVAL '5 days' + OR d3_date = d2_date + INTERVAL '3 days'; ---- d g diff --git a/datafusion/sqllogictest/test_files/pg_compat/pg_interval.slt b/datafusion/sqllogictest/test_files/pg_compat/pg_interval.slt deleted file mode 100644 index caf272a52ebd..000000000000 --- a/datafusion/sqllogictest/test_files/pg_compat/pg_interval.slt +++ /dev/null @@ -1,81 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -# interval - date -onlyif postgres -query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types -select interval '1 month' - '2023-05-01'::date; - -# interval - timestamp -onlyif postgres -query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types -SELECT interval '1 month' - '2023-05-01 12:30:00'::timestamp; - -# interval as LHS -onlyif postgres -query B -select interval '2 seconds' = interval '2 seconds'; ----- -true - -onlyif postgres -query B -select interval '1 seconds' < interval '2 seconds'; ----- -true - -onlyif postgres -query B -select interval '1 seconds' < arrow_cast(123, 'Duration(Nanosecond)') ----- -false - -# Interval string literal + date -onlyif postgres -query D -select interval '1 month' + '1 day' + '2012-01-01'::date; ----- -2012-02-02 - -# Interval nested string literal + date -onlyif postgres -query D -select interval '1 year' + '1 month' + '1 day' + '2012-01-01'::date ----- -2013-02-02 - -# Interval nested string literal subtraction + date -onlyif postgres -query D -select interval '1 year' - '1 month' + '1 day' + '2012-01-01'::date ----- -2012-12-02 - -# Interval nested string literal subtraction + date -onlyif postgres -query D -select interval '1 year' - '1 month' + '1 day' + '2012-01-01'::date ----- -2012-12-02 - -# interval + date -onlyif postgres -query D -select interval '1 month' + '2012-01-01'::date; ----- -2012-02-01 diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index 1e3b516641ff..4b11e338da70 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -3156,18 +3156,18 @@ select arrow_cast(123, 'Duration(Nanosecond)') < interval '1 seconds'; true query B -select interval '1 second' < arrow_cast(123, 'Duration(Nanosecond)') +select interval '1 seconds' < arrow_cast(123, 'Duration(Nanosecond)') ---- false # interval as LHS query B -select interval '2 second' = interval '2 second'; +select interval '2 seconds' = interval '2 seconds'; ---- true query B -select interval '1 second' < interval '2 second'; +select interval '1 seconds' < interval '2 seconds'; ---- true From e7ed21e6611b7edc8fe26d3c8163aa2fc30185fd Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Thu, 12 Sep 2024 16:47:40 +0100 Subject: [PATCH 7/8] cleanup --- datafusion/sql/tests/sql_integration.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index b64df963ac9e..5a203703e967 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -2758,7 +2758,7 @@ fn negative_interval_plus_interval_in_projection() { #[test] fn complex_interval_expression_in_projection() { - let sql ="select -interval '2 days' + interval '5 days'+ (-interval '3 days' + interval '5 days');"; + let sql = "select -interval '2 days' + interval '5 days'+ (-interval '3 days' + interval '5 days');"; let expected = "Projection: IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: -2, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: -3, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\")\n EmptyRelation"; quick_test(sql, expected); From ce3425a5cd10fa44b01efd076448f42f2fad8441 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Thu, 12 Sep 2024 17:03:46 +0100 Subject: [PATCH 8/8] fix last failing test :fingerscrossed: --- .../core/src/datasource/physical_plan/csv.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs index 0e51effdf41a..6cd1864deb1d 100644 --- a/datafusion/core/src/datasource/physical_plan/csv.rs +++ b/datafusion/core/src/datasource/physical_plan/csv.rs @@ -1329,15 +1329,15 @@ mod tests { let df = ctx.sql(r#"select * from t1"#).await?.collect().await?; let expected = [ - "+-------+------------------------+", - "| col1 | col2 |", - "+-------+------------------------+", - "| 1 | hello\rworld |", - "| 2 | something\relse |", + "+-------+-----------------------------+", + "| col1 | col2 |", + "+-------+-----------------------------+", + "| 1 | hello\rworld |", + "| 2 | something\relse |", "| 3 | \rmany\rlines\rmake\rgood test\r |", - "| 4 | unquoted |", - "| value | end |", - "+-------+------------------------+", + "| 4 | unquoted |", + "| value | end |", + "+-------+-----------------------------+", ]; crate::assert_batches_eq!(expected, &df); Ok(())