diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 2e53a682854ce..e95fcdd128ede 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -20,6 +20,7 @@ use std::any::Any; use std::cmp::Ordering; use std::collections::{HashMap, HashSet}; +use std::iter::zip; use std::sync::Arc; use crate::dml::CopyTo; @@ -35,7 +36,7 @@ use crate::logical_plan::{ Projection, Repartition, Sort, SubqueryAlias, TableScan, Union, Unnest, Values, Window, }; -use crate::type_coercion::binary::values_coercion; +use crate::type_coercion::binary::{comparison_coercion, values_coercion}; use crate::utils::{ can_hash, columnize_expr, compare_sort_expr, expr_to_columns, find_valid_equijoin_key_pair, group_window_expr_by_sort_keys, @@ -1337,14 +1338,96 @@ pub fn validate_unique_names<'a>( }) } +pub fn project_with_column_index( + expr: Vec, + input: Arc, + schema: DFSchemaRef, +) -> Result { + let alias_expr = expr + .into_iter() + .enumerate() + .map(|(i, e)| match e { + Expr::Alias(Alias { ref name, .. }) if name != schema.field(i).name() => { + e.unalias().alias(schema.field(i).name()) + } + Expr::Column(Column { + relation: _, + ref name, + }) if name != schema.field(i).name() => e.alias(schema.field(i).name()), + Expr::Alias { .. } | Expr::Column { .. } => e, + Expr::Wildcard { .. } => e, + _ => e.alias(schema.field(i).name()), + }) + .collect::>(); + + Projection::try_new_with_schema(alias_expr, input, schema) + .map(LogicalPlan::Projection) +} + /// Union two logical plans. pub fn union(left_plan: LogicalPlan, right_plan: LogicalPlan) -> Result { - // Temporarily use the schema from the left input and later rely on the analyzer to - // coerce the two schemas into a common one. - let schema = Arc::clone(left_plan.schema()); + let left_col_num = left_plan.schema().fields().len(); + + // check union plan length same. + let right_col_num = right_plan.schema().fields().len(); + if right_col_num != left_col_num { + return plan_err!( + "Union queries must have the same number of columns, (left is {left_col_num}, right is {right_col_num})"); + } + + // create union schema + let union_qualified_fields = + zip(left_plan.schema().iter(), right_plan.schema().iter()) + .map( + |((left_qualifier, left_field), (_right_qualifier, right_field))| { + let nullable = left_field.is_nullable() || right_field.is_nullable(); + let data_type = comparison_coercion( + left_field.data_type(), + right_field.data_type(), + ) + .ok_or_else(|| { + plan_datafusion_err!( + "UNION Column {} (type: {}) is not compatible with column {} (type: {})", + right_field.name(), + right_field.data_type(), + left_field.name(), + left_field.data_type() + ) + })?; + Ok(( + left_qualifier.cloned(), + Arc::new(Field::new(left_field.name(), data_type, nullable)), + )) + }, + ) + .collect::>>()?; + let union_schema = + DFSchema::new_with_metadata(union_qualified_fields, HashMap::new())?; + + let inputs = vec![left_plan, right_plan] + .into_iter() + .map(|p| { + let plan = coerce_plan_expr_for_schema(&p, &union_schema)?; + match plan { + LogicalPlan::Projection(Projection { expr, input, .. }) => { + Ok(Arc::new(project_with_column_index( + expr, + input, + Arc::new(union_schema.clone()), + )?)) + } + other_plan => Ok(Arc::new(other_plan)), + } + }) + .collect::>>()?; + + if inputs.is_empty() { + return plan_err!("Empty UNION"); + } + Ok(LogicalPlan::Union(Union { - inputs: vec![Arc::new(left_plan), Arc::new(right_plan)], - schema, + inputs, + schema: Arc::new(union_schema), })) } @@ -1767,6 +1850,23 @@ mod tests { Ok(()) } + #[test] + fn plan_builder_union_different_num_columns_error() -> Result<()> { + let plan1 = + table_scan(TableReference::none(), &employee_schema(), Some(vec![3]))?; + let plan2 = + table_scan(TableReference::none(), &employee_schema(), Some(vec![3, 4]))?; + + let expected = "Error during planning: Union queries must have the same number of columns, (left is 1, right is 2)"; + let err_msg1 = plan1.clone().union(plan2.clone().build()?).unwrap_err(); + let err_msg2 = plan1.union_distinct(plan2.build()?).unwrap_err(); + + assert_eq!(err_msg1.strip_backtrace(), expected); + assert_eq!(err_msg2.strip_backtrace(), expected); + + Ok(()) + } + #[test] fn plan_builder_simple_distinct() -> Result<()> { let plan = diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index 40efbba6de7a5..7392028ba7aba 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -17,24 +17,22 @@ //! Optimizer rule for type validation and coercion -use std::collections::HashMap; use std::sync::Arc; -use itertools::izip; - -use arrow::datatypes::{DataType, Field, IntervalUnit}; +use arrow::datatypes::{DataType, IntervalUnit}; use crate::analyzer::AnalyzerRule; use crate::utils::NamePreserver; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; use datafusion_common::{ - exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, - DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, + exec_err, internal_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema, + DataFusionError, Result, ScalarValue, }; +use datafusion_expr::builder::project_with_column_index; use datafusion_expr::expr::{ - self, Alias, Between, BinaryExpr, Case, Exists, InList, InSubquery, Like, - ScalarFunction, WindowFunction, + self, Between, BinaryExpr, Case, Exists, InList, InSubquery, Like, ScalarFunction, + WindowFunction, }; use datafusion_expr::expr_rewriter::coerce_plan_expr_for_schema; use datafusion_expr::expr_schema::cast_subquery; @@ -53,7 +51,7 @@ use datafusion_expr::type_coercion::{is_datetime, is_utf8_or_large_utf8}; use datafusion_expr::utils::merge_schema; use datafusion_expr::{ is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, not, - AggregateUDF, Expr, ExprFunctionExt, ExprSchemable, Join, LogicalPlan, Operator, + AggregateUDF, Expr, ExprFunctionExt, ExprSchemable, LogicalPlan, Operator, Projection, ScalarUDF, Union, WindowFrame, WindowFrameBound, WindowFrameUnits, }; @@ -123,8 +121,9 @@ fn analyze_internal( expr.rewrite(&mut expr_rewrite)? .map_data(|expr| original_name.restore(expr)) })? - // some plans need extra coercion after their expressions are coerced - .map_data(|plan| expr_rewrite.coerce_plan(plan))? + // coerce join expressions specially + .map_data(|plan| expr_rewrite.coerce_joins(plan))? + .map_data(|plan| expr_rewrite.coerce_union(plan))? // recompute the schema after the expressions have been rewritten as the types may have changed .map_data(|plan| plan.recompute_schema()) } @@ -138,14 +137,6 @@ impl<'a> TypeCoercionRewriter<'a> { Self { schema } } - fn coerce_plan(&mut self, plan: LogicalPlan) -> Result { - match plan { - LogicalPlan::Join(join) => self.coerce_join(join), - LogicalPlan::Union(union) => Self::coerce_union(union), - _ => Ok(plan), - } - } - /// Coerce join equality expressions and join filter /// /// Joins must be treated specially as their equality expressions are stored @@ -154,7 +145,11 @@ impl<'a> TypeCoercionRewriter<'a> { /// /// For example, on_exprs like `t1.a = t2.b AND t1.x = t2.y` will be stored /// as a list of `(t1.a, t2.b), (t1.x, t2.y)` - fn coerce_join(&mut self, mut join: Join) -> Result { + fn coerce_joins(&mut self, plan: LogicalPlan) -> Result { + let LogicalPlan::Join(mut join) = plan else { + return Ok(plan); + }; + join.on = join .on .into_iter() @@ -175,30 +170,36 @@ impl<'a> TypeCoercionRewriter<'a> { Ok(LogicalPlan::Join(join)) } - /// Coerce the union’s inputs to a common schema compatible with all inputs. - /// This occurs after wildcard expansion and the coercion of the input expressions. - fn coerce_union(union_plan: Union) -> Result { - let union_schema = Arc::new(coerce_union_schema(&union_plan.inputs)?); - let new_inputs = union_plan + /// Corece the union inputs after expanding the wildcard expressions + /// + /// Union inputs must have the same schema, so we coerce the expressions to match the schema + /// after expanding the wildcard expressions + fn coerce_union(&self, plan: LogicalPlan) -> Result { + let LogicalPlan::Union(union) = plan else { + return Ok(plan); + }; + + let inputs = union .inputs - .iter() + .into_iter() .map(|p| { - let plan = coerce_plan_expr_for_schema(p, &union_schema)?; + let plan = coerce_plan_expr_for_schema(&p, &union.schema)?; match plan { LogicalPlan::Projection(Projection { expr, input, .. }) => { Ok(Arc::new(project_with_column_index( expr, input, - Arc::clone(&union_schema), + Arc::clone(&union.schema), )?)) } other_plan => Ok(Arc::new(other_plan)), } }) .collect::>>()?; + Ok(LogicalPlan::Union(Union { - inputs: new_inputs, - schema: union_schema, + inputs, + schema: Arc::clone(&union.schema), })) } @@ -808,92 +809,6 @@ fn coerce_case_expression(case: Case, schema: &DFSchema) -> Result { Ok(Case::new(case_expr, when_then, else_expr)) } -/// Get a common schema that is compatible with all inputs of UNION. -fn coerce_union_schema(inputs: &[Arc]) -> Result { - let base_schema = inputs[0].schema(); - let mut union_datatypes = base_schema - .fields() - .iter() - .map(|f| f.data_type().clone()) - .collect::>(); - let mut union_nullabilities = base_schema - .fields() - .iter() - .map(|f| f.is_nullable()) - .collect::>(); - - for (i, plan) in inputs.iter().enumerate().skip(1) { - let plan_schema = plan.schema(); - if plan_schema.fields().len() != base_schema.fields().len() { - return plan_err!( - "Union schemas have different number of fields: \ - query 1 has {} fields whereas query {} has {} fields", - base_schema.fields().len(), - i + 1, - plan_schema.fields().len() - ); - } - // coerce data type and nullablity for each field - for (union_datatype, union_nullable, plan_field) in izip!( - union_datatypes.iter_mut(), - union_nullabilities.iter_mut(), - plan_schema.fields() - ) { - let coerced_type = - comparison_coercion(union_datatype, plan_field.data_type()).ok_or_else( - || { - plan_datafusion_err!( - "Incompatible inputs for Union: Previous inputs were \ - of type {}, but got incompatible type {} on column '{}'", - union_datatype, - plan_field.data_type(), - plan_field.name() - ) - }, - )?; - *union_datatype = coerced_type; - *union_nullable = *union_nullable || plan_field.is_nullable(); - } - } - let union_qualified_fields = izip!( - base_schema.iter(), - union_datatypes.into_iter(), - union_nullabilities - ) - .map(|((qualifier, field), datatype, nullable)| { - let field = Arc::new(Field::new(field.name().clone(), datatype, nullable)); - (qualifier.cloned(), field) - }) - .collect::>(); - DFSchema::new_with_metadata(union_qualified_fields, HashMap::new()) -} - -/// See `` -fn project_with_column_index( - expr: Vec, - input: Arc, - schema: DFSchemaRef, -) -> Result { - let alias_expr = expr - .into_iter() - .enumerate() - .map(|(i, e)| match e { - Expr::Alias(Alias { ref name, .. }) if name != schema.field(i).name() => { - e.unalias().alias(schema.field(i).name()) - } - Expr::Column(Column { - relation: _, - ref name, - }) if name != schema.field(i).name() => e.alias(schema.field(i).name()), - Expr::Alias { .. } | Expr::Column { .. } => e, - _ => e.alias(schema.field(i).name()), - }) - .collect::>(); - - Projection::try_new_with_schema(alias_expr, input, schema) - .map(LogicalPlan::Projection) -} - #[cfg(test)] mod test { use std::any::Any; diff --git a/datafusion/optimizer/src/eliminate_nested_union.rs b/datafusion/optimizer/src/eliminate_nested_union.rs index 5f41e4f137b15..cc8cf1f56c184 100644 --- a/datafusion/optimizer/src/eliminate_nested_union.rs +++ b/datafusion/optimizer/src/eliminate_nested_union.rs @@ -114,11 +114,8 @@ fn extract_plan_from_distinct(plan: Arc) -> Arc { #[cfg(test)] mod tests { use super::*; - use crate::analyzer::type_coercion::TypeCoercion; - use crate::analyzer::Analyzer; use crate::test::*; use arrow::datatypes::{DataType, Field, Schema}; - use datafusion_common::config::ConfigOptions; use datafusion_expr::{col, logical_plan::table_scan}; fn schema() -> Schema { @@ -130,14 +127,7 @@ mod tests { } fn assert_optimized_plan_equal(plan: LogicalPlan, expected: &str) -> Result<()> { - let options = ConfigOptions::default(); - let analyzed_plan = Analyzer::with_rules(vec![Arc::new(TypeCoercion::new())]) - .execute_and_check(plan, &options, |_, _| {})?; - assert_optimized_plan_eq( - Arc::new(EliminateNestedUnion::new()), - analyzed_plan, - expected, - ) + assert_optimized_plan_eq(Arc::new(EliminateNestedUnion::new()), plan, expected) } #[test] diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 7ce3565fa29f6..15efe2d2f03ce 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -2160,6 +2160,148 @@ fn union_all() { quick_test(sql, expected); } +#[test] +fn union_with_different_column_names() { + let sql = "SELECT order_id from orders UNION ALL SELECT customer_id FROM orders"; + let expected = "Union\ + \n Projection: orders.order_id\ + \n TableScan: orders\ + \n Projection: orders.customer_id AS order_id\ + \n TableScan: orders"; + quick_test(sql, expected); +} + +#[test] +fn union_values_with_no_alias() { + let sql = "SELECT 1, 2 UNION ALL SELECT 3, 4"; + let expected = "Union\ + \n Projection: Int64(1) AS Int64(1), Int64(2) AS Int64(2)\ + \n EmptyRelation\ + \n Projection: Int64(3) AS Int64(1), Int64(4) AS Int64(2)\ + \n EmptyRelation"; + quick_test(sql, expected); +} + +#[test] +fn union_with_incompatible_data_type() { + let sql = "SELECT interval '1 year 1 day' UNION ALL SELECT 1"; + let err = logical_plan(sql) + .expect_err("query should have failed") + .strip_backtrace(); + assert_eq!( + "Error during planning: UNION Column Int64(1) (type: Int64) is not compatible with column IntervalMonthDayNano(\"IntervalMonthDayNano { months: 12, days: 1, nanoseconds: 0 }\") (type: Interval(MonthDayNano))", + err + ); +} + +#[test] +fn union_with_different_decimal_data_types() { + let sql = "SELECT 1 a UNION ALL SELECT 1.1 a"; + let expected = "Union\ + \n Projection: CAST(Int64(1) AS Float64) AS a\ + \n EmptyRelation\ + \n Projection: Float64(1.1) AS a\ + \n EmptyRelation"; + quick_test(sql, expected); +} + +#[test] +fn union_with_null() { + let sql = "SELECT NULL a UNION ALL SELECT 1.1 a"; + let expected = "Union\ + \n Projection: CAST(NULL AS Float64) AS a\ + \n EmptyRelation\ + \n Projection: Float64(1.1) AS a\ + \n EmptyRelation"; + quick_test(sql, expected); +} + +#[test] +fn union_with_float_and_string() { + let sql = "SELECT 'a' a UNION ALL SELECT 1.1 a"; + let expected = "Union\ + \n Projection: Utf8(\"a\") AS a\ + \n EmptyRelation\ + \n Projection: CAST(Float64(1.1) AS Utf8) AS a\ + \n EmptyRelation"; + quick_test(sql, expected); +} + +#[test] +fn union_with_multiply_cols() { + let sql = "SELECT 'a' a, 1 b UNION ALL SELECT 1.1 a, 1.1 b"; + let expected = "Union\ + \n Projection: Utf8(\"a\") AS a, CAST(Int64(1) AS Float64) AS b\ + \n EmptyRelation\ + \n Projection: CAST(Float64(1.1) AS Utf8) AS a, Float64(1.1) AS b\ + \n EmptyRelation"; + quick_test(sql, expected); +} + +#[test] +fn sorted_union_with_different_types_and_group_by() { + let sql = "SELECT a FROM (select 1 a) x GROUP BY 1 UNION ALL (SELECT a FROM (select 1.1 a) x GROUP BY 1) ORDER BY 1"; + let expected = "Sort: x.a ASC NULLS LAST\ + \n Union\ + \n Projection: CAST(x.a AS Float64) AS a\ + \n Aggregate: groupBy=[[x.a]], aggr=[[]]\ + \n SubqueryAlias: x\ + \n Projection: Int64(1) AS a\ + \n EmptyRelation\ + \n Projection: x.a\ + \n Aggregate: groupBy=[[x.a]], aggr=[[]]\ + \n SubqueryAlias: x\ + \n Projection: Float64(1.1) AS a\ + \n EmptyRelation"; + quick_test(sql, expected); +} + +#[test] +fn union_with_binary_expr_and_cast() { + let sql = "SELECT cast(0.0 + a as integer) FROM (select 1 a) x GROUP BY 1 UNION ALL (SELECT 2.1 + a FROM (select 1 a) x GROUP BY 1)"; + let expected = "Union\ + \n Projection: CAST(Float64(0) + x.a AS Float64) AS Float64(0) + x.a\ + \n Aggregate: groupBy=[[CAST(Float64(0) + x.a AS Int32)]], aggr=[[]]\ + \n SubqueryAlias: x\ + \n Projection: Int64(1) AS a\ + \n EmptyRelation\ + \n Projection: Float64(2.1) + x.a AS Float64(0) + x.a\ + \n Aggregate: groupBy=[[Float64(2.1) + x.a]], aggr=[[]]\ + \n SubqueryAlias: x\ + \n Projection: Int64(1) AS a\ + \n EmptyRelation"; + quick_test(sql, expected); +} + +#[test] +fn union_with_aliases() { + let sql = "SELECT a as a1 FROM (select 1 a) x GROUP BY 1 UNION ALL (SELECT a as a1 FROM (select 1.1 a) x GROUP BY 1)"; + let expected = "Union\ + \n Projection: CAST(x.a AS Float64) AS a1\ + \n Aggregate: groupBy=[[x.a]], aggr=[[]]\ + \n SubqueryAlias: x\ + \n Projection: Int64(1) AS a\ + \n EmptyRelation\ + \n Projection: x.a AS a1\ + \n Aggregate: groupBy=[[x.a]], aggr=[[]]\ + \n SubqueryAlias: x\ + \n Projection: Float64(1.1) AS a\ + \n EmptyRelation"; + quick_test(sql, expected); +} + +#[test] +fn union_with_incompatible_data_types() { + let sql = "SELECT 'a' a UNION ALL SELECT true a"; + let err = logical_plan(sql) + .expect_err("query should have failed") + .strip_backtrace(); + assert_eq!( + "Error during planning: UNION Column a (type: Boolean) is not compatible with column a (type: Utf8)", + err + ); +} + #[test] fn empty_over() { let sql = "SELECT order_id, MAX(order_id) OVER () from orders"; diff --git a/datafusion/sqllogictest/test_files/type_coercion.slt b/datafusion/sqllogictest/test_files/type_coercion.slt index e420c0cc71554..aa1e6826eca55 100644 --- a/datafusion/sqllogictest/test_files/type_coercion.slt +++ b/datafusion/sqllogictest/test_files/type_coercion.slt @@ -49,179 +49,3 @@ select interval '1 month' - '2023-05-01'::date; # interval - timestamp query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types SELECT interval '1 month' - '2023-05-01 12:30:00'::timestamp; - - -#################################### -## Test type coercion with UNIONs ## -#################################### - -# Disable optimizer to test only the analyzer with type coercion -statement ok -set datafusion.optimizer.max_passes = 0; - -statement ok -set datafusion.explain.logical_plan_only = true; - -# Create test table -statement ok -CREATE TABLE orders( - order_id INT UNSIGNED NOT NULL, - customer_id INT UNSIGNED NOT NULL, - o_item_id VARCHAR NOT NULL, - qty INT NOT NULL, - price DOUBLE NOT NULL, - delivered BOOLEAN NOT NULL -); - -# union_different_num_columns_error() / UNION -query error Error during planning: Union schemas have different number of fields: query 1 has 1 fields whereas query 2 has 2 fields -SELECT order_id FROM orders UNION SELECT customer_id, o_item_id FROM orders - -# union_different_num_columns_error() / UNION ALL -query error Error during planning: Union schemas have different number of fields: query 1 has 1 fields whereas query 2 has 2 fields -SELECT order_id FROM orders UNION ALL SELECT customer_id, o_item_id FROM orders - -# union_with_different_column_names() -query TT -EXPLAIN SELECT order_id from orders UNION ALL SELECT customer_id FROM orders ----- -logical_plan -01)Union -02)--Projection: orders.order_id -03)----TableScan: orders -04)--Projection: orders.customer_id AS order_id -05)----TableScan: orders - -# union_values_with_no_alias() -query TT -EXPLAIN SELECT 1, 2 UNION ALL SELECT 3, 4 ----- -logical_plan -01)Union -02)--Projection: Int64(1) AS Int64(1), Int64(2) AS Int64(2) -03)----EmptyRelation -04)--Projection: Int64(3) AS Int64(1), Int64(4) AS Int64(2) -05)----EmptyRelation - -# union_with_incompatible_data_type() -query error Incompatible inputs for Union: Previous inputs were of type Interval\(MonthDayNano\), but got incompatible type Int64 on column 'Int64\(1\)' -SELECT interval '1 year 1 day' UNION ALL SELECT 1 - -# union_with_different_decimal_data_types() -query TT -EXPLAIN SELECT 1 a UNION ALL SELECT 1.1 a ----- -logical_plan -01)Union -02)--Projection: CAST(Int64(1) AS Float64) AS a -03)----EmptyRelation -04)--Projection: Float64(1.1) AS a -05)----EmptyRelation - -# union_with_null() -query TT -EXPLAIN SELECT NULL a UNION ALL SELECT 1.1 a ----- -logical_plan -01)Union -02)--Projection: CAST(NULL AS Float64) AS a -03)----EmptyRelation -04)--Projection: Float64(1.1) AS a -05)----EmptyRelation - -# union_with_float_and_string() -query TT -EXPLAIN SELECT 'a' a UNION ALL SELECT 1.1 a ----- -logical_plan -01)Union -02)--Projection: Utf8("a") AS a -03)----EmptyRelation -04)--Projection: CAST(Float64(1.1) AS Utf8) AS a -05)----EmptyRelation - -# union_with_multiply_cols() -query TT -EXPLAIN SELECT 'a' a, 1 b UNION ALL SELECT 1.1 a, 1.1 b ----- -logical_plan -01)Union -02)--Projection: Utf8("a") AS a, CAST(Int64(1) AS Float64) AS b -03)----EmptyRelation -04)--Projection: CAST(Float64(1.1) AS Utf8) AS a, Float64(1.1) AS b -05)----EmptyRelation - -# sorted_union_with_different_types_and_group_by() -query TT -EXPLAIN SELECT a FROM (select 1 a) x GROUP BY 1 - UNION ALL -(SELECT a FROM (select 1.1 a) x GROUP BY 1) ORDER BY 1 ----- -logical_plan -01)Sort: x.a ASC NULLS LAST -02)--Union -03)----Projection: CAST(x.a AS Float64) AS a -04)------Aggregate: groupBy=[[x.a]], aggr=[[]] -05)--------SubqueryAlias: x -06)----------Projection: Int64(1) AS a -07)------------EmptyRelation -08)----Projection: x.a -09)------Aggregate: groupBy=[[x.a]], aggr=[[]] -10)--------SubqueryAlias: x -11)----------Projection: Float64(1.1) AS a -12)------------EmptyRelation - -# union_with_binary_expr_and_cast() -query TT -EXPLAIN SELECT cast(0.0 + a as integer) FROM (select 1 a) x GROUP BY 1 - UNION ALL -(SELECT 2.1 + a FROM (select 1 a) x GROUP BY 1) ----- -logical_plan -01)Union -02)--Projection: CAST(Float64(0) + x.a AS Float64) AS Float64(0) + x.a -03)----Aggregate: groupBy=[[CAST(Float64(0) + CAST(x.a AS Float64) AS Int32)]], aggr=[[]] -04)------SubqueryAlias: x -05)--------Projection: Int64(1) AS a -06)----------EmptyRelation -07)--Projection: Float64(2.1) + x.a AS Float64(0) + x.a -08)----Aggregate: groupBy=[[Float64(2.1) + CAST(x.a AS Float64)]], aggr=[[]] -09)------SubqueryAlias: x -10)--------Projection: Int64(1) AS a -11)----------EmptyRelation - -# union_with_aliases() -query TT -EXPLAIN SELECT a as a1 FROM (select 1 a) x GROUP BY 1 - UNION ALL -(SELECT a as a1 FROM (select 1.1 a) x GROUP BY 1) ----- -logical_plan -01)Union -02)--Projection: CAST(x.a AS Float64) AS a1 -03)----Aggregate: groupBy=[[x.a]], aggr=[[]] -04)------SubqueryAlias: x -05)--------Projection: Int64(1) AS a -06)----------EmptyRelation -07)--Projection: x.a AS a1 -08)----Aggregate: groupBy=[[x.a]], aggr=[[]] -09)------SubqueryAlias: x -10)--------Projection: Float64(1.1) AS a -11)----------EmptyRelation - -# union_with_incompatible_data_types() -query error Incompatible inputs for Union: Previous inputs were of type Utf8, but got incompatible type Boolean on column 'a' -SELECT 'a' a UNION ALL SELECT true a - -statement ok -SET datafusion.optimizer.max_passes = 3; - -statement ok -SET datafusion.explain.logical_plan_only = false; - -statement ok -DROP TABLE orders; - -######################################## -## Test type coercion with UNIONs end ## -######################################## diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index d28e6d6e237bd..cf248d3f6a73e 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -680,49 +680,3 @@ DROP TABLE t3; statement ok DROP TABLE t4; - -# Test issue: https://github.com/apache/datafusion/issues/11742 -query R rowsort -WITH - tt(v1) AS (VALUES (1::INT),(NULL::INT)) -SELECT NVL(v1, 0.5) FROM tt - UNION ALL -SELECT NULL WHERE FALSE; ----- -0.5 -1 - - -### -# Test for https://github.com/apache/datafusion/issues/11492 -### - -# Input data is -# a,b,c -# 1,2,3 - -statement ok -CREATE EXTERNAL TABLE t ( - a INT, - b INT, - c INT -) -STORED AS CSV -LOCATION '../core/tests/data/example.csv' -WITH ORDER (a ASC) -OPTIONS ('format.has_header' 'true'); - -query T -SELECT (SELECT a from t ORDER BY a) UNION ALL (SELECT 'bar' as a from t) ORDER BY a; ----- -1 -bar - -query I -SELECT (SELECT a from t ORDER BY a) UNION ALL (SELECT NULL as a from t) ORDER BY a; ----- -1 -NULL - -statement ok -drop table t