diff --git a/e2e_test/batch/basic/query.slt.part b/e2e_test/batch/basic/query.slt.part index 18fdab91f4f24..7f0eb1f6f755f 100644 --- a/e2e_test/batch/basic/query.slt.part +++ b/e2e_test/batch/basic/query.slt.part @@ -30,6 +30,21 @@ select count(*) from t3; ---- 1 +query III +select * except (v1) from t3; +---- +2 NULL + +query III +select * except (t3.v1) from t3; +---- +2 NULL + +query III +select * except (v1), * except (v2) from t3; +---- +2 NULL 1 NULL + statement error Division by zero select v1/0 from t3; diff --git a/e2e_test/batch/basic/subquery.slt.part b/e2e_test/batch/basic/subquery.slt.part index b5f8d1f7fa94e..dc27649a90b6d 100644 --- a/e2e_test/batch/basic/subquery.slt.part +++ b/e2e_test/batch/basic/subquery.slt.part @@ -71,6 +71,20 @@ NULL 1 NULL 2 NULL NULL +query II +select * except (b,d) from (select t1.x as a, t1.y as b, t2.x as c, t2.y as d from t1 join t2 on t1.x = t2.x where t1.x=1); +---- +1 1 +1 1 +1 1 +1 1 + +query II +select * except (t1.x, t2.y), * except (t1.y, t2.x) from t1 join t2 on t1.y = t2.y where exists(select * from t3 where t1.x = t3.x and t2.y = t3.y) order by t2.x; +---- +2 1 2 2 +2 2 2 2 +2 NULL 2 2 statement ok drop table t1; diff --git a/e2e_test/batch/top_n/group_top_n.slt b/e2e_test/batch/top_n/group_top_n.slt index 628e03d5b97fb..71904968200cf 100644 --- a/e2e_test/batch/top_n/group_top_n.slt +++ b/e2e_test/batch/top_n/group_top_n.slt @@ -54,6 +54,19 @@ where rank <= 3 AND rank > 1; 3 2 3 3 +query II rowsort +select * except (rank) from ( + select *, ROW_NUMBER() OVER (PARTITION BY x ORDER BY y) as rank from t +) +where rank <= 3 AND rank > 1; +---- +1 2 +1 3 +2 2 +2 3 +3 2 +3 3 + query II rowsort select x, y from ( select *, RANK() OVER (ORDER BY y) as rank from t diff --git a/src/frontend/planner_test/tests/testdata/input/basic_query.yaml b/src/frontend/planner_test/tests/testdata/input/basic_query.yaml index 6bafd10d22e93..97e06eefeff3d 100644 --- a/src/frontend/planner_test/tests/testdata/input/basic_query.yaml +++ b/src/frontend/planner_test/tests/testdata/input/basic_query.yaml @@ -28,6 +28,17 @@ expected_outputs: - stream_plan - batch_plan +- sql: | + create table t (v1 int, v2 int, v3 int); + select * except (v1, v2) from t; + expected_outputs: + - stream_plan + - batch_plan +- sql: | + create table t (v1 int, v2 int, v3 int); + select * except (v1, v2), v3 from t; + expected_outputs: + - batch_plan - name: test boolean expression common factor extraction sql: | create table t (v1 Boolean, v2 Boolean, v3 Boolean); diff --git a/src/frontend/planner_test/tests/testdata/output/basic_query.yaml b/src/frontend/planner_test/tests/testdata/output/basic_query.yaml index 2f5447ceabd59..81bf434ba9b09 100644 --- a/src/frontend/planner_test/tests/testdata/output/basic_query.yaml +++ b/src/frontend/planner_test/tests/testdata/output/basic_query.yaml @@ -41,6 +41,22 @@ StreamMaterialize { columns: [v1, t._row_id(hidden)], stream_key: [t._row_id], pk_columns: [t._row_id], pk_conflict: NoCheck } └─StreamFilter { predicate: (t.v1 < 1:Int32) } └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } +- sql: | + create table t (v1 int, v2 int, v3 int); + select * except (v1, v2) from t; + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchScan { table: t, columns: [t.v3], distribution: SomeShard } + stream_plan: |- + StreamMaterialize { columns: [v3, t._row_id(hidden)], stream_key: [t._row_id], pk_columns: [t._row_id], pk_conflict: NoCheck } + └─StreamTableScan { table: t, columns: [t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } +- sql: | + create table t (v1 int, v2 int, v3 int); + select * except (v1, v2), v3 from t; + batch_plan: |- + BatchExchange { order: [], dist: Single } + └─BatchProject { exprs: [t.v3, t.v3] } + └─BatchScan { table: t, columns: [t.v3], distribution: SomeShard } - name: test boolean expression common factor extraction sql: | create table t (v1 Boolean, v2 Boolean, v3 Boolean); diff --git a/src/frontend/src/binder/expr/function.rs b/src/frontend/src/binder/expr/function.rs index 605d7814a1f85..08ba7c8c5f92a 100644 --- a/src/frontend/src/binder/expr/function.rs +++ b/src/frontend/src/binder/expr/function.rs @@ -1018,7 +1018,8 @@ impl Binder { FunctionArgExpr::Expr(expr) => Ok(vec![self.bind_expr_inner(expr)?]), FunctionArgExpr::QualifiedWildcard(_) => todo!(), FunctionArgExpr::ExprQualifiedWildcard(_, _) => todo!(), - FunctionArgExpr::Wildcard => Ok(vec![]), + FunctionArgExpr::WildcardOrWithExcept(None) => Ok(vec![]), + FunctionArgExpr::WildcardOrWithExcept(Some(_)) => unreachable!(), } } diff --git a/src/frontend/src/binder/select.rs b/src/frontend/src/binder/select.rs index 896cc117ad9b6..7dd639ba969d1 100644 --- a/src/frontend/src/binder/select.rs +++ b/src/frontend/src/binder/select.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fmt::Debug; use itertools::Itertools; @@ -259,20 +259,33 @@ impl Binder { select_list.extend(exprs); aliases.extend(names); } - SelectItem::Wildcard => { + SelectItem::WildcardOrWithExcept(w) => { if self.context.range_of.is_empty() { return Err(ErrorCode::BindError( "SELECT * with no tables specified is not valid".into(), ) .into()); } + // Bind the column groups - // In psql, the USING and NATURAL columns come before the rest of the columns in - // a SELECT * statement + // In psql, the USING and NATURAL columns come before the rest of the + // columns in a SELECT * statement let (exprs, names) = self.iter_column_groups(); select_list.extend(exprs); aliases.extend(names); + let mut except_indices: HashSet = HashSet::new(); + if let Some(exprs) = w { + for expr in exprs { + let bound = self.bind_expr(expr)?; + if let ExprImpl::InputRef(inner) = bound { + except_indices.insert(inner.index); + } else { + unreachable!(); + } + } + } + // Bind columns that are not in groups let (exprs, names) = Self::iter_bound_columns(self.context.columns[..].iter().filter(|c| { @@ -282,17 +295,19 @@ impl Binder { .column_group_context .mapping .contains_key(&c.index) + && !except_indices.contains(&c.index) })); + select_list.extend(exprs); aliases.extend(names); - - // TODO: we will need to be able to handle wildcard expressions bound to aliases - // in the future. We'd then need a `NaturalGroupContext` - // bound to each alias to correctly disambiguate column + // TODO: we will need to be able to handle wildcard expressions bound to + // aliases in the future. We'd then need a + // `NaturalGroupContext` bound to each alias + // to correctly disambiguate column // references // - // We may need to refactor `NaturalGroupContext` to become span aware in that - // case. + // We may need to refactor `NaturalGroupContext` to become span aware in + // that case. } } } diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs index 29bd1d893ac90..26a6675629510 100644 --- a/src/frontend/src/handler/create_sink.rs +++ b/src/frontend/src/handler/create_sink.rs @@ -50,7 +50,7 @@ pub fn gen_sink_query_from_name(from_name: ObjectName) -> Result { }]; let select = Select { from, - projection: vec![SelectItem::Wildcard], + projection: vec![SelectItem::WildcardOrWithExcept(None)], ..Default::default() }; let body = SetExpr::Select(Box::new(select)); diff --git a/src/meta/src/manager/catalog/utils.rs b/src/meta/src/manager/catalog/utils.rs index 1a07805b875d6..a9ce497eff6e4 100644 --- a/src/meta/src/manager/catalog/utils.rs +++ b/src/meta/src/manager/catalog/utils.rs @@ -244,7 +244,13 @@ impl QueryRewriter<'_> { FunctionArgExpr::Expr(expr) | FunctionArgExpr::ExprQualifiedWildcard(expr, _) => { self.visit_expr(expr) } - FunctionArgExpr::QualifiedWildcard(_) | FunctionArgExpr::Wildcard => {} + FunctionArgExpr::QualifiedWildcard(_) + | FunctionArgExpr::WildcardOrWithExcept(None) => {} + FunctionArgExpr::WildcardOrWithExcept(Some(exprs)) => { + for expr in exprs { + self.visit_expr(expr); + } + } }, } } @@ -346,7 +352,12 @@ impl QueryRewriter<'_> { SelectItem::UnnamedExpr(expr) | SelectItem::ExprQualifiedWildcard(expr, _) | SelectItem::ExprWithAlias { expr, .. } => self.visit_expr(expr), - SelectItem::QualifiedWildcard(_) | SelectItem::Wildcard => {} + SelectItem::QualifiedWildcard(_) | SelectItem::WildcardOrWithExcept(None) => {} + SelectItem::WildcardOrWithExcept(Some(exprs)) => { + for expr in exprs { + self.visit_expr(expr); + } + } } } } diff --git a/src/sqlparser/src/ast/mod.rs b/src/sqlparser/src/ast/mod.rs index 0ad6591eb9a34..123644d94d8f7 100644 --- a/src/sqlparser/src/ast/mod.rs +++ b/src/sqlparser/src/ast/mod.rs @@ -1917,8 +1917,8 @@ pub enum FunctionArgExpr { ExprQualifiedWildcard(Expr, Vec), /// Qualified wildcard, e.g. `alias.*` or `schema.table.*`. QualifiedWildcard(ObjectName), - /// An unqualified `*` - Wildcard, + /// An unqualified `*` or `* with (columns)` + WildcardOrWithExcept(Option>), } impl fmt::Display for FunctionArgExpr { @@ -1936,7 +1936,19 @@ impl fmt::Display for FunctionArgExpr { ) } FunctionArgExpr::QualifiedWildcard(prefix) => write!(f, "{}.*", prefix), - FunctionArgExpr::Wildcard => f.write_str("*"), + FunctionArgExpr::WildcardOrWithExcept(w) => match w { + Some(exprs) => write!( + f, + "EXCEPT ({})", + exprs + .iter() + .map(|v| v.to_string()) + .collect::>() + .as_slice() + .join(", ") + ), + None => f.write_str("*"), + }, } } } diff --git a/src/sqlparser/src/ast/query.rs b/src/sqlparser/src/ast/query.rs index 82bab6096105a..f7d222fbb1f35 100644 --- a/src/sqlparser/src/ast/query.rs +++ b/src/sqlparser/src/ast/query.rs @@ -309,8 +309,8 @@ pub enum SelectItem { ExprWithAlias { expr: Expr, alias: Ident }, /// `alias.*` or even `schema.table.*` QualifiedWildcard(ObjectName), - /// An unqualified `*` - Wildcard, + /// An unqualified `*`, or `* except (exprs)` + WildcardOrWithExcept(Option>), } impl fmt::Display for SelectItem { @@ -327,7 +327,19 @@ impl fmt::Display for SelectItem { .format_with("", |i, f| f(&format_args!(".{i}"))) ), SelectItem::QualifiedWildcard(prefix) => write!(f, "{}.*", prefix), - SelectItem::Wildcard => write!(f, "*"), + SelectItem::WildcardOrWithExcept(w) => match w { + Some(exprs) => write!( + f, + "* EXCEPT ({})", + exprs + .iter() + .map(|v| v.to_string()) + .collect::>() + .as_slice() + .join(", ") + ), + None => write!(f, "*"), + }, } } } diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index 9d937cfe5ab63..499ff2892ff90 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -82,7 +82,8 @@ pub enum WildcardOrExpr { /// See also [`Expr::FieldIdentifier`] for behaviors of parentheses. ExprQualifiedWildcard(Expr, Vec), QualifiedWildcard(ObjectName), - Wildcard, + // Either it's `*` or `* excepts (columns)` + WildcardOrWithExcept(Option>), } impl From for FunctionArgExpr { @@ -93,7 +94,7 @@ impl From for FunctionArgExpr { Self::ExprQualifiedWildcard(expr, prefix) } WildcardOrExpr::QualifiedWildcard(prefix) => Self::QualifiedWildcard(prefix), - WildcardOrExpr::Wildcard => Self::Wildcard, + WildcardOrExpr::WildcardOrWithExcept(w) => Self::WildcardOrWithExcept(w), } } } @@ -313,7 +314,14 @@ impl Parser { return self.word_concat_wildcard_expr(w.to_ident()?, wildcard_expr); } Token::Mul => { - return Ok(WildcardOrExpr::Wildcard); + if self.parse_keyword(Keyword::EXCEPT) && self.consume_token(&Token::LParen) { + let exprs = self.parse_comma_separated(Parser::parse_expr)?; + if self.consume_token(&Token::RParen) { + return Ok(WildcardOrExpr::WildcardOrWithExcept(Some(exprs))); + } + } else { + return Ok(WildcardOrExpr::WildcardOrWithExcept(None)); + } } // parses wildcard field selection expression. // Code is similar to `parse_struct_selection` @@ -346,9 +354,10 @@ impl Parser { let mut idents = vec![ident]; match simple_wildcard_expr { WildcardOrExpr::QualifiedWildcard(ids) => idents.extend(ids.0), - WildcardOrExpr::Wildcard => {} + WildcardOrExpr::WildcardOrWithExcept(None) => {} WildcardOrExpr::ExprQualifiedWildcard(_, _) => unreachable!(), WildcardOrExpr::Expr(e) => return Ok(WildcardOrExpr::Expr(e)), + WildcardOrExpr::WildcardOrWithExcept(Some(_)) => unreachable!(), } Ok(WildcardOrExpr::QualifiedWildcard(ObjectName(idents))) } @@ -387,9 +396,10 @@ impl Parser { match simple_wildcard_expr { WildcardOrExpr::QualifiedWildcard(ids) => idents.extend(ids.0), - WildcardOrExpr::Wildcard => {} + WildcardOrExpr::WildcardOrWithExcept(None) => {} WildcardOrExpr::ExprQualifiedWildcard(_, _) => unreachable!(), WildcardOrExpr::Expr(_) => unreachable!(), + WildcardOrExpr::WildcardOrWithExcept(Some(_)) => unreachable!(), } Ok(WildcardOrExpr::ExprQualifiedWildcard(expr, idents)) } @@ -408,7 +418,7 @@ impl Parser { Token::Word(w) => id_parts.push(w.to_ident()?), Token::Mul => { return if id_parts.is_empty() { - Ok(WildcardOrExpr::Wildcard) + Ok(WildcardOrExpr::WildcardOrWithExcept(None)) } else { Ok(WildcardOrExpr::QualifiedWildcard(ObjectName(id_parts))) } @@ -4253,7 +4263,7 @@ impl Parser { WildcardOrExpr::ExprQualifiedWildcard(expr, prefix) => { Ok(SelectItem::ExprQualifiedWildcard(expr, prefix)) } - WildcardOrExpr::Wildcard => Ok(SelectItem::Wildcard), + WildcardOrExpr::WildcardOrWithExcept(w) => Ok(SelectItem::WildcardOrWithExcept(w)), } } diff --git a/src/sqlparser/tests/sqlparser_common.rs b/src/sqlparser/tests/sqlparser_common.rs index 7088450220038..7e56ea649c8e5 100644 --- a/src/sqlparser/tests/sqlparser_common.rs +++ b/src/sqlparser/tests/sqlparser_common.rs @@ -260,7 +260,10 @@ fn parse_select_all_distinct() { fn parse_select_wildcard() { let sql = "SELECT * FROM foo"; let select = verified_only_select(sql); - assert_eq!(&SelectItem::Wildcard, only(&select.projection)); + assert_eq!( + &SelectItem::WildcardOrWithExcept(None), + only(&select.projection) + ); let sql = "SELECT foo.* FROM foo"; let select = verified_only_select(sql); @@ -284,6 +287,16 @@ fn parse_select_wildcard() { assert!(format!("{}", result.unwrap_err()).contains("Expected end of statement, found: +")); } +#[test] +fn parse_select_except() { + let sql = "SELECT * EXCEPT (v1) FROM foo"; + let select = verified_only_select(sql); + assert_eq!( + &SelectItem::WildcardOrWithExcept(Some(vec![Expr::Identifier(Ident::new_unchecked("v1"))])), + only(&select.projection) + ); +} + #[test] fn parse_count_wildcard() { verified_only_select("SELECT COUNT(*) FROM Orders WHERE id = 10"); @@ -331,7 +344,9 @@ fn parse_select_count_wildcard() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new_unchecked("COUNT")]), - args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], + args: vec![FunctionArg::Unnamed(FunctionArgExpr::WildcardOrWithExcept( + None + ))], over: None, distinct: false, order_by: vec![], @@ -1071,7 +1086,9 @@ fn parse_select_having() { Some(Expr::BinaryOp { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new_unchecked("COUNT")]), - args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], + args: vec![FunctionArg::Unnamed(FunctionArgExpr::WildcardOrWithExcept( + None + ))], over: None, distinct: false, order_by: vec![], diff --git a/src/sqlparser/tests/testdata/select.yaml b/src/sqlparser/tests/testdata/select.yaml index c8aed6d1d8caf..b9e362986548d 100644 --- a/src/sqlparser/tests/testdata/select.yaml +++ b/src/sqlparser/tests/testdata/select.yaml @@ -29,14 +29,21 @@ formatted_sql: SELECT (CAST(ROW(1, 2, 3) AS foo)).v1.* - input: SELECT * FROM generate_series('2'::INT,'10'::INT,'2'::INT) formatted_sql: SELECT * FROM generate_series(CAST('2' AS INT), CAST('10' AS INT), CAST('2' AS INT)) - formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [Wildcard], from: [TableWithJoins { relation: TableFunction { name: ObjectName([Ident { value: "generate_series", quote_style: None }]), alias: None, args: [Unnamed(Expr(Cast { expr: Value(SingleQuotedString("2")), data_type: Int })), Unnamed(Expr(Cast { expr: Value(SingleQuotedString("10")), data_type: Int })), Unnamed(Expr(Cast { expr: Value(SingleQuotedString("2")), data_type: Int }))] }, joins: [] }], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })' + formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [WildcardOrWithExcept(None)], from: [TableWithJoins { relation: TableFunction { name: ObjectName([Ident { value: "generate_series", quote_style: None }]), alias: None, args: [Unnamed(Expr(Cast { expr: Value(SingleQuotedString("2")), data_type: Int })), Unnamed(Expr(Cast { expr: Value(SingleQuotedString("10")), data_type: Int })), Unnamed(Expr(Cast { expr: Value(SingleQuotedString("2")), data_type: Int }))] }, joins: [] }], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })' - input: SELECT * FROM unnest(Array[1,2,3]); formatted_sql: SELECT * FROM unnest(ARRAY[1, 2, 3]) - formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [Wildcard], from: [TableWithJoins { relation: TableFunction { name: ObjectName([Ident { value: "unnest", quote_style: None }]), alias: None, args: [Unnamed(Expr(Array(Array { elem: [Value(Number("1")), Value(Number("2")), Value(Number("3"))], named: true })))] }, joins: [] }], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })' + formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [WildcardOrWithExcept(None)], from: [TableWithJoins { relation: TableFunction { name: ObjectName([Ident { value: "unnest", quote_style: None }]), alias: None, args: [Unnamed(Expr(Array(Array { elem: [Value(Number("1")), Value(Number("2")), Value(Number("3"))], named: true })))] }, joins: [] }], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })' - input: SELECT id, fname, lname FROM customer WHERE salary <> 'Not Provided' AND salary <> '' formatted_sql: SELECT id, fname, lname FROM customer WHERE salary <> 'Not Provided' AND salary <> '' - input: SELECT id FROM customer WHERE NOT salary = '' formatted_sql: SELECT id FROM customer WHERE NOT salary = '' +- input: SELECT * EXCEPT (v1,v2) FROM foo + formatted_sql: SELECT * EXCEPT (v1, v2) FROM foo + formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [WildcardOrWithExcept(Some([Identifier(Ident { value: "v1", quote_style: None }), Identifier(Ident { value: "v2", quote_style: None })]))], from: [TableWithJoins { relation: Table { name: ObjectName([Ident { value: "foo", quote_style: None }]), alias: None, for_system_time_as_of_proctime: false }, joins: [] }], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })' +- input: SELECT v3 EXCEPT (v1, v2) FROM foo + error_msg: |- + sql parser error: Expected SELECT, VALUES, or a subquery in the query body, found: v1 at line:1, column:21 + Near "SELECT v3 EXCEPT (" - input: SELECT * FROM t LIMIT 1 FETCH FIRST ROWS ONLY error_msg: 'sql parser error: Cannot specify both LIMIT and FETCH' - input: SELECT * FROM t FETCH FIRST ROWS WITH TIES diff --git a/src/tests/sqlsmith/src/sql_gen/agg.rs b/src/tests/sqlsmith/src/sql_gen/agg.rs index a665d250f3bc4..a05c6f75ee9de 100644 --- a/src/tests/sqlsmith/src/sql_gen/agg.rs +++ b/src/tests/sqlsmith/src/sql_gen/agg.rs @@ -129,7 +129,9 @@ fn make_agg_func( let args = if exprs.is_empty() { // The only agg without args is `count`. // `select proname from pg_proc where array_length(proargtypes, 1) = 0 and prokind = 'a';` - vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)] + vec![FunctionArg::Unnamed(FunctionArgExpr::WildcardOrWithExcept( + None, + ))] } else { exprs .iter()