From cdbd96434676c8c34e742a6cfea6bc7499e97cde Mon Sep 17 00:00:00 2001 From: Junhao Liu Date: Fri, 26 Apr 2024 09:18:16 -0600 Subject: [PATCH 1/6] support window function sql2expr (#10243) --- .../expr/src/built_in_window_function.rs | 2 +- datafusion/expr/src/expr.rs | 10 + datafusion/sql/src/unparser/expr.rs | 175 ++++++++++++++---- 3 files changed, 151 insertions(+), 36 deletions(-) diff --git a/datafusion/expr/src/built_in_window_function.rs b/datafusion/expr/src/built_in_window_function.rs index 1001bbb015ed..18a888ae8b2a 100644 --- a/datafusion/expr/src/built_in_window_function.rs +++ b/datafusion/expr/src/built_in_window_function.rs @@ -71,7 +71,7 @@ pub enum BuiltInWindowFunction { } impl BuiltInWindowFunction { - fn name(&self) -> &str { + pub fn name(&self) -> &str { use BuiltInWindowFunction::*; match self { RowNumber => "ROW_NUMBER", diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 0d8e8d816b33..e310eaa7e48f 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -669,6 +669,16 @@ impl WindowFunctionDefinition { WindowFunctionDefinition::WindowUDF(fun) => fun.signature().clone(), } } + + /// Function's name for display + pub fn name(&self) -> &str { + match self { + WindowFunctionDefinition::BuiltInWindowFunction(fun) => fun.name(), + WindowFunctionDefinition::WindowUDF(fun) => fun.name(), + WindowFunctionDefinition::AggregateFunction(fun) => fun.name(), + WindowFunctionDefinition::AggregateUDF(fun) => fun.name(), + } + } } impl fmt::Display for WindowFunctionDefinition { diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index d091fbe14dbd..7194b0a7d851 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -21,10 +21,7 @@ use datafusion_common::{ internal_datafusion_err, not_impl_err, plan_err, Column, Result, ScalarValue, }; use datafusion_expr::{ - expr::{ - AggregateFunctionDefinition, Alias, Exists, InList, ScalarFunction, Sort, - WindowFunction, - }, + expr::{Alias, Exists, InList, ScalarFunction, Sort, WindowFunction}, Between, BinaryExpr, Case, Cast, Expr, Like, Operator, }; use sqlparser::ast::{ @@ -170,14 +167,56 @@ impl Unparser<'_> { Expr::Literal(value) => Ok(self.scalar_to_sql(value)?), Expr::Alias(Alias { expr, name: _, .. }) => self.expr_to_sql(expr), Expr::WindowFunction(WindowFunction { - fun: _, - args: _, - partition_by: _, + fun, + args, + partition_by, order_by: _, - window_frame: _, + window_frame, null_treatment: _, }) => { - not_impl_err!("Unsupported expression: {expr:?}") + let func_name = fun.name(); + + let args = self.function_args_to_sql(args)?; + + let units = match window_frame.units { + datafusion_expr::window_frame::WindowFrameUnits::Rows => { + ast::WindowFrameUnits::Rows + } + datafusion_expr::window_frame::WindowFrameUnits::Range => { + ast::WindowFrameUnits::Range + } + datafusion_expr::window_frame::WindowFrameUnits::Groups => { + ast::WindowFrameUnits::Groups + } + }; + let start_bound = self.convert_bound(&window_frame.start_bound); + let end_bound = self.convert_bound(&window_frame.end_bound); + let over = Some(ast::WindowType::WindowSpec(ast::WindowSpec { + window_name: None, + partition_by: partition_by + .iter() + .map(|e| self.expr_to_sql(e)) + .collect::>>()?, + order_by: vec![], + window_frame: Some(ast::WindowFrame { + units, + start_bound, + end_bound: Option::from(end_bound), + }), + })); + Ok(ast::Expr::Function(Function { + name: ast::ObjectName(vec![Ident { + value: func_name.to_string(), + quote_style: None, + }]), + args, + filter: None, + null_treatment: None, + over, + distinct: false, + special: false, + order_by: vec![], + })) } Expr::SimilarTo(Like { negated, @@ -199,37 +238,20 @@ impl Unparser<'_> { escape_char: *escape_char, }), Expr::AggregateFunction(agg) => { - let func_name = if let AggregateFunctionDefinition::BuiltIn(built_in) = - &agg.func_def - { - built_in.name() - } else { - return not_impl_err!( - "Only built in agg functions are supported, got {agg:?}" - ); - }; - - let args = agg - .args - .iter() - .map(|e| { - if matches!(e, Expr::Wildcard { qualifier: None }) { - Ok(FunctionArg::Unnamed(ast::FunctionArgExpr::Wildcard)) - } else { - self.expr_to_sql(e).map(|e| { - FunctionArg::Unnamed(ast::FunctionArgExpr::Expr(e)) - }) - } - }) - .collect::>>()?; + let func_name = agg.func_def.name(); + let args = self.function_args_to_sql(&agg.args)?; + let filter = match &agg.filter { + Some(filter) => Some(Box::new(self.expr_to_sql(filter)?)), + None => None, + }; Ok(ast::Expr::Function(Function { name: ast::ObjectName(vec![Ident { value: func_name.to_string(), quote_style: None, }]), args, - filter: None, + filter, null_treatment: None, over: None, distinct: agg.distinct, @@ -355,6 +377,40 @@ impl Unparser<'_> { Ok(ast::Expr::Identifier(self.new_ident(col.name.to_string()))) } + fn convert_bound( + &self, + bound: &datafusion_expr::window_frame::WindowFrameBound, + ) -> ast::WindowFrameBound { + match bound { + datafusion_expr::window_frame::WindowFrameBound::Preceding(val) => { + ast::WindowFrameBound::Preceding( + self.scalar_to_sql(val).map(Box::new).ok(), + ) + } + datafusion_expr::window_frame::WindowFrameBound::Following(val) => { + ast::WindowFrameBound::Following( + self.scalar_to_sql(val).map(Box::new).ok(), + ) + } + datafusion_expr::window_frame::WindowFrameBound::CurrentRow => { + ast::WindowFrameBound::CurrentRow + } + } + } + + fn function_args_to_sql(&self, args: &[Expr]) -> Result> { + args.iter() + .map(|e| { + if matches!(e, Expr::Wildcard { qualifier: None }) { + Ok(ast::FunctionArg::Unnamed(ast::FunctionArgExpr::Wildcard)) + } else { + self.expr_to_sql(e) + .map(|e| ast::FunctionArg::Unnamed(ast::FunctionArgExpr::Expr(e))) + } + }) + .collect::>>() + } + pub(super) fn new_ident(&self, str: String) -> ast::Ident { ast::Ident { value: str, @@ -735,8 +791,10 @@ mod tests { use arrow::datatypes::{Field, Schema}; use datafusion_common::TableReference; use datafusion_expr::{ - case, col, exists, expr::AggregateFunction, lit, not, not_exists, table_scan, - ColumnarValue, ScalarUDF, ScalarUDFImpl, Signature, Volatility, + case, col, exists, + expr::{AggregateFunction, AggregateFunctionDefinition}, + lit, not, not_exists, table_scan, wildcard, ColumnarValue, ScalarUDF, + ScalarUDFImpl, Signature, Volatility, WindowFrame, WindowFunctionDefinition, }; use crate::unparser::dialect::CustomDialect; @@ -901,6 +959,53 @@ mod tests { }), "COUNT(DISTINCT *)", ), + ( + Expr::AggregateFunction(AggregateFunction { + func_def: AggregateFunctionDefinition::BuiltIn( + datafusion_expr::AggregateFunction::Count, + ), + args: vec![Expr::Wildcard { qualifier: None }], + distinct: false, + filter: Some(Box::new(lit(true))), + order_by: None, + null_treatment: None, + }), + "COUNT(*) FILTER (WHERE true)", + ), + ( + Expr::WindowFunction(WindowFunction { + fun: WindowFunctionDefinition::BuiltInWindowFunction( + datafusion_expr::BuiltInWindowFunction::RowNumber, + ), + args: vec![col("col")], + partition_by: vec![], + order_by: vec![], + window_frame: WindowFrame::new(None), + null_treatment: None, + }), + r#"ROW_NUMBER("col") OVER (ROWS BETWEEN NULL PRECEDING AND NULL FOLLOWING)"#, + ), + ( + Expr::WindowFunction(WindowFunction { + fun: WindowFunctionDefinition::AggregateFunction( + datafusion_expr::AggregateFunction::Count, + ), + args: vec![wildcard()], + partition_by: vec![], + order_by: vec![], + window_frame: WindowFrame::new_bounds( + datafusion_expr::WindowFrameUnits::Range, + datafusion_expr::WindowFrameBound::Preceding( + ScalarValue::UInt32(Some(6)), + ), + datafusion_expr::WindowFrameBound::Following( + ScalarValue::UInt32(Some(2)), + ), + ), + null_treatment: None, + }), + r#"COUNT(*) OVER (RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING)"#, + ), (col("a").is_not_null(), r#""a" IS NOT NULL"#), ( (col("a") + col("b")).gt(lit(4)).is_true(), From 4ec3d51ebae0606edc6d99d1fcf5e672a89a373e Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Fri, 26 Apr 2024 12:00:07 -0400 Subject: [PATCH 2/6] Make modules public, add Default impl's. (#10239) --- datafusion/functions/src/string/ascii.rs | 7 +++ datafusion/functions/src/string/bit_length.rs | 6 +++ datafusion/functions/src/string/btrim.rs | 6 +++ datafusion/functions/src/string/chr.rs | 6 +++ .../functions/src/string/levenshtein.rs | 6 +++ datafusion/functions/src/string/lower.rs | 6 +++ datafusion/functions/src/string/ltrim.rs | 6 +++ datafusion/functions/src/string/mod.rs | 44 +++++++++---------- .../functions/src/string/octet_length.rs | 6 +++ datafusion/functions/src/string/overlay.rs | 6 +++ datafusion/functions/src/string/repeat.rs | 6 +++ datafusion/functions/src/string/replace.rs | 6 +++ datafusion/functions/src/string/rtrim.rs | 6 +++ datafusion/functions/src/string/split_part.rs | 6 +++ .../functions/src/string/starts_with.rs | 7 +++ datafusion/functions/src/string/to_hex.rs | 7 +++ datafusion/functions/src/string/upper.rs | 6 +++ datafusion/functions/src/string/uuid.rs | 6 +++ .../functions/src/unicode/character_length.rs | 6 +++ .../functions/src/unicode/find_in_set.rs | 6 +++ datafusion/functions/src/unicode/left.rs | 6 +++ datafusion/functions/src/unicode/lpad.rs | 6 +++ datafusion/functions/src/unicode/mod.rs | 22 +++++----- datafusion/functions/src/unicode/reverse.rs | 6 +++ datafusion/functions/src/unicode/right.rs | 6 +++ datafusion/functions/src/unicode/rpad.rs | 6 +++ datafusion/functions/src/unicode/strpos.rs | 6 +++ datafusion/functions/src/unicode/substr.rs | 6 +++ .../functions/src/unicode/substrindex.rs | 6 +++ datafusion/functions/src/unicode/translate.rs | 6 +++ 30 files changed, 204 insertions(+), 33 deletions(-) diff --git a/datafusion/functions/src/string/ascii.rs b/datafusion/functions/src/string/ascii.rs index 15a3c2391ac6..9e1e6b81b61d 100644 --- a/datafusion/functions/src/string/ascii.rs +++ b/datafusion/functions/src/string/ascii.rs @@ -47,6 +47,13 @@ pub fn ascii(args: &[ArrayRef]) -> Result { pub struct AsciiFunc { signature: Signature, } + +impl Default for AsciiFunc { + fn default() -> Self { + Self::new() + } +} + impl AsciiFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/string/bit_length.rs b/datafusion/functions/src/string/bit_length.rs index 17c49216553b..65ec1a4a7734 100644 --- a/datafusion/functions/src/string/bit_length.rs +++ b/datafusion/functions/src/string/bit_length.rs @@ -31,6 +31,12 @@ pub struct BitLengthFunc { signature: Signature, } +impl Default for BitLengthFunc { + fn default() -> Self { + Self::new() + } +} + impl BitLengthFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/string/btrim.rs b/datafusion/functions/src/string/btrim.rs index 971f7bbd4d92..97b54a194a27 100644 --- a/datafusion/functions/src/string/btrim.rs +++ b/datafusion/functions/src/string/btrim.rs @@ -41,6 +41,12 @@ pub struct BTrimFunc { aliases: Vec, } +impl Default for BTrimFunc { + fn default() -> Self { + Self::new() + } +} + impl BTrimFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/string/chr.rs b/datafusion/functions/src/string/chr.rs index 21d79cf6b0f1..4da7dc01594d 100644 --- a/datafusion/functions/src/string/chr.rs +++ b/datafusion/functions/src/string/chr.rs @@ -65,6 +65,12 @@ pub struct ChrFunc { signature: Signature, } +impl Default for ChrFunc { + fn default() -> Self { + Self::new() + } +} + impl ChrFunc { pub fn new() -> Self { Self { diff --git a/datafusion/functions/src/string/levenshtein.rs b/datafusion/functions/src/string/levenshtein.rs index ec22b0a4a480..3edf6de8c863 100644 --- a/datafusion/functions/src/string/levenshtein.rs +++ b/datafusion/functions/src/string/levenshtein.rs @@ -34,6 +34,12 @@ pub struct LevenshteinFunc { signature: Signature, } +impl Default for LevenshteinFunc { + fn default() -> Self { + Self::new() + } +} + impl LevenshteinFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/string/lower.rs b/datafusion/functions/src/string/lower.rs index b9b3840252c5..29ca682c380b 100644 --- a/datafusion/functions/src/string/lower.rs +++ b/datafusion/functions/src/string/lower.rs @@ -31,6 +31,12 @@ pub struct LowerFunc { signature: Signature, } +impl Default for LowerFunc { + fn default() -> Self { + Self::new() + } +} + impl LowerFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/string/ltrim.rs b/datafusion/functions/src/string/ltrim.rs index 1a6a9d497f66..ef05a2cb2a13 100644 --- a/datafusion/functions/src/string/ltrim.rs +++ b/datafusion/functions/src/string/ltrim.rs @@ -40,6 +40,12 @@ pub struct LtrimFunc { signature: Signature, } +impl Default for LtrimFunc { + fn default() -> Self { + Self::new() + } +} + impl LtrimFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/string/mod.rs b/datafusion/functions/src/string/mod.rs index 9eb2a7426fba..52411142cb8d 100644 --- a/datafusion/functions/src/string/mod.rs +++ b/datafusion/functions/src/string/mod.rs @@ -21,28 +21,28 @@ use std::sync::Arc; use datafusion_expr::ScalarUDF; -mod ascii; -mod bit_length; -mod btrim; -mod chr; -mod common; -mod concat; -mod concat_ws; -mod ends_with; -mod initcap; -mod levenshtein; -mod lower; -mod ltrim; -mod octet_length; -mod overlay; -mod repeat; -mod replace; -mod rtrim; -mod split_part; -mod starts_with; -mod to_hex; -mod upper; -mod uuid; +pub mod ascii; +pub mod bit_length; +pub mod btrim; +pub mod chr; +pub mod common; +pub mod concat; +pub mod concat_ws; +pub mod ends_with; +pub mod initcap; +pub mod levenshtein; +pub mod lower; +pub mod ltrim; +pub mod octet_length; +pub mod overlay; +pub mod repeat; +pub mod replace; +pub mod rtrim; +pub mod split_part; +pub mod starts_with; +pub mod to_hex; +pub mod upper; +pub mod uuid; // create UDFs make_udf_function!(ascii::AsciiFunc, ASCII, ascii); diff --git a/datafusion/functions/src/string/octet_length.rs b/datafusion/functions/src/string/octet_length.rs index bdd262b7e37e..12980fab1f11 100644 --- a/datafusion/functions/src/string/octet_length.rs +++ b/datafusion/functions/src/string/octet_length.rs @@ -31,6 +31,12 @@ pub struct OctetLengthFunc { signature: Signature, } +impl Default for OctetLengthFunc { + fn default() -> Self { + Self::new() + } +} + impl OctetLengthFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/string/overlay.rs b/datafusion/functions/src/string/overlay.rs index 3f92a73c1af9..772b04136129 100644 --- a/datafusion/functions/src/string/overlay.rs +++ b/datafusion/functions/src/string/overlay.rs @@ -34,6 +34,12 @@ pub struct OverlayFunc { signature: Signature, } +impl Default for OverlayFunc { + fn default() -> Self { + Self::new() + } +} + impl OverlayFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/string/repeat.rs b/datafusion/functions/src/string/repeat.rs index 77521120d9d8..a70d0a162562 100644 --- a/datafusion/functions/src/string/repeat.rs +++ b/datafusion/functions/src/string/repeat.rs @@ -34,6 +34,12 @@ pub struct RepeatFunc { signature: Signature, } +impl Default for RepeatFunc { + fn default() -> Self { + Self::new() + } +} + impl RepeatFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/string/replace.rs b/datafusion/functions/src/string/replace.rs index 01a3762acaf4..4cebbba839fa 100644 --- a/datafusion/functions/src/string/replace.rs +++ b/datafusion/functions/src/string/replace.rs @@ -34,6 +34,12 @@ pub struct ReplaceFunc { signature: Signature, } +impl Default for ReplaceFunc { + fn default() -> Self { + Self::new() + } +} + impl ReplaceFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/string/rtrim.rs b/datafusion/functions/src/string/rtrim.rs index e6e93e38c966..2e39080e226b 100644 --- a/datafusion/functions/src/string/rtrim.rs +++ b/datafusion/functions/src/string/rtrim.rs @@ -40,6 +40,12 @@ pub struct RtrimFunc { signature: Signature, } +impl Default for RtrimFunc { + fn default() -> Self { + Self::new() + } +} + impl RtrimFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/string/split_part.rs b/datafusion/functions/src/string/split_part.rs index 4396386afff5..517fa93e5284 100644 --- a/datafusion/functions/src/string/split_part.rs +++ b/datafusion/functions/src/string/split_part.rs @@ -34,6 +34,12 @@ pub struct SplitPartFunc { signature: Signature, } +impl Default for SplitPartFunc { + fn default() -> Self { + Self::new() + } +} + impl SplitPartFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/string/starts_with.rs b/datafusion/functions/src/string/starts_with.rs index edbf5c9217a7..05bd960ff14b 100644 --- a/datafusion/functions/src/string/starts_with.rs +++ b/datafusion/functions/src/string/starts_with.rs @@ -43,6 +43,13 @@ pub fn starts_with(args: &[ArrayRef]) -> Result { pub struct StartsWithFunc { signature: Signature, } + +impl Default for StartsWithFunc { + fn default() -> Self { + Self::new() + } +} + impl StartsWithFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/string/to_hex.rs b/datafusion/functions/src/string/to_hex.rs index 5050d8bab3e9..79aa9254f9b1 100644 --- a/datafusion/functions/src/string/to_hex.rs +++ b/datafusion/functions/src/string/to_hex.rs @@ -63,6 +63,13 @@ where pub struct ToHexFunc { signature: Signature, } + +impl Default for ToHexFunc { + fn default() -> Self { + Self::new() + } +} + impl ToHexFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/string/upper.rs b/datafusion/functions/src/string/upper.rs index 8f03d7dc6bbc..da31948fbcfa 100644 --- a/datafusion/functions/src/string/upper.rs +++ b/datafusion/functions/src/string/upper.rs @@ -28,6 +28,12 @@ pub struct UpperFunc { signature: Signature, } +impl Default for UpperFunc { + fn default() -> Self { + Self::new() + } +} + impl UpperFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/string/uuid.rs b/datafusion/functions/src/string/uuid.rs index 9c97b4dd7413..3ddc320fcec1 100644 --- a/datafusion/functions/src/string/uuid.rs +++ b/datafusion/functions/src/string/uuid.rs @@ -32,6 +32,12 @@ pub struct UuidFunc { signature: Signature, } +impl Default for UuidFunc { + fn default() -> Self { + Self::new() + } +} + impl UuidFunc { pub fn new() -> Self { Self { diff --git a/datafusion/functions/src/unicode/character_length.rs b/datafusion/functions/src/unicode/character_length.rs index 7e2723771ff2..4f32f4c17776 100644 --- a/datafusion/functions/src/unicode/character_length.rs +++ b/datafusion/functions/src/unicode/character_length.rs @@ -33,6 +33,12 @@ pub struct CharacterLengthFunc { aliases: Vec, } +impl Default for CharacterLengthFunc { + fn default() -> Self { + Self::new() + } +} + impl CharacterLengthFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/unicode/find_in_set.rs b/datafusion/functions/src/unicode/find_in_set.rs index fc45f897c5f4..7c864bc191d7 100644 --- a/datafusion/functions/src/unicode/find_in_set.rs +++ b/datafusion/functions/src/unicode/find_in_set.rs @@ -35,6 +35,12 @@ pub struct FindInSetFunc { signature: Signature, } +impl Default for FindInSetFunc { + fn default() -> Self { + Self::new() + } +} + impl FindInSetFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/unicode/left.rs b/datafusion/functions/src/unicode/left.rs index 24ea2d5a8f25..7d456f5f1e94 100644 --- a/datafusion/functions/src/unicode/left.rs +++ b/datafusion/functions/src/unicode/left.rs @@ -35,6 +35,12 @@ pub struct LeftFunc { signature: Signature, } +impl Default for LeftFunc { + fn default() -> Self { + Self::new() + } +} + impl LeftFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/unicode/lpad.rs b/datafusion/functions/src/unicode/lpad.rs index 47208903bcef..ce5e0064362b 100644 --- a/datafusion/functions/src/unicode/lpad.rs +++ b/datafusion/functions/src/unicode/lpad.rs @@ -33,6 +33,12 @@ pub struct LPadFunc { signature: Signature, } +impl Default for LPadFunc { + fn default() -> Self { + Self::new() + } +} + impl LPadFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/unicode/mod.rs b/datafusion/functions/src/unicode/mod.rs index eba4cd5048eb..5a8e953bc161 100644 --- a/datafusion/functions/src/unicode/mod.rs +++ b/datafusion/functions/src/unicode/mod.rs @@ -21,17 +21,17 @@ use std::sync::Arc; use datafusion_expr::ScalarUDF; -mod character_length; -mod find_in_set; -mod left; -mod lpad; -mod reverse; -mod right; -mod rpad; -mod strpos; -mod substr; -mod substrindex; -mod translate; +pub mod character_length; +pub mod find_in_set; +pub mod left; +pub mod lpad; +pub mod reverse; +pub mod right; +pub mod rpad; +pub mod strpos; +pub mod substr; +pub mod substrindex; +pub mod translate; // create UDFs make_udf_function!( diff --git a/datafusion/functions/src/unicode/reverse.rs b/datafusion/functions/src/unicode/reverse.rs index 6b24c2336810..52666cc57059 100644 --- a/datafusion/functions/src/unicode/reverse.rs +++ b/datafusion/functions/src/unicode/reverse.rs @@ -32,6 +32,12 @@ pub struct ReverseFunc { signature: Signature, } +impl Default for ReverseFunc { + fn default() -> Self { + Self::new() + } +} + impl ReverseFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/unicode/right.rs b/datafusion/functions/src/unicode/right.rs index dddbf31e721b..20cbbe020ff1 100644 --- a/datafusion/functions/src/unicode/right.rs +++ b/datafusion/functions/src/unicode/right.rs @@ -35,6 +35,12 @@ pub struct RightFunc { signature: Signature, } +impl Default for RightFunc { + fn default() -> Self { + Self::new() + } +} + impl RightFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/unicode/rpad.rs b/datafusion/functions/src/unicode/rpad.rs index 8946f07006b7..fc6bf1ffe748 100644 --- a/datafusion/functions/src/unicode/rpad.rs +++ b/datafusion/functions/src/unicode/rpad.rs @@ -33,6 +33,12 @@ pub struct RPadFunc { signature: Signature, } +impl Default for RPadFunc { + fn default() -> Self { + Self::new() + } +} + impl RPadFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/unicode/strpos.rs b/datafusion/functions/src/unicode/strpos.rs index 4ebdd9d58623..395fd0b77d12 100644 --- a/datafusion/functions/src/unicode/strpos.rs +++ b/datafusion/functions/src/unicode/strpos.rs @@ -36,6 +36,12 @@ pub struct StrposFunc { aliases: Vec, } +impl Default for StrposFunc { + fn default() -> Self { + Self::new() + } +} + impl StrposFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs index 260937a01a74..c297182057fe 100644 --- a/datafusion/functions/src/unicode/substr.rs +++ b/datafusion/functions/src/unicode/substr.rs @@ -34,6 +34,12 @@ pub struct SubstrFunc { signature: Signature, } +impl Default for SubstrFunc { + fn default() -> Self { + Self::new() + } +} + impl SubstrFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/unicode/substrindex.rs b/datafusion/functions/src/unicode/substrindex.rs index da4ff55828e9..a057e4298546 100644 --- a/datafusion/functions/src/unicode/substrindex.rs +++ b/datafusion/functions/src/unicode/substrindex.rs @@ -34,6 +34,12 @@ pub struct SubstrIndexFunc { aliases: Vec, } +impl Default for SubstrIndexFunc { + fn default() -> Self { + Self::new() + } +} + impl SubstrIndexFunc { pub fn new() -> Self { use DataType::*; diff --git a/datafusion/functions/src/unicode/translate.rs b/datafusion/functions/src/unicode/translate.rs index 25daf8738b21..5f64d8875bf5 100644 --- a/datafusion/functions/src/unicode/translate.rs +++ b/datafusion/functions/src/unicode/translate.rs @@ -35,6 +35,12 @@ pub struct TranslateFunc { signature: Signature, } +impl Default for TranslateFunc { + fn default() -> Self { + Self::new() + } +} + impl TranslateFunc { pub fn new() -> Self { use DataType::*; From 44f11471ae6eb11e340bf2c7e55e8ecefb589549 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 26 Apr 2024 10:01:29 -0600 Subject: [PATCH 3/6] Update release scripts now that DataFusion is TLP (#10235) --- dev/release/README.md | 81 +++++++++++-------------- dev/release/create-tarball.sh | 20 +++--- dev/release/publish_homebrew.sh | 6 +- dev/release/rat_exclude_files.txt | 78 ------------------------ dev/release/release-crates.sh | 2 +- dev/release/release-tarball.sh | 14 ++--- dev/release/verify-release-candidate.sh | 8 +-- dev/update_arrow_deps.py | 2 +- 8 files changed, 61 insertions(+), 150 deletions(-) diff --git a/dev/release/README.md b/dev/release/README.md index 32735588ed8f..f772f1e42c1e 100644 --- a/dev/release/README.md +++ b/dev/release/README.md @@ -48,8 +48,8 @@ patch release: - Created a personal access token in GitHub for changelog automation script. - Github PAT should be created with `repo` access - Make sure your signing key is added to the following files in SVN: - - https://dist.apache.org/repos/dist/dev/arrow/KEYS - - https://dist.apache.org/repos/dist/release/arrow/KEYS + - https://dist.apache.org/repos/dist/dev/datafusion/KEYS + - https://dist.apache.org/repos/dist/release/datafusion/KEYS ### How to add signing key @@ -58,8 +58,8 @@ See instructions at https://infra.apache.org/release-signing.html#generate for g Committers can add signing keys in Subversion client with their ASF account. e.g.: ```bash -$ svn co https://dist.apache.org/repos/dist/dev/arrow -$ cd arrow +$ svn co https://dist.apache.org/repos/dist/dev/datafusion +$ cd datafusion $ editor KEYS $ svn ci KEYS ``` @@ -128,7 +128,7 @@ release. See [#9697](https://github.com/apache/datafusion/pull/9697) for an example. -Here are the commands that could be used to prepare the `5.1.0` release: +Here are the commands that could be used to prepare the `38.0.0` release: ### Update Version @@ -139,10 +139,10 @@ git fetch apache git checkout apache/main ``` -Update datafusion version in `datafusion/Cargo.toml` to `5.1.0`: +Update datafusion version in `datafusion/Cargo.toml` to `38.0.0`: ``` -./dev/update_datafusion_versions.py 5.1.0 +./dev/update_datafusion_versions.py 38.0.0 ``` Lastly commit the version change: @@ -167,7 +167,7 @@ Pick numbers in sequential order, with `0` for `rc0`, `1` for `rc1`, etc. While the official release artifacts are signed tarballs and zip files, we also tag the commit it was created for convenience and code archaeology. -Using a string such as `5.1.0` as the ``, create and push the tag by running these commands: +Using a string such as `38.0.0` as the ``, create and push the tag by running these commands: ```shell git fetch apache @@ -181,29 +181,29 @@ git push apache Run `create-tarball.sh` with the `` tag and `` and you found in previous steps: ```shell -GH_TOKEN= ./dev/release/create-tarball.sh 5.1.0 0 +GH_TOKEN= ./dev/release/create-tarball.sh 38.0.0 0 ``` The `create-tarball.sh` script -1. creates and uploads all release candidate artifacts to the [arrow - dev](https://dist.apache.org/repos/dist/dev/arrow) location on the +1. creates and uploads all release candidate artifacts to the [datafusion + dev](https://dist.apache.org/repos/dist/dev/datafusion) location on the apache distribution svn server 2. provide you an email template to - send to dev@arrow.apache.org for release voting. + send to dev@datafusion.apache.org for release voting. ### Vote on Release Candidate artifacts -Send the email output from the script to dev@arrow.apache.org. The email should look like +Send the email output from the script to dev@datafusion.apache.org. The email should look like ``` -To: dev@arrow.apache.org -Subject: [VOTE][DataFusion] Release Apache DataFusion 5.1.0 RC0 +To: dev@datafusion.apache.org +Subject: [VOTE] Release Apache DataFusion 38.0.0 RC1 Hi, -I would like to propose a release of Apache DataFusion version 5.1.0. +I would like to propose a release of Apache DataFusion version 38.0.0. This release candidate is based on commit: a5dd428f57e62db20a945e8b1895de91405958c4 [1] The proposed release artifacts and signatures are hosted at [2]. @@ -214,16 +214,16 @@ and vote on the release. The vote will be open for at least 72 hours. -[ ] +1 Release this as Apache DataFusion 5.1.0 +[ ] +1 Release this as Apache DataFusion 38.0.0 [ ] +0 -[ ] -1 Do not release this as Apache DataFusion 5.1.0 because... +[ ] -1 Do not release this as Apache DataFusion 38.0.0 because... Here is my vote: +1 [1]: https://github.com/apache/datafusion/tree/a5dd428f57e62db20a945e8b1895de91405958c4 -[2]: https://dist.apache.org/repos/dist/dev/arrow/apache-datafusion-5.1.0 +[2]: https://dist.apache.org/repos/dist/dev/datafusion/apache-datafusion-38.0.0 [3]: https://github.com/apache/datafusion/blob/a5dd428f57e62db20a945e8b1895de91405958c4/CHANGELOG.md ``` @@ -234,7 +234,7 @@ For the release to become "official" it needs at least three PMC members to vote The `dev/release/verify-release-candidate.sh` is a script in this repository that can assist in the verification process. Run it like: ``` -./dev/release/verify-release-candidate.sh 5.1.0 0 +./dev/release/verify-release-candidate.sh 38.0.0 0 ``` #### If the release is not approved @@ -249,11 +249,11 @@ NOTE: steps in this section can only be done by PMC members. ### After the release is approved Move artifacts to the release location in SVN, e.g. -https://dist.apache.org/repos/dist/release/datafusion/datafusion-5.1.0/, using +https://dist.apache.org/repos/dist/release/datafusion/datafusion-38.0.0/, using the `release-tarball.sh` script: ```shell -./dev/release/release-tarball.sh 5.1.0 0 +./dev/release/release-tarball.sh 38.0.0 0 ``` Congratulations! The release is now official! @@ -263,9 +263,9 @@ Congratulations! The release is now official! Tag the same release candidate commit with the final release tag ``` -git co apache/5.1.0-rc0 -git tag 5.1.0 -git push apache 5.1.0 +git co apache/38.0.0-rc0 +git tag 38.0.0 +git push apache 38.0.0 ``` ### Publish on Crates.io @@ -300,7 +300,7 @@ of the following crates: Download and unpack the official release tarball Verify that the Cargo.toml in the tarball contains the correct version -(e.g. `version = "5.1.0"`) and then publish the crates by running the script `release-crates.sh` +(e.g. `version = "38.0.0"`) and then publish the crates by running the script `release-crates.sh` in a directory extracted from the source tarball that was voted on. Note that this script doesn't work if run in a Git repo. @@ -413,10 +413,9 @@ https://crates.io/crates/datafusion-substrait/28.0.0 ### Add the release to Apache Reporter -Add the release to https://reporter.apache.org/addrelease.html?arrow with a version name prefixed with `RS-DATAFUSION-`, -for example `RS-DATAFUSION-14.0.0`. +Add the release to https://reporter.apache.org/addrelease.html?datafusion using the version number e.g. 38.0.0. -The release information is used to generate a template for a board report (see example +The release information is used to generate a template for a board report (see example from Apache Arrow project [here](https://github.com/apache/arrow/pull/14357)). ### Delete old RCs and Releases @@ -431,13 +430,13 @@ Release candidates should be deleted once the release is published. Get a list of DataFusion release candidates: ```bash -svn ls https://dist.apache.org/repos/dist/dev/arrow | grep datafusion +svn ls https://dist.apache.org/repos/dist/dev/datafusion ``` Delete a release candidate: ```bash -svn delete -m "delete old DataFusion RC" https://dist.apache.org/repos/dist/dev/datafusion/apache-datafusion-7.1.0-rc1/ +svn delete -m "delete old DataFusion RC" https://dist.apache.org/repos/dist/dev/datafusion/apache-datafusion-38.0.0-rc1/ ``` #### Deleting old releases from `release` svn @@ -447,35 +446,25 @@ Only the latest release should be available. Delete old releases after publishin Get a list of DataFusion releases: ```bash -svn ls https://dist.apache.org/repos/dist/release/arrow | grep datafusion +svn ls https://dist.apache.org/repos/dist/release/datafusion ``` Delete a release: ```bash -svn delete -m "delete old DataFusion release" https://dist.apache.org/repos/dist/release/datafusion/datafusion-7.0.0 +svn delete -m "delete old DataFusion release" https://dist.apache.org/repos/dist/release/datafusion/datafusion-37.0.0 ``` -### Publish the User Guide to the Arrow Site - -- Run the `build.sh` in the `docs` directory from the release tarball. -- Clone the [arrow-site](https://github.com/apache/arrow-site) repository -- Checkout the `asf-site` branch -- Copy content from `docs/build/html/*` to the `datafusion` directory in arrow-site -- Create a PR against the `asf-site` branch ([example](https://github.com/apache/arrow-site/pull/237)) -- Once the PR is merged, the content will be published to https://datafusion.apache.org/ by GitHub Pages (this - can take some time). - ### Optional: Write a blog post announcing the release -We typically crowdsource release announcements by collaborating on a Google document, usually starting +We typically crowd source release announcements by collaborating on a Google document, usually starting with a copy of the previous release announcement. Run the following commands to get the number of commits and number of unique contributors for inclusion in the blog post. ```bash -git log --pretty=oneline 10.0.0..11.0.0 datafusion datafusion-cli datafusion-examples | wc -l -git shortlog -sn 10.0.0..11.0.0 datafusion datafusion-cli datafusion-examples | wc -l +git log --pretty=oneline 37.0.0..38.0.0 datafusion datafusion-cli datafusion-examples | wc -l +git shortlog -sn 37.0.0..38.0.0 datafusion datafusion-cli datafusion-examples | wc -l ``` Once there is consensus on the contents of the post, create a PR to add a blog post to the diff --git a/dev/release/create-tarball.sh b/dev/release/create-tarball.sh index e345773287cf..693d069a9323 100755 --- a/dev/release/create-tarball.sh +++ b/dev/release/create-tarball.sh @@ -21,9 +21,9 @@ # Adapted from https://github.com/apache/arrow-rs/tree/master/dev/release/create-tarball.sh # This script creates a signed tarball in -# dev/dist/apache-arrow-datafusion--.tar.gz and uploads it to -# the "dev" area of the dist.apache.arrow repository and prepares an -# email for sending to the dev@arrow.apache.org list for a formal +# dev/dist/apache-datafusion--.tar.gz and uploads it to +# the "dev" area of the dist.apache.datafusion repository and prepares an +# email for sending to the dev@datafusion.apache.org list for a formal # vote. # # See release/README.md for full release instructions @@ -65,21 +65,21 @@ tag="${version}-rc${rc}" echo "Attempting to create ${tarball} from tag ${tag}" release_hash=$(cd "${SOURCE_TOP_DIR}" && git rev-list --max-count=1 ${tag}) -release=apache-arrow-datafusion-${version} +release=apache-datafusion-${version} distdir=${SOURCE_TOP_DIR}/dev/dist/${release}-rc${rc} tarname=${release}.tar.gz tarball=${distdir}/${tarname} -url="https://dist.apache.org/repos/dist/dev/arrow/${release}-rc${rc}" +url="https://dist.apache.org/repos/dist/dev/datafusion/${release}-rc${rc}" if [ -z "$release_hash" ]; then echo "Cannot continue: unknown git tag: ${tag}" fi -echo "Draft email for dev@arrow.apache.org mailing list" +echo "Draft email for dev@datafusion.apache.org mailing list" echo "" echo "---------------------------------------------------------" cat < ${tarball}.sha256 (cd ${distdir} && shasum -a 512 ${tarname}) > ${tarball}.sha512 -echo "Uploading to apache dist/dev to ${url}" -svn co --depth=empty https://dist.apache.org/repos/dist/dev/arrow ${SOURCE_TOP_DIR}/dev/dist +echo "Uploading to datafusion dist/dev to ${url}" +svn co --depth=empty https://dist.apache.org/repos/dist/dev/datafusion ${SOURCE_TOP_DIR}/dev/dist svn add ${distdir} svn ci -m "Apache DataFusion ${version} ${rc}" ${distdir} diff --git a/dev/release/publish_homebrew.sh b/dev/release/publish_homebrew.sh index 1cf7160d4284..20955953e85a 100644 --- a/dev/release/publish_homebrew.sh +++ b/dev/release/publish_homebrew.sh @@ -39,8 +39,8 @@ else # Fallback num_processing_units=1 fi -url="https://www.apache.org/dyn/closer.lua?path=arrow/arrow-datafusion-${version}/apache-arrow-datafusion-${version}.tar.gz" -sha256="$(curl https://dist.apache.org/repos/dist/release/arrow/arrow-datafusion-${version}/apache-arrow-datafusion-${version}.tar.gz.sha256 | cut -d' ' -f1)" +url="https://www.apache.org/dyn/closer.lua?path=datafusion/datafusion-${version}/apache-datafusion-${version}.tar.gz" +sha256="$(curl https://dist.apache.org/repos/dist/release/datafusion/datafusion-${version}/apache-datafusion-${version}.tar.gz.sha256 | cut -d' ' -f1)" pushd "$(brew --repository homebrew/core)" @@ -52,7 +52,7 @@ fi echo "Updating working copy" git fetch --all --prune --tags --force -j$num_processing_units -branch=apache-arrow-datafusion-${version} +branch=apache-datafusion-${version} echo "Creating branch: ${branch}" git branch -D ${branch} || : git checkout -b ${branch} origin/master diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index ce5635b6daf4..897a35172c9d 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -15,84 +15,8 @@ ci/etc/*.patch ci/vcpkg/*.patch CHANGELOG.md datafusion/CHANGELOG.md -python/CHANGELOG.md -conbench/benchmarks.json -conbench/requirements.txt -conbench/requirements-test.txt -conbench/.flake8 -conbench/.isort.cfg dev/requirements*.txt -dev/archery/MANIFEST.in -dev/archery/requirements*.txt -dev/archery/archery/tests/fixtures/* -dev/archery/archery/crossbow/tests/fixtures/* dev/release/rat_exclude_files.txt -dev/tasks/homebrew-formulae/apache-arrow.rb -dev/tasks/linux-packages/apache-arrow-apt-source/debian/apache-arrow-apt-source.install -dev/tasks/linux-packages/apache-arrow-apt-source/debian/compat -dev/tasks/linux-packages/apache-arrow-apt-source/debian/control -dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules -dev/tasks/linux-packages/apache-arrow-apt-source/debian/source/format -dev/tasks/linux-packages/apache-arrow/debian/compat -dev/tasks/linux-packages/apache-arrow/debian/control.in -dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-1.0.install -dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-cuda-1.0.install -dev/tasks/linux-packages/apache-arrow/debian/gir1.2-arrow-dataset-1.0.install -dev/tasks/linux-packages/apache-arrow/debian/gir1.2-gandiva-1.0.install -dev/tasks/linux-packages/apache-arrow/debian/gir1.2-parquet-1.0.install -dev/tasks/linux-packages/apache-arrow/debian/gir1.2-plasma-1.0.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base -dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links -dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib400.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib400.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda400.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base -dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links -dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib400.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset400.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight400.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight400.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-python400.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow400.install -dev/tasks/linux-packages/apache-arrow/debian/libgandiva-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base -dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install -dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links -dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib400.install -dev/tasks/linux-packages/apache-arrow/debian/libgandiva400.install -dev/tasks/linux-packages/apache-arrow/debian/libparquet-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base -dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install -dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links -dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib400.install -dev/tasks/linux-packages/apache-arrow/debian/libparquet400.install -dev/tasks/linux-packages/apache-arrow/debian/libplasma-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.doc-base -dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.install -dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib-doc.links -dev/tasks/linux-packages/apache-arrow/debian/libplasma-glib400.install -dev/tasks/linux-packages/apache-arrow/debian/libplasma400.install -dev/tasks/linux-packages/apache-arrow/debian/patches/series -dev/tasks/linux-packages/apache-arrow/debian/plasma-store-server.install -dev/tasks/linux-packages/apache-arrow/debian/rules -dev/tasks/linux-packages/apache-arrow/debian/source/format -dev/tasks/linux-packages/apache-arrow/debian/watch -dev/tasks/requirements*.txt -dev/tasks/conda-recipes/* pax_global_header MANIFEST.in __init__.pxd @@ -109,8 +33,6 @@ requirements.txt .gitattributes rust-toolchain benchmarks/queries/q*.sql -python/rust-toolchain -python/requirements*.txt **/testdata/* benchmarks/queries/* benchmarks/expected-plans/* diff --git a/dev/release/release-crates.sh b/dev/release/release-crates.sh index 00ce77a86749..b9bda68b780b 100644 --- a/dev/release/release-crates.sh +++ b/dev/release/release-crates.sh @@ -21,7 +21,7 @@ # This script publishes datafusion crates to crates.io. # # This script should only be run after the release has been approved -# by the arrow PMC committee. +# by the Apache DataFusion PMC committee. # # See release/README.md for full release instructions diff --git a/dev/release/release-tarball.sh b/dev/release/release-tarball.sh index 74a4bab3aecd..bd858d23a767 100755 --- a/dev/release/release-tarball.sh +++ b/dev/release/release-tarball.sh @@ -21,10 +21,10 @@ # Adapted from https://github.com/apache/arrow-rs/tree/master/dev/release/release-tarball.sh # This script copies a tarball from the "dev" area of the -# dist.apache.arrow repository to the "release" area +# dist.apache.datafusion repository to the "release" area # # This script should only be run after the release has been approved -# by the arrow PMC committee. +# by the Apache DataFusion PMC committee. # # See release/README.md for full release instructions # @@ -43,7 +43,7 @@ fi version=$1 rc=$2 -tmp_dir=tmp-apache-arrow-datafusion-dist +tmp_dir=tmp-apache-datafusion-dist echo "Recreate temporary directory: ${tmp_dir}" rm -rf ${tmp_dir} @@ -52,14 +52,14 @@ mkdir -p ${tmp_dir} echo "Clone dev dist repository" svn \ co \ - https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-datafusion-${version}-rc${rc} \ + https://dist.apache.org/repos/dist/dev/datafusion/apache-datafusion-${version}-rc${rc} \ ${tmp_dir}/dev echo "Clone release dist repository" -svn co https://dist.apache.org/repos/dist/release/arrow ${tmp_dir}/release +svn co https://dist.apache.org/repos/dist/release/datafusion ${tmp_dir}/release echo "Copy ${version}-rc${rc} to release working copy" -release_version=arrow-datafusion-${version} +release_version=datafusion-${version} mkdir -p ${tmp_dir}/release/${release_version} cp -r ${tmp_dir}/dev/* ${tmp_dir}/release/${release_version}/ svn add ${tmp_dir}/release/${release_version} @@ -71,4 +71,4 @@ echo "Clean up" rm -rf ${tmp_dir} echo "Success! The release is available here:" -echo " https://dist.apache.org/repos/dist/release/arrow/${release_version}" +echo " https://dist.apache.org/repos/dist/release/datafusion/${release_version}" diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 45e984dec3a0..2c0bd216b3ac 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -33,7 +33,7 @@ set -o pipefail SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" ARROW_DIR="$(dirname $(dirname ${SOURCE_DIR}))" -ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/arrow' +ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/datafusion' download_dist_file() { curl \ @@ -45,7 +45,7 @@ download_dist_file() { } download_rc_file() { - download_dist_file apache-arrow-datafusion-${VERSION}-rc${RC_NUMBER}/$1 + download_dist_file apache-datafusion-${VERSION}-rc${RC_NUMBER}/$1 } import_gpg_keys() { @@ -143,11 +143,11 @@ test_source_distribution() { TEST_SUCCESS=no -setup_tempdir "arrow-${VERSION}" +setup_tempdir "datafusion-${VERSION}" echo "Working in sandbox ${ARROW_TMPDIR}" cd ${ARROW_TMPDIR} -dist_name="apache-arrow-datafusion-${VERSION}" +dist_name="apache-datafusion-${VERSION}" import_gpg_keys fetch_archive ${dist_name} tar xf ${dist_name}.tar.gz diff --git a/dev/update_arrow_deps.py b/dev/update_arrow_deps.py index b685ad2738b1..268ded38f6e8 100755 --- a/dev/update_arrow_deps.py +++ b/dev/update_arrow_deps.py @@ -17,7 +17,7 @@ # limitations under the License. # -# Script that updates the arrow dependencies in datafusion and ballista, locally +# Script that updates the arrow dependencies in datafusion locally # # installation: # pip install tomlkit requests From a5ce56831a9ec61e634d1c285c1b28d8c3891503 Mon Sep 17 00:00:00 2001 From: Kevin Mingtarja <69668484+kevinmingtarja@users.noreply.github.com> Date: Sat, 27 Apr 2024 01:08:26 +0700 Subject: [PATCH 4/6] implement rewrite for EliminateLimit (#10253) --- datafusion/optimizer/src/eliminate_limit.rs | 64 ++++++++++++--------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/datafusion/optimizer/src/eliminate_limit.rs b/datafusion/optimizer/src/eliminate_limit.rs index 39231d784e00..1b0907d9736d 100644 --- a/datafusion/optimizer/src/eliminate_limit.rs +++ b/datafusion/optimizer/src/eliminate_limit.rs @@ -18,8 +18,9 @@ //! [`EliminateLimit`] eliminates `LIMIT` when possible use crate::optimizer::ApplyOrder; use crate::{OptimizerConfig, OptimizerRule}; -use datafusion_common::Result; -use datafusion_expr::logical_plan::{EmptyRelation, LogicalPlan}; +use datafusion_common::tree_node::Transformed; +use datafusion_common::{internal_err, Result}; +use datafusion_expr::logical_plan::{tree_node::unwrap_arc, EmptyRelation, LogicalPlan}; /// Optimizer rule to replace `LIMIT 0` or `LIMIT` whose ancestor LIMIT's skip is /// greater than or equal to current's fetch @@ -41,32 +42,10 @@ impl EliminateLimit { impl OptimizerRule for EliminateLimit { fn try_optimize( &self, - plan: &LogicalPlan, + _plan: &LogicalPlan, _config: &dyn OptimizerConfig, ) -> Result> { - if let LogicalPlan::Limit(limit) = plan { - match limit.fetch { - Some(fetch) => { - if fetch == 0 { - return Ok(Some(LogicalPlan::EmptyRelation(EmptyRelation { - produce_one_row: false, - schema: limit.input.schema().clone(), - }))); - } - } - None => { - if limit.skip == 0 { - let input = limit.input.as_ref(); - // input also can be Limit, so we should apply again. - return Ok(Some( - self.try_optimize(input, _config)? - .unwrap_or_else(|| input.clone()), - )); - } - } - } - } - Ok(None) + internal_err!("Should have called EliminateLimit::rewrite") } fn name(&self) -> &str { @@ -76,6 +55,39 @@ impl OptimizerRule for EliminateLimit { fn apply_order(&self) -> Option { Some(ApplyOrder::BottomUp) } + + fn supports_rewrite(&self) -> bool { + true + } + + fn rewrite( + &self, + plan: LogicalPlan, + _config: &dyn OptimizerConfig, + ) -> Result< + datafusion_common::tree_node::Transformed, + datafusion_common::DataFusionError, + > { + match plan { + LogicalPlan::Limit(limit) => { + if let Some(fetch) = limit.fetch { + if fetch == 0 { + return Ok(Transformed::yes(LogicalPlan::EmptyRelation( + EmptyRelation { + produce_one_row: false, + schema: limit.input.schema().clone(), + }, + ))); + } + } else if limit.skip == 0 { + // input also can be Limit, so we should apply again. + return Ok(self.rewrite(unwrap_arc(limit.input), _config).unwrap()); + } + Ok(Transformed::no(LogicalPlan::Limit(limit))) + } + _ => Ok(Transformed::no(plan)), + } + } } #[cfg(test)] From fe103172e74f42f5a8d3323d265c1cdc1147b7e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Berkay=20=C5=9Eahin?= <124376117+berkaysynnada@users.noreply.github.com> Date: Fri, 26 Apr 2024 21:25:26 +0300 Subject: [PATCH 5/6] Cleanup inactive tests (#10249) --- .../src/physical_optimizer/join_selection.rs | 33 ------------------- 1 file changed, 33 deletions(-) diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs index 4fefcdf7aad6..b670e8297147 100644 --- a/datafusion/core/src/physical_optimizer/join_selection.rs +++ b/datafusion/core/src/physical_optimizer/join_selection.rs @@ -882,28 +882,6 @@ mod tests_statistical { (big, medium, small) } - pub(crate) fn crosscheck_plans(plan: Arc) -> Result<()> { - let subrules: Vec> = vec![ - Box::new(hash_join_convert_symmetric_subrule), - Box::new(hash_join_swap_subrule), - ]; - let new_plan = plan - .transform_up(|p| apply_subrules(p, &subrules, &ConfigOptions::new())) - .data()?; - // TODO: End state payloads will be checked here. - let config = ConfigOptions::new().optimizer; - let collect_left_threshold = config.hash_join_single_partition_threshold; - let collect_threshold_num_rows = config.hash_join_single_partition_threshold_rows; - let _ = new_plan.transform_up(|plan| { - statistical_join_selection_subrule( - plan, - collect_left_threshold, - collect_threshold_num_rows, - ) - })?; - Ok(()) - } - #[tokio::test] async fn test_join_with_swap() { let (big, small) = create_big_and_small(); @@ -958,7 +936,6 @@ mod tests_statistical { swapped_join.right().statistics().unwrap().total_byte_size, Precision::Inexact(2097152) ); - crosscheck_plans(join.clone()).unwrap(); } #[tokio::test] @@ -1001,7 +978,6 @@ mod tests_statistical { swapped_join.right().statistics().unwrap().total_byte_size, Precision::Inexact(2097152) ); - crosscheck_plans(join.clone()).unwrap(); } #[tokio::test] @@ -1055,7 +1031,6 @@ mod tests_statistical { Precision::Inexact(2097152) ); assert_eq!(original_schema, swapped_join.schema()); - crosscheck_plans(join).unwrap(); } } @@ -1078,7 +1053,6 @@ mod tests_statistical { "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_lines, actual_lines ); - crosscheck_plans(plan).unwrap(); }; } @@ -1180,7 +1154,6 @@ mod tests_statistical { swapped_join.right().statistics().unwrap().total_byte_size, Precision::Inexact(2097152) ); - crosscheck_plans(join).unwrap(); } #[rstest( @@ -1249,7 +1222,6 @@ mod tests_statistical { swapped_join.right().statistics().unwrap().total_byte_size, Precision::Inexact(2097152) ); - crosscheck_plans(join.clone()).unwrap(); } #[rstest( @@ -1311,7 +1283,6 @@ mod tests_statistical { swapped_join.right().statistics().unwrap().total_byte_size, Precision::Inexact(2097152) ); - crosscheck_plans(join.clone()).unwrap(); } #[tokio::test] @@ -1523,7 +1494,6 @@ mod tests_statistical { assert_eq!(*swapped_join.partition_mode(), expected_mode); } - crosscheck_plans(join).unwrap(); } } @@ -1568,8 +1538,6 @@ mod util_tests { #[cfg(test)] mod hash_join_tests { - - use self::tests_statistical::crosscheck_plans; use super::*; use crate::physical_optimizer::test_utils::SourceType; use crate::test_util::UnboundedExec; @@ -2000,7 +1968,6 @@ mod hash_join_tests { ) ); }; - crosscheck_plans(plan).unwrap(); Ok(()) } } From f8c623fe045d70a87eac8dc8620b74ff73be56d5 Mon Sep 17 00:00:00 2001 From: Jonah Gao Date: Sat, 27 Apr 2024 02:30:09 +0800 Subject: [PATCH 6/6] fix: no longer support the `substring` function (#10242) * fix: no longer support the `substring` function * enable from-for format * update test comment * review feedback * review feedback Co-authored-by: Jeffrey Vo --------- Co-authored-by: Jeffrey Vo --- datafusion/sql/src/expr/mod.rs | 2 +- datafusion/sqllogictest/test_files/expr.slt | 29 +++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 0d1db8a29cce..13f559a0ebc7 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -467,7 +467,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { expr, substring_from, substring_for, - special: false, + special: _, } => self.sql_substring_to_expr( expr, substring_from, diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt index adc577f12f91..ff63416b3a10 100644 --- a/datafusion/sqllogictest/test_files/expr.slt +++ b/datafusion/sqllogictest/test_files/expr.slt @@ -1871,6 +1871,17 @@ SELECT digest('','blake3'); ---- af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262 + +query T +SELECT substring('alphabet', 1) +---- +alphabet + +query T +SELECT substring('alphabet', 3, 2) +---- +ph + query T SELECT substring('alphabet' from 2 for 1); ---- @@ -1886,6 +1897,24 @@ SELECT substring('alphabet' for 1); ---- a +# The 'from' and 'for' parameters don't support string types, because they should be treated as +# regular expressions, which we have not implemented yet. +query error DataFusion error: Error during planning: No function matches the given name and argument types +SELECT substring('alphabet' FROM '3') + +query error DataFusion error: Error during planning: No function matches the given name and argument types +SELECT substring('alphabet' FROM '3' FOR '2') + +query error DataFusion error: Error during planning: No function matches the given name and argument types +SELECT substring('alphabet' FROM '3' FOR 2) + +query error DataFusion error: Error during planning: No function matches the given name and argument types +SELECT substring('alphabet' FROM 3 FOR '2') + +query error DataFusion error: Error during planning: No function matches the given name and argument types +SELECT substring('alphabet' FOR '2') + + ##### csv_query_nullif_divide_by_0