Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: making is_nullable ansi aware for sum_decimal and avg_decimal #981

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion native/core/benches/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ fn criterion_benchmark(c: &mut Criterion) {
Arc::clone(&c1),
DataType::Decimal128(38, 10),
DataType::Decimal128(38, 10),
false,
)));
b.to_async(&rt).iter(|| {
black_box(agg_test(
Expand Down Expand Up @@ -96,7 +97,7 @@ fn criterion_benchmark(c: &mut Criterion) {

group.bench_function("sum_decimal_comet", |b| {
let comet_sum_decimal = Arc::new(AggregateUDF::new_from_impl(
SumDecimal::try_new(Arc::clone(&c1), DataType::Decimal128(38, 10)).unwrap(),
SumDecimal::try_new(Arc::clone(&c1), DataType::Decimal128(38, 10), false).unwrap(),
));
b.to_async(&rt).iter(|| {
black_box(agg_test(
Expand Down
17 changes: 14 additions & 3 deletions native/core/src/execution/datafusion/expressions/avg_decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,23 @@ pub struct AvgDecimal {
expr: Arc<dyn PhysicalExpr>,
sum_data_type: DataType,
result_data_type: DataType,
ansi_mode: bool,
}

impl AvgDecimal {
/// Create a new AVG aggregate function
pub fn new(expr: Arc<dyn PhysicalExpr>, result_type: DataType, sum_type: DataType) -> Self {
pub fn new(
expr: Arc<dyn PhysicalExpr>,
result_type: DataType,
sum_type: DataType,
ansi_mode: bool,
) -> Self {
Self {
signature: Signature::user_defined(Immutable),
expr,
result_data_type: result_type,
sum_data_type: sum_type,
ansi_mode,
}
}
}
Expand Down Expand Up @@ -89,12 +96,12 @@ impl AggregateUDFImpl for AvgDecimal {
Field::new(
format_state_name(self.name(), "sum"),
self.sum_data_type.clone(),
true,
self.is_nullable(),
),
Field::new(
format_state_name(self.name(), "count"),
DataType::Int64,
true,
self.is_nullable(),
),
])
}
Expand All @@ -107,6 +114,10 @@ impl AggregateUDFImpl for AvgDecimal {
ReversedUDAF::Identical
}

fn is_nullable(&self) -> bool {
!self.ansi_mode
}

fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
true
}
Expand Down
15 changes: 11 additions & 4 deletions native/core/src/execution/datafusion/expressions/sum_decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,16 @@ pub struct SumDecimal {
precision: u8,
/// Decimal scale
scale: i8,
/// ANSI Mode
ansi_mode: bool,
}

impl SumDecimal {
pub fn try_new(expr: Arc<dyn PhysicalExpr>, data_type: DataType) -> DFResult<Self> {
pub fn try_new(
expr: Arc<dyn PhysicalExpr>,
data_type: DataType,
ansi_mode: bool,
) -> DFResult<Self> {
// The `data_type` is the SUM result type passed from Spark side
let (precision, scale) = match data_type {
DataType::Decimal128(p, s) => (p, s),
Expand All @@ -66,6 +72,7 @@ impl SumDecimal {
result_type: data_type,
precision,
scale,
ansi_mode,
})
}
}
Expand Down Expand Up @@ -129,8 +136,7 @@ impl AggregateUDFImpl for SumDecimal {
}

fn is_nullable(&self) -> bool {
// SumDecimal is always nullable because overflows can cause null values
true
!self.ansi_mode
vaibhawvipul marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down Expand Up @@ -501,7 +507,7 @@ mod tests {
#[test]
fn invalid_data_type() {
let expr = Arc::new(Literal::new(ScalarValue::Int32(Some(1))));
assert!(SumDecimal::try_new(expr, DataType::Int32).is_err());
assert!(SumDecimal::try_new(expr, DataType::Int32, false).is_err());
}

#[tokio::test]
Expand All @@ -524,6 +530,7 @@ mod tests {
let aggregate_udf = Arc::new(AggregateUDF::new_from_impl(SumDecimal::try_new(
Arc::clone(&c1),
data_type.clone(),
false,
)?));

let aggr_expr = AggregateExprBuilder::new(aggregate_udf, vec![c1])
Expand Down
2 changes: 2 additions & 0 deletions native/core/src/execution/datafusion/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1374,6 +1374,7 @@ impl PhysicalPlanner {
let func = AggregateUDF::new_from_impl(SumDecimal::try_new(
Arc::clone(&child),
datatype,
expr.fail_on_error,
)?);
AggregateExprBuilder::new(Arc::new(func), vec![child])
}
Expand Down Expand Up @@ -1403,6 +1404,7 @@ impl PhysicalPlanner {
Arc::clone(&child),
datatype,
input_datatype,
expr.fail_on_error,
));
AggregateExprBuilder::new(Arc::new(func), vec![child])
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1202,6 +1202,28 @@ class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper {
}
}

test("sum decimal and average decimal overflow with ansi true") {
withSQLConf(
SQLConf.ANSI_ENABLED.key -> "true",
CometConf.COMET_ANSI_MODE_ENABLED.key -> "true",
CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key -> "true") {
Seq(true, false).foreach { dictionary =>
withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
val table = "test"
withTable(table) {
sql(s"create table $table(col1 decimal(5, 2), col2 decimal(5, 2)) using parquet")
sql(s"""
insert into $table values
(-999.99, 999.99),
(-999.99, 999.99)
""")
checkSparkAnswer("SELECT SUM(col1), AVG(col1), SUM(col2), AVG(col2) FROM test")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should use checkSparkAnswerAndOperator here to make sure that the Comet query really is running with Comet expressions.

Suggested change
checkSparkAnswer("SELECT SUM(col1), AVG(col1), SUM(col2), AVG(col2) FROM test")
checkSparkAnswerAndOperator("SELECT SUM(col1), AVG(col1), SUM(col2), AVG(col2) FROM test")

}
}
}
}
}

test("var_pop and var_samp") {
withSQLConf(CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true") {
Seq("native", "jvm").foreach { cometShuffleMode =>
Expand Down
Loading