Skip to content

Commit

Permalink
Merge remote-tracking branch 'apache/main' into alamb/map_in_place2
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Apr 9, 2024
2 parents b29ebd2 + eb05741 commit a51c6c6
Show file tree
Hide file tree
Showing 42 changed files with 2,454 additions and 1,540 deletions.
27 changes: 16 additions & 11 deletions datafusion/core/src/physical_optimizer/pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,19 +105,23 @@ pub trait PruningStatistics {
fn num_containers(&self) -> usize;

/// Return the number of null values for the named column as an
/// `Option<UInt64Array>`.
/// [`UInt64Array`]
///
/// See [`Self::min_values`] for when to return `None` and null values.
///
/// Note: the returned array must contain [`Self::num_containers`] rows
///
/// [`UInt64Array`]: arrow::array::UInt64Array
fn null_counts(&self, column: &Column) -> Option<ArrayRef>;

/// Return the number of rows for the named column in each container
/// as an `Option<UInt64Array>`.
/// as an [`UInt64Array`].
///
/// See [`Self::min_values`] for when to return `None` and null values.
///
/// Note: the returned array must contain [`Self::num_containers`] rows
///
/// [`UInt64Array`]: arrow::array::UInt64Array
fn row_counts(&self, column: &Column) -> Option<ArrayRef>;

/// Returns [`BooleanArray`] where each row represents information known
Expand Down Expand Up @@ -1519,6 +1523,7 @@ mod tests {
array::{BinaryArray, Int32Array, Int64Array, StringArray},
datatypes::{DataType, TimeUnit},
};
use arrow_array::UInt64Array;
use datafusion_common::{ScalarValue, ToDFSchema};
use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::expr::InList;
Expand Down Expand Up @@ -1684,10 +1689,10 @@ mod tests {
/// there are containers
fn with_null_counts(
mut self,
counts: impl IntoIterator<Item = Option<i64>>,
counts: impl IntoIterator<Item = Option<u64>>,
) -> Self {
let null_counts: ArrayRef =
Arc::new(counts.into_iter().collect::<Int64Array>());
Arc::new(counts.into_iter().collect::<UInt64Array>());

self.assert_invariants();
self.null_counts = Some(null_counts);
Expand All @@ -1698,10 +1703,10 @@ mod tests {
/// there are containers
fn with_row_counts(
mut self,
counts: impl IntoIterator<Item = Option<i64>>,
counts: impl IntoIterator<Item = Option<u64>>,
) -> Self {
let row_counts: ArrayRef =
Arc::new(counts.into_iter().collect::<Int64Array>());
Arc::new(counts.into_iter().collect::<UInt64Array>());

self.assert_invariants();
self.row_counts = Some(row_counts);
Expand Down Expand Up @@ -1753,13 +1758,13 @@ mod tests {
self
}

/// Add null counts for the specified columm.
/// Add null counts for the specified column.
/// There must be the same number of null counts as
/// there are containers
fn with_null_counts(
mut self,
name: impl Into<String>,
counts: impl IntoIterator<Item = Option<i64>>,
counts: impl IntoIterator<Item = Option<u64>>,
) -> Self {
let col = Column::from_name(name.into());

Expand All @@ -1775,13 +1780,13 @@ mod tests {
self
}

/// Add row counts for the specified columm.
/// Add row counts for the specified column.
/// There must be the same number of row counts as
/// there are containers
fn with_row_counts(
mut self,
name: impl Into<String>,
counts: impl IntoIterator<Item = Option<i64>>,
counts: impl IntoIterator<Item = Option<u64>>,
) -> Self {
let col = Column::from_name(name.into());

Expand All @@ -1797,7 +1802,7 @@ mod tests {
self
}

/// Add contained information for the specified columm.
/// Add contained information for the specified column.
fn with_contained(
mut self,
name: impl Into<String>,
Expand Down
136 changes: 136 additions & 0 deletions datafusion/core/tests/simplification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use datafusion_common::cast::as_int32_array;
use datafusion_common::ScalarValue;
use datafusion_common::{DFSchemaRef, ToDFSchema};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::logical_plan::builder::table_scan_with_filters;
use datafusion_expr::simplify::SimplifyInfo;
use datafusion_expr::{
expr, table_scan, BuiltinScalarFunction, Cast, ColumnarValue, Expr, ExprSchemable,
Expand Down Expand Up @@ -294,6 +295,45 @@ fn select_date_plus_interval() -> Result<()> {
Ok(())
}

#[test]
fn simplify_project_scalar_fn() -> Result<()> {
// Issue https://github.com/apache/arrow-datafusion/issues/5996
let schema = Schema::new(vec![Field::new("f", DataType::Float64, false)]);
let plan = table_scan(Some("test"), &schema, None)?
.project(vec![power(col("f"), lit(1.0))])?
.build()?;

// before simplify: power(t.f, 1.0)
// after simplify: t.f as "power(t.f, 1.0)"
let expected = "Projection: test.f AS power(test.f,Float64(1))\
\n TableScan: test";
let actual = get_optimized_plan_formatted(&plan, &Utc::now());
assert_eq!(expected, actual);
Ok(())
}

#[test]
fn simplify_scan_predicate() -> Result<()> {
let schema = Schema::new(vec![
Field::new("f", DataType::Float64, false),
Field::new("g", DataType::Float64, false),
]);
let plan = table_scan_with_filters(
Some("test"),
&schema,
None,
vec![col("g").eq(power(col("f"), lit(1.0)))],
)?
.build()?;

// before simplify: t.g = power(t.f, 1.0)
// after simplify: (t.g = t.f) as "t.g = power(t.f, 1.0)"
let expected = "TableScan: test, full_filters=[g = f AS g = power(f,Float64(1))]";
let actual = get_optimized_plan_formatted(&plan, &Utc::now());
assert_eq!(expected, actual);
Ok(())
}

#[test]
fn test_const_evaluator() {
// true --> true
Expand Down Expand Up @@ -431,3 +471,99 @@ fn multiple_now() -> Result<()> {
assert_eq!(expected, actual);
Ok(())
}

// ------------------------------
// --- Simplifier tests -----
// ------------------------------

fn expr_test_schema() -> DFSchemaRef {
Schema::new(vec![
Field::new("c1", DataType::Utf8, true),
Field::new("c2", DataType::Boolean, true),
Field::new("c3", DataType::Int64, true),
Field::new("c4", DataType::UInt32, true),
Field::new("c1_non_null", DataType::Utf8, false),
Field::new("c2_non_null", DataType::Boolean, false),
Field::new("c3_non_null", DataType::Int64, false),
Field::new("c4_non_null", DataType::UInt32, false),
])
.to_dfschema_ref()
.unwrap()
}

fn test_simplify(input_expr: Expr, expected_expr: Expr) {
let info: MyInfo = MyInfo {
schema: expr_test_schema(),
execution_props: ExecutionProps::new(),
};
let simplifier = ExprSimplifier::new(info);
let simplified_expr = simplifier
.simplify(input_expr.clone())
.expect("successfully evaluated");

assert_eq!(
simplified_expr, expected_expr,
"Mismatch evaluating {input_expr}\n Expected:{expected_expr}\n Got:{simplified_expr}"
);
}

#[test]
fn test_simplify_log() {
// Log(c3, 1) ===> 0
{
let expr = log(col("c3_non_null"), lit(1));
test_simplify(expr, lit(0i64));
}
// Log(c3, c3) ===> 1
{
let expr = log(col("c3_non_null"), col("c3_non_null"));
let expected = lit(1i64);
test_simplify(expr, expected);
}
// Log(c3, Power(c3, c4)) ===> c4
{
let expr = log(
col("c3_non_null"),
power(col("c3_non_null"), col("c4_non_null")),
);
let expected = col("c4_non_null");
test_simplify(expr, expected);
}
// Log(c3, c4) ===> Log(c3, c4)
{
let expr = log(col("c3_non_null"), col("c4_non_null"));
let expected = log(col("c3_non_null"), col("c4_non_null"));
test_simplify(expr, expected);
}
}

#[test]
fn test_simplify_power() {
// Power(c3, 0) ===> 1
{
let expr = power(col("c3_non_null"), lit(0));
let expected = lit(1i64);
test_simplify(expr, expected)
}
// Power(c3, 1) ===> c3
{
let expr = power(col("c3_non_null"), lit(1));
let expected = col("c3_non_null");
test_simplify(expr, expected)
}
// Power(c3, Log(c3, c4)) ===> c4
{
let expr = power(
col("c3_non_null"),
log(col("c3_non_null"), col("c4_non_null")),
);
let expected = col("c4_non_null");
test_simplify(expr, expected)
}
// Power(c3, c4) ===> Power(c3, c4)
{
let expr = power(col("c3_non_null"), col("c4_non_null"));
let expected = power(col("c3_non_null"), col("c4_non_null"));
test_simplify(expr, expected)
}
}
Loading

0 comments on commit a51c6c6

Please sign in to comment.