Skip to content

Commit

Permalink
Remove Expr::GetIndexedField, replace Expr::{field,index,range} w…
Browse files Browse the repository at this point in the history
…ith `FieldAccessor`, `IndexAccessor`, and `SliceAccessor` (#10568)

* remove expr

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add expr extension

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* doc

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* move test that has struct

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fmt

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add foc and fix displayed name

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rm test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* rebase

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* move doc

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
  • Loading branch information
jayzhan211 authored May 21, 2024
1 parent 94b5511 commit 3ebc31d
Show file tree
Hide file tree
Showing 27 changed files with 365 additions and 498 deletions.
1 change: 0 additions & 1 deletion datafusion/core/src/datasource/listing/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ pub fn expr_applicable_for_cols(col_names: &[String], expr: &Expr) -> bool {
| Expr::Exists { .. }
| Expr::InSubquery(_)
| Expr::ScalarSubquery(_)
| Expr::GetIndexedField { .. }
| Expr::GroupingSet(_)
| Expr::Case { .. } => Ok(TreeNodeRecursion::Continue),

Expand Down
26 changes: 1 addition & 25 deletions datafusion/core/src/physical_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,7 @@ use datafusion_common::{
use datafusion_expr::dml::CopyTo;
use datafusion_expr::expr::{
self, AggregateFunction, AggregateFunctionDefinition, Alias, Between, BinaryExpr,
Cast, GetFieldAccess, GetIndexedField, GroupingSet, InList, Like, TryCast,
WindowFunction,
Cast, GroupingSet, InList, Like, TryCast, WindowFunction,
};
use datafusion_expr::expr_rewriter::unnormalize_cols;
use datafusion_expr::expr_vec_fmt;
Expand Down Expand Up @@ -216,29 +215,6 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result<String> {
let expr = create_physical_name(expr, false)?;
Ok(format!("{expr} IS NOT UNKNOWN"))
}
Expr::GetIndexedField(GetIndexedField { expr: _, field }) => {
match field {
GetFieldAccess::NamedStructField { name: _ } => {
unreachable!(
"NamedStructField should have been rewritten in OperatorToFunction"
)
}
GetFieldAccess::ListIndex { key: _ } => {
unreachable!(
"ListIndex should have been rewritten in OperatorToFunction"
)
}
GetFieldAccess::ListRange {
start: _,
stop: _,
stride: _,
} => {
unreachable!(
"ListRange should have been rewritten in OperatorToFunction"
)
}
};
}
Expr::ScalarFunction(fun) => fun.func.display_name(&fun.args),
Expr::WindowFunction(WindowFunction {
fun,
Expand Down
11 changes: 7 additions & 4 deletions datafusion/core/tests/expr_api/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ use arrow_array::{ArrayRef, RecordBatch, StringArray, StructArray};
use arrow_schema::{DataType, Field};
use datafusion::prelude::*;
use datafusion_common::DFSchema;
use datafusion_functions::core::expr_ext::FieldAccessor;
use datafusion_functions_array::expr_ext::{IndexAccessor, SliceAccessor};
/// Tests of using and evaluating `Expr`s outside the context of a LogicalPlan
use std::sync::{Arc, OnceLock};

Expand Down Expand Up @@ -61,7 +63,7 @@ fn test_eq_with_coercion() {
#[test]
fn test_get_field() {
evaluate_expr_test(
get_field(col("props"), "a"),
col("props").field("a"),
vec![
"+------------+",
"| expr |",
Expand All @@ -77,7 +79,8 @@ fn test_get_field() {
#[test]
fn test_nested_get_field() {
evaluate_expr_test(
get_field(col("props"), "a")
col("props")
.field("a")
.eq(lit("2021-02-02"))
.or(col("id").eq(lit(1))),
vec![
Expand All @@ -95,7 +98,7 @@ fn test_nested_get_field() {
#[test]
fn test_list() {
evaluate_expr_test(
array_element(col("list"), lit(1i64)),
col("list").index(lit(1i64)),
vec![
"+------+", "| expr |", "+------+", "| one |", "| two |", "| five |",
"+------+",
Expand All @@ -106,7 +109,7 @@ fn test_list() {
#[test]
fn test_list_range() {
evaluate_expr_test(
array_slice(col("list"), lit(1i64), lit(2i64), None),
col("list").range(lit(1i64), lit(2i64)),
vec![
"+--------------+",
"| expr |",
Expand Down
129 changes: 127 additions & 2 deletions datafusion/core/tests/optimizer_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,19 @@ use std::collections::HashMap;
use std::sync::Arc;

use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
use arrow_schema::{Fields, SchemaBuilder};
use datafusion_common::config::ConfigOptions;
use datafusion_common::{plan_err, Result};
use datafusion_expr::{AggregateUDF, LogicalPlan, ScalarUDF, TableSource, WindowUDF};
use datafusion_common::tree_node::{TransformedResult, TreeNode};
use datafusion_common::{plan_err, DFSchema, Result, ScalarValue};
use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
use datafusion_expr::{
col, lit, AggregateUDF, BinaryExpr, Expr, ExprSchemable, LogicalPlan, Operator,
ScalarUDF, TableSource, WindowUDF,
};
use datafusion_functions::core::expr_ext::FieldAccessor;
use datafusion_optimizer::analyzer::Analyzer;
use datafusion_optimizer::optimizer::Optimizer;
use datafusion_optimizer::simplify_expressions::GuaranteeRewriter;
use datafusion_optimizer::{OptimizerConfig, OptimizerContext};
use datafusion_sql::planner::{ContextProvider, SqlToRel};
use datafusion_sql::sqlparser::ast::Statement;
Expand Down Expand Up @@ -233,3 +241,120 @@ impl TableSource for MyTableSource {
self.schema.clone()
}
}

#[test]
fn test_nested_schema_nullability() {
let mut builder = SchemaBuilder::new();
builder.push(Field::new("foo", DataType::Int32, true));
builder.push(Field::new(
"parent",
DataType::Struct(Fields::from(vec![Field::new(
"child",
DataType::Int64,
false,
)])),
true,
));
let schema = builder.finish();

let dfschema = DFSchema::from_field_specific_qualified_schema(
vec![Some("table_name".into()), None],
&Arc::new(schema),
)
.unwrap();

let expr = col("parent").field("child");
assert!(expr.nullable(&dfschema).unwrap());
}

#[test]
fn test_inequalities_non_null_bounded() {
let guarantees = vec![
// x ∈ [1, 3] (not null)
(
col("x"),
NullableInterval::NotNull {
values: Interval::make(Some(1_i32), Some(3_i32)).unwrap(),
},
),
// s.y ∈ [1, 3] (not null)
(
col("s").field("y"),
NullableInterval::NotNull {
values: Interval::make(Some(1_i32), Some(3_i32)).unwrap(),
},
),
];

let mut rewriter = GuaranteeRewriter::new(guarantees.iter());

// (original_expr, expected_simplification)
let simplified_cases = &[
(col("x").lt(lit(0)), false),
(col("s").field("y").lt(lit(0)), false),
(col("x").lt_eq(lit(3)), true),
(col("x").gt(lit(3)), false),
(col("x").gt(lit(0)), true),
(col("x").eq(lit(0)), false),
(col("x").not_eq(lit(0)), true),
(col("x").between(lit(0), lit(5)), true),
(col("x").between(lit(5), lit(10)), false),
(col("x").not_between(lit(0), lit(5)), false),
(col("x").not_between(lit(5), lit(10)), true),
(
Expr::BinaryExpr(BinaryExpr {
left: Box::new(col("x")),
op: Operator::IsDistinctFrom,
right: Box::new(lit(ScalarValue::Null)),
}),
true,
),
(
Expr::BinaryExpr(BinaryExpr {
left: Box::new(col("x")),
op: Operator::IsDistinctFrom,
right: Box::new(lit(5)),
}),
true,
),
];

validate_simplified_cases(&mut rewriter, simplified_cases);

let unchanged_cases = &[
col("x").gt(lit(2)),
col("x").lt_eq(lit(2)),
col("x").eq(lit(2)),
col("x").not_eq(lit(2)),
col("x").between(lit(3), lit(5)),
col("x").not_between(lit(3), lit(10)),
];

validate_unchanged_cases(&mut rewriter, unchanged_cases);
}

fn validate_simplified_cases<T>(rewriter: &mut GuaranteeRewriter, cases: &[(Expr, T)])
where
ScalarValue: From<T>,
T: Clone,
{
for (expr, expected_value) in cases {
let output = expr.clone().rewrite(rewriter).data().unwrap();
let expected = lit(ScalarValue::from(expected_value.clone()));
assert_eq!(
output, expected,
"{} simplified to {}, but expected {}",
expr, output, expected
);
}
}
fn validate_unchanged_cases(rewriter: &mut GuaranteeRewriter, cases: &[Expr]) {
for expr in cases {
let output = expr.clone().rewrite(rewriter).data().unwrap();
assert_eq!(
&output, expr,
"{} was simplified to {}, but expected it to be unchanged",
expr, output
);
}
}
Loading

0 comments on commit 3ebc31d

Please sign in to comment.