Skip to content

Commit

Permalink
use __common_expr prefix
Browse files Browse the repository at this point in the history
  • Loading branch information
peter-toth committed Jun 18, 2024
1 parent be2e560 commit a592e69
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 122 deletions.
84 changes: 44 additions & 40 deletions datafusion/optimizer/src/common_subexpr_eliminate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use datafusion_expr::logical_plan::{Aggregate, LogicalPlan, Projection, Window};
use datafusion_expr::{col, Expr, ExprSchemable};
use indexmap::IndexMap;

const CSE_PREFIX: &str = "__cse";
const CSE_PREFIX: &str = "__common_expr";

/// Identifier that represents a subexpression tree.
///
Expand Down Expand Up @@ -929,8 +929,8 @@ mod test {
)?
.build()?;

let expected = "Aggregate: groupBy=[[]], aggr=[[sum(__cse_1 AS test.a * Int32(1) - test.b), sum(__cse_1 AS test.a * Int32(1) - test.b * (Int32(1) + test.c))]]\
\n Projection: test.a * (Int32(1) - test.b) AS __cse_1, test.a, test.b, test.c\
let expected = "Aggregate: groupBy=[[]], aggr=[[sum(__common_expr_1 AS test.a * Int32(1) - test.b), sum(__common_expr_1 AS test.a * Int32(1) - test.b * (Int32(1) + test.c))]]\
\n Projection: test.a * (Int32(1) - test.b) AS __common_expr_1, test.a, test.b, test.c\
\n TableScan: test";

assert_optimized_plan_eq(expected, &plan, None);
Expand All @@ -949,8 +949,8 @@ mod test {
])?
.build()?;

let expected = "Projection: __cse_1 - test.c AS alias1 * __cse_1 AS test.a + test.b, __cse_1 AS test.a + test.b\
\n Projection: test.a + test.b AS __cse_1, test.a, test.b, test.c\
let expected = "Projection: __common_expr_1 - test.c AS alias1 * __common_expr_1 AS test.a + test.b, __common_expr_1 AS test.a + test.b\
\n Projection: test.a + test.b AS __common_expr_1, test.a, test.b, test.c\
\n TableScan: test";

assert_optimized_plan_eq(expected, &plan, None);
Expand Down Expand Up @@ -1002,8 +1002,8 @@ mod test {
)?
.build()?;

let expected = "Projection: __cse_1 AS col1, __cse_1 AS col2, col3, __cse_3 AS AVG(test.c), __cse_2 AS col4, __cse_2 AS col5, col6, __cse_4 AS my_agg(test.c)\
\n Aggregate: groupBy=[[]], aggr=[[AVG(test.a) AS __cse_1, my_agg(test.a) AS __cse_2, AVG(test.b) AS col3, AVG(test.c) AS __cse_3, my_agg(test.b) AS col6, my_agg(test.c) AS __cse_4]]\
let expected = "Projection: __common_expr_1 AS col1, __common_expr_1 AS col2, col3, __common_expr_3 AS AVG(test.c), __common_expr_2 AS col4, __common_expr_2 AS col5, col6, __common_expr_4 AS my_agg(test.c)\
\n Aggregate: groupBy=[[]], aggr=[[AVG(test.a) AS __common_expr_1, my_agg(test.a) AS __common_expr_2, AVG(test.b) AS col3, AVG(test.c) AS __common_expr_3, my_agg(test.b) AS col6, my_agg(test.c) AS __common_expr_4]]\
\n TableScan: test";

assert_optimized_plan_eq(expected, &plan, None);
Expand All @@ -1021,8 +1021,8 @@ mod test {
)?
.build()?;

let expected = "Projection: Int32(1) + __cse_1 AS AVG(test.a), Int32(1) - __cse_1 AS AVG(test.a), Int32(1) + __cse_2 AS my_agg(test.a), Int32(1) - __cse_2 AS my_agg(test.a)\
\n Aggregate: groupBy=[[]], aggr=[[AVG(test.a) AS __cse_1, my_agg(test.a) AS __cse_2]]\
let expected = "Projection: Int32(1) + __common_expr_1 AS AVG(test.a), Int32(1) - __common_expr_1 AS AVG(test.a), Int32(1) + __common_expr_2 AS my_agg(test.a), Int32(1) - __common_expr_2 AS my_agg(test.a)\
\n Aggregate: groupBy=[[]], aggr=[[AVG(test.a) AS __common_expr_1, my_agg(test.a) AS __common_expr_2]]\
\n TableScan: test";

assert_optimized_plan_eq(expected, &plan, None);
Expand All @@ -1038,8 +1038,8 @@ mod test {
)?
.build()?;

let expected ="Aggregate: groupBy=[[]], aggr=[[AVG(__cse_1) AS col1, my_agg(__cse_1) AS col2]]\
\n Projection: UInt32(1) + test.a AS __cse_1, test.a, test.b, test.c\
let expected ="Aggregate: groupBy=[[]], aggr=[[AVG(__common_expr_1) AS col1, my_agg(__common_expr_1) AS col2]]\
\n Projection: UInt32(1) + test.a AS __common_expr_1, test.a, test.b, test.c\
\n TableScan: test";

assert_optimized_plan_eq(expected, &plan, None);
Expand All @@ -1055,8 +1055,8 @@ mod test {
)?
.build()?;

let expected = "Aggregate: groupBy=[[__cse_1 AS UInt32(1) + test.a]], aggr=[[AVG(__cse_1) AS col1, my_agg(__cse_1) AS col2]]\
\n Projection: UInt32(1) + test.a AS __cse_1, test.a, test.b, test.c\
let expected = "Aggregate: groupBy=[[__common_expr_1 AS UInt32(1) + test.a]], aggr=[[AVG(__common_expr_1) AS col1, my_agg(__common_expr_1) AS col2]]\
\n Projection: UInt32(1) + test.a AS __common_expr_1, test.a, test.b, test.c\
\n TableScan: test";

assert_optimized_plan_eq(expected, &plan, None);
Expand All @@ -1076,9 +1076,9 @@ mod test {
)?
.build()?;

let expected = "Projection: UInt32(1) + test.a, UInt32(1) + __cse_2 AS col1, UInt32(1) - __cse_2 AS col2, __cse_4 AS AVG(UInt32(1) + test.a), UInt32(1) + __cse_3 AS col3, UInt32(1) - __cse_3 AS col4, __cse_5 AS my_agg(UInt32(1) + test.a)\
\n Aggregate: groupBy=[[__cse_1 AS UInt32(1) + test.a]], aggr=[[AVG(__cse_1) AS __cse_2, my_agg(__cse_1) AS __cse_3, AVG(__cse_1 AS UInt32(1) + test.a) AS __cse_4, my_agg(__cse_1 AS UInt32(1) + test.a) AS __cse_5]]\
\n Projection: UInt32(1) + test.a AS __cse_1, test.a, test.b, test.c\
let expected = "Projection: UInt32(1) + test.a, UInt32(1) + __common_expr_2 AS col1, UInt32(1) - __common_expr_2 AS col2, __common_expr_4 AS AVG(UInt32(1) + test.a), UInt32(1) + __common_expr_3 AS col3, UInt32(1) - __common_expr_3 AS col4, __common_expr_5 AS my_agg(UInt32(1) + test.a)\
\n Aggregate: groupBy=[[__common_expr_1 AS UInt32(1) + test.a]], aggr=[[AVG(__common_expr_1) AS __common_expr_2, my_agg(__common_expr_1) AS __common_expr_3, AVG(__common_expr_1 AS UInt32(1) + test.a) AS __common_expr_4, my_agg(__common_expr_1 AS UInt32(1) + test.a) AS __common_expr_5]]\
\n Projection: UInt32(1) + test.a AS __common_expr_1, test.a, test.b, test.c\
\n TableScan: test";

assert_optimized_plan_eq(expected, &plan, None);
Expand All @@ -1103,9 +1103,9 @@ mod test {
)?
.build()?;

let expected = "Projection: table.test.col.a, UInt32(1) + __cse_2 AS AVG(UInt32(1) + table.test.col.a), __cse_2 AS AVG(UInt32(1) + table.test.col.a)\
\n Aggregate: groupBy=[[table.test.col.a]], aggr=[[AVG(__cse_1 AS UInt32(1) + table.test.col.a) AS __cse_2]]\
\n Projection: UInt32(1) + table.test.col.a AS __cse_1, table.test.col.a\
let expected = "Projection: table.test.col.a, UInt32(1) + __common_expr_2 AS AVG(UInt32(1) + table.test.col.a), __common_expr_2 AS AVG(UInt32(1) + table.test.col.a)\
\n Aggregate: groupBy=[[table.test.col.a]], aggr=[[AVG(__common_expr_1 AS UInt32(1) + table.test.col.a) AS __common_expr_2]]\
\n Projection: UInt32(1) + table.test.col.a AS __common_expr_1, table.test.col.a\
\n TableScan: table.test";

assert_optimized_plan_eq(expected, &plan, None);
Expand All @@ -1124,8 +1124,8 @@ mod test {
])?
.build()?;

let expected = "Projection: __cse_1 AS first, __cse_1 AS second\
\n Projection: Int32(1) + test.a AS __cse_1, test.a, test.b, test.c\
let expected = "Projection: __common_expr_1 AS first, __common_expr_1 AS second\
\n Projection: Int32(1) + test.a AS __common_expr_1, test.a, test.b, test.c\
\n TableScan: test";

assert_optimized_plan_eq(expected, &plan, None);
Expand Down Expand Up @@ -1295,8 +1295,8 @@ mod test {
.build()?;

let expected = "Projection: test.a, test.b, test.c\
\n Filter: __cse_1 - Int32(10) > __cse_1\
\n Projection: Int32(1) + test.a AS __cse_1, test.a, test.b, test.c\
\n Filter: __common_expr_1 - Int32(10) > __common_expr_1\
\n Projection: Int32(1) + test.a AS __common_expr_1, test.a, test.b, test.c\
\n TableScan: test";

assert_optimized_plan_eq(expected, &plan, None);
Expand Down Expand Up @@ -1346,42 +1346,46 @@ mod test {
let table_scan = test_table_scan()?;

let config = &OptimizerContext::new();
let __cse_1 = config.alias_generator().next(CSE_PREFIX);
let common_expr_1 = config.alias_generator().next(CSE_PREFIX);
let plan = LogicalPlanBuilder::from(table_scan.clone())
.project(vec![(col("a") + col("b")).alias(__cse_1.clone()), col("c")])?
.project(vec![
col(__cse_1.clone()).alias("c1"),
col(__cse_1).alias("c2"),
(col("a") + col("b")).alias(common_expr_1.clone()),
col("c"),
])?
.project(vec![
col(common_expr_1.clone()).alias("c1"),
col(common_expr_1).alias("c2"),
(col("c") + lit(2)).alias("c3"),
(col("c") + lit(2)).alias("c4"),
])?
.build()?;

let expected =
"Projection: __cse_1 AS c1, __cse_1 AS c2, __cse_2 AS c3, __cse_2 AS c4\
\n Projection: test.c + Int32(2) AS __cse_2, __cse_1, test.c\
\n Projection: test.a + test.b AS __cse_1, test.c\
let expected = "Projection: __common_expr_1 AS c1, __common_expr_1 AS c2, __common_expr_2 AS c3, __common_expr_2 AS c4\
\n Projection: test.c + Int32(2) AS __common_expr_2, __common_expr_1, test.c\
\n Projection: test.a + test.b AS __common_expr_1, test.c\
\n TableScan: test";

assert_optimized_plan_eq(expected, &plan, Some(config));

let config = &OptimizerContext::new();
let ___cse_1 = config.alias_generator().next(CSE_PREFIX);
let __cse_2 = config.alias_generator().next(CSE_PREFIX);
let _common_expr_1 = config.alias_generator().next(CSE_PREFIX);
let common_expr_2 = config.alias_generator().next(CSE_PREFIX);
let plan = LogicalPlanBuilder::from(table_scan.clone())
.project(vec![(col("a") + col("b")).alias(__cse_2.clone()), col("c")])?
.project(vec![
col(__cse_2.clone()).alias("c1"),
col(__cse_2).alias("c2"),
(col("a") + col("b")).alias(common_expr_2.clone()),
col("c"),
])?
.project(vec![
col(common_expr_2.clone()).alias("c1"),
col(common_expr_2).alias("c2"),
(col("c") + lit(2)).alias("c3"),
(col("c") + lit(2)).alias("c4"),
])?
.build()?;

let expected =
"Projection: __cse_2 AS c1, __cse_2 AS c2, __cse_3 AS c3, __cse_3 AS c4\
\n Projection: test.c + Int32(2) AS __cse_3, __cse_2, test.c\
\n Projection: test.a + test.b AS __cse_2, test.c\
let expected = "Projection: __common_expr_2 AS c1, __common_expr_2 AS c2, __common_expr_3 AS c3, __common_expr_3 AS c4\
\n Projection: test.c + Int32(2) AS __common_expr_3, __common_expr_2, test.c\
\n Projection: test.a + test.b AS __common_expr_2, test.c\
\n TableScan: test";

assert_optimized_plan_eq(expected, &plan, Some(config));
Expand Down
8 changes: 4 additions & 4 deletions datafusion/sqllogictest/test_files/group_by.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4187,8 +4187,8 @@ EXPLAIN SELECT SUM(DISTINCT CAST(x AS DOUBLE)), MAX(DISTINCT CAST(x AS DOUBLE))
logical_plan
01)Projection: sum(alias1) AS sum(DISTINCT t1.x), MAX(alias1) AS MAX(DISTINCT t1.x)
02)--Aggregate: groupBy=[[t1.y]], aggr=[[sum(alias1), MAX(alias1)]]
03)----Aggregate: groupBy=[[t1.y, __cse_1 AS t1.x AS alias1]], aggr=[[]]
04)------Projection: CAST(t1.x AS Float64) AS __cse_1, t1.y
03)----Aggregate: groupBy=[[t1.y, __common_expr_1 AS t1.x AS alias1]], aggr=[[]]
04)------Projection: CAST(t1.x AS Float64) AS __common_expr_1, t1.y
05)--------TableScan: t1 projection=[x, y]
physical_plan
01)ProjectionExec: expr=[sum(alias1)@1 as sum(DISTINCT t1.x), MAX(alias1)@2 as MAX(DISTINCT t1.x)]
Expand All @@ -4200,8 +4200,8 @@ physical_plan
07)------------CoalesceBatchesExec: target_batch_size=2
08)--------------RepartitionExec: partitioning=Hash([y@0, alias1@1], 8), input_partitions=8
09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
10)------------------AggregateExec: mode=Partial, gby=[y@1 as y, __cse_1@0 as alias1], aggr=[]
11)--------------------ProjectionExec: expr=[CAST(x@0 AS Float64) as __cse_1, y@1 as y]
10)------------------AggregateExec: mode=Partial, gby=[y@1 as y, __common_expr_1@0 as alias1], aggr=[]
11)--------------------ProjectionExec: expr=[CAST(x@0 AS Float64) as __common_expr_1, y@1 as y]
12)----------------------MemoryExec: partitions=1, partition_sizes=[1]

# create an unbounded table that contains ordered timestamp.
Expand Down
16 changes: 8 additions & 8 deletions datafusion/sqllogictest/test_files/select.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1436,12 +1436,12 @@ query TT
EXPLAIN SELECT x/2, x/2+1 FROM t;
----
logical_plan
01)Projection: __cse_1 AS t.x / Int64(2), __cse_1 AS t.x / Int64(2) + Int64(1)
02)--Projection: t.x / Int64(2) AS __cse_1
01)Projection: __common_expr_1 AS t.x / Int64(2), __common_expr_1 AS t.x / Int64(2) + Int64(1)
02)--Projection: t.x / Int64(2) AS __common_expr_1
03)----TableScan: t projection=[x]
physical_plan
01)ProjectionExec: expr=[__cse_1@0 as t.x / Int64(2), __cse_1@0 + 1 as t.x / Int64(2) + Int64(1)]
02)--ProjectionExec: expr=[x@0 / 2 as __cse_1]
01)ProjectionExec: expr=[__common_expr_1@0 as t.x / Int64(2), __common_expr_1@0 + 1 as t.x / Int64(2) + Int64(1)]
02)--ProjectionExec: expr=[x@0 / 2 as __common_expr_1]
03)----MemoryExec: partitions=1, partition_sizes=[1]

query II
Expand All @@ -1454,12 +1454,12 @@ query TT
EXPLAIN SELECT abs(x), abs(x) + abs(y) FROM t;
----
logical_plan
01)Projection: __cse_1 AS abs(t.x), __cse_1 AS abs(t.x) + abs(t.y)
02)--Projection: abs(t.x) AS __cse_1, t.y
01)Projection: __common_expr_1 AS abs(t.x), __common_expr_1 AS abs(t.x) + abs(t.y)
02)--Projection: abs(t.x) AS __common_expr_1, t.y
03)----TableScan: t projection=[x, y]
physical_plan
01)ProjectionExec: expr=[__cse_1@0 as abs(t.x), __cse_1@0 + abs(y@1) as abs(t.x) + abs(t.y)]
02)--ProjectionExec: expr=[abs(x@0) as __cse_1, y@1 as y]
01)ProjectionExec: expr=[__common_expr_1@0 as abs(t.x), __common_expr_1@0 + abs(y@1) as abs(t.x) + abs(t.y)]
02)--ProjectionExec: expr=[abs(x@0) as __common_expr_1, y@1 as y]
03)----MemoryExec: partitions=1, partition_sizes=[1]

query II
Expand Down
8 changes: 4 additions & 4 deletions datafusion/sqllogictest/test_files/subquery.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1069,8 +1069,8 @@ query TT
explain select a/2, a/2 + 1 from t
----
logical_plan
01)Projection: __cse_1 AS t.a / Int64(2), __cse_1 AS t.a / Int64(2) + Int64(1)
02)--Projection: t.a / Int64(2) AS __cse_1
01)Projection: __common_expr_1 AS t.a / Int64(2), __common_expr_1 AS t.a / Int64(2) + Int64(1)
02)--Projection: t.a / Int64(2) AS __common_expr_1
03)----TableScan: t projection=[a]

statement ok
Expand All @@ -1080,8 +1080,8 @@ query TT
explain select a/2, a/2 + 1 from t
----
logical_plan
01)Projection: __cse_1 AS t.a / Int64(2), __cse_1 AS t.a / Int64(2) + Int64(1)
02)--Projection: t.a / Int64(2) AS __cse_1
01)Projection: __common_expr_1 AS t.a / Int64(2), __common_expr_1 AS t.a / Int64(2) + Int64(1)
02)--Projection: t.a / Int64(2) AS __common_expr_1
03)----TableScan: t projection=[a]

###
Expand Down
6 changes: 3 additions & 3 deletions datafusion/sqllogictest/test_files/tpch/q1.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ explain select
logical_plan
01)Sort: lineitem.l_returnflag ASC NULLS LAST, lineitem.l_linestatus ASC NULLS LAST
02)--Projection: lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity) AS sum_qty, sum(lineitem.l_extendedprice) AS sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax) AS sum_charge, AVG(lineitem.l_quantity) AS avg_qty, AVG(lineitem.l_extendedprice) AS avg_price, AVG(lineitem.l_discount) AS avg_disc, COUNT(*) AS count_order
03)----Aggregate: groupBy=[[lineitem.l_returnflag, lineitem.l_linestatus]], aggr=[[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(__cse_1) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(__cse_1 * (Decimal128(Some(1),20,0) + lineitem.l_tax)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), AVG(lineitem.l_quantity), AVG(lineitem.l_extendedprice), AVG(lineitem.l_discount), COUNT(Int64(1)) AS COUNT(*)]]
04)------Projection: lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS __cse_1, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus
03)----Aggregate: groupBy=[[lineitem.l_returnflag, lineitem.l_linestatus]], aggr=[[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(__common_expr_1) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(__common_expr_1 * (Decimal128(Some(1),20,0) + lineitem.l_tax)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), AVG(lineitem.l_quantity), AVG(lineitem.l_extendedprice), AVG(lineitem.l_discount), COUNT(Int64(1)) AS COUNT(*)]]
04)------Projection: lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS __common_expr_1, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus
05)--------Filter: lineitem.l_shipdate <= Date32("1998-09-02")
06)----------TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], partial_filters=[lineitem.l_shipdate <= Date32("1998-09-02")]
physical_plan
Expand All @@ -54,7 +54,7 @@ physical_plan
05)--------CoalesceBatchesExec: target_batch_size=8192
06)----------RepartitionExec: partitioning=Hash([l_returnflag@0, l_linestatus@1], 4), input_partitions=4
07)------------AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), AVG(lineitem.l_quantity), AVG(lineitem.l_extendedprice), AVG(lineitem.l_discount), COUNT(*)]
08)--------------ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __cse_1, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus]
08)--------------ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_1, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus]
09)----------------CoalesceBatchesExec: target_batch_size=8192
10)------------------FilterExec: l_shipdate@6 <= 1998-09-02
11)--------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], has_header=false
Expand Down
Loading

0 comments on commit a592e69

Please sign in to comment.