Skip to content

Commit 94acb50

Browse files
zhongjian.xzjzhongjian.xzj
zhongjian.xzj
authored and
zhongjian.xzj
committed
[nereids] pull up join from union all rule
1 parent 9e0a2e8 commit 94acb50

File tree

23 files changed

+1411
-756
lines changed

23 files changed

+1411
-756
lines changed

fe/fe-core/src/main/java/org/apache/doris/catalog/constraint/ForeignKeyConstraint.java

+4
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ public String getReferencedColumnName(String column) {
6262
return foreignToReference.get(column);
6363
}
6464

65+
public ImmutableMap<String, String> getForeignToReference() {
66+
return foreignToReference;
67+
}
68+
6569
public Map<Column, Column> getForeignToPrimary(TableIf curTable) {
6670
ImmutableMap.Builder<Column, Column> columnBuilder = new ImmutableMap.Builder<>();
6771
TableIf refTable = referencedTable.toTableIf();

fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java

+17
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@
8787
import org.apache.doris.nereids.rules.rewrite.PruneOlapScanPartition;
8888
import org.apache.doris.nereids.rules.rewrite.PruneOlapScanTablet;
8989
import org.apache.doris.nereids.rules.rewrite.PullUpCteAnchor;
90+
import org.apache.doris.nereids.rules.rewrite.PullUpJoinFromUnionAll;
9091
import org.apache.doris.nereids.rules.rewrite.PullUpProjectUnderApply;
9192
import org.apache.doris.nereids.rules.rewrite.PullUpProjectUnderLimit;
9293
import org.apache.doris.nereids.rules.rewrite.PullUpProjectUnderTopN;
@@ -288,6 +289,21 @@ public class Rewriter extends AbstractBatchJobExecutor {
288289

289290
// this rule should invoke after infer predicate and push down distinct, and before push down limit
290291
custom(RuleType.ELIMINATE_JOIN_BY_FOREIGN_KEY, EliminateJoinByFK::new),
292+
// this rule should be after topic "Column pruning and infer predicate"
293+
topic("Join pull up",
294+
topDown(
295+
new EliminateFilter(),
296+
new PushDownFilterThroughProject(),
297+
new MergeProjects()
298+
),
299+
topDown(
300+
new PullUpJoinFromUnionAll()
301+
),
302+
custom(RuleType.COLUMN_PRUNING, ColumnPruning::new),
303+
bottomUp(RuleSet.PUSH_DOWN_FILTERS),
304+
custom(RuleType.ELIMINATE_UNNECESSARY_PROJECT, EliminateUnnecessaryProject::new)
305+
),
306+
291307
topic("Limit optimization",
292308
// TODO: the logical plan should not contains any phase information,
293309
// we should refactor like AggregateStrategies, e.g. LimitStrategies,
@@ -340,6 +356,7 @@ public class Rewriter extends AbstractBatchJobExecutor {
340356
custom(RuleType.ELIMINATE_SORT, EliminateSort::new),
341357
bottomUp(new EliminateEmptyRelation())
342358
),
359+
343360
// this rule batch must keep at the end of rewrite to do some plan check
344361
topic("Final rewrite and check",
345362
custom(RuleType.CHECK_DATA_TYPES, CheckDataTypes::new),

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java

+1
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ public enum RuleType {
249249

250250
// split limit
251251
SPLIT_LIMIT(RuleTypeClass.REWRITE),
252+
PULL_UP_JOIN_FROM_UNIONALL(RuleTypeClass.REWRITE),
252253
// limit push down
253254
PUSH_LIMIT_THROUGH_JOIN(RuleTypeClass.REWRITE),
254255
PUSH_LIMIT_THROUGH_PROJECT_JOIN(RuleTypeClass.REWRITE),

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateJoinByFK.java

+5-1
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,11 @@ private boolean isPredicateCompatible(BiMap<Slot, Slot> equalSlots, Map<Column,
326326
.map(e -> e.rewriteUp(
327327
s -> s instanceof Slot ? primarySlotToForeign.getOrDefault(s, (Slot) s) : s))
328328
.collect(Collectors.toSet());
329-
return columnWithPredicates.get(fp.getKey()).containsAll(primaryPredicates);
329+
if (columnWithPredicates.get(fp.getKey()) == null && !columnWithPredicates.isEmpty()) {
330+
return false;
331+
} else {
332+
return columnWithPredicates.get(fp.getKey()).containsAll(primaryPredicates);
333+
}
330334
});
331335
}
332336
}

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PullUpJoinFromUnionAll.java

+685
Large diffs are not rendered by default.

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSetOperation.java

-2
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
import org.apache.doris.nereids.util.TypeCoercionUtils;
4141
import org.apache.doris.qe.SessionVariable;
4242

43-
import com.google.common.base.Preconditions;
4443
import com.google.common.collect.ImmutableList;
4544
import com.google.common.collect.ImmutableList.Builder;
4645

@@ -125,7 +124,6 @@ public List<NamedExpression> buildNewOutputs() {
125124

126125
// If the right child is nullable, need to ensure that the left child is also nullable
127126
private List<Slot> resetNullableForLeftOutputs() {
128-
Preconditions.checkState(children.size() == 2);
129127
List<Slot> resetNullableForLeftOutputs = new ArrayList<>();
130128
for (int i = 0; i < child(1).getOutput().size(); ++i) {
131129
if (child(1).getOutput().get(i).nullable() && !child(0).getOutput().get(i).nullable()) {

regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query11.out

+42-46
Original file line numberDiff line numberDiff line change
@@ -2,64 +2,60 @@
22
-- !ds_shape_11 --
33
PhysicalCteAnchor ( cteId=CTEId#0 )
44
--PhysicalCteProducer ( cteId=CTEId#0 )
5-
----PhysicalUnion
6-
------PhysicalProject
7-
--------hashAgg[GLOBAL]
8-
----------PhysicalDistribute
9-
------------hashAgg[LOCAL]
5+
----PhysicalProject
6+
------hashJoin[INNER_JOIN] hashCondition=((PULL_UP_UNIFIED_OUTPUT_ALIAS = customer.c_customer_sk)) otherCondition=()
7+
--------PhysicalDistribute
8+
----------PhysicalProject
9+
------------PhysicalUnion
1010
--------------PhysicalProject
11-
----------------hashJoin[INNER_JOIN] hashCondition=((customer.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk]
11+
----------------hashAgg[GLOBAL]
1212
------------------PhysicalDistribute
13-
--------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
13+
--------------------hashAgg[LOCAL]
1414
----------------------PhysicalProject
15-
------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
16-
----------------------PhysicalDistribute
17-
------------------------PhysicalProject
18-
--------------------------filter(d_year IN (1998, 1999))
19-
----------------------------PhysicalOlapScan[date_dim]
20-
------------------PhysicalDistribute
21-
--------------------PhysicalProject
22-
----------------------PhysicalOlapScan[customer]
23-
------PhysicalProject
24-
--------hashAgg[GLOBAL]
25-
----------PhysicalDistribute
26-
------------hashAgg[LOCAL]
15+
------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
16+
--------------------------PhysicalProject
17+
----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0
18+
--------------------------PhysicalDistribute
19+
----------------------------PhysicalProject
20+
------------------------------filter(d_year IN (1998, 1999))
21+
--------------------------------PhysicalOlapScan[date_dim]
2722
--------------PhysicalProject
28-
----------------hashJoin[INNER_JOIN] hashCondition=((customer.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[ws_bill_customer_sk]
23+
----------------hashAgg[GLOBAL]
2924
------------------PhysicalDistribute
30-
--------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk]
25+
--------------------hashAgg[LOCAL]
3126
----------------------PhysicalProject
32-
------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3
33-
----------------------PhysicalDistribute
34-
------------------------PhysicalProject
35-
--------------------------filter(d_year IN (1998, 1999))
36-
----------------------------PhysicalOlapScan[date_dim]
37-
------------------PhysicalDistribute
38-
--------------------PhysicalProject
39-
----------------------PhysicalOlapScan[customer]
27+
------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
28+
--------------------------PhysicalProject
29+
----------------------------PhysicalOlapScan[web_sales] apply RFs: RF1
30+
--------------------------PhysicalDistribute
31+
----------------------------PhysicalProject
32+
------------------------------filter(d_year IN (1998, 1999))
33+
--------------------------------PhysicalOlapScan[date_dim]
34+
--------PhysicalDistribute
35+
----------PhysicalProject
36+
------------PhysicalOlapScan[customer]
4037
--PhysicalResultSink
4138
----PhysicalTopN[MERGE_SORT]
4239
------PhysicalDistribute
4340
--------PhysicalTopN[LOCAL_SORT]
4441
----------PhysicalProject
45-
------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id)) otherCondition=((if((year_total > 0.00), (cast(year_total as DECIMALV3(38, 8)) / year_total), 0.000000) > if((year_total > 0.00), (cast(year_total as DECIMALV3(38, 8)) / year_total), 0.000000)))
46-
--------------PhysicalProject
47-
----------------hashJoin[INNER_JOIN] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=()
48-
------------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=()
49-
--------------------PhysicalDistribute
50-
----------------------PhysicalProject
51-
------------------------filter((t_s_firstyear.dyear = 1998) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.00))
52-
--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
53-
--------------------PhysicalDistribute
54-
----------------------PhysicalProject
55-
------------------------filter((t_w_firstyear.dyear = 1998) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.00))
56-
--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
57-
------------------PhysicalDistribute
58-
--------------------PhysicalProject
59-
----------------------filter((t_s_secyear.dyear = 1999) and (t_s_secyear.sale_type = 's'))
60-
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
42+
------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=((if((year_total > 0.00), (cast(year_total as DECIMALV3(38, 8)) / year_total), 0.000000) > if((year_total > 0.00), (cast(year_total as DECIMALV3(38, 8)) / year_total), 0.000000)))
6143
--------------PhysicalDistribute
6244
----------------PhysicalProject
63-
------------------filter((t_w_secyear.dyear = 1999) and (t_w_secyear.sale_type = 'w'))
45+
------------------filter((t_w_firstyear.dyear = 1998) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.00))
6446
--------------------PhysicalCteConsumer ( cteId=CTEId#0 )
47+
--------------hashJoin[INNER_JOIN] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=()
48+
----------------PhysicalDistribute
49+
------------------PhysicalProject
50+
--------------------filter((t_s_secyear.dyear = 1999) and (t_s_secyear.sale_type = 's'))
51+
----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
52+
----------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id)) otherCondition=()
53+
------------------PhysicalDistribute
54+
--------------------PhysicalProject
55+
----------------------filter((t_w_secyear.dyear = 1999) and (t_w_secyear.sale_type = 'w'))
56+
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
57+
------------------PhysicalDistribute
58+
--------------------PhysicalProject
59+
----------------------filter((t_s_firstyear.dyear = 1998) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.00))
60+
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
6561

0 commit comments

Comments
 (0)