Skip to content

Commit

Permalink
[Fix](nereids)make agg output unchanged after normalized repeat (apac…
Browse files Browse the repository at this point in the history
…he#36369)

cherry-pick apache#36207 to branch-2.0

The NormalizeRepeat rule can change the output of agg.
For example:

         SELECT
             col_int_undef_signed2 AS C1 ,
             col_int_undef_signed2
         FROM
             normalize_repeat_name_unchanged
         GROUP BY
         GROUPING SETS (
         (col_int_undef_signed2),
         (col_int_undef_signed2))

Before fixing the bug, the plan is:

LogicalResultSink[97] ( outputExprs=[C1#7, col_int_undef_signed2#1] )
      +--LogicalProject[94] ( distinct=false, projects=[C1#7, C1#7], excepts=[] )
         +--LogicalAggregate[93] ( groupByExpr=[C1#7, GROUPING_ID#8], outputExpr=[C1#7, GROUPING_ID#8], hasRepeat=true )
            +--LogicalRepeat ( groupingSets=[[C1#7], [C1#7]], outputExpressions=[C1#7, GROUPING_ID#8] )
               +--LogicalProject[91] ( distinct=false, projects=[col_int_undef_signed2#1 AS `C1`apache#7], excepts=[] )
                  +--LogicalOlapScan (  )

This can lead to column not found in LogicalResultSink, report error:
Input slot(s) not in childs output: col_int_undef_signed2#1 in plan:
LogicalResultSink[97] ( outputExprs=[C1#7, col_int_undef_signed2#1] )
child output is: [C1#7]

This pr makes agg output unchanged after normalized repeat. After
fixing, the plan is:

LogicalResultSink[97] ( outputExprs=[C1#7, col_int_undef_signed2#1] )
      +--LogicalProject[94] ( distinct=false, projects=[C1#7, C1#7 as `col_int_undef_signed2`#1], excepts=[] )
         +--LogicalAggregate[93] ( groupByExpr=[C1#7, GROUPING_ID#8], outputExpr=[C1#7, GROUPING_ID#8], hasRepeat=true )
            +--LogicalRepeat ( groupingSets=[[C1#7], [C1#7]], outputExpressions=[C1#7, GROUPING_ID#8] )
               +--LogicalProject[91] ( distinct=false, projects=[col_int_undef_signed2#1 AS `C1`apache#7], excepts=[] )
                  +--LogicalOlapScan (  )
  • Loading branch information
feiniaofeiafei authored Jun 19, 2024
1 parent 9589428 commit 2e2f102
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.apache.doris.nereids.util.PlanUtils;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableList.Builder;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
Expand Down Expand Up @@ -179,6 +180,7 @@ private LogicalAggregate<Plan> normalizeRepeat(LogicalRepeat<Plan> repeat) {
.addAll(groupingSetsUsedSlot)
.addAll(allVirtualSlots)
.build();
normalizedAggOutput = getExprIdUnchangedNormalizedAggOutput(normalizedAggOutput, repeat.getOutputExpressions());
return new LogicalAggregate<>(normalizedAggGroupBy, (List) normalizedAggOutput,
Optional.of(normalizedRepeat), normalizedRepeat);
}
Expand Down Expand Up @@ -276,4 +278,25 @@ private Expression normalizeGroupingScalarFunction(NormalizeToSlotContext contex
return expr;
}
}

private static List<NamedExpression> getExprIdUnchangedNormalizedAggOutput(
List<NamedExpression> normalizedAggOutput, List<NamedExpression> originalAggOutput) {
Builder<NamedExpression> builder = new ImmutableList.Builder<>();
for (int i = 0; i < originalAggOutput.size(); i++) {
NamedExpression e = normalizedAggOutput.get(i);
// process Expression like Alias(SlotReference#0)#0
if (e instanceof Alias && e.child(0) instanceof SlotReference) {
SlotReference slotReference = (SlotReference) e.child(0);
if (slotReference.getExprId().equals(e.getExprId())) {
e = slotReference;
}
}
// Make the output ExprId unchanged
if (!e.getExprId().equals(originalAggOutput.get(i).getExprId())) {
e = new Alias(originalAggOutput.get(i).getExprId(), e, originalAggOutput.get(i).getName());
}
builder.add(e);
}
return builder.build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !test_name_unchange --
\N \N
\N \N
-2169155 -2169155
-2169155 -2169155
-1760025 -1760025
-1760025 -1760025
-27328 -27328
-27328 -27328
-23380 -23380
-23380 -23380
-23025 -23025
-23025 -23025
-127 -127
-127 -127
-88 -88
-88 -88
-73 -73
-73 -73
25 25
25 25
5694 5694
5694 5694
29932 29932
29932 29932
5907087 5907087
5907087 5907087
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("grouping_normalize_test"){
sql "SET enable_nereids_planner=true"
sql "SET enable_fallback_to_original_planner=false"

sql "drop table if exists normalize_repeat_name_unchanged"
sql """create table normalize_repeat_name_unchanged (
col_int_undef_signed int/*agg_type_placeholder*/ ,
col_int_undef_signed2 int/*agg_type_placeholder*/ ,
col_float_undef_signed float/*agg_type_placeholder*/ ,
col_int_undef_signed3 int/*agg_type_placeholder*/ ,
col_int_undef_signed4 int/*agg_type_placeholder*/ ,
col_int_undef_signed5 int/*agg_type_placeholder*/ ,
pk int/*agg_type_placeholder*/
) engine=olap
distributed by hash(pk) buckets 10
properties("replication_num" = "1");"""
sql """
insert into normalize_repeat_name_unchanged(pk,col_int_undef_signed,col_int_undef_signed2,col_float_undef_signed,
col_int_undef_signed3,col_int_undef_signed4,col_int_undef_signed5) values (0,null,-27328,5595590,null,null,5767077),(1,3831,null,87,-14582,21,null),
(2,10131,5907087,28248,2473748,88,-18315),(3,2352090,5694,5173440,null,null,-31126),(4,-26805,29932,null,-55,3148,-6705245),(5,null,null,41,57,-3060427,null),
(6,118,25,3472000,-123,null,-2934940),(7,null,null,-109,112,-7344754,4326526),(8,null,-2169155,-19402,null,null,26943),(9,46,null,1736620,30084,13838,null),
(10,24708,null,null,-806832,-116,676),(11,2232,-23025,null,9665,-27413,13457),(12,-6,-127,-5007917,20521,-48,2709),(13,-72,-127,3258,null,-6394361,-5580),
(14,4494439,-1760025,-16580,66,6562396,-280256),(15,6099281,-73,-5376852,-303421,null,-1843),(16,122,-23380,null,7350221,111,null),
(17,null,null,11356,null,11799,108),(18,-91,-88,39,-29582,null,121),(19,4991662,null,-220,7593505,-54,4086882);"""

qt_test_name_unchange """
SELECT
col_int_undef_signed2 AS C1 ,
col_int_undef_signed2
FROM
normalize_repeat_name_unchanged
GROUP BY
GROUPING SETS (
(col_int_undef_signed2),
(col_int_undef_signed2))
order by 1,2
"""
}

0 comments on commit 2e2f102

Please sign in to comment.