@@ -774,7 +774,7 @@ Status PipelineXFragmentContext::_create_tree_helper(
774
774
ObjectPool* pool, const std::vector<TPlanNode>& tnodes,
775
775
const doris::TPipelineFragmentParams& request, const DescriptorTbl& descs,
776
776
OperatorXPtr parent, int * node_idx, OperatorXPtr* root, PipelinePtr& cur_pipe,
777
- int child_idx, const bool followed_by_shuffled_join ) {
777
+ int child_idx, const bool followed_by_shuffled_operator ) {
778
778
// propagate error case
779
779
if (*node_idx >= tnodes.size ()) {
780
780
// TODO: print thrift msg
@@ -785,11 +785,11 @@ Status PipelineXFragmentContext::_create_tree_helper(
785
785
const TPlanNode& tnode = tnodes[*node_idx];
786
786
787
787
int num_children = tnodes[*node_idx].num_children ;
788
- bool current_followed_by_shuffled_join = followed_by_shuffled_join ;
788
+ bool current_followed_by_shuffled_operator = followed_by_shuffled_operator ;
789
789
OperatorXPtr op = nullptr ;
790
790
RETURN_IF_ERROR (_create_operator (pool, tnodes[*node_idx], request, descs, op, cur_pipe,
791
791
parent == nullptr ? -1 : parent->node_id (), child_idx,
792
- followed_by_shuffled_join ));
792
+ followed_by_shuffled_operator ));
793
793
794
794
// assert(parent != nullptr || (node_idx == 0 && root_expr != nullptr));
795
795
if (parent != nullptr ) {
@@ -800,7 +800,7 @@ Status PipelineXFragmentContext::_create_tree_helper(
800
800
}
801
801
802
802
/* *
803
- * `ExchangeType::HASH_SHUFFLE` should be used if an operator is followed by a shuffled hash join.
803
+ * `ExchangeType::HASH_SHUFFLE` should be used if an operator is followed by a shuffled operator (shuffled hash join, union operator followed by co-located operators) .
804
804
*
805
805
* For plan:
806
806
* LocalExchange(id=0) -> Aggregation(id=1) -> ShuffledHashJoin(id=2)
@@ -814,8 +814,8 @@ Status PipelineXFragmentContext::_create_tree_helper(
814
814
cur_pipe->operator_xs ().empty ()
815
815
? cur_pipe->sink_x ()->require_shuffled_data_distribution ()
816
816
: op->require_shuffled_data_distribution ();
817
- current_followed_by_shuffled_join =
818
- (followed_by_shuffled_join || op->is_shuffled_hash_join ()) &&
817
+ current_followed_by_shuffled_operator =
818
+ (followed_by_shuffled_operator || op->is_shuffled_operator ()) &&
819
819
require_shuffled_data_distribution;
820
820
821
821
cur_pipe->_name .push_back (' -' );
@@ -826,7 +826,7 @@ Status PipelineXFragmentContext::_create_tree_helper(
826
826
for (int i = 0 ; i < num_children; i++) {
827
827
++*node_idx;
828
828
RETURN_IF_ERROR (_create_tree_helper (pool, tnodes, request, descs, op, node_idx, nullptr ,
829
- cur_pipe, i, current_followed_by_shuffled_join ));
829
+ cur_pipe, i, current_followed_by_shuffled_operator ));
830
830
831
831
// we are expecting a child, but have used all nodes
832
832
// this means we have been given a bad tree and must fail
@@ -868,13 +868,13 @@ Status PipelineXFragmentContext::_add_local_exchange_impl(
868
868
* `bucket_seq_to_instance_idx` is empty if no scan operator is contained in this fragment.
869
869
* So co-located operators(e.g. Agg, Analytic) should use `HASH_SHUFFLE` instead of `BUCKET_HASH_SHUFFLE`.
870
870
*/
871
- const bool followed_by_shuffled_join =
872
- operator_xs.size () > idx ? operator_xs[idx]->followed_by_shuffled_join ()
873
- : cur_pipe->sink_x ()->followed_by_shuffled_join ();
871
+ const bool followed_by_shuffled_operator =
872
+ operator_xs.size () > idx ? operator_xs[idx]->followed_by_shuffled_operator ()
873
+ : cur_pipe->sink_x ()->followed_by_shuffled_operator ();
874
874
const bool should_disable_bucket_shuffle =
875
875
bucket_seq_to_instance_idx.empty () &&
876
876
shuffle_idx_to_instance_idx.find (-1 ) == shuffle_idx_to_instance_idx.end () &&
877
- followed_by_shuffled_join ;
877
+ followed_by_shuffled_operator ;
878
878
sink.reset (new LocalExchangeSinkOperatorX (
879
879
sink_id, local_exchange_id,
880
880
should_disable_bucket_shuffle ? _total_instances : _num_instances,
@@ -1050,7 +1050,7 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1050
1050
const DescriptorTbl& descs, OperatorXPtr& op,
1051
1051
PipelinePtr& cur_pipe, int parent_idx,
1052
1052
int child_idx,
1053
- const bool followed_by_shuffled_join ) {
1053
+ const bool followed_by_shuffled_operator ) {
1054
1054
// We directly construct the operator from Thrift because the given array is in the order of preorder traversal.
1055
1055
// Therefore, here we need to use a stack-like structure.
1056
1056
_pipeline_parent_map.pop (cur_pipe, parent_idx, child_idx);
@@ -1124,7 +1124,7 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1124
1124
op.reset (new DistinctStreamingAggOperatorX (pool, next_operator_id (), tnode, descs,
1125
1125
_require_bucket_distribution));
1126
1126
RETURN_IF_ERROR (cur_pipe->add_operator (op));
1127
- op->set_followed_by_shuffled_join (followed_by_shuffled_join );
1127
+ op->set_followed_by_shuffled_operator (followed_by_shuffled_operator );
1128
1128
_require_bucket_distribution =
1129
1129
_require_bucket_distribution || op->require_data_distribution ();
1130
1130
} else if (tnode.agg_node .__isset .use_streaming_preaggregation &&
@@ -1155,7 +1155,7 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1155
1155
sink.reset (new AggSinkOperatorX (pool, next_sink_operator_id (), tnode, descs,
1156
1156
_require_bucket_distribution));
1157
1157
}
1158
- sink->set_followed_by_shuffled_join (followed_by_shuffled_join );
1158
+ sink->set_followed_by_shuffled_operator (followed_by_shuffled_operator );
1159
1159
_require_bucket_distribution =
1160
1160
_require_bucket_distribution || sink->require_data_distribution ();
1161
1161
sink->set_dests_id ({op->operator_id ()});
@@ -1206,8 +1206,8 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1206
1206
1207
1207
_pipeline_parent_map.push (op->node_id (), cur_pipe);
1208
1208
_pipeline_parent_map.push (op->node_id (), build_side_pipe);
1209
- sink->set_followed_by_shuffled_join (sink->is_shuffled_hash_join ());
1210
- op->set_followed_by_shuffled_join (op->is_shuffled_hash_join ());
1209
+ sink->set_followed_by_shuffled_operator (sink->is_shuffled_operator ());
1210
+ op->set_followed_by_shuffled_operator (op->is_shuffled_operator ());
1211
1211
} else {
1212
1212
op.reset (new HashJoinProbeOperatorX (pool, tnode, next_operator_id (), descs));
1213
1213
RETURN_IF_ERROR (cur_pipe->add_operator (op));
@@ -1228,8 +1228,8 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1228
1228
1229
1229
_pipeline_parent_map.push (op->node_id (), cur_pipe);
1230
1230
_pipeline_parent_map.push (op->node_id (), build_side_pipe);
1231
- sink->set_followed_by_shuffled_join (sink->is_shuffled_hash_join ());
1232
- op->set_followed_by_shuffled_join (op->is_shuffled_hash_join ());
1231
+ sink->set_followed_by_shuffled_operator (sink->is_shuffled_operator ());
1232
+ op->set_followed_by_shuffled_operator (op->is_shuffled_operator ());
1233
1233
}
1234
1234
_require_bucket_distribution =
1235
1235
_require_bucket_distribution || op->require_data_distribution ();
@@ -1259,6 +1259,7 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1259
1259
case TPlanNodeType::UNION_NODE: {
1260
1260
int child_count = tnode.num_children ;
1261
1261
op.reset (new UnionSourceOperatorX (pool, tnode, next_operator_id (), descs));
1262
+ op->set_followed_by_shuffled_operator (_require_bucket_distribution);
1262
1263
RETURN_IF_ERROR (cur_pipe->add_operator (op));
1263
1264
1264
1265
const auto downstream_pipeline_id = cur_pipe->id ();
@@ -1301,7 +1302,7 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1301
1302
sink.reset (new SortSinkOperatorX (pool, next_sink_operator_id (), tnode, descs,
1302
1303
_require_bucket_distribution));
1303
1304
}
1304
- sink->set_followed_by_shuffled_join (followed_by_shuffled_join );
1305
+ sink->set_followed_by_shuffled_operator (followed_by_shuffled_operator );
1305
1306
_require_bucket_distribution =
1306
1307
_require_bucket_distribution || sink->require_data_distribution ();
1307
1308
sink->set_dests_id ({op->operator_id ()});
@@ -1341,7 +1342,7 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1341
1342
DataSinkOperatorXPtr sink;
1342
1343
sink.reset (new AnalyticSinkOperatorX (pool, next_sink_operator_id (), tnode, descs,
1343
1344
_require_bucket_distribution));
1344
- sink->set_followed_by_shuffled_join (followed_by_shuffled_join );
1345
+ sink->set_followed_by_shuffled_operator (followed_by_shuffled_operator );
1345
1346
_require_bucket_distribution =
1346
1347
_require_bucket_distribution || sink->require_data_distribution ();
1347
1348
sink->set_dests_id ({op->operator_id ()});
@@ -1352,11 +1353,13 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
1352
1353
case TPlanNodeType::INTERSECT_NODE: {
1353
1354
RETURN_IF_ERROR (_build_operators_for_set_operation_node<true >(
1354
1355
pool, tnode, descs, op, cur_pipe, parent_idx, child_idx));
1356
+ op->set_followed_by_shuffled_operator (_require_bucket_distribution);
1355
1357
break ;
1356
1358
}
1357
1359
case TPlanNodeType::EXCEPT_NODE: {
1358
1360
RETURN_IF_ERROR (_build_operators_for_set_operation_node<false >(
1359
1361
pool, tnode, descs, op, cur_pipe, parent_idx, child_idx));
1362
+ op->set_followed_by_shuffled_operator (_require_bucket_distribution);
1360
1363
break ;
1361
1364
}
1362
1365
case TPlanNodeType::REPEAT_NODE: {
0 commit comments