@@ -667,7 +667,7 @@ Status PipelineFragmentContext::_create_tree_helper(ObjectPool* pool,
667
667
const DescriptorTbl& descs, OperatorPtr parent,
668
668
int * node_idx, OperatorPtr* root,
669
669
PipelinePtr& cur_pipe, int child_idx,
670
- const bool followed_by_shuffled_join ) {
670
+ const bool followed_by_shuffled_operator ) {
671
671
// propagate error case
672
672
if (*node_idx >= tnodes.size ()) {
673
673
return Status::InternalError (
@@ -677,11 +677,11 @@ Status PipelineFragmentContext::_create_tree_helper(ObjectPool* pool,
677
677
const TPlanNode& tnode = tnodes[*node_idx];
678
678
679
679
int num_children = tnodes[*node_idx].num_children ;
680
- bool current_followed_by_shuffled_join = followed_by_shuffled_join ;
680
+ bool current_followed_by_shuffled_operator = followed_by_shuffled_operator ;
681
681
OperatorPtr op = nullptr ;
682
682
RETURN_IF_ERROR (_create_operator (pool, tnodes[*node_idx], request, descs, op, cur_pipe,
683
683
parent == nullptr ? -1 : parent->node_id (), child_idx,
684
- followed_by_shuffled_join ));
684
+ followed_by_shuffled_operator ));
685
685
686
686
// assert(parent != nullptr || (node_idx == 0 && root_expr != nullptr));
687
687
if (parent != nullptr ) {
@@ -691,7 +691,7 @@ Status PipelineFragmentContext::_create_tree_helper(ObjectPool* pool,
691
691
*root = op;
692
692
}
693
693
/* *
694
- * `ExchangeType::HASH_SHUFFLE` should be used if an operator is followed by a shuffled hash join.
694
+ * `ExchangeType::HASH_SHUFFLE` should be used if an operator is followed by a shuffled operator (shuffled hash join, union operator followed by co-located operators) .
695
695
*
696
696
* For plan:
697
697
* LocalExchange(id=0) -> Aggregation(id=1) -> ShuffledHashJoin(id=2)
@@ -704,15 +704,15 @@ Status PipelineFragmentContext::_create_tree_helper(ObjectPool* pool,
704
704
auto require_shuffled_data_distribution =
705
705
cur_pipe->operators ().empty () ? cur_pipe->sink ()->require_shuffled_data_distribution ()
706
706
: op->require_shuffled_data_distribution ();
707
- current_followed_by_shuffled_join =
708
- (followed_by_shuffled_join || op->is_shuffled_hash_join ()) &&
707
+ current_followed_by_shuffled_operator =
708
+ (followed_by_shuffled_operator || op->is_shuffled_operator ()) &&
709
709
require_shuffled_data_distribution;
710
710
711
711
// rely on that tnodes is preorder of the plan
712
712
for (int i = 0 ; i < num_children; i++) {
713
713
++*node_idx;
714
714
RETURN_IF_ERROR (_create_tree_helper (pool, tnodes, request, descs, op, node_idx, nullptr ,
715
- cur_pipe, i, current_followed_by_shuffled_join ));
715
+ cur_pipe, i, current_followed_by_shuffled_operator ));
716
716
717
717
// we are expecting a child, but have used all nodes
718
718
// this means we have been given a bad tree and must fail
@@ -753,13 +753,13 @@ Status PipelineFragmentContext::_add_local_exchange_impl(
753
753
* `bucket_seq_to_instance_idx` is empty if no scan operator is contained in this fragment.
754
754
* So co-located operators(e.g. Agg, Analytic) should use `HASH_SHUFFLE` instead of `BUCKET_HASH_SHUFFLE`.
755
755
*/
756
- const bool followed_by_shuffled_join = operators. size () > idx
757
- ? operators[idx]->followed_by_shuffled_join ()
758
- : cur_pipe->sink ()->followed_by_shuffled_join ();
756
+ const bool followed_by_shuffled_operator =
757
+ operators. size () > idx ? operators[idx]->followed_by_shuffled_operator ()
758
+ : cur_pipe->sink ()->followed_by_shuffled_operator ();
759
759
const bool should_disable_bucket_shuffle =
760
760
bucket_seq_to_instance_idx.empty () &&
761
761
shuffle_idx_to_instance_idx.find (-1 ) == shuffle_idx_to_instance_idx.end () &&
762
- followed_by_shuffled_join ;
762
+ followed_by_shuffled_operator ;
763
763
sink.reset (new LocalExchangeSinkOperatorX (
764
764
sink_id, local_exchange_id,
765
765
should_disable_bucket_shuffle ? _total_instances : _num_instances,
@@ -1199,7 +1199,7 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo
1199
1199
const DescriptorTbl& descs, OperatorPtr& op,
1200
1200
PipelinePtr& cur_pipe, int parent_idx,
1201
1201
int child_idx,
1202
- const bool followed_by_shuffled_join ) {
1202
+ const bool followed_by_shuffled_operator ) {
1203
1203
// We directly construct the operator from Thrift because the given array is in the order of preorder traversal.
1204
1204
// Therefore, here we need to use a stack-like structure.
1205
1205
_pipeline_parent_map.pop (cur_pipe, parent_idx, child_idx);
@@ -1321,15 +1321,15 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo
1321
1321
1322
1322
op.reset (new DistinctStreamingAggOperatorX (pool, next_operator_id (), tnode, descs,
1323
1323
_require_bucket_distribution));
1324
- op->set_followed_by_shuffled_join (false );
1324
+ op->set_followed_by_shuffled_operator (false );
1325
1325
_require_bucket_distribution = true ;
1326
1326
RETURN_IF_ERROR (new_pipe->add_operator (op));
1327
1327
RETURN_IF_ERROR (cur_pipe->operators ().front ()->set_child (op));
1328
1328
cur_pipe = new_pipe;
1329
1329
} else {
1330
1330
op.reset (new DistinctStreamingAggOperatorX (pool, next_operator_id (), tnode, descs,
1331
1331
_require_bucket_distribution));
1332
- op->set_followed_by_shuffled_join (followed_by_shuffled_join );
1332
+ op->set_followed_by_shuffled_operator (followed_by_shuffled_operator );
1333
1333
_require_bucket_distribution =
1334
1334
_require_bucket_distribution || op->require_data_distribution ();
1335
1335
RETURN_IF_ERROR (cur_pipe->add_operator (op));
@@ -1384,7 +1384,7 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo
1384
1384
sink.reset (new AggSinkOperatorX (pool, next_sink_operator_id (), tnode, descs,
1385
1385
_require_bucket_distribution));
1386
1386
}
1387
- sink->set_followed_by_shuffled_join (followed_by_shuffled_join );
1387
+ sink->set_followed_by_shuffled_operator (followed_by_shuffled_operator );
1388
1388
_require_bucket_distribution =
1389
1389
_require_bucket_distribution || sink->require_data_distribution ();
1390
1390
sink->set_dests_id ({op->operator_id ()});
@@ -1434,8 +1434,8 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo
1434
1434
1435
1435
_pipeline_parent_map.push (op->node_id (), cur_pipe);
1436
1436
_pipeline_parent_map.push (op->node_id (), build_side_pipe);
1437
- sink->set_followed_by_shuffled_join (sink->is_shuffled_hash_join ());
1438
- op->set_followed_by_shuffled_join (op->is_shuffled_hash_join ());
1437
+ sink->set_followed_by_shuffled_operator (sink->is_shuffled_operator ());
1438
+ op->set_followed_by_shuffled_operator (op->is_shuffled_operator ());
1439
1439
} else {
1440
1440
op.reset (new HashJoinProbeOperatorX (pool, tnode, next_operator_id (), descs));
1441
1441
RETURN_IF_ERROR (cur_pipe->add_operator (op));
@@ -1456,8 +1456,8 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo
1456
1456
1457
1457
_pipeline_parent_map.push (op->node_id (), cur_pipe);
1458
1458
_pipeline_parent_map.push (op->node_id (), build_side_pipe);
1459
- sink->set_followed_by_shuffled_join (sink->is_shuffled_hash_join ());
1460
- op->set_followed_by_shuffled_join (op->is_shuffled_hash_join ());
1459
+ sink->set_followed_by_shuffled_operator (sink->is_shuffled_operator ());
1460
+ op->set_followed_by_shuffled_operator (op->is_shuffled_operator ());
1461
1461
}
1462
1462
_require_bucket_distribution =
1463
1463
_require_bucket_distribution || op->require_data_distribution ();
@@ -1487,6 +1487,7 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo
1487
1487
case TPlanNodeType::UNION_NODE: {
1488
1488
int child_count = tnode.num_children ;
1489
1489
op.reset (new UnionSourceOperatorX (pool, tnode, next_operator_id (), descs));
1490
+ op->set_followed_by_shuffled_operator (_require_bucket_distribution);
1490
1491
RETURN_IF_ERROR (cur_pipe->add_operator (op));
1491
1492
1492
1493
const auto downstream_pipeline_id = cur_pipe->id ();
@@ -1498,6 +1499,7 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo
1498
1499
_dag[downstream_pipeline_id].push_back (build_side_pipe->id ());
1499
1500
DataSinkOperatorPtr sink;
1500
1501
sink.reset (new UnionSinkOperatorX (i, next_sink_operator_id (), pool, tnode, descs));
1502
+ sink->set_followed_by_shuffled_operator (_require_bucket_distribution);
1501
1503
sink->set_dests_id ({op->operator_id ()});
1502
1504
RETURN_IF_ERROR (build_side_pipe->set_sink (sink));
1503
1505
RETURN_IF_ERROR (build_side_pipe->sink ()->init (tnode, _runtime_state.get ()));
@@ -1531,7 +1533,7 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo
1531
1533
sink.reset (new SortSinkOperatorX (pool, next_sink_operator_id (), tnode, descs,
1532
1534
_require_bucket_distribution));
1533
1535
}
1534
- sink->set_followed_by_shuffled_join (followed_by_shuffled_join );
1536
+ sink->set_followed_by_shuffled_operator (followed_by_shuffled_operator );
1535
1537
_require_bucket_distribution =
1536
1538
_require_bucket_distribution || sink->require_data_distribution ();
1537
1539
sink->set_dests_id ({op->operator_id ()});
@@ -1571,7 +1573,7 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo
1571
1573
DataSinkOperatorPtr sink;
1572
1574
sink.reset (new AnalyticSinkOperatorX (pool, next_sink_operator_id (), tnode, descs,
1573
1575
_require_bucket_distribution));
1574
- sink->set_followed_by_shuffled_join (followed_by_shuffled_join );
1576
+ sink->set_followed_by_shuffled_operator (followed_by_shuffled_operator );
1575
1577
_require_bucket_distribution =
1576
1578
_require_bucket_distribution || sink->require_data_distribution ();
1577
1579
sink->set_dests_id ({op->operator_id ()});
@@ -1582,11 +1584,13 @@ Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNo
1582
1584
case TPlanNodeType::INTERSECT_NODE: {
1583
1585
RETURN_IF_ERROR (_build_operators_for_set_operation_node<true >(
1584
1586
pool, tnode, descs, op, cur_pipe, parent_idx, child_idx));
1587
+ op->set_followed_by_shuffled_operator (_require_bucket_distribution);
1585
1588
break ;
1586
1589
}
1587
1590
case TPlanNodeType::EXCEPT_NODE: {
1588
1591
RETURN_IF_ERROR (_build_operators_for_set_operation_node<false >(
1589
1592
pool, tnode, descs, op, cur_pipe, parent_idx, child_idx));
1593
+ op->set_followed_by_shuffled_operator (_require_bucket_distribution);
1590
1594
break ;
1591
1595
}
1592
1596
case TPlanNodeType::REPEAT_NODE: {
0 commit comments