Skip to content

Commit b982482

Browse files
committed
[fix](join) incorrect result of left semi/anti join with empty build side
1 parent b1c5747 commit b982482

File tree

3 files changed

+56
-4
lines changed

3 files changed

+56
-4
lines changed

be/src/vec/common/hash_table/hash_map.h

+35
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,9 @@ class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower, Allocator>
226226
template <int JoinOpType>
227227
void prepare_build(size_t num_elem, int batch_size, bool has_null_key) {
228228
_has_null_key = has_null_key;
229+
230+
// the first row in build side is not really from build side table
231+
_empty_build_side = num_elem <= 1;
229232
max_batch_size = batch_size;
230233
bucket_size = calc_bucket_size(num_elem + 1);
231234
first.resize(bucket_size + 1);
@@ -262,6 +265,14 @@ class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower, Allocator>
262265
uint32_t* __restrict probe_idxs, bool& probe_visited,
263266
uint32_t* __restrict build_idxs,
264267
doris::vectorized::ColumnFilterHelper* mark_column) {
268+
if constexpr (JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
269+
if (_empty_build_side) {
270+
return _process_null_aware_left_anti_join_for_empty_build_side<
271+
JoinOpType, with_other_conjuncts, is_mark_join>(
272+
probe_idx, probe_rows, probe_idxs, build_idxs, mark_column);
273+
}
274+
}
275+
265276
if constexpr (is_mark_join) {
266277
return _find_batch_mark<JoinOpType, with_other_conjuncts>(
267278
keys, build_idx_map, probe_idx, probe_rows, probe_idxs, build_idxs,
@@ -367,6 +378,29 @@ class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower, Allocator>
367378
return std::tuple {probe_idx, 0U, matched_cnt};
368379
}
369380

381+
template <int JoinOpType, bool with_other_conjuncts, bool is_mark_join>
382+
auto _process_null_aware_left_anti_join_for_empty_build_side(
383+
int probe_idx, int probe_rows, uint32_t* __restrict probe_idxs,
384+
uint32_t* __restrict build_idxs, doris::vectorized::ColumnFilterHelper* mark_column) {
385+
static_assert(JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN);
386+
auto matched_cnt = 0;
387+
const auto batch_size = max_batch_size;
388+
389+
while (probe_idx < probe_rows && matched_cnt < batch_size) {
390+
probe_idxs[matched_cnt] = probe_idx++;
391+
if constexpr (is_mark_join) {
392+
build_idxs[matched_cnt] = 0;
393+
}
394+
++matched_cnt;
395+
}
396+
397+
if constexpr (is_mark_join && !with_other_conjuncts) {
398+
mark_column->resize_fill(matched_cnt, 1);
399+
}
400+
401+
return std::tuple {probe_idx, 0U, matched_cnt};
402+
}
403+
370404
auto _find_batch_right_semi_anti(const Key* __restrict keys,
371405
const uint32_t* __restrict build_idx_map, int probe_idx,
372406
int probe_rows) {
@@ -532,6 +566,7 @@ class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower, Allocator>
532566
Cell cell;
533567
doris::vectorized::Arena* pool;
534568
bool _has_null_key = false;
569+
bool _empty_build_side = true;
535570
};
536571

537572
template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>,

regression-test/data/correctness_p0/test_null_aware_left_anti_join.out

+7
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,10 @@
99

1010
-- !select --
1111

12+
-- !anti_emtpy_right --
13+
\N
14+
1
15+
3
16+
17+
-- !semi_emtpy_right --
18+

regression-test/suites/correctness_p0/test_null_aware_left_anti_join.groovy

+14-4
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,21 @@ suite("test_null_aware_left_anti_join") {
6060
sql """ set parallel_pipeline_task_num=2; """
6161
qt_select """ select ${tableName2}.k1 from ${tableName2} where k1 not in (select ${tableName1}.k1 from ${tableName1}) order by ${tableName2}.k1; """
6262

63-
sql """
64-
drop table if exists ${tableName2};
63+
// In left anti join, if right side is empty, all rows(null included) of left should be output.
64+
qt_anti_emtpy_right """
65+
select
66+
*
67+
from ${tableName1} t1 where k1 not in (
68+
select k1 from ${tableName2} t2 where t2.k1 > 2
69+
) order by 1;
6570
"""
6671

67-
sql """
68-
drop table if exists ${tableName1};
72+
// In left semi join, if right side is empty, no row should be output.
73+
qt_semi_emtpy_right """
74+
select
75+
*
76+
from ${tableName1} t1 where k1 in (
77+
select k1 from ${tableName2} t2 where t2.k1 > 2
78+
) order by 1;
6979
"""
7080
}

0 commit comments

Comments
 (0)