Skip to content

Commit

Permalink
Fix filter bitvector init timing
Browse files Browse the repository at this point in the history
  • Loading branch information
zanmato1984 committed Jan 4, 2024
1 parent 2e496db commit cbf1b58
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 4 deletions.
7 changes: 5 additions & 2 deletions cpp/src/arrow/acero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,11 @@ add_arrow_acero_test(plan_test
add_arrow_acero_test(source_node_test SOURCES source_node_test.cc test_nodes.cc)
add_arrow_acero_test(fetch_node_test SOURCES fetch_node_test.cc test_nodes.cc)
add_arrow_acero_test(order_by_node_test SOURCES order_by_node_test.cc test_nodes.cc)
add_arrow_acero_test(hash_join_node_test SOURCES hash_join_node_test.cc
bloom_filter_test.cc)
add_arrow_acero_test(hash_join_node_test
SOURCES
hash_join_node_test.cc
bloom_filter_test.cc
swiss_join_test.cc)
add_arrow_acero_test(pivot_longer_node_test SOURCES pivot_longer_node_test.cc
test_nodes.cc)

Expand Down
6 changes: 4 additions & 2 deletions cpp/src/arrow/acero/swiss_join.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2244,6 +2244,10 @@ Status JoinProbeProcessor::OnNextBatch(int64_t thread_id,
match_iterator.SetLookupResult(
minibatch_size_next, minibatch_start, match_bitvector_buf.mutable_data(),
key_ids_buf.mutable_data(), no_duplicate_keys, hash_table_->key_to_payload());
if (!residual_filter_->IsTrivial()) {
std::memset(filtered_bitvector_buf.mutable_data(), 0,
bit_util::BytesForBits(minibatch_size_next));
}
int num_matches_next;
while (match_iterator.GetNextBatch(minibatch_size, &num_matches_next,
materialize_batch_ids_buf.mutable_data(),
Expand All @@ -2256,8 +2260,6 @@ Status JoinProbeProcessor::OnNextBatch(int64_t thread_id,
materialize_key_ids_buf.mutable_data(),
materialize_payload_ids_buf.mutable_data(), /*output_payload_ids=*/true,
!(no_duplicate_keys || no_payload_columns), temp_stack, &num_matches_next));
std::memset(filtered_bitvector_buf.mutable_data(), 0,
bit_util::BytesForBits(minibatch_size_next));
for (int i = 0; i < num_matches_next; ++i) {
int bit_idx = materialize_batch_ids_buf.mutable_data()[i] - minibatch_start;
bit_util::SetBitTo(filtered_bitvector_buf.mutable_data(), bit_idx, 1);
Expand Down
26 changes: 26 additions & 0 deletions cpp/src/arrow/acero/swiss_join_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include <gtest/gtest.h>

namespace arrow {
namespace acero {

TEST(SwissJoin, ResidualFilter) {}

} // namespace acero
} // namespace arrow

0 comments on commit cbf1b58

Please sign in to comment.