Skip to content

Commit

Permalink
[opt](arm) Optimize the BlockBloomFilter::bucket_find on ARM platform… (
Browse files Browse the repository at this point in the history
#43508)

…s using NEON instructions. (#38888)
#38888
## Proposed changes

```
--------------------------------------------------------------
Benchmark                    Time             CPU   Iterations
--------------------------------------------------------------
BM_BucketFindNeon         8.14 ns         8.14 ns    344002441
BM_BucketFindNative       17.5 ns         17.5 ns    160152491
```
  • Loading branch information
Mryange authored Nov 10, 2024
1 parent 4c7e495 commit 8867a82
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 4 deletions.
36 changes: 35 additions & 1 deletion be/src/exprs/block_bloom_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,39 @@ class BlockBloomFilter {
return false;
}

#ifdef __ARM_NEON
void make_find_mask(uint32_t key, uint32x4_t* masks) const noexcept {
uint32x4_t hash_data_1 = vdupq_n_u32(key);
uint32x4_t hash_data_2 = vdupq_n_u32(key);

uint32x4_t rehash_1 = vld1q_u32(&kRehash[0]);
uint32x4_t rehash_2 = vld1q_u32(&kRehash[4]);

// masks[i] = key * kRehash[i];
hash_data_1 = vmulq_u32(rehash_1, hash_data_1);
hash_data_2 = vmulq_u32(rehash_2, hash_data_2);
// masks[i] = masks[i] >> shift_num;
hash_data_1 = vshrq_n_u32(hash_data_1, shift_num);
hash_data_2 = vshrq_n_u32(hash_data_2, shift_num);

const uint32x4_t ones = vdupq_n_u32(1);

// masks[i] = 0x1 << masks[i];
masks[0] = vshlq_u32(ones, reinterpret_cast<int32x4_t>(hash_data_1));
masks[1] = vshlq_u32(ones, reinterpret_cast<int32x4_t>(hash_data_2));
}
#else
void make_find_mask(uint32_t key, uint32_t* masks) const noexcept {
for (int i = 0; i < kBucketWords; ++i) {
masks[i] = key * kRehash[i];

masks[i] = masks[i] >> shift_num;

masks[i] = 0x1 << masks[i];
}
}
#endif

// Computes the logical OR of this filter with 'other' and stores the result in this
// filter.
// Notes:
Expand Down Expand Up @@ -163,7 +196,8 @@ class BlockBloomFilter {
// log2(number of bits in a BucketWord)
static constexpr int kLogBucketWordBits = 5;
static constexpr BucketWord kBucketWordMask = (1 << kLogBucketWordBits) - 1;

// (>> 27) is equivalent to (mod 32)
static constexpr auto shift_num = ((1 << kLogBucketWordBits) - kLogBucketWordBits);
// log2(number of bytes in a bucket)
static constexpr int kLogBucketByteSize = 5;
// Bucket size in bytes.
Expand Down
29 changes: 26 additions & 3 deletions be/src/exprs/block_bloom_filter_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,37 @@ void BlockBloomFilter::bucket_insert(const uint32_t bucket_idx, const uint32_t h
}

bool BlockBloomFilter::bucket_find(const uint32_t bucket_idx, const uint32_t hash) const noexcept {
#if defined(__ARM_NEON)
uint32x4_t masks[2];

uint32x4_t directory_1 = vld1q_u32(&_directory[bucket_idx][0]);
uint32x4_t directory_2 = vld1q_u32(&_directory[bucket_idx][4]);

make_find_mask(hash, masks);
// The condition for returning true is that all the bits in _directory[bucket_idx][i] specified by masks[i] are 1.
// This can be equivalently expressed as all the bits in not( _directory[bucket_idx][i]) specified by masks[i] are 0.
// vbicq_u32(vec1, vec2) : Result of (vec1 AND NOT vec2)
// If true is returned, out_1 and out_2 should be all zeros.
uint32x4_t out_1 = vbicq_u32(masks[0], directory_1);
uint32x4_t out_2 = vbicq_u32(masks[1], directory_2);

out_1 = vorrq_u32(out_1, out_2);

uint32x2_t low = vget_low_u32(out_1);
uint32x2_t high = vget_high_u32(out_1);
low = vorr_u32(low, high);
uint32_t res = vget_lane_u32(low, 0) | vget_lane_u32(low, 1);
return !(res);
#else
uint32_t masks[kBucketWords];
make_find_mask(hash, masks);
for (int i = 0; i < kBucketWords; ++i) {
BucketWord hval = (kRehash[i] * hash) >> ((1 << kLogBucketWordBits) - kLogBucketWordBits);
hval = 1U << hval;
if (!(DCHECK_NOTNULL(_directory)[bucket_idx][i] & hval)) {
if ((DCHECK_NOTNULL(_directory)[bucket_idx][i] & masks[i]) == 0) {
return false;
}
}
return true;
#endif
}

void BlockBloomFilter::insert_no_avx2(const uint32_t hash) noexcept {
Expand Down

0 comments on commit 8867a82

Please sign in to comment.