Skip to content

Commit

Permalink
upd
Browse files Browse the repository at this point in the history
  • Loading branch information
Mryange committed Jul 25, 2024
1 parent 8735639 commit 6635a38
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 70 deletions.
26 changes: 12 additions & 14 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2200,19 +2200,17 @@ uint16_t SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_
const uint32_t sel_end_simd = sel_pos + selected_size / SIMD_BYTES * SIMD_BYTES;

while (sel_pos < sel_end_simd) {
auto mask = simd::bytes32_mask_to_bits32_mask(_ret_flags.data() + sel_pos);
auto mask = simd::bytes_mask_to_bits_mask(_ret_flags.data() + sel_pos);
if (0 == mask) {
//pass
} else if (0xffffffff == mask) {
} else if (simd::bits_mask_all() == mask) {
for (uint32_t i = 0; i < SIMD_BYTES; i++) {
sel_rowid_idx[new_size++] = sel_pos + i;
}
} else {
while (mask) {
const size_t bit_pos = __builtin_ctzll(mask);
sel_rowid_idx[new_size++] = sel_pos + bit_pos;
mask = mask & (mask - 1);
}
simd::iterate_through_bits_mask(
[&](const size_t bit_pos) { sel_rowid_idx[new_size++] = sel_pos + bit_pos; },
mask);
}
sel_pos += SIMD_BYTES;
}
Expand Down Expand Up @@ -2686,19 +2684,19 @@ uint16_t SegmentIterator::_evaluate_common_expr_filter(uint16_t* sel_rowid_idx,
const uint32_t sel_end_simd = sel_pos + selected_size / SIMD_BYTES * SIMD_BYTES;

while (sel_pos < sel_end_simd) {
auto mask = simd::bytes32_mask_to_bits32_mask(filt_pos + sel_pos);
auto mask = simd::bytes_mask_to_bits_mask(filt_pos + sel_pos);
if (0 == mask) {
//pass
} else if (0xffffffff == mask) {
} else if (simd::bits_mask_all() == mask) {
for (uint32_t i = 0; i < SIMD_BYTES; i++) {
sel_rowid_idx[new_size++] = sel_rowid_idx[sel_pos + i];
}
} else {
while (mask) {
const size_t bit_pos = __builtin_ctzll(mask);
sel_rowid_idx[new_size++] = sel_rowid_idx[sel_pos + bit_pos];
mask = mask & (mask - 1);
}
simd::iterate_through_bits_mask(
[&](const size_t bit_pos) {
sel_rowid_idx[new_size++] = sel_rowid_idx[sel_pos + bit_pos];
},
mask);
}
sel_pos += SIMD_BYTES;
}
Expand Down
18 changes: 13 additions & 5 deletions be/src/util/simd/bits.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,8 @@
namespace doris {
namespace simd {

/// todo(zeno) Compile add avx512 parameter, modify it to bytes64_mask_to_bits64_mask
/// Transform 32-byte mask to 32-bit mask
inline uint32_t bytes32_mask_to_bits32_mask(const uint8_t* data) {
/// Currently, transforming a 32-byte mask to a 32-bit mask has a faster processing method in the ARM version.
inline auto bytes_mask_to_bits_mask(const uint8_t* data) {
#ifdef __AVX2__
auto zero32 = _mm256_setzero_si256();
uint32_t mask = static_cast<uint32_t>(_mm256_movemask_epi8(
Expand All @@ -51,8 +50,17 @@ inline uint32_t bytes32_mask_to_bits32_mask(const uint8_t* data) {
return mask;
}

inline uint32_t bytes32_mask_to_bits32_mask(const bool* data) {
return bytes32_mask_to_bits32_mask(reinterpret_cast<const uint8_t*>(data));
inline constexpr auto bits_mask_all() {
return 0xffffffff;
}

template <typename Func>
void iterate_through_bits_mask(Func func, decltype(bytes_mask_to_bits_mask(nullptr)) mask) {
while (mask) {
const size_t bit_pos = __builtin_ctzll(mask);
func(bit_pos);
mask = mask & (mask - 1);
}
}

inline size_t count_zero_num(const int8_t* __restrict data, size_t size) {
Expand Down
33 changes: 16 additions & 17 deletions be/src/vec/columns/column_decimal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,16 +322,14 @@ ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter& filt, ssize_t result_s
const UInt8* filt_end_sse = filt_pos + size / SIMD_BYTES * SIMD_BYTES;

while (filt_pos < filt_end_sse) {
uint32_t mask = simd::bytes32_mask_to_bits32_mask(filt_pos);

if (0xFFFFFFFF == mask) {
uint32_t mask = simd::bytes_mask_to_bits_mask(filt_pos);
if (0 == mask) {
//pass
} else if (simd::bits_mask_all() == mask) {
res_data.insert(data_pos, data_pos + SIMD_BYTES);
} else {
while (mask) {
const size_t idx = __builtin_ctzll(mask);
res_data.push_back(data_pos[idx]);
mask = mask & (mask - 1);
}
simd::iterate_through_bits_mask(
[&](const size_t bit_pos) { res_data.push_back(data_pos[bit_pos]); }, mask);
}

filt_pos += SIMD_BYTES;
Expand Down Expand Up @@ -367,18 +365,19 @@ size_t ColumnDecimal<T>::filter(const IColumn::Filter& filter) {
const UInt8* filter_end_sse = filter_pos + size / SIMD_BYTES * SIMD_BYTES;

while (filter_pos < filter_end_sse) {
uint32_t mask = simd::bytes32_mask_to_bits32_mask(filter_pos);

if (0xFFFFFFFF == mask) {
uint32_t mask = simd::bytes_mask_to_bits_mask(filter_pos);
if (0 == mask) {
//pass
} else if (simd::bits_mask_all() == mask) {
memmove(result_data, data_pos, sizeof(T) * SIMD_BYTES);
result_data += SIMD_BYTES;
} else {
while (mask) {
const size_t idx = __builtin_ctzll(mask);
*result_data = data_pos[idx];
++result_data;
mask = mask & (mask - 1);
}
simd::iterate_through_bits_mask(
[&](const size_t idx) {
*result_data = data_pos[idx];
++result_data;
},
mask);
}

filter_pos += SIMD_BYTES;
Expand Down
34 changes: 17 additions & 17 deletions be/src/vec/columns/column_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -389,16 +389,15 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter& filt, ssize_t result_si
const UInt8* filt_end_sse = filt_pos + size / SIMD_BYTES * SIMD_BYTES;

while (filt_pos < filt_end_sse) {
uint32_t mask = simd::bytes32_mask_to_bits32_mask(filt_pos);

if (0xFFFFFFFF == mask) {
uint32_t mask = simd::bytes_mask_to_bits_mask(filt_pos);
if (0 == mask) {
//pass
} else if (simd::bits_mask_all() == mask) {
res_data.insert(data_pos, data_pos + SIMD_BYTES);
} else {
while (mask) {
const size_t idx = __builtin_ctzll(mask);
res_data.push_back_without_reserve(data_pos[idx]);
mask = mask & (mask - 1);
}
simd::iterate_through_bits_mask(
[&](const size_t idx) { res_data.push_back_without_reserve(data_pos[idx]); },
mask);
}

filt_pos += SIMD_BYTES;
Expand Down Expand Up @@ -436,18 +435,19 @@ size_t ColumnVector<T>::filter(const IColumn::Filter& filter) {
const UInt8* filter_end_sse = filter_pos + size / SIMD_BYTES * SIMD_BYTES;

while (filter_pos < filter_end_sse) {
uint32_t mask = simd::bytes32_mask_to_bits32_mask(filter_pos);

if (0xFFFFFFFF == mask) {
uint32_t mask = simd::bytes_mask_to_bits_mask(filter_pos);
if (0 == mask) {
//pass
} else if (simd::bits_mask_all() == mask) {
memmove(result_data, data_pos, sizeof(T) * SIMD_BYTES);
result_data += SIMD_BYTES;
} else {
while (mask) {
const size_t idx = __builtin_ctzll(mask);
*result_data = data_pos[idx];
++result_data;
mask = mask & (mask - 1);
}
simd::iterate_through_bits_mask(
[&](const size_t idx) {
*result_data = data_pos[idx];
++result_data;
},
mask);
}

filter_pos += SIMD_BYTES;
Expand Down
33 changes: 16 additions & 17 deletions be/src/vec/columns/columns_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,10 @@ void filter_arrays_impl_generic(const PaddedPODArray<T>& src_elems,
const auto filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES;

while (filt_pos < filt_end_aligned) {
auto mask = simd::bytes32_mask_to_bits32_mask(filt_pos);

if (mask == 0xffffffff) {
auto mask = simd::bytes_mask_to_bits_mask(filt_pos);
if (0 == mask) {
//pass
} else if (mask == simd::bits_mask_all()) {
/// SIMD_BYTES consecutive rows pass the filter
const auto first = offsets_pos == offsets_begin;

Expand All @@ -203,11 +204,8 @@ void filter_arrays_impl_generic(const PaddedPODArray<T>& src_elems,
res_elems.resize(elems_size_old + chunk_size);
memcpy(&res_elems[elems_size_old], &src_elems[chunk_offset], chunk_size * sizeof(T));
} else {
while (mask) {
const size_t bit_pos = __builtin_ctzll(mask);
copy_array(offsets_pos + bit_pos);
mask = mask & (mask - 1);
}
simd::iterate_through_bits_mask(
[&](const size_t bit_pos) { copy_array(offsets_pos + bit_pos); }, mask);
}

filt_pos += SIMD_BYTES;
Expand Down Expand Up @@ -263,9 +261,10 @@ size_t filter_arrays_impl_generic_without_reserving(PaddedPODArray<T>& elems,
const auto filter_end_aligned = filter_pos + size / SIMD_BYTES * SIMD_BYTES;

while (filter_pos < filter_end_aligned) {
auto mask = simd::bytes32_mask_to_bits32_mask(filter_pos);

if (mask == 0xffffffff) {
auto mask = simd::bytes_mask_to_bits_mask(filter_pos);
if (0 == mask) {
//pass
} else if (mask == simd::bits_mask_all()) {
/// SIMD_BYTES consecutive rows pass the filter
const auto first = offsets_pos == offsets_begin;

Expand All @@ -281,12 +280,12 @@ size_t filter_arrays_impl_generic_without_reserving(PaddedPODArray<T>& elems,
result_data += chunk_size;
result_size += SIMD_BYTES;
} else {
while (mask) {
const size_t bit_pos = __builtin_ctzll(mask);
copy_array(offsets_pos + bit_pos);
++result_size;
mask = mask & (mask - 1);
}
simd::iterate_through_bits_mask(
[&](const size_t bit_pos) {
copy_array(offsets_pos + bit_pos);
++result_size;
},
mask);
}

filter_pos += SIMD_BYTES;
Expand Down

0 comments on commit 6635a38

Please sign in to comment.