Skip to content

Commit c4b4c5e

Browse files
committed
add comment
1 parent 745ffc4 commit c4b4c5e

File tree

2 files changed

+17
-9
lines changed

2 files changed

+17
-9
lines changed

be/src/util/simd/vstring_function.h

+6-2
Original file line numberDiff line numberDiff line change
@@ -187,8 +187,12 @@ class VStringFunctions {
187187
return p;
188188
}
189189

190-
static inline std::pair<size_t, size_t> skip_leading_utf8(const char* begin, const char* end,
191-
size_t n) {
190+
// Iterate a UTF-8 string without exceeding a given length n.
191+
// The function returns two values:
192+
// the first represents the byte length traversed, and the second represents the char length traversed.
193+
static inline std::pair<size_t, size_t> iterate_utf8_with_limit_length(const char* begin,
194+
const char* end,
195+
size_t n) {
192196
const char* p = begin;
193197
int char_size = 0;
194198

be/src/vec/functions/function_string.h

+11-7
Original file line numberDiff line numberDiff line change
@@ -1581,6 +1581,8 @@ class FunctionStringPad : public IFunction {
15811581
size_t input_rows_count) const {
15821582
std::vector<size_t> pad_index;
15831583
size_t const_pad_char_size = 0;
1584+
// If pad_const = true, initialize pad_index only once.
1585+
// The same logic applies to the if constexpr (!pad_const) condition below.
15841586
if constexpr (pad_const) {
15851587
const_pad_char_size = simd::VStringFunctions::get_char_len(
15861588
(const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
@@ -1608,12 +1610,14 @@ class FunctionStringPad : public IFunction {
16081610
const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
16091611
const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
16101612

1611-
auto [real_len, skip_chars] = simd::VStringFunctions::skip_leading_utf8(
1612-
(const char*)str_data, (const char*)str_data + str_len, len);
1613-
if (len <= skip_chars) {
1614-
buffer.reserve(buffer_len + real_len);
1615-
memcpy(buffer.data() + buffer_len, str_data, real_len);
1616-
buffer_len += real_len;
1613+
auto [iterate_byte_len, iterate_char_len] =
1614+
simd::VStringFunctions::iterate_utf8_with_limit_length(
1615+
(const char*)str_data, (const char*)str_data + str_len, len);
1616+
// If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1617+
if (iterate_char_len == len) {
1618+
buffer.reserve(buffer_len + iterate_byte_len);
1619+
memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1620+
buffer_len += iterate_byte_len;
16171621
res_offsets[i] = buffer_len;
16181622
continue;
16191623
}
@@ -1630,7 +1634,7 @@ class FunctionStringPad : public IFunction {
16301634
res_offsets[i] = buffer_len;
16311635
continue;
16321636
}
1633-
const size_t str_char_size = skip_chars;
1637+
const size_t str_char_size = iterate_char_len;
16341638
const size_t pad_times = (len - str_char_size) / pad_char_size;
16351639
const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
16361640
const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;

0 commit comments

Comments
 (0)