Skip to content

Commit cd65796

Browse files
authored
[opt](inverted index) ignore_above only affects untokenized strings (#28819)
1 parent 619d500 commit cd65796

9 files changed

+32
-26
lines changed

be/src/olap/rowset/segment_v2/inverted_index_writer.cpp

+16-10
Original file line numberDiff line numberDiff line change
@@ -295,12 +295,15 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
295295
"field or index writer is null in inverted index writer");
296296
}
297297
auto* v = (Slice*)values;
298+
auto ignore_above_value =
299+
get_parser_ignore_above_value_from_properties(_index_meta->properties());
300+
auto ignore_above = std::stoi(ignore_above_value);
298301
for (int i = 0; i < count; ++i) {
299-
auto ignore_above_value =
300-
get_parser_ignore_above_value_from_properties(_index_meta->properties());
301-
auto ignore_above = std::stoi(ignore_above_value);
302-
if (v->get_size() > ignore_above) {
303-
VLOG_DEBUG << "fulltext index value length can be at most 256, but got "
302+
// only ignore_above UNTOKENIZED strings
303+
if (_parser_type == InvertedIndexParserType::PARSER_NONE &&
304+
v->get_size() > ignore_above) {
305+
VLOG_DEBUG << "fulltext index value length can be at most "
306+
<< ignore_above_value << ", but got "
304307
<< "value length:" << v->get_size() << ", ignore this value";
305308
new_fulltext_field(empty_value.c_str(), 0);
306309
RETURN_IF_ERROR(add_null_document());
@@ -330,6 +333,9 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
330333
return Status::InternalError(
331334
"field or index writer is null in inverted index writer");
332335
}
336+
auto ignore_above_value =
337+
get_parser_ignore_above_value_from_properties(_index_meta->properties());
338+
auto ignore_above = std::stoi(ignore_above_value);
333339
for (int i = 0; i < count; ++i) {
334340
// offsets[i+1] is now row element count
335341
std::vector<std::string> strings;
@@ -346,11 +352,11 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
346352
}
347353

348354
auto value = join(strings, " ");
349-
auto ignore_above_value =
350-
get_parser_ignore_above_value_from_properties(_index_meta->properties());
351-
auto ignore_above = std::stoi(ignore_above_value);
352-
if (value.length() > ignore_above) {
353-
VLOG_DEBUG << "fulltext index value length can be at most 256, but got "
355+
// only ignore_above UNTOKENIZED strings
356+
if (_parser_type == InvertedIndexParserType::PARSER_NONE &&
357+
value.length() > ignore_above) {
358+
VLOG_DEBUG << "fulltext index value length can be at most "
359+
<< ignore_above_value << ", but got "
354360
<< "value length:" << value.length() << ", ignore this value";
355361
new_fulltext_field(empty_value.c_str(), 0);
356362
RETURN_IF_ERROR(add_null_document());

regression-test/suites/mysql_fulltext/ddl/large_records_t1_dk.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t1_dk (
22
FTS_DOC_ID BIGINT NOT NULL,
33
a TEXT,
44
b TEXT,
5-
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
6-
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
5+
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
6+
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
77
)
88
DUPLICATE KEY(FTS_DOC_ID)
99
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3

regression-test/suites/mysql_fulltext/ddl/large_records_t1_uk.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t1_uk (
22
FTS_DOC_ID BIGINT NOT NULL,
33
a TEXT,
44
b TEXT,
5-
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
6-
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
5+
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
6+
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
77
)
88
UNIQUE KEY(FTS_DOC_ID)
99
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3

regression-test/suites/mysql_fulltext/ddl/large_records_t2_dk.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t2_dk (
22
FTS_DOC_ID BIGINT NOT NULL,
33
a TEXT,
44
b TEXT,
5-
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
6-
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
5+
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
6+
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
77
)
88
DUPLICATE KEY(FTS_DOC_ID)
99
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3

regression-test/suites/mysql_fulltext/ddl/large_records_t2_uk.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t2_uk (
22
FTS_DOC_ID BIGINT NOT NULL,
33
a TEXT,
44
b TEXT,
5-
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
6-
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
5+
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
6+
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
77
)
88
UNIQUE KEY(FTS_DOC_ID)
99
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3

regression-test/suites/mysql_fulltext/ddl/large_records_t3_dk.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t3_dk (
22
FTS_DOC_ID BIGINT NOT NULL,
33
a TEXT,
44
b TEXT,
5-
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
6-
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
5+
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
6+
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
77
)
88
DUPLICATE KEY(FTS_DOC_ID)
99
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3

regression-test/suites/mysql_fulltext/ddl/large_records_t3_uk.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t3_uk (
22
FTS_DOC_ID BIGINT NOT NULL,
33
a TEXT,
44
b TEXT,
5-
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
6-
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
5+
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
6+
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
77
)
88
UNIQUE KEY(FTS_DOC_ID)
99
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3

regression-test/suites/mysql_fulltext/ddl/large_records_t4_dk.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t4_dk (
22
FTS_DOC_ID BIGINT NOT NULL,
33
a TEXT,
44
b TEXT,
5-
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
6-
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
5+
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
6+
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
77
)
88
DUPLICATE KEY(FTS_DOC_ID)
99
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3

regression-test/suites/mysql_fulltext/ddl/large_records_t4_uk.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t4_uk (
22
FTS_DOC_ID BIGINT NOT NULL,
33
a TEXT,
44
b TEXT,
5-
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
6-
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
5+
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
6+
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
77
)
88
UNIQUE KEY(FTS_DOC_ID)
99
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3

0 commit comments

Comments
 (0)