Skip to content

Commit ebe031c

Browse files
authored
[fix](inverted index) Fix match_regexp to correctly handle empty string patterns (#40659)
#39503
1 parent 52e13c9 commit ebe031c

File tree

5 files changed

+24
-23
lines changed

5 files changed

+24
-23
lines changed

be/src/vec/functions/match.cpp

-9
Original file line numberDiff line numberDiff line change
@@ -407,15 +407,6 @@ Status FunctionMatchRegexp::execute_match(FunctionContext* context, const std::s
407407
VLOG_DEBUG << "begin to run FunctionMatchRegexp::execute_match, parser_type: "
408408
<< inverted_index_parser_type_to_string(inverted_index_ctx->parser_type);
409409

410-
if (match_query_str.empty()) {
411-
VLOG_DEBUG << fmt::format(
412-
"token parser result is empty for query, "
413-
"please check your query: '{}' and index parser: '{}'",
414-
match_query_str,
415-
inverted_index_parser_type_to_string(inverted_index_ctx->parser_type));
416-
return Status::OK();
417-
}
418-
419410
const std::string& pattern = match_query_str;
420411

421412
hs_database_t* database = nullptr;

regression-test/data/inverted_index_p0/test_index_match_regexp.out

+3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
-- !sql --
33
1000
44

5+
-- !sql --
6+
1000
7+
58
-- !sql --
69
54
710

regression-test/data/inverted_index_p0/test_no_index_match.out

+3
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,6 @@
2020
-- !sql --
2121
0
2222

23+
-- !sql --
24+
1000
25+

regression-test/suites/inverted_index_p0/test_index_match_regexp.groovy

+1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ suite("test_index_match_regexp", "p0"){
8080

8181
sql "sync"
8282

83+
qt_sql """ select count() from test_index_match_regexp where request match_regexp ''; """
8384
qt_sql """ select count() from test_index_match_regexp where request match_regexp '^h'; """
8485
qt_sql """ select count() from test_index_match_regexp where request match_regexp '^team'; """
8586
qt_sql """ select count() from test_index_match_regexp where request match_regexp 's\$'; """

regression-test/suites/inverted_index_p0/test_no_index_match.groovy

+17-14
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
suite("test_no_index_match", "p0") {
2020
// define a sql table
21-
def testTable_unique = "httplogs_unique"
21+
def testTable = "test_no_index_match"
2222

2323
def create_httplogs_unique_table = {testTablex ->
2424
// multi-line sql
@@ -77,35 +77,38 @@ suite("test_no_index_match", "p0") {
7777
}
7878

7979
try {
80-
sql "DROP TABLE IF EXISTS ${testTable_unique}"
81-
create_httplogs_unique_table.call(testTable_unique)
82-
load_httplogs_data.call(testTable_unique, 'httplogs_unique', 'true', 'json', 'documents-1000.json')
80+
sql "DROP TABLE IF EXISTS ${testTable}"
81+
create_httplogs_unique_table.call(testTable)
82+
load_httplogs_data.call(testTable, 'test_no_index_match', 'true', 'json', 'documents-1000.json')
8383

84-
sql """ INSERT INTO ${testTable_unique} VALUES (1, '1', '', 1, 1); """
84+
sql """ INSERT INTO ${testTable} VALUES (1, '1', '', 1, 1); """
8585

8686
sql 'sync'
87+
sql """ set enable_common_expr_pushdown = true """
8788

8889
try {
89-
qt_sql """ select count() from ${testTable_unique} where (request match_any 'hm bg'); """
90-
qt_sql """ select count() from ${testTable_unique} where (request match_all 'hm bg'); """
91-
qt_sql """ select count() from ${testTable_unique} where (request match_phrase 'hm bg'); """
92-
qt_sql """ select count() from ${testTable_unique} where (request match_phrase_prefix 'hm b'); """
93-
qt_sql """ select count() from ${testTable_unique} where (request match_regexp 'la'); """
90+
qt_sql """ select count() from ${testTable} where (request match_any 'hm bg'); """
91+
qt_sql """ select count() from ${testTable} where (request match_all 'hm bg'); """
92+
qt_sql """ select count() from ${testTable} where (request match_phrase 'hm bg'); """
93+
qt_sql """ select count() from ${testTable} where (request match_phrase_prefix 'hm b'); """
94+
qt_sql """ select count() from ${testTable} where (request match_regexp 'la'); """
9495

95-
qt_sql """ select count() from ${testTable_unique} where (request match_phrase '欧冶工业品'); """
96-
qt_sql """ select count() from ${testTable_unique} where (request match_phrase_prefix '欧冶工业品'); """
96+
qt_sql """ select count() from ${testTable} where (request match_phrase '欧冶工业品'); """
97+
qt_sql """ select count() from ${testTable} where (request match_phrase_prefix '欧冶工业品'); """
98+
99+
qt_sql """ select count() from ${testTable} where (request match_regexp ''); """
97100
} finally {
98101
}
99102

100103
try {
101-
sql """ select /*+ SET_VAR(enable_match_without_inverted_index = 0) */ count() from ${testTable_unique} where (request match_phrase 'hm bg'); """
104+
sql """ select /*+ SET_VAR(enable_match_without_inverted_index = 0) */ count() from ${testTable} where (request match_phrase 'hm bg'); """
102105
} catch (Exception e) {
103106
log.info(e.getMessage());
104107
assertTrue(e.getMessage().contains("match_phrase not support execute_match"))
105108
}
106109

107110
try {
108-
sql """ select /*+ SET_VAR(enable_match_without_inverted_index = 0) */ count() from ${testTable_unique} where (request match_phrase_prefix 'hm b'); """
111+
sql """ select /*+ SET_VAR(enable_match_without_inverted_index = 0) */ count() from ${testTable} where (request match_phrase_prefix 'hm b'); """
109112
} catch (Exception e) {
110113
log.info(e.getMessage());
111114
assertTrue(e.getMessage().contains("match_phrase_prefix not support execute_match"))

0 commit comments

Comments
 (0)