Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix](inverted index) multi match distinguishes the inverted index v1 and v2 #39149

Merged
merged 1 commit into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions be/src/vec/functions/function_multi_match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,15 @@ Status FunctionMultiMatch::eval_inverted_index(FunctionContext* context,

auto single_result = std::make_shared<roaring::Roaring>();
StringRef query_value(match_param->query.data());
RETURN_IF_ERROR(index_reader->query(opts.stats, opts.runtime_state,
std::to_string(column.unique_id()), &query_value,
query_type, single_result));
auto index_version = tablet_schema->get_inverted_index_storage_format();
if (index_version == InvertedIndexStorageFormatPB::V1) {
RETURN_IF_ERROR(index_reader->query(opts.stats, opts.runtime_state, column_name,
&query_value, query_type, single_result));
} else if (index_version == InvertedIndexStorageFormatPB::V2) {
RETURN_IF_ERROR(index_reader->query(opts.stats, opts.runtime_state,
std::to_string(column.unique_id()), &query_value,
query_type, single_result));
}
(*result) |= (*single_result);
}

Expand Down
24 changes: 24 additions & 0 deletions regression-test/data/inverted_index_p0/test_index_multi_match.out
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,27 @@
-- !sql --
44

-- !sql --
178

-- !sql --
180

-- !sql --
859

-- !sql --
44

-- !sql --
178

-- !sql --
180

-- !sql --
859

-- !sql --
44

Original file line number Diff line number Diff line change
Expand Up @@ -19,51 +19,37 @@
suite("test_index_multi_match", "p0"){
def indexTbName1 = "test_index_multi_match_1"
def indexTbName2 = "test_index_multi_match_2"
def indexTbName3 = "test_index_multi_match_3"
def indexTbName4 = "test_index_multi_match_4"

sql "DROP TABLE IF EXISTS ${indexTbName1}"
sql "DROP TABLE IF EXISTS ${indexTbName2}"
sql "DROP TABLE IF EXISTS ${indexTbName3}"
sql "DROP TABLE IF EXISTS ${indexTbName4}"

sql """
CREATE TABLE ${indexTbName1} (
`@timestamp` int(11) NULL COMMENT "",
`clientip` text NULL COMMENT "",
`request` text NULL COMMENT "",
`status` text NULL COMMENT "",
`size` text NULL COMMENT "",
INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX status_idx (`status`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX size_idx (`size`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT ''
) ENGINE=OLAP
DUPLICATE KEY(`@timestamp`)
COMMENT "OLAP"
DISTRIBUTED BY RANDOM BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"disable_auto_compaction" = "true"
);
"""

sql """
CREATE TABLE ${indexTbName2} (
`@timestamp` int(11) NULL COMMENT "",
`clientip` text NULL COMMENT "",
`request` text NULL COMMENT "",
`status` text NULL COMMENT "",
`size` text NULL COMMENT "",
INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX status_idx (`status`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX size_idx (`size`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT ''
) ENGINE=OLAP
DUPLICATE KEY(`@timestamp`)
COMMENT "OLAP"
DISTRIBUTED BY RANDOM BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"disable_auto_compaction" = "true"
);
"""
def create_table = {table_name, idx_version ->
sql """
CREATE TABLE ${table_name} (
`@timestamp` int(11) NULL COMMENT "",
`clientip` text NULL COMMENT "",
`request` text NULL COMMENT "",
`status` text NULL COMMENT "",
`size` text NULL COMMENT "",
INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX status_idx (`status`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
INDEX size_idx (`size`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT ''
) ENGINE=OLAP
DUPLICATE KEY(`@timestamp`)
COMMENT "OLAP"
DISTRIBUTED BY RANDOM BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"inverted_index_storage_format" = "${idx_version}",
"disable_auto_compaction" = "true"
);
"""
}

def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
expected_succ_rows = -1, load_to_single_tablet = 'true' ->
Expand Down Expand Up @@ -103,20 +89,39 @@ suite("test_index_multi_match", "p0"){
}

try {
create_table(indexTbName1, 'V1')
create_table(indexTbName2, 'V2')
create_table(indexTbName3, 'V1')
create_table(indexTbName4, 'V2')

load_httplogs_data.call(indexTbName1, 'test_index_multi_match_1', 'true', 'json', 'documents-1000.json')
load_httplogs_data.call(indexTbName2, 'test_index_multi_match_2', 'true', 'json', 'documents-1000.json')
load_httplogs_data.call(indexTbName3, 'test_index_multi_match_3', 'true', 'json', 'documents-1000.json')
load_httplogs_data.call(indexTbName4, 'test_index_multi_match_4', 'true', 'json', 'documents-1000.json')

sql "sync"

sql """ set enable_common_expr_pushdown = true """

qt_sql """ select count() from ${indexTbName1} where (clientip match_phrase_prefix '2'); """
qt_sql """ select count() from ${indexTbName1} where (clientip match_phrase_prefix '2' or request match_phrase_prefix '2'); """
qt_sql """ select count() from ${indexTbName1} where (clientip match_phrase_prefix '2' or request match_phrase_prefix '2' or status match_phrase_prefix '2' or size match_phrase_prefix '2'); """
qt_sql """ select count() from ${indexTbName1} where (clientip match_phrase_prefix 'a' or request match_phrase_prefix 'a' or status match_phrase_prefix 'a' or size match_phrase_prefix 'a'); """

qt_sql """ select count() from ${indexTbName2} where multi_match(clientip, '', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName2} where multi_match(clientip, 'request', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName2} where multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName2} where multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """
qt_sql """ select count() from ${indexTbName2} where (clientip match_phrase_prefix '2'); """
qt_sql """ select count() from ${indexTbName2} where (clientip match_phrase_prefix '2' or request match_phrase_prefix '2'); """
qt_sql """ select count() from ${indexTbName2} where (clientip match_phrase_prefix '2' or request match_phrase_prefix '2' or status match_phrase_prefix '2' or size match_phrase_prefix '2'); """
qt_sql """ select count() from ${indexTbName2} where (clientip match_phrase_prefix 'a' or request match_phrase_prefix 'a' or status match_phrase_prefix 'a' or size match_phrase_prefix 'a'); """

qt_sql """ select count() from ${indexTbName3} where multi_match(clientip, '', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName3} where multi_match(clientip, 'request', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName3} where multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName3} where multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """

qt_sql """ select count() from ${indexTbName4} where multi_match(clientip, '', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName4} where multi_match(clientip, 'request', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName4} where multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """
qt_sql """ select count() from ${indexTbName4} where multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """

} finally {
//try_sql("DROP TABLE IF EXISTS ${testTable}")
Expand Down
Loading