From f26066e9594d616f9adb602aa3185a8ccca942ae Mon Sep 17 00:00:00 2001 From: xufei Date: Tue, 8 Feb 2022 09:37:13 +0800 Subject: [PATCH 1/4] add support for regexp --- dbms/src/Common/OptimizedRegularExpression.h | 3 +- .../Common/OptimizedRegularExpression.inl.h | 17 +- .../DAGExpressionAnalyzerHelper.cpp | 28 +- .../Coprocessor/DAGExpressionAnalyzerHelper.h | 7 +- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 4 +- dbms/src/Functions/FunctionsStringArray.h | 3 +- dbms/src/Functions/FunctionsStringSearch.cpp | 418 +++- dbms/src/Functions/FunctionsStringSearch.h | 46 +- dbms/src/Functions/FunctionsVisitParam.h | 12 +- dbms/src/Functions/Regexps.h | 5 +- dbms/src/Functions/tests/gtest_regexp.cpp | 2156 +++++++++++++++++ dbms/src/Storages/Transaction/Collator.h | 11 + tests/fullstack-test/expr/regexp.test | 57 + .../new_collation_fullstack/regexp.test | 69 + 14 files changed, 2723 insertions(+), 113 deletions(-) create mode 100644 dbms/src/Functions/tests/gtest_regexp.cpp create mode 100644 tests/fullstack-test/expr/regexp.test create mode 100644 tests/tidb-ci/new_collation_fullstack/regexp.test diff --git a/dbms/src/Common/OptimizedRegularExpression.h b/dbms/src/Common/OptimizedRegularExpression.h index 963de77ae08..6aa3c3f6942 100644 --- a/dbms/src/Common/OptimizedRegularExpression.h +++ b/dbms/src/Common/OptimizedRegularExpression.h @@ -48,7 +48,8 @@ class OptimizedRegularExpressionImpl { RE_CASELESS = 0x00000001, RE_NO_CAPTURE = 0x00000010, - RE_DOT_NL = 0x00000100 + RE_DOT_NL = 0x00000100, + RE_NO_OPTIMIZE = 0x00001000 }; using Match = OptimizedRegularExpressionDetails::Match; diff --git a/dbms/src/Common/OptimizedRegularExpression.inl.h b/dbms/src/Common/OptimizedRegularExpression.inl.h index 8d77036cfbe..18fd01bb2d7 100644 --- a/dbms/src/Common/OptimizedRegularExpression.inl.h +++ b/dbms/src/Common/OptimizedRegularExpression.inl.h @@ -262,10 +262,21 @@ void OptimizedRegularExpressionImpl::analyze( template OptimizedRegularExpressionImpl::OptimizedRegularExpressionImpl(const std::string & regexp_, int options) { - analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix); + if (options & RE_NO_OPTIMIZE) + { + /// query from TiDB, currently, since analyze does not handle all the cases, skip the optimization + /// to avoid im-compatible issues + is_trivial = false; + required_substring.clear(); + required_substring_is_prefix = false; + } + else + { + analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix); + } - /// Just three following options are supported - if (options & (~(RE_CASELESS | RE_NO_CAPTURE | RE_DOT_NL))) + /// Just four following options are supported + if (options & (~(RE_CASELESS | RE_NO_CAPTURE | RE_DOT_NL | RE_NO_OPTIMIZE))) throw Poco::Exception("OptimizedRegularExpression: Unsupported option."); is_case_insensitive = options & RE_CASELESS; diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp index 004380f4c4c..34a2e00c4f6 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp @@ -380,6 +380,30 @@ String DAGExpressionAnalyzerHelper::buildRoundFunction( return analyzer->applyFunction("tidbRoundWithFrac", argument_names, actions, getCollatorFromExpr(expr)); } +String DAGExpressionAnalyzerHelper::buildRegexpFunction( + DAGExpressionAnalyzer * analyzer, + const tipb::Expr & expr, + ExpressionActionsPtr & actions) +{ + const String & func_name = getFunctionName(expr); + Names argument_names; + for (const auto & child : expr.children()) + { + String name = analyzer->getActions(child, actions); + argument_names.push_back(name); + } + std::shared_ptr collator = getCollatorFromExpr(expr); + if (expr.sig() == tipb::ScalarFuncSig::RegexpReplaceSig || expr.sig() == tipb::ScalarFuncSig::RegexpSig) + { + /// according to https://github.com/pingcap/tidb/blob/v5.0.0/expression/builtin_like.go#L126, + /// For binary collation, it will use RegexpXXXSig, otherwise it will use RegexpXXXUTF8Sig + /// Need to set the collator explicitly because `getCollatorFromExpr` will return nullptr + /// if new collation is not enabled. + collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); + } + return analyzer->applyFunction(func_name, argument_names, actions, collator); +} + DAGExpressionAnalyzerHelper::FunctionBuilderMap DAGExpressionAnalyzerHelper::function_builder_map( {{"in", DAGExpressionAnalyzerHelper::buildInFunction}, {"notIn", DAGExpressionAnalyzerHelper::buildInFunction}, @@ -401,6 +425,8 @@ DAGExpressionAnalyzerHelper::FunctionBuilderMap DAGExpressionAnalyzerHelper::fun {"leftUTF8", DAGExpressionAnalyzerHelper::buildLeftUTF8Function}, {"date_add", DAGExpressionAnalyzerHelper::buildDateAddOrSubFunction}, {"date_sub", DAGExpressionAnalyzerHelper::buildDateAddOrSubFunction}, + {"regexp", DAGExpressionAnalyzerHelper::buildRegexpFunction}, + {"replaceRegexpAll", DAGExpressionAnalyzerHelper::buildRegexpFunction}, {"tidbRound", DAGExpressionAnalyzerHelper::buildRoundFunction}}); -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.h index 39cf6f216f8..173f061258e 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.h @@ -64,6 +64,11 @@ class DAGExpressionAnalyzerHelper const tipb::Expr & expr, const ExpressionActionsPtr & actions); + static String buildRegexpFunction( + DAGExpressionAnalyzer * analyzer, + const tipb::Expr & expr, + ExpressionActionsPtr & actions); + static String genFuncString( const String & func_name, const Names & argument_names, @@ -74,4 +79,4 @@ class DAGExpressionAnalyzerHelper static FunctionBuilderMap function_builder_map; }; -} // namespace DB \ No newline at end of file +} // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 1b95af0dd9f..80568ee4b6b 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -412,8 +412,8 @@ const std::unordered_map scalar_func_map({ //{tipb::ScalarFuncSig::UUID, "cast"}, {tipb::ScalarFuncSig::LikeSig, "like3Args"}, - //{tipb::ScalarFuncSig::RegexpSig, "cast"}, - //{tipb::ScalarFuncSig::RegexpUTF8Sig, "cast"}, + {tipb::ScalarFuncSig::RegexpSig, "regexp"}, + {tipb::ScalarFuncSig::RegexpUTF8Sig, "regexp"}, //{tipb::ScalarFuncSig::JsonExtractSig, "cast"}, //{tipb::ScalarFuncSig::JsonUnquoteSig, "cast"}, diff --git a/dbms/src/Functions/FunctionsStringArray.h b/dbms/src/Functions/FunctionsStringArray.h index 0cac99aabfd..7899d421e19 100644 --- a/dbms/src/Functions/FunctionsStringArray.h +++ b/dbms/src/Functions/FunctionsStringArray.h @@ -280,7 +280,8 @@ class ExtractAllImpl + " of first argument of function " + getName() + ". Must be constant string.", ErrorCodes::ILLEGAL_COLUMN); - re = Regexps::get(col->getValue()); + int flags = OptimizedRegularExpression::RE_DOT_NL; + re = Regexps::get(col->getValue(), flags); capture = re->getNumberOfSubpatterns() > 0 ? 1 : 0; matches.resize(capture + 1); diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index 5222f96ea12..df32f50f005 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -158,11 +158,12 @@ struct PositionImpl const ColumnString::Offsets & offsets, const std::string & needle, const UInt8 escape_char, + const std::string & match_type, const TiDB::TiDBCollatorPtr & collator, PaddedPODArray & res) { - if (escape_char != CH_ESCAPE_CHAR || collator != nullptr) - throw Exception("PositionImpl don't support customized escape char and tidb collator", ErrorCodes::NOT_IMPLEMENTED); + if (escape_char != CH_ESCAPE_CHAR || !match_type.empty() || collator != nullptr) + throw Exception("PositionImpl don't support customized escape char/match_type argument/tidb collator", ErrorCodes::NOT_IMPLEMENTED); const UInt8 * begin = &data[0]; const UInt8 * pos = begin; const UInt8 * end = pos + data.size(); @@ -199,10 +200,10 @@ struct PositionImpl } /// Search for substring in string. - static void constantConstant(std::string data, std::string needle, const UInt8 escape_char, const TiDB::TiDBCollatorPtr & collator, UInt64 & res) + static void constantConstant(std::string data, std::string needle, const UInt8 escape_char, const std::string & match_type, const TiDB::TiDBCollatorPtr & collator, UInt64 & res) { - if (escape_char != CH_ESCAPE_CHAR || collator != nullptr) - throw Exception("PositionImpl don't support customized escape char and tidb collator", ErrorCodes::NOT_IMPLEMENTED); + if (escape_char != CH_ESCAPE_CHAR || !match_type.empty() || collator != nullptr) + throw Exception("PositionImpl don't support customized escape char/match_type argument/tidb collator", ErrorCodes::NOT_IMPLEMENTED); Impl::toLowerIfNeed(data); Impl::toLowerIfNeed(needle); @@ -219,11 +220,12 @@ struct PositionImpl const ColumnString::Chars_t & needle_data, const ColumnString::Offsets & needle_offsets, const UInt8 escape_char, + const std::string & match_type, const TiDB::TiDBCollatorPtr & collator, PaddedPODArray & res) { - if (escape_char != CH_ESCAPE_CHAR || collator != nullptr) - throw Exception("PositionImpl don't support customized escape char and tidb collator", ErrorCodes::NOT_IMPLEMENTED); + if (escape_char != CH_ESCAPE_CHAR || !match_type.empty() || collator != nullptr) + throw Exception("PositionImpl don't support customized escape char/match_type argument/tidb collator", ErrorCodes::NOT_IMPLEMENTED); ColumnString::Offset prev_haystack_offset = 0; ColumnString::Offset prev_needle_offset = 0; @@ -268,11 +270,12 @@ struct PositionImpl const ColumnString::Chars_t & needle_data, const ColumnString::Offsets & needle_offsets, const UInt8 escape_char, + const std::string & match_type, const TiDB::TiDBCollatorPtr & collator, PaddedPODArray & res) { - if (escape_char != CH_ESCAPE_CHAR || collator != nullptr) - throw Exception("PositionImpl don't support customized escape char and tidb collator", ErrorCodes::NOT_IMPLEMENTED); + if (escape_char != CH_ESCAPE_CHAR || !match_type.empty() || collator != nullptr) + throw Exception("PositionImpl don't support customized escape char/match_type argument/tidb collator", ErrorCodes::NOT_IMPLEMENTED); // NOTE You could use haystack indexing. But this is a rare case. ColumnString::Offset prev_needle_offset = 0; @@ -310,6 +313,56 @@ struct PositionImpl } }; +static String getRE2ModeModifiers(const std::string & match_type, const std::shared_ptr & collator) +{ + /// for regexp only ci/cs is supported + re2_st::RE2::Options options(re2_st::RE2::CannedOptions::DefaultOptions); + if (collator != nullptr && collator->isCI()) + options.set_case_sensitive(false); + + /// match_type can overwrite collator + if (!match_type.empty()) + { + for (const auto & c : match_type) + { + switch (c) + { + case 'i': + /// according to MySQL doc: if either argument is a binary string, the arguments are handled in + /// case-sensitive fashion as binary strings, even if match_type contains the i character. + /// However, test in MySQL 8.0.25 shows that i flag still take affect even if the collation is binary, + /// if (collator == nullptr || !collator->isBinary()) + options.set_case_sensitive(false); + break; + case 'c': + options.set_case_sensitive(true); + break; + case 's': + options.set_dot_nl(true); + break; + case 'm': + options.set_one_line(false); + break; + default: + throw Exception("Incorrect arguments to regexp related functions."); + } + } + } + if (!options.one_line() || options.dot_nl() || !options.case_sensitive()) + { + String mode_modifiers("(?"); + if (!options.one_line()) + mode_modifiers += "m"; + if (!options.case_sensitive()) + mode_modifiers += "i"; + if (options.dot_nl()) + mode_modifiers += "s"; + mode_modifiers += ")"; + return mode_modifiers; + } + else + return ""; +} /// Is the LIKE expression reduced to finding a substring in a string? inline bool likePatternIsStrstr(const String & pattern, String & res) @@ -361,9 +414,9 @@ String replaceEscapeChar(String & orig_string, UInt8 escape_char) for (size_t i = 0; i < orig_string.size(); i++) { auto c = orig_string[i]; - if (c == escape_char) + if (static_cast(c) == escape_char) { - if (i + 1 != orig_string.size() && orig_string[i + 1] == escape_char) + if (i + 1 != orig_string.size() && static_cast(orig_string[i + 1]) == escape_char) { // two successive escape char, which means it is trying to escape itself, just remove one i++; @@ -402,7 +455,7 @@ String replaceEscapeChar(String & orig_string, UInt8 escape_char) * NOTE: We want to run regexp search for whole block by one call (as implemented in function 'position') * but for that, regexp engine must support \0 bytes and their interpretation as string boundaries. */ -template +template struct MatchImpl { using ResultType = UInt8; @@ -412,10 +465,13 @@ struct MatchImpl const ColumnString::Offsets & offsets, const std::string & orig_pattern, UInt8 escape_char, + const std::string & match_type, const TiDB::TiDBCollatorPtr & collator, PaddedPODArray & res) { - if (collator != nullptr) + /// collation only take affect for like, for regexp collation is not + /// fully supported.(Only case sensitive/insensitive is supported) + if (like && collator != nullptr) { auto matcher = collator->pattern(); matcher->compile(orig_pattern, escape_char); @@ -472,7 +528,20 @@ struct MatchImpl { size_t size = offsets.size(); - const auto & regexp = Regexps::get(pattern); + int flags = 0; + if constexpr (for_tidb) + flags |= OptimizedRegularExpression::RE_NO_OPTIMIZE; + else + flags |= OptimizedRegularExpression::RE_DOT_NL; + + /// match_type can overwrite collator + if (!match_type.empty() || collator != nullptr) + { + String mode_modifiers = getRE2ModeModifiers(match_type, collator); + if (!mode_modifiers.empty()) + pattern = mode_modifiers + pattern; + } + const auto & regexp = Regexps::get(pattern, flags); std::string required_substring; bool is_trivial; @@ -579,10 +648,11 @@ struct MatchImpl const std::string & data, const std::string & orig_pattern, UInt8 escape_char, + const std::string & match_type, const TiDB::TiDBCollatorPtr & collator, UInt8 & res) { - if (collator != nullptr) + if (like && collator != nullptr) { auto matcher = collator->pattern(); matcher->compile(orig_pattern, escape_char); @@ -593,7 +663,20 @@ struct MatchImpl String pattern = orig_pattern; if (escape_char != CH_ESCAPE_CHAR) pattern = replaceEscapeChar(pattern, escape_char); - const auto & regexp = Regexps::get(pattern); + int flags = 0; + if constexpr (for_tidb) + flags |= OptimizedRegularExpression::RE_NO_OPTIMIZE; + else + flags |= OptimizedRegularExpression::RE_DOT_NL; + + /// match_type can overwrite collator + if (!match_type.empty() || collator != nullptr) + { + String mode_modifiers = getRE2ModeModifiers(match_type, collator); + if (!mode_modifiers.empty()) + pattern = mode_modifiers + pattern; + } + const auto & regexp = Regexps::get(pattern, flags); res = revert ^ regexp->match(data); } } @@ -604,6 +687,7 @@ struct MatchImpl const ColumnString::Chars_t & needle_data, const ColumnString::Offsets & needle_offsets, UInt8 escape_char, + const std::string & match_type, const TiDB::TiDBCollatorPtr & collator, PaddedPODArray & res) { @@ -619,7 +703,7 @@ struct MatchImpl // TODO: remove the copy, use raw char array directly std::string haystack_str(reinterpret_cast(&haystack_data[prev_haystack_offset]), haystack_size); std::string needle_str(reinterpret_cast(&needle_data[prev_needle_offset]), needle_size); - constantConstant(haystack_str, needle_str, escape_char, collator, res[i]); + constantConstant(haystack_str, needle_str, escape_char, match_type, collator, res[i]); prev_haystack_offset = haystack_offsets[i]; prev_needle_offset = needle_offsets[i]; } @@ -631,6 +715,7 @@ struct MatchImpl const ColumnString::Chars_t & needle_data, const ColumnString::Offsets & needle_offsets, UInt8 escape_char, + const std::string & match_type, const TiDB::TiDBCollatorPtr & collator, PaddedPODArray & res) { @@ -644,7 +729,7 @@ struct MatchImpl size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; // TODO: remove the copy, use raw char array directly std::string needle_str(reinterpret_cast(&needle_data[prev_needle_offset]), needle_size); - constantConstant(haystack_data, needle_str, escape_char, collator, res[i]); + constantConstant(haystack_data, needle_str, escape_char, match_type, collator, res[i]); prev_needle_offset = needle_offsets[i]; } } @@ -662,7 +747,8 @@ struct ExtractImpl res_data.reserve(data.size() / 5); res_offsets.resize(offsets.size()); - const auto & regexp = Regexps::get(pattern); + int flags = OptimizedRegularExpression::RE_DOT_NL; + const auto & regexp = Regexps::get(pattern, flags); unsigned capture = regexp->getNumberOfSubpatterns() > 0 ? 1 : 0; OptimizedRegularExpression::MatchVec matches; @@ -719,7 +805,7 @@ struct ReplaceRegexpImpl { Instructions instructions; - String now = ""; + String now; for (size_t i = 0; i < s.size(); ++i) { if (s[i] == '\\' && i + 1 < s.size()) @@ -761,13 +847,24 @@ struct ReplaceRegexpImpl static void processString(const re2_st::StringPiece & input, ColumnString::Chars_t & res_data, ColumnString::Offset & res_offset, + const Int64 & pos, + const Int64 & occ, re2_st::RE2 & searcher, int num_captures, const Instructions & instructions) { re2_st::StringPiece matches[max_captures]; - size_t start_pos = 0; + size_t start_pos = pos <= 0 ? 0 : pos - 1; + Int64 match_occ = 0; + size_t prefix_length = std::min(start_pos, static_cast(input.length())); + if (prefix_length > 0) + { + /// Copy prefix + res_data.resize(res_data.size() + prefix_length); + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data(), prefix_length); + res_offset += prefix_length; + } while (start_pos < static_cast(input.length())) { /// If no more replacements possible for current string @@ -775,34 +872,53 @@ struct ReplaceRegexpImpl if (searcher.Match(input, start_pos, input.length(), re2_st::RE2::Anchor::UNANCHORED, matches, num_captures)) { - const auto & match = matches[0]; - size_t bytes_to_copy = (match.data() - input.data()) - start_pos; + match_occ++; + /// if occ > 0, it will replace all the match expr, otherwise it only replace the occ-th match + if (occ == 0 || match_occ == occ) + { + const auto & match = matches[0]; + size_t bytes_to_copy = (match.data() - input.data()) - start_pos; - /// Copy prefix before matched regexp without modification - res_data.resize(res_data.size() + bytes_to_copy); - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + start_pos, bytes_to_copy); - res_offset += bytes_to_copy; - start_pos += bytes_to_copy + match.length(); + /// Copy prefix before matched regexp without modification + res_data.resize(res_data.size() + bytes_to_copy); + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + start_pos, bytes_to_copy); + res_offset += bytes_to_copy; + start_pos += bytes_to_copy + match.length(); - /// Do substitution instructions - for (const auto & it : instructions) - { - if (it.first >= 0) - { - res_data.resize(res_data.size() + matches[it.first].length()); - memcpy(&res_data[res_offset], matches[it.first].data(), matches[it.first].length()); - res_offset += matches[it.first].length(); - } - else + /// Do substitution instructions + for (const auto & it : instructions) { - res_data.resize(res_data.size() + it.second.size()); - memcpy(&res_data[res_offset], it.second.data(), it.second.size()); - res_offset += it.second.size(); + if (it.first >= 0) + { + res_data.resize(res_data.size() + matches[it.first].length()); + memcpy(&res_data[res_offset], matches[it.first].data(), matches[it.first].length()); + res_offset += matches[it.first].length(); + } + else + { + res_data.resize(res_data.size() + it.second.size()); + memcpy(&res_data[res_offset], it.second.data(), it.second.size()); + res_offset += it.second.size(); + } } - } - if (replace_one || match.length() == 0) /// Stop after match of zero length, to avoid infinite loop. - can_finish_current_string = true; + /// when occ > 0, just replace the occ-th match even if replace_one is false + if (replace_one || match.length() == 0) /// Stop after match of zero length, to avoid infinite loop. + can_finish_current_string = true; + } + else + { + const auto & match = matches[0]; + size_t bytes_to_copy = (match.data() - input.data()) - start_pos + match.length(); + + /// Copy the matched string without modification + res_data.resize(res_data.size() + bytes_to_copy); + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + start_pos, bytes_to_copy); + res_offset += bytes_to_copy; + start_pos += bytes_to_copy; + if (match.length() == 0) + can_finish_current_string = true; + } } else can_finish_current_string = true; @@ -827,6 +943,10 @@ struct ReplaceRegexpImpl const ColumnString::Offsets & offsets, const std::string & needle, const std::string & replacement, + const Int64 & pos, + const Int64 & occ, + const std::string & match_type, + std::shared_ptr collator, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -837,11 +957,22 @@ struct ReplaceRegexpImpl if (needle.empty()) { - /// TODO: copy all the data without changing - throw Exception("Length of the second argument of function replace must be greater than 0.", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + /// Copy all the data without changing. + res_data.resize(data.size()); + const UInt8 * begin = &data[0]; + memcpy(&res_data[0], begin, data.size()); + memcpy(&res_offsets[0], &offsets[0], size * sizeof(UInt64)); + return; } - re2_st::RE2 searcher(needle); + String updated_needle = needle; + if (!match_type.empty() || collator != nullptr) + { + String mode_modifiers = getRE2ModeModifiers(match_type, collator); + if (!mode_modifiers.empty()) + updated_needle = mode_modifiers + updated_needle; + } + re2_st::RE2 searcher(updated_needle); int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, static_cast(max_captures)); Instructions instructions = createInstructions(replacement, num_captures); @@ -852,17 +983,21 @@ struct ReplaceRegexpImpl int from = i > 0 ? offsets[i - 1] : 0; re2_st::StringPiece input(reinterpret_cast(&data[0] + from), offsets[i] - from - 1); - processString(input, res_data, res_offset, searcher, num_captures, instructions); + processString(input, res_data, res_offset, pos, occ, searcher, num_captures, instructions); res_offsets[i] = res_offset; } } - static void vector_fixed(const ColumnString::Chars_t & data, - size_t n, - const std::string & needle, - const std::string & replacement, - ColumnString::Chars_t & res_data, - ColumnString::Offsets & res_offsets) + static void vectorFixed(const ColumnString::Chars_t & data, + size_t n, + const std::string & needle, + const std::string & replacement, + const Int64 & pos, + const Int64 & occ, + const std::string & match_type, + std::shared_ptr collator, + ColumnString::Chars_t & res_data, + ColumnString::Offsets & res_offsets) { ColumnString::Offset res_offset = 0; size_t size = data.size() / n; @@ -875,7 +1010,14 @@ struct ReplaceRegexpImpl throw Exception("Length of the second argument of function replace must be greater than 0.", ErrorCodes::ARGUMENT_OUT_OF_BOUND); } - re2_st::RE2 searcher(needle); + String updated_needle = needle; + if (!match_type.empty() || collator != nullptr) + { + String mode_modifiers = getRE2ModeModifiers(match_type, collator); + if (!mode_modifiers.empty()) + updated_needle = mode_modifiers + updated_needle; + } + re2_st::RE2 searcher(updated_needle); int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, static_cast(max_captures)); Instructions instructions = createInstructions(replacement, num_captures); @@ -885,10 +1027,21 @@ struct ReplaceRegexpImpl int from = i * n; re2_st::StringPiece input(reinterpret_cast(&data[0] + from), n); - processString(input, res_data, res_offset, searcher, num_captures, instructions); + processString(input, res_data, res_offset, pos, occ, searcher, num_captures, instructions); res_offsets[i] = res_offset; } } + static void constant(const String & input, const String & needle, const String & replacement, const Int64 & pos, const Int64 & occ, const String & match_type, std::shared_ptr collator, String & output) + { + ColumnString::Chars_t input_data; + input_data.insert(input_data.end(), input.begin(), input.end()); + ColumnString::Offsets input_offsets; + input_offsets.push_back(input_data.size() + 1); + ColumnString::Chars_t output_data; + ColumnString::Offsets output_offsets; + vector(input_data, input_offsets, needle, replacement, pos, occ, match_type, collator, output_data, output_offsets); + output = String(reinterpret_cast(&output_data[0]), output_offsets[0] - 1); + } }; @@ -904,6 +1057,10 @@ struct ReplaceStringImpl const ColumnString::Offsets & offsets, const std::string & needle, const std::string & replacement, + const Int64 & /* pos */, + const Int64 & /* occ */, + const std::string & /* match_type */, + std::shared_ptr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -982,12 +1139,16 @@ struct ReplaceStringImpl } } - static void vector_non_const_needle( + static void vectorNonConstNeedle( const ColumnString::Chars_t & data, const ColumnString::Offsets & offsets, const ColumnString::Chars_t & needle_chars, const ColumnString::Offsets & needle_offsets, const std::string & replacement, + const Int64 & /* pos */, + const Int64 & /* occ */, + const std::string & /* match_type */, + std::shared_ptr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1052,12 +1213,16 @@ struct ReplaceStringImpl } } - static void vector_non_const_replacement( + static void vectorNonConstReplacement( const ColumnString::Chars_t & data, const ColumnString::Offsets & offsets, const std::string & needle, const ColumnString::Chars_t & replacement_chars, const ColumnString::Offsets & replacement_offsets, + const Int64 & /* pos */, + const Int64 & /* occ */, + const std::string & /* match_type */, + std::shared_ptr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1139,13 +1304,17 @@ struct ReplaceStringImpl } } - static void vector_non_const_needle_replacement( + static void vectorNonConstNeedleReplacement( const ColumnString::Chars_t & data, const ColumnString::Offsets & offsets, const ColumnString::Chars_t & needle_chars, const ColumnString::Offsets & needle_offsets, const ColumnString::Chars_t & replacement_chars, const ColumnString::Offsets & replacement_offsets, + const Int64 & /* pos */, + const Int64 & /* occ */, + const std::string & /* match_type */, + std::shared_ptr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1213,12 +1382,16 @@ struct ReplaceStringImpl /// Note: this function converts fixed-length strings to variable-length strings /// and each variable-length string should ends with zero byte. - static void vector_fixed(const ColumnString::Chars_t & data, - size_t n, - const std::string & needle, - const std::string & replacement, - ColumnString::Chars_t & res_data, - ColumnString::Offsets & res_offsets) + static void vectorFixed(const ColumnString::Chars_t & data, + size_t n, + const std::string & needle, + const std::string & replacement, + const Int64 & /* pos */, + const Int64 & /* occ */, + const std::string & /* match_type */, + std::shared_ptr /* collator */, + ColumnString::Chars_t & res_data, + ColumnString::Offsets & res_offsets) { const UInt8 * begin = &data[0]; const UInt8 * pos = begin; @@ -1305,12 +1478,16 @@ struct ReplaceStringImpl } } - static void vector_fixed_non_const_needle( + static void vectorFixedNonConstNeedle( const ColumnString::Chars_t & data, size_t n, const ColumnString::Chars_t & needle_chars, const ColumnString::Offsets & needle_offsets, const std::string & replacement, + const Int64 & /* pos */, + const Int64 & /* occ */, + const std::string & /* match_type */, + std::shared_ptr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1376,12 +1553,16 @@ struct ReplaceStringImpl } } - static void vector_fixed_non_const_replacement( + static void vectorFixedNonConstReplacement( const ColumnString::Chars_t & data, size_t n, const std::string & needle, const ColumnString::Chars_t & replacement_chars, const ColumnString::Offsets & replacement_offsets, + const Int64 & /* pos */, + const Int64 & /* occ */, + const std::string & /* match_type */, + std::shared_ptr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1473,13 +1654,17 @@ struct ReplaceStringImpl } } - static void vector_fixed_non_const_needle_replacement( + static void vectorFixedNonConstNeedleReplacement( const ColumnString::Chars_t & data, size_t n, const ColumnString::Chars_t & needle_chars, const ColumnString::Offsets & needle_offsets, const ColumnString::Chars_t & replacement_chars, const ColumnString::Offsets & replacement_offsets, + const Int64 & /* pos */, + const Int64 & /* occ */, + const std::string & /* match_type */, + std::shared_ptr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1549,7 +1734,7 @@ struct ReplaceStringImpl } } - static void constant(const std::string & data, const std::string & needle, const std::string & replacement, std::string & res_data) + static void constant(const std::string & data, const std::string & needle, const std::string & replacement, const Int64 & /* pos */, const Int64 & /* occ */, const std::string & /* match_type */, std::shared_ptr /* collator */, std::string & res_data) { if (needle.empty()) { @@ -1596,11 +1781,31 @@ class FunctionStringReplace : public IFunction size_t getNumberOfArguments() const override { - return 3; + return 0; } + bool isVariadic() const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override + { + if constexpr (Impl::support_non_const_needle && Impl::support_non_const_replacement) + { + return {3, 4, 5}; + } + else if constexpr (Impl::support_non_const_needle) + { + return {2, 3, 4, 5}; + } + else if constexpr (Impl::support_non_const_replacement) + { + return {1, 3, 4, 5}; + } + else + { + return {1, 2, 3, 4, 5}; + } + } + void setCollator(const std::shared_ptr & collator_) override { collator = collator_; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { @@ -1616,6 +1821,18 @@ class FunctionStringReplace : public IFunction throw Exception("Illegal type " + arguments[2]->getName() + " of third argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + if (arguments.size() > 3 && !arguments[3]->isInteger()) + throw Exception("Illegal type " + arguments[2]->getName() + " of forth argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (arguments.size() > 4 && !arguments[4]->isInteger()) + throw Exception("Illegal type " + arguments[2]->getName() + " of fifth argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (arguments.size() > 5 && !arguments[5]->isStringOrFixedString()) + throw Exception("Illegal type " + arguments[2]->getName() + " of sixth argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + return std::make_shared(); } @@ -1624,6 +1841,18 @@ class FunctionStringReplace : public IFunction const ColumnPtr & column_src = block.getByPosition(arguments[0]).column; const ColumnPtr & column_needle = block.getByPosition(arguments[1]).column; const ColumnPtr & column_replacement = block.getByPosition(arguments[2]).column; + const ColumnPtr column_pos = arguments.size() > 3 ? block.getByPosition(arguments[3]).column : nullptr; + const ColumnPtr column_occ = arguments.size() > 4 ? block.getByPosition(arguments[4]).column : nullptr; + const ColumnPtr column_match_type = arguments.size() > 5 ? block.getByPosition(arguments[5]).column : nullptr; + + if ((column_pos != nullptr && !column_pos->isColumnConst()) + || (column_occ != nullptr && !column_occ->isColumnConst()) + || (column_match_type != nullptr && !column_match_type->isColumnConst())) + throw Exception("4th, 5th, 6th arguments of function " + getName() + " must be constants."); + Int64 pos = column_pos == nullptr ? 1 : typeid_cast(column_pos.get())->getInt(0); + Int64 occ = column_occ == nullptr ? 0 : typeid_cast(column_occ.get())->getInt(0); + String match_type = column_match_type == nullptr ? "" : typeid_cast(column_match_type.get())->getValue(); + ColumnWithTypeAndName & column_result = block.getByPosition(result); bool needle_const = column_needle->isColumnConst(); @@ -1631,19 +1860,19 @@ class FunctionStringReplace : public IFunction if (needle_const && replacement_const) { - executeImpl(column_src, column_needle, column_replacement, column_result); + executeImpl(column_src, column_needle, column_replacement, pos, occ, match_type, column_result); } else if (needle_const) { - executeImplNonConstReplacement(column_src, column_needle, column_replacement, column_result); + executeImplNonConstReplacement(column_src, column_needle, column_replacement, pos, occ, match_type, column_result); } else if (replacement_const) { - executeImplNonConstNeedle(column_src, column_needle, column_replacement, column_result); + executeImplNonConstNeedle(column_src, column_needle, column_replacement, pos, occ, match_type, column_result); } else { - executeImplNonConstNeedleReplacement(column_src, column_needle, column_replacement, column_result); + executeImplNonConstNeedleReplacement(column_src, column_needle, column_replacement, pos, occ, match_type, column_result); } } @@ -1652,6 +1881,9 @@ class FunctionStringReplace : public IFunction const ColumnPtr & column_src, const ColumnPtr & column_needle, const ColumnPtr & column_replacement, + Int64 pos, + Int64 occ, + const String & match_type, ColumnWithTypeAndName & column_result) const { const ColumnConst * c1_const = typeid_cast(column_needle.get()); @@ -1662,13 +1894,13 @@ class FunctionStringReplace : public IFunction if (const ColumnString * col = checkAndGetColumn(column_src.get())) { auto col_res = ColumnString::create(); - Impl::vector(col->getChars(), col->getOffsets(), needle, replacement, col_res->getChars(), col_res->getOffsets()); + Impl::vector(col->getChars(), col->getOffsets(), needle, replacement, pos, occ, match_type, collator, col_res->getChars(), col_res->getOffsets()); column_result.column = std::move(col_res); } else if (const ColumnFixedString * col = checkAndGetColumn(column_src.get())) { auto col_res = ColumnString::create(); - Impl::vector_fixed(col->getChars(), col->getN(), needle, replacement, col_res->getChars(), col_res->getOffsets()); + Impl::vectorFixed(col->getChars(), col->getN(), needle, replacement, pos, occ, match_type, collator, col_res->getChars(), col_res->getOffsets()); column_result.column = std::move(col_res); } else @@ -1681,6 +1913,9 @@ class FunctionStringReplace : public IFunction const ColumnPtr & column_src, const ColumnPtr & column_needle, const ColumnPtr & column_replacement, + Int64 pos, + Int64 occ, + const String & match_type, ColumnWithTypeAndName & column_result) const { if constexpr (Impl::support_non_const_needle) @@ -1692,13 +1927,13 @@ class FunctionStringReplace : public IFunction if (const ColumnString * col = checkAndGetColumn(column_src.get())) { auto col_res = ColumnString::create(); - Impl::vector_non_const_needle(col->getChars(), col->getOffsets(), col_needle->getChars(), col_needle->getOffsets(), replacement, col_res->getChars(), col_res->getOffsets()); + Impl::vectorNonConstNeedle(col->getChars(), col->getOffsets(), col_needle->getChars(), col_needle->getOffsets(), replacement, pos, occ, match_type, collator, col_res->getChars(), col_res->getOffsets()); column_result.column = std::move(col_res); } else if (const ColumnFixedString * col = checkAndGetColumn(column_src.get())) { auto col_res = ColumnString::create(); - Impl::vector_fixed_non_const_needle(col->getChars(), col->getN(), col_needle->getChars(), col_needle->getOffsets(), replacement, col_res->getChars(), col_res->getOffsets()); + Impl::vectorFixedNonConstNeedle(col->getChars(), col->getN(), col_needle->getChars(), col_needle->getOffsets(), replacement, pos, occ, match_type, collator, col_res->getChars(), col_res->getOffsets()); column_result.column = std::move(col_res); } else @@ -1716,6 +1951,9 @@ class FunctionStringReplace : public IFunction const ColumnPtr & column_src, const ColumnPtr & column_needle, const ColumnPtr & column_replacement, + Int64 pos, + Int64 occ, + const String & match_type, ColumnWithTypeAndName & column_result) const { if constexpr (Impl::support_non_const_replacement) @@ -1727,13 +1965,13 @@ class FunctionStringReplace : public IFunction if (const ColumnString * col = checkAndGetColumn(column_src.get())) { auto col_res = ColumnString::create(); - Impl::vector_non_const_replacement(col->getChars(), col->getOffsets(), needle, col_replacement->getChars(), col_replacement->getOffsets(), col_res->getChars(), col_res->getOffsets()); + Impl::vectorNonConstReplacement(col->getChars(), col->getOffsets(), needle, col_replacement->getChars(), col_replacement->getOffsets(), pos, occ, match_type, collator, col_res->getChars(), col_res->getOffsets()); column_result.column = std::move(col_res); } else if (const ColumnFixedString * col = checkAndGetColumn(column_src.get())) { auto col_res = ColumnString::create(); - Impl::vector_fixed_non_const_replacement(col->getChars(), col->getN(), needle, col_replacement->getChars(), col_replacement->getOffsets(), col_res->getChars(), col_res->getOffsets()); + Impl::vectorFixedNonConstReplacement(col->getChars(), col->getN(), needle, col_replacement->getChars(), col_replacement->getOffsets(), pos, occ, match_type, collator, col_res->getChars(), col_res->getOffsets()); column_result.column = std::move(col_res); } else @@ -1751,6 +1989,9 @@ class FunctionStringReplace : public IFunction const ColumnPtr & column_src, const ColumnPtr & column_needle, const ColumnPtr & column_replacement, + Int64 pos, + Int64 occ, + const String & match_type, ColumnWithTypeAndName & column_result) const { if constexpr (Impl::support_non_const_needle && Impl::support_non_const_replacement) @@ -1761,13 +2002,13 @@ class FunctionStringReplace : public IFunction if (const ColumnString * col = checkAndGetColumn(column_src.get())) { auto col_res = ColumnString::create(); - Impl::vector_non_const_needle_replacement(col->getChars(), col->getOffsets(), col_needle->getChars(), col_needle->getOffsets(), col_replacement->getChars(), col_replacement->getOffsets(), col_res->getChars(), col_res->getOffsets()); + Impl::vectorNonConstNeedleReplacement(col->getChars(), col->getOffsets(), col_needle->getChars(), col_needle->getOffsets(), col_replacement->getChars(), col_replacement->getOffsets(), pos, occ, match_type, collator, col_res->getChars(), col_res->getOffsets()); column_result.column = std::move(col_res); } else if (const ColumnFixedString * col = checkAndGetColumn(column_src.get())) { auto col_res = ColumnString::create(); - Impl::vector_fixed_non_const_needle_replacement(col->getChars(), col->getN(), col_needle->getChars(), col_needle->getOffsets(), col_replacement->getChars(), col_replacement->getOffsets(), col_res->getChars(), col_res->getOffsets()); + Impl::vectorFixedNonConstNeedleReplacement(col->getChars(), col->getN(), col_needle->getChars(), col_needle->getOffsets(), col_replacement->getChars(), col_replacement->getOffsets(), pos, occ, match_type, collator, col_res->getChars(), col_res->getOffsets()); column_result.column = std::move(col_res); } else @@ -1780,6 +2021,7 @@ class FunctionStringReplace : public IFunction throw Exception("Argument at index 2 and 3 for function replace must be constant", ErrorCodes::ILLEGAL_COLUMN); } } + std::shared_ptr collator; }; struct NamePosition @@ -1802,6 +2044,12 @@ struct NameMatch { static constexpr auto name = "match"; }; + +struct NameTiDBMatch +{ + static constexpr auto name = "regexp"; +}; + struct NameLike { static constexpr auto name = "like"; @@ -1842,8 +2090,9 @@ using FunctionPositionCaseInsensitiveUTF8 = FunctionsStringSearch, NamePositionCaseInsensitiveUTF8>; using FunctionMatch = FunctionsStringSearch, NameMatch>; +using FunctionTiDBMatch = FunctionsStringSearch, NameTiDBMatch>; using FunctionLike = FunctionsStringSearch, NameLike>; -using FunctionLike3Args = FunctionsStringSearch, NameLike3Args, 3>; +using FunctionLike3Args = FunctionsStringSearch, NameLike3Args, true>; using FunctionNotLike = FunctionsStringSearch, NameNotLike>; using FunctionExtract = FunctionsStringSearchToString; using FunctionReplaceOne = FunctionStringReplace, NameReplaceOne>; @@ -1863,6 +2112,7 @@ void registerFunctionsStringSearch(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/dbms/src/Functions/FunctionsStringSearch.h b/dbms/src/Functions/FunctionsStringSearch.h index 9e6dfbd2c13..8a1eaef85bd 100644 --- a/dbms/src/Functions/FunctionsStringSearch.h +++ b/dbms/src/Functions/FunctionsStringSearch.h @@ -43,12 +43,11 @@ extern const int ILLEGAL_COLUMN; static const UInt8 CH_ESCAPE_CHAR = '\\'; -template +template class FunctionsStringSearch : public IFunction { public: static constexpr auto name = Name::name; - static constexpr auto has_3_args = (num_args == 3); static FunctionPtr create(const Context &) { return std::make_shared(); @@ -63,9 +62,12 @@ class FunctionsStringSearch : public IFunction size_t getNumberOfArguments() const override { - return num_args; + return 0; } + bool isVariadic() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {3}; } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!arguments[0]->isString()) @@ -77,10 +79,20 @@ class FunctionsStringSearch : public IFunction throw Exception( "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - if (has_3_args && !arguments[2]->isInteger()) - throw Exception( - "Illegal type " + arguments[2]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + if constexpr (customize_escape_char) + { + if (!arguments[2]->isInteger()) + throw Exception( + "Illegal type " + arguments[2]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + else + { + if (arguments.size() > 2 && !arguments[2]->isString()) + throw Exception( + "Illegal type " + arguments[2]->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } return std::make_shared>(); } @@ -96,7 +108,8 @@ class FunctionsStringSearch : public IFunction const ColumnConst * col_needle_const = typeid_cast(&*column_needle); UInt8 escape_char = CH_ESCAPE_CHAR; - if (has_3_args) + String match_type = ""; + if constexpr (customize_escape_char) { const auto * col_escape_const = typeid_cast(&*block.getByPosition(arguments[2]).column); bool valid_args = true; @@ -122,12 +135,22 @@ class FunctionsStringSearch : public IFunction throw Exception("3rd arguments of function " + getName() + " must be constants and between 0 and 255."); } } + else + { + if (arguments.size() > 2) + { + auto * col_match_type_const = typeid_cast(&*block.getByPosition(arguments[2]).column); + if (col_match_type_const == nullptr) + throw Exception("Match type argument of function " + getName() + " must be constant"); + match_type = col_match_type_const->getValue(); + } + } if (col_haystack_const && col_needle_const) { ResultType res{}; String needle_string = col_needle_const->getValue(); - Impl::constantConstant(col_haystack_const->getValue(), needle_string, escape_char, collator, res); + Impl::constantConstant(col_haystack_const->getValue(), needle_string, escape_char, match_type, collator, res); block.getByPosition(result).column = block.getByPosition(result).type->createColumnConst(col_haystack_const->size(), toField(res)); return; } @@ -146,19 +169,20 @@ class FunctionsStringSearch : public IFunction col_needle_vector->getChars(), col_needle_vector->getOffsets(), escape_char, + match_type, collator, vec_res); else if (col_haystack_vector && col_needle_const) { String needle_string = col_needle_const->getValue(); - Impl::vectorConstant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needle_string, escape_char, collator, vec_res); + Impl::vectorConstant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), needle_string, escape_char, match_type, collator, vec_res); } else if (col_haystack_const && col_needle_vector) { auto haystack = col_haystack_const->getValue(); const ColumnString::Chars_t & needle_chars = col_needle_vector->getChars(); const IColumn::Offsets & needle_offsets = col_needle_vector->getOffsets(); - Impl::constantVector(haystack, needle_chars, needle_offsets, escape_char, collator, vec_res); + Impl::constantVector(haystack, needle_chars, needle_offsets, escape_char, match_type, collator, vec_res); } else throw Exception("Illegal columns " + block.getByPosition(arguments[0]).column->getName() + " and " diff --git a/dbms/src/Functions/FunctionsVisitParam.h b/dbms/src/Functions/FunctionsVisitParam.h index 9078e79b111..a4ca69aa96d 100644 --- a/dbms/src/Functions/FunctionsVisitParam.h +++ b/dbms/src/Functions/FunctionsVisitParam.h @@ -171,10 +171,10 @@ struct ExtractParamImpl using ResultType = typename ParamExtractor::ResultType; /// It is assumed that `res` is the correct size and initialized with zeros. - static void vectorConstant(const ColumnString::Chars_t & data, const ColumnString::Offsets & offsets, std::string needle, const UInt8 escape_char, const TiDB::TiDBCollatorPtr & collator, PaddedPODArray & res) + static void vectorConstant(const ColumnString::Chars_t & data, const ColumnString::Offsets & offsets, std::string needle, const UInt8 escape_char, const std::string & match_type, const TiDB::TiDBCollatorPtr & collator, PaddedPODArray & res) { - if (escape_char != '\\' || collator != nullptr) - throw Exception("PositionImpl don't support customized escape char and tidb collator", ErrorCodes::NOT_IMPLEMENTED); + if (escape_char != '\\' || !match_type.empty() || collator != nullptr) + throw Exception("ExtractParamImpl don't support customized escape char/match_type/tidb collator", ErrorCodes::NOT_IMPLEMENTED); /// We are looking for a parameter simply as a substring of the form "name" needle = "\"" + needle + "\":"; @@ -210,10 +210,10 @@ struct ExtractParamImpl memset(&res[i], 0, (res.size() - i) * sizeof(res[0])); } - static void constantConstant(const std::string & data, std::string needle, const UInt8 escape_char, const TiDB::TiDBCollatorPtr & collator, ResultType & res) + static void constantConstant(const std::string & data, std::string needle, const UInt8 escape_char, const std::string & match_type, const TiDB::TiDBCollatorPtr & collator, ResultType & res) { - if (escape_char != '\\' || collator != nullptr) - throw Exception("PositionImpl don't support customized escape char and tidb collator", ErrorCodes::NOT_IMPLEMENTED); + if (escape_char != '\\' || !match_type.empty() || collator != nullptr) + throw Exception("ExtractParamImpl don't support customized escape char/match_type/tidb collator", ErrorCodes::NOT_IMPLEMENTED); needle = "\"" + needle + "\":"; size_t pos = data.find(needle); if (pos == std::string::npos) diff --git a/dbms/src/Functions/Regexps.h b/dbms/src/Functions/Regexps.h index cb44d751c89..3ec6a4ca15a 100644 --- a/dbms/src/Functions/Regexps.h +++ b/dbms/src/Functions/Regexps.h @@ -31,13 +31,12 @@ inline Regexp createRegexp(const std::string & pattern, int flags) } template -inline Pool::Pointer get(const std::string & pattern) +inline Pool::Pointer get(const std::string & pattern, int flags) { /// C++11 has thread-safe function-local statics on most modern compilers. static Pool known_regexps; /// Different variables for different pattern parameters. - return known_regexps.get(pattern, [&pattern] { - int flags = OptimizedRegularExpression::RE_DOT_NL; + return known_regexps.get(pattern, [&pattern, &flags] { if (no_capture) flags |= OptimizedRegularExpression::RE_NO_CAPTURE; diff --git a/dbms/src/Functions/tests/gtest_regexp.cpp b/dbms/src/Functions/tests/gtest_regexp.cpp new file mode 100644 index 00000000000..73c6ba36113 --- /dev/null +++ b/dbms/src/Functions/tests/gtest_regexp.cpp @@ -0,0 +1,2156 @@ +#include +#include +#include +#include + +#include +#include +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-compare" +#include + +#pragma GCC diagnostic pop + +namespace DB +{ +namespace tests +{ +class Regexp : public FunctionTest +{ +protected: + bool isColumnConstNull(const ColumnWithTypeAndName & column_with_type) + { + return column_with_type.column->isColumnConst() && column_with_type.column->isNullAt(0); + } + bool isColumnConstNotNull(const ColumnWithTypeAndName & column_with_type) + { + return column_with_type.column->isColumnConst() && !column_with_type.column->isNullAt(0); + } + bool isNullableColumnVector(const ColumnWithTypeAndName & column_with_type) + { + return !column_with_type.column->isColumnConst() && column_with_type.type->isNullable(); + } + template + ColumnWithTypeAndName createNullableVectorColumn(const InferredDataVector & vec, const std::vector & null_map) + { + using NullableType = Nullable; + InferredDataVector nullable_vec; + for (size_t i = 0; i < null_map.size(); i++) + { + if (null_map[i]) + nullable_vec.push_back({}); + else + nullable_vec.push_back(vec[i]); + } + return createColumn(nullable_vec); + } +}; + +TEST_F(Regexp, testRegexpMatchType) +{ + UInt8 res = false; + std::shared_ptr binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); + std::shared_ptr ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI); + DB::MatchImpl::constantConstant("a\nB\n", "(?m)(?i)^b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a\nB\n", "^b", '\\', "mi", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a\nB\n", "^b", '\\', "m", ci_collator, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a\nB\n", "^b", '\\', "mi", binary_collator, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a\nB\n", "^b", '\\', "i", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a\nB\n", "^b", '\\', "m", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a\nB\n", "^a.*b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a\nB\n", "^a.*B", '\\', "s", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a\nB\n", "^a.*b", '\\', "is", nullptr, res); + ASSERT_TRUE(res == 1); +} + +TEST_F(Regexp, testRegexpMySQLFailedCases) +{ + UInt8 res = false; + /// result different from mysql 8.x + DB::MatchImpl::constantConstant("aa", "((((((((((a))))))))))\\10", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("AA", "(?i)((((((((((a))))))))))\\10", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nabb\n", "abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\na\n", "a$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\naa\n", "aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nab\n", "ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a\nb\n", "(?m)b\\s^", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + /// back reference not supported in RE2 + // DB::MatchImpl::constantConstant("abcabc", "(abc)\\1", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcabc", "([a-c]*)\\1", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("a", "(a)|\\1", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("x", "(a)|\\1", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("ababbbcbc", "(([a-c])b*?\\2)*", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("ababbbcbc", "(([a-c])b*?\\2){3}", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("aaxabxbaxbbx", "((\\3|b)\\2(a)x)+", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("aaaxabaxbaaxbbax", "((\\3|b)\\2(a)x)+", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("bbaababbabaaaaabbaaaabba", "((\\3|b)\\2(a)){2,}", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("ABCABC", "(?i)(abc)\\1", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("ABCABC", "(?i)([a-c]*)\\1", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("aaaaaaaaaa", "^(a\\1?){4}$", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("aaaaaaaaa", "^(a\\1?){4}$", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("aaaaaaaaaaa", "^(a\\1?){4}$", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("Ab4ab", "(?i)(ab)\\d\\1", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("ab4Ab", "(?i)(ab)\\d\\1", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("aaaaaa", "^(a\\1?)(a\\1?)(a\\2?)(a\\3?)$", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("aaaaaa", "^(a\\1?){4}$", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abc", "^(?:b|a(?=(.)))*\\1", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("Oo", "(?i)^(o)(?!.*\\1)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("abc12bc", "(.*)\\d+\\1", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("aaab", "(?=(a+?))(\\1ab)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("aaab", "^(?=(a+?))\\1ab", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("2", "2(]*)?$\\1", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcab", "(\\w)?(abc)\\1b", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + /// invalid or unsupported Perl syntax: `(?!` + // DB::MatchImpl::constantConstant("abad", "a(?!b).", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + /// invalid or unsupported Perl syntax: `(?=` + // DB::MatchImpl::constantConstant("abad", "a(?=c|d).", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abad", "a(?=d).", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*)(?=c)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*)(?=c)c", '\\', "", nullptr, res); /* Result: yB */ + // ; + // DB::MatchImpl::constantConstant("abcd", "(.*)(?=b|c)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*)(?=b|c)c", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*)(?=c|b)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*)(?=c|b)c", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*)(?=[bc])", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*)(?=[bc])c", '\\', "", nullptr, res); /* Result: yB */ + // ; + // DB::MatchImpl::constantConstant("abcd", "(.*?)(?=c)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*?)(?=c)c", '\\', "", nullptr, res); /* Result: yB */ + // ; + // DB::MatchImpl::constantConstant("abcd", "(.*?)(?=b|c)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*?)(?=b|c)c", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*?)(?=c|b)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*?)(?=c|b)c", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*?)(?=[bc])", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*?)(?=[bc])c", '\\', "", nullptr, res); /* Result: yB */ + // ; + /// invalid or unsupported Perl syntax: `(?<` + // DB::MatchImpl::constantConstant("ab", "(?<=a)b", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("cb", "(?<=a)b", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b", "(?<=a)b", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("ab", "(?::constantConstant("cb", "(?::constantConstant("b", "(?::constantConstant("dbcb", "(?::constantConstant("dbaacb", "(?::constantConstant("dbcb", "(?::constantConstant("dbaacb", "(?::constantConstant("cdaccb", "(?::constantConstant("a", "$(?<=^(a))", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*)(?<=b)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*)(?<=b)c", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*)(?<=b|c)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*)(?<=b|c)c", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*)(?<=c|b)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*)(?<=c|b)c", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*)(?<=[bc])", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*)(?<=[bc])c", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*?)(?<=b)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*?)(?<=b)c", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*?)(?<=b|c)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*?)(?<=b|c)c", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*?)(?<=c|b)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*?)(?<=c|b)c", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*?)(?<=[bc])", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abcd", "(.*?)(?<=[bc])c", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + /// invalid or unsupported Perl syntax: `(?#` + // DB::MatchImpl::constantConstant("aaac", "^a(?#xxx){3}c", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("aaac", "(?x)^a (?#xxx) (?#yyy) {3}c", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + /// invalid or unsupported Perl syntax: `(?s` + // DB::MatchImpl::constantConstant("a\nb\nc\n", "((?s).)c(?!.)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("a\nb\nc\n", "((?s)b.)c(?!.)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + /// invalid or unsupported Perl syntax: `(?>` + // DB::MatchImpl::constantConstant("aaab", "(?>a+)b", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("aaab", "((?>a+)b)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("aaab", "(?>(a+))b", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("((abc(ade)ufh()()x", "((?>[^()]+)|\\([^()]*\\))+", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("_I(round(xs * sz),1)", "round\\(((?>[^()]+))\\)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + /// invalid escape sequence: `\Z` + // DB::MatchImpl::constantConstant("a\nb\n", "\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("b\na\n", "\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("b\na", "\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("a\nb\n", "(?m)\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("b\na\n", "(?m)\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("b\na", "(?m)\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("a\nb\n", "a\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\na\n", "a\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("b\na", "a\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("a\nb\n", "(?m)a\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\na\n", "(?m)a\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("b\na", "(?m)a\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("aa\nb\n", "aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\naa\n", "aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("b\naa", "aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("aa\nb\n", "(?m)aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\naa\n", "(?m)aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("b\naa", "(?m)aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("ac\nb\n", "aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nac\n", "aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nac", "aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("ac\nb\n", "(?m)aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nac\n", "(?m)aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nac", "(?m)aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("ca\nb\n", "aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nca\n", "aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nca", "aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("ca\nb\n", "(?m)aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nca\n", "(?m)aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nca", "(?m)aa\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("ab\nb\n", "ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nab\n", "ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("b\nab", "ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("ab\nb\n", "(?m)ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nab\n", "(?m)ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("b\nab", "(?m)ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("ac\nb\n", "ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nac\n", "ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("ac\nb\n", "(?m)ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nac\n", "(?m)ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nac", "(?m)ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("ca\nb\n", "ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nca\n", "ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nca", "ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("ca\nb\n", "(?m)ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nca\n", "(?m)ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nca", "(?m)ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("abb\nb\n", "abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nabb\n", "abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("b\nabb", "abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("abb\nb\n", "(?m)abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nabb\n", "(?m)abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("b\nabb", "(?m)abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("ac\nb\n", "abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nac\n", "abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nac", "abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("ac\nb\n", "(?m)abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nac\n", "(?m)abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nac", "(?m)abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("ca\nb\n", "abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nca\n", "abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nca", "abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("ca\nb\n", "(?m)abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nca\n", "(?m)abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nca", "(?m)abb\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + // DB::MatchImpl::constantConstant("b\nac", "ab\\Z", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + /// invalid or unsupported Perl syntax: `(?x` + // DB::MatchImpl::constantConstant("x ", "(?x)((?x:.) )", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + // DB::MatchImpl::constantConstant("x ", "(?x)((?-x:.) )", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + /// invalid or unsupported Perl syntax: `(?!` + // DB::MatchImpl::constantConstant("a\nxb\n", "(?m)(?!\\A)x", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + /// invalid character class range: `a-[` + // DB::MatchImpl::constantConstant("za-9z", "([a-[:digit:]]+)", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); + /// invalid escape sequence: `\G` + // DB::MatchImpl::constantConstant("aaaXbX", "\\GX.*X", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 0); + /// invalid escape sequence: `\R` + // DB::MatchImpl::constantConstant("abc\n123\n456\nxyz\n", "(?m)^\\d+\\R\\d+$", '\\', "", nullptr, res); + // ASSERT_TRUE(res == 1); +} + +TEST_F(Regexp, testRegexpMySQLCases) +{ + UInt8 res = false; + // Test based on https://github.com/mysql/mysql-server/blob/mysql-cluster-8.0.17/mysql-test/t/regular_expressions_func.test + DB::MatchImpl::constantConstant("abc", "abc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("xbc", "abc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("axc", "abc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abx", "abc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("xabcy", "abc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ababc", "abc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "ab*c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "ab*bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abbc", "ab*bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abbbbc", "ab*bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abbbbc", ".{1}", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abbbbc", ".{3,4}", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abbbbc", "ab{0,}bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abbc", "ab+bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "ab+bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abq", "ab+bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abq", "ab{1,}bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abbbbc", "ab+bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abbbbc", "ab{1,}bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abbbbc", "ab{1,3}bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abbbbc", "ab{3,4}bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abbbbc", "ab{4,5}bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abbc", "ab?bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "ab?bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "ab{0,1}bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abbbbc", "ab?bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abc", "ab?c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "ab{0,1}c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "^abc$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcc", "^abc$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abcc", "^abc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aabc", "^abc$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("aabc", "abc$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aabcd", "abc$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abc", "^", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "a.c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("axc", "a.c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("axyzc", "a.*c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("axyzd", "a.*c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abc", "a[bc]d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abd", "a[bc]d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abd", "a[b-d]e", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ace", "a[b-d]e", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aac", "a[b-d]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a-", "a[-b]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a-", "a[b-]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // error ER_REGEXP_INVALID_RANGE + // DB::MatchImpl::constantConstant("-","a[b-a]",'\\',"",nullptr,res); /* Result: c */; + // error ER_REGEXP_MISSING_CLOSE_BRACKET + // DB::MatchImpl::constantConstant("-","a[]b",'\\',"",nullptr,res); /* Result: ci */; + // error ER_REGEXP_MISSING_CLOSE_BRACKET + // DB::MatchImpl::constantConstant("-","a[",'\\',"",nullptr,res); /* Result: c */; + // error ER_REGEXP_INVALID_BACK_REF + // DB::MatchImpl::constantConstant("-","\\1",'\\',"",nullptr,res); /* Result: c */; + // error ER_REGEXP_INVALID_BACK_REF + // DB::MatchImpl::constantConstant("-","\\2",'\\',"",nullptr,res); /* Result: c */; + // error ER_REGEXP_INVALID_BACK_REF + // DB::MatchImpl::constantConstant("-","(a)|\\2",'\\',"",nullptr,res); /* Result: c */; + DB::MatchImpl::constantConstant("a]", "a]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a]b", "a[]]b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aed", "a[^bc]d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abd", "a[^bc]d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("adc", "a[^-b]c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a-c", "a[^-b]c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a]c", "a[^]b]c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("adc", "a[^]b]c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a-", "\\ba\\b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("-a", "\\ba\\b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("-a-", "\\ba\\b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("xy", "\\by\\b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("yz", "\\by\\b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("xyz", "\\by\\b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a-", "\\Ba\\B", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("-a", "\\Ba\\B", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("-a-", "\\Ba\\B", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("xy", "\\By\\b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("yz", "\\by\\B", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("xyz", "\\By\\B", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a", "\\w", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("-", "\\w", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a", "\\W", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("-", "\\W", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a b", "a\\sb", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a-b", "a\\sb", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a b", "a\\Sb", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a-b", "a\\Sb", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("1", "\\d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("-", "\\d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("1", "\\D", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("-", "\\D", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a", "[\\w]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("-", "[\\w]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a", "[\\W]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("-", "[\\W]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a b", "a[\\s]b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a-b", "a[\\s]b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a b", "a[\\S]b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a-b", "a[\\S]b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("1", "[\\d]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("-", "[\\d]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("1", "[\\D]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("-", "[\\D]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "ab|cd", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcd", "ab|cd", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("def", "()ef", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // error ER_REGEXP_RULE_SYNTAX + // DB::MatchImpl::constantConstant("-","*a",'\\',"",nullptr,res); /* Result: c */; + // error ER_REGEXP_RULE_SYNTAX + // DB::MatchImpl::constantConstant("-","(*)b",'\\',"",nullptr,res); /* Result: c */; + DB::MatchImpl::constantConstant("b", "$b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + // error ER_REGEXP_BAD_ESCAPE_SEQUENCE + // DB::MatchImpl::constantConstant("-","a\\",'\\',"",nullptr,res); /* Result: c */; + DB::MatchImpl::constantConstant("a(b", "a\\(b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab", "a\\(*b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a((b", "a\\(*b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a\\b", "a\\\\b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // error ER_REGEXP_MISMATCHED_PAREN + // DB::MatchImpl::constantConstant("-","abc)",'\\',"",nullptr,res); /* Result: c */; + // error ER_REGEXP_MISMATCHED_PAREN + // DB::MatchImpl::constantConstant("-","(abc",'\\',"",nullptr,res); /* Result: c */; + DB::MatchImpl::constantConstant("abc", "((a))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "(a)b(c)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aabbabc", "a+b+c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aabbabc", "a{1,}b{1,}c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // error ER_REGEXP_RULE_SYNTAX + // DB::MatchImpl::constantConstant("-","a**",'\\',"",nullptr,res); /* Result: c */; + DB::MatchImpl::constantConstant("abcabc", "a.+?c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab", "(a+|b)*", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab", "(a+|b){0,}", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab", "(a+|b)+", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab", "(a+|b){1,}", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab", "(a+|b)?", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab", "(a+|b){0,1}", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // error ER_REGEXP_MISMATCHED_PAREN + // DB::MatchImpl::constantConstant("-",",'\\',"",nullptr,res);(",'\\',"",nullptr,res); /* Result: c */; + DB::MatchImpl::constantConstant("cde", "[^ab]*", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("", "abc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("", "a*", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abbbcd", "([abc])*d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcd", "([abc])*bcd", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("e", "a|b|c|d|e", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ef", "(a|b|c|d|e)f", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcdefg", "abcd*efg", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("xabyabbbz", "ab*", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("xayabbbz", "ab*", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcde", "(ab|cd)e", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("hij", "[abhgefdc]ij", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcde", "^(ab|cd)e", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abcdef", "(abc|)ef", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcd", "(a|b)c*d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "(ab|ab*)bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "a([bc]*)c*", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcd", "a([bc]*)(c*d)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcd", "a([bc]+)(c*d)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcd", "a([bc]*)(c+d)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("adcdcde", "a[bcd]*dcdcde", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("adcdcde", "a[bcd]+dcdcde", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abc", "(ab|a)b*c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcd", "((a)(b)c)(d)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("alpha", "[a-zA-Z_][a-zA-Z0-9_]*", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abh", "^a(bc+|b[eh])g|.h$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("effgz", "(bc+d$|ef*g.|h?i(j|k))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ij", "(bc+d$|ef*g.|h?i(j|k))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("effg", "(bc+d$|ef*g.|h?i(j|k))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("bcdd", "(bc+d$|ef*g.|h?i(j|k))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("reffgz", "(bc+d$|ef*g.|h?i(j|k))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a", "((((((((((a))))))))))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a", "(((((((((a)))))))))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("uh-uh", "multiple words of text", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("multiple words, yeah", "multiple words", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcde", "(.*)c(.*)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("(a, b)", "\\((.*), (.*)\\)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab", "[k]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abcd", "abcd", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcd", "a(bc)d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ac", "a[-]?c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b", "(a)|(b)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABC", "(?i)abc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("XBC", "(?i)abc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("AXC", "(?i)abc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ABX", "(?i)abc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("XABCY", "(?i)abc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABABC", "(?i)abc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABC", "(?i)ab*c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABC", "(?i)ab*bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABBC", "(?i)ab*bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABBBBC", "(?i)ab*?bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABBBBC", "(?i)ab{0,}?bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABBC", "(?i)ab+?bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABC", "(?i)ab+bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ABQ", "(?i)ab+bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ABQ", "(?i)ab{1,}bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ABBBBC", "(?i)ab+bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABBBBC", "(?i)ab{1,}?bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABBBBC", "(?i)ab{1,3}?bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABBBBC", "(?i)ab{3,4}?bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABBBBC", "(?i)ab{4,5}?bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ABBC", "(?i)ab??bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABC", "(?i)ab??bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABC", "(?i)ab{0,1}?bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABBBBC", "(?i)ab??bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ABC", "(?i)ab??c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABC", "(?i)ab{0,1}?c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABC", "(?i)^abc$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABCC", "(?i)^abc$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ABCC", "(?i)^abc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AABC", "(?i)^abc$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("AABC", "(?i)abc$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABC", "(?i)^", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABC", "(?i)$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABC", "(?i)a.c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AXC", "(?i)a.c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AXYZC", "(?i)a.*?c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AXYZD", "(?i)a.*c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ABC", "(?i)a[bc]d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ABD", "(?i)a[bc]d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABD", "(?i)a[b-d]e", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ACE", "(?i)a[b-d]e", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AAC", "(?i)a[b-d]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("A-", "(?i)a[-b]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("A-", "(?i)a[b-]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // error ER_REGEXP_INVALID_RANGE + // DB::MatchImpl::constantConstant("-","(?i)a[b-a]",'\\',"",nullptr,res); /* Result: c */; + // error ER_REGEXP_MISSING_CLOSE_BRACKET + // DB::MatchImpl::constantConstant("-","(?i)a[]b",'\\',"",nullptr,res); /* Result: ci */; + // error ER_REGEXP_MISSING_CLOSE_BRACKET + // DB::MatchImpl::constantConstant("-","(?i)a[",'\\',"",nullptr,res); /* Result: c */; + DB::MatchImpl::constantConstant("A]", "(?i)a]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("A]B", "(?i)a[]]b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AED", "(?i)a[^bc]d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABD", "(?i)a[^bc]d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ADC", "(?i)a[^-b]c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("A-C", "(?i)a[^-b]c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("A]C", "(?i)a[^]b]c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ADC", "(?i)a[^]b]c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABC", "(?i)ab|cd", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABCD", "(?i)ab|cd", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("DEF", "(?i)()ef", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // error ER_REGEXP_RULE_SYNTAX + // DB::MatchImpl::constantConstant("-","(?i)*a",'\\',"",nullptr,res); /* Result: c */; + // error ER_REGEXP_RULE_SYNTAX + // DB::MatchImpl::constantConstant("-","(?i)(*)b",'\\',"",nullptr,res); /* Result: c */; + DB::MatchImpl::constantConstant("B", "(?i)$b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + // error ER_REGEXP_BAD_ESCAPE_SEQUENCE + // DB::MatchImpl::constantConstant("-","(?i)a\\",'\\',"",nullptr,res); /* Result: c */; + DB::MatchImpl::constantConstant("A(B", "(?i)a\\(b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AB", "(?i)a\\(*b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("A((B", "(?i)a\\(*b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("A\\B", "(?i)a\\\\b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // error ER_REGEXP_MISMATCHED_PAREN + // DB::MatchImpl::constantConstant("-","(?i)abc)",'\\',"",nullptr,res); /* Result: c */; + // error ER_REGEXP_MISMATCHED_PAREN + // DB::MatchImpl::constantConstant("-","(?i)(abc",'\\',"",nullptr,res); /* Result: c */; + DB::MatchImpl::constantConstant("ABC", "(?i)((a))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABC", "(?i)(a)b(c)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AABBABC", "(?i)a+b+c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AABBABC", "(?i)a{1,}b{1,}c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // error ER_REGEXP_RULE_SYNTAX + // DB::MatchImpl::constantConstant("-","(?i)a**",'\\',"",nullptr,res); /* Result: c */; + DB::MatchImpl::constantConstant("ABCABC", "(?i)a.+?c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABCABC", "(?i)a.*?c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABCABC", "(?i)a.{0,5}?c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AB", "(?i)(a+|b)*", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AB", "(?i)(a+|b){0,}", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AB", "(?i)(a+|b)+", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AB", "(?i)(a+|b){1,}", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AB", "(?i)(a+|b)?", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AB", "(?i)(a+|b){0,1}", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AB", "(?i)(a+|b){0,1}?", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // error ER_REGEXP_MISMATCHED_PAREN + // DB::MatchImpl::constantConstant("-","(?i))(",'\\',"",nullptr,res); /* Result: c */; + DB::MatchImpl::constantConstant("CDE", "(?i)[^ab]*", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("", "(?i)abc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("", "(?i)a*", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABBBCD", "(?i)([abc])*d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABCD", "(?i)([abc])*bcd", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("E", "(?i)a|b|c|d|e", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("EF", "(?i)(a|b|c|d|e)f", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABCDEFG", "(?i)abcd*efg", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("XABYABBBZ", "(?i)ab*", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("XAYABBBZ", "(?i)ab*", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABCDE", "(?i)(ab|cd)e", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("HIJ", "(?i)[abhgefdc]ij", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABCDE", "(?i)^(ab|cd)e", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ABCDEF", "(?i)(abc|)ef", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABCD", "(?i)(a|b)c*d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABC", "(?i)(ab|ab*)bc", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABC", "(?i)a([bc]*)c*", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABCD", "(?i)a([bc]*)(c*d)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABCD", "(?i)a([bc]+)(c*d)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABCD", "(?i)a([bc]*)(c+d)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ADCDCDE", "(?i)a[bcd]*dcdcde", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ADCDCDE", "(?i)a[bcd]+dcdcde", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ABC", "(?i)(ab|a)b*c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABCD", "(?i)((a)(b)c)(d)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ALPHA", "(?i)[a-zA-Z_][a-zA-Z0-9_]*", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABH", "(?i)^a(bc+|b[eh])g|.h$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("EFFGZ", "(?i)(bc+d$|ef*g.|h?i(j|k))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("IJ", "(?i)(bc+d$|ef*g.|h?i(j|k))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("EFFG", "(?i)(bc+d$|ef*g.|h?i(j|k))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("BCDD", "(?i)(bc+d$|ef*g.|h?i(j|k))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("REFFGZ", "(?i)(bc+d$|ef*g.|h?i(j|k))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("A", "(?i)((((((((((a))))))))))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("A", "(?i)(((((((((a)))))))))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("A", "(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("C", "(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("UH-UH", "(?i)multiple words of text", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("MULTIPLE WORDS, YEAH", "(?i)multiple words", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABCDE", "(?i)(.*)c(.*)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("(A, B)", "(?i)\\((.*), (.*)\\)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AB", "(?i)[k]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ABCD", "(?i)abcd", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ABCD", "(?i)a(bc)d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AC", "(?i)a[-]?c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ace", "a(?:b|c|d)(.)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ace", "a(?:b|c|d)*(.)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ace", "a(?:b|c|d)+?(.)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("acdbcdbe", "a(?:b|c|d)+?(.)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("acdbcdbe", "a(?:b|c|d)+(.)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("acdbcdbe", "a(?:b|c|d){2}(.)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("acdbcdbe", "a(?:b|c|d){4,5}(.)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("acdbcdbe", "a(?:b|c|d){4,5}?(.)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("foobar", "((foo)|(bar))*", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // error ER_REGEXP_MISMATCHED_PAREN + // DB::MatchImpl::constantConstant("-",":(?:",'\\',"",nullptr,res); /* Result: c */; + DB::MatchImpl::constantConstant("acdbcdbe", "a(?:b|c|d){6,7}(.)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("acdbcdbe", "a(?:b|c|d){6,7}?(.)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("acdbcdbe", "a(?:b|c|d){5,6}(.)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("acdbcdbe", "a(?:b|c|d){5,6}?(.)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("acdbcdbe", "a(?:b|c|d){5,7}(.)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("acdbcdbe", "a(?:b|c|d){5,7}?(.)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ace", "a(?:b|(c|e){1,2}?|d)+?(.)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("AB", "^(.+)?B", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant(".", "^([^a-z])|(\\^)$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("<&OUT", "^[<>]&", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // # Not implemented + // error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("aaaaaaaaaa","^(a(?(1)\\1)){4}$",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // # Not implemented + // error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("aaaaaaaaa","^(a(?(1)\\1)){4}$",'\\',"",nullptr,res); ASSERT_TRUE(res == 0); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("aaaaaaaaaaa","^(a(?(1)\\1)){4}$",'\\',"",nullptr,res); ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("aaaaaaaaa", "((a{4})+)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aaaaaaaaaa", "(((aa){2})+)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aaaaaaaaaa", "(((a{2}){2})+)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("foobar", "(?:(f)(o)(o)|(b)(a)(r))*", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // --error ER_REGEXP_RULE_SYNTAX + // DB::MatchImpl::constantConstant("-","(?<%)b",'\\',"",nullptr,res); /* Result: c */; + DB::MatchImpl::constantConstant("aba", "(?:..)*a", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aba", "(?:..)*?a", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "^(){3,5}", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aax", "^(a+)*ax", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aax", "^((a|b)+)*ax", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aax", "^((a|bc)+)*ax", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("cab", "(a|x)*ab", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("cab", "(a)*ab", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab", "(?:(?i)a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab", "((?i)a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("Ab", "(?:(?i)a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("Ab", "((?i)a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aB", "(?:(?i)a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("aB", "((?i)a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ab", "(?i:a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab", "((?i:a))b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("Ab", "(?i:a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("Ab", "((?i:a))b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aB", "(?i:a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("aB", "((?i:a))b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ab", "(?i)(?:(?-i)a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab", "(?i)((?-i)a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aB", "(?i)(?:(?-i)a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aB", "(?i)((?-i)a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("Ab", "(?i)(?:(?-i)a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("Ab", "(?i)((?-i)a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("AB", "(?i)(?:(?-i)a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("AB", "(?i)((?-i)a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ab", "(?i)(?-i:a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab", "(?i)((?-i:a))b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aB", "(?i)(?-i:a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aB", "(?i)((?-i:a))b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("Ab", "(?i)(?-i:a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("Ab", "(?i)((?-i:a))b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("AB", "(?i)(?-i:a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("AB", "(?i)((?-i:a))b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a\nB", "(?i)((?-i:a.))b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a\nB", "(?i)((?s-i:a.))b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("B\nB", "(?i)((?s-i:a.))b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant( + "cabbbb", + "(?:c|d)(?:)(?:a(?:)(?:b)(?:b(?:))(?:b(?:)(?:b)))", + '\\', + "", + nullptr, + res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + "(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb)))", + '\\', + "", + nullptr, + res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("foobar1234baz", "foo\\w*\\d{4}baz", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("cabd","a(?{})b",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("-","a(?{)b",'\\',"",nullptr,res); /* Result: c */; + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("-","a(?{{})b",'\\',"",nullptr,res); /* Result: c */; + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("-","a(?{}})b",'\\',"",nullptr,res); /* Result: c */; + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("-","a(?{"{"})b",'\\',"",nullptr,res); /* Result: c */; + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("cabd","a(?{"\\{"})b",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("-","a(?{"{"}})b",'\\',"",nullptr,res); /* Result: c */; + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("caxbd","a(?{$bl="\\{"}).b",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("x~~", "x(~~)*(?:(?:F)?)?", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a--", "^(?:a?b?)*$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a\nb\nc\n", "((?s)^a(.))((?m)^b$)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a\nb\nc\n", "((?m)^b$)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a\nb\n", "(?m)^b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a\nb\n", "(?m)^(b)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a\nb\n", "((?m)^b)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a\nb\n", "\n((?m)^b)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a\nb\nc\n", "^b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a\nb\nc\n", "()^b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a\nb\nc\n", "((?m)^b)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","(?(1)a|b)",'\\',"",nullptr,res); ASSERT_TRUE(res == 0); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","(?(1)b|a)",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","(x)?(?(1)a|b)",'\\',"",nullptr,res); ASSERT_TRUE(res == 0); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","(x)?(?(1)b|a)",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","()?(?(1)b|a)",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","()(?(1)b|a)",'\\',"",nullptr,res); ASSERT_TRUE(res == 0); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","()?(?(1)a|b)",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("(blah)","^(\\()?blah(?(1)(\\)))$",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("blah","^(\\()?blah(?(1)(\\)))$",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("blah)","^(\\()?blah(?(1)(\\)))$",'\\',"",nullptr,res); ASSERT_TRUE(res == 0); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("(blah","^(\\()?blah(?(1)(\\)))$",'\\',"",nullptr,res); ASSERT_TRUE(res == 0); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("(blah)","^(\\(+)?blah(?(1)(\\)))$",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("blah","^(\\(+)?blah(?(1)(\\)))$",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("blah)","^(\\(+)?blah(?(1)(\\)))$",'\\',"",nullptr,res); ASSERT_TRUE(res == 0); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("(blah","^(\\(+)?blah(?(1)(\\)))$",'\\',"",nullptr,res); ASSERT_TRUE(res == 0); + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","(?(1?)a|b)",'\\',"",nullptr,res); /* Result: c */; + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","(?(1)a|b|c)",'\\',"",nullptr,res); /* Result: c */; + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","(?(?{0})a|b)",'\\',"",nullptr,res); ASSERT_TRUE(res == 0); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","(?(?{0})b|a)",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","(?(?{1})b|a)",'\\',"",nullptr,res); ASSERT_TRUE(res == 0); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","(?(?{1})a|b)",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","(?(?!a)a|b)",'\\',"",nullptr,res); ASSERT_TRUE(res == 0); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","(?(?!a)b|a)",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","(?(?=a)b|a)",'\\',"",nullptr,res); ASSERT_TRUE(res == 0); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","(?(?=a)a|b)",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("one:", "(\\w+:)+", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcd:", "([\\w:]+::)?(\\w+)$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abcd", "([\\w:]+::)?(\\w+)$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("xy:z:::abcd", "([\\w:]+::)?(\\w+)$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aexycd", "^[^bcd]*(c+)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("caab", "(a*)b+", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("yaaxxaaaacd","(?{$a=2})a*aa(?{local$a=$a+1})k*c(?{$b=$a})",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("yaaxxaaaacd","(?{$a=2})(a(?{local$a=$a+1}))*aak*c(?{$b=$a})",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aaab", "(>a+)ab", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a:[b]:", "([\\[:]+)", '\\', "", nullptr, res); /* Result: yi */ + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a=[b]=", "([\\[=]+)", '\\', "", nullptr, res); /* Result: yi */ + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a.[b].", "([\\[.]+)", '\\', "", nullptr, res); /* Result: yi */ + ASSERT_TRUE(res == 1); + // --error ER_REGEXP_MISSING_CLOSE_BRACKET + // DB::MatchImpl::constantConstant("-","[a[:xyz:",'\\',"",nullptr,res); /* Result: c */; + // --error ER_REGEXP_ILLEGAL_ARGUMENT + // DB::MatchImpl::constantConstant("-","[a[:xyz:]",'\\',"",nullptr,res); /* Result: c */; + DB::MatchImpl::constantConstant("abc", "[a\\[:]b[:c]", '\\', "", nullptr, res); /* Result: yi */ + ASSERT_TRUE(res == 1); + // --error ER_REGEXP_ILLEGAL_ARGUMENT + // DB::MatchImpl::constantConstant("pbaq","([a[:xyz:]b]+)",'\\',"",nullptr,res); /* Result: c */; + DB::MatchImpl::constantConstant("abc", "[a\\[:]b[:c]", '\\', "", nullptr, res); /* Result: iy */ + ASSERT_TRUE(res == 1); + // --error ER_REGEXP_ILLEGAL_ARGUMENT + // DB::MatchImpl::constantConstant("-","[[:foo:]]",'\\',"",nullptr,res); /* Result: c */; + // --error ER_REGEXP_ILLEGAL_ARGUMENT + // DB::MatchImpl::constantConstant("-","[[:^foo:]]",'\\',"",nullptr,res); /* Result: c */; + // --error ER_REGEXP_LOOK_BEHIND_LIMIT + // DB::MatchImpl::constantConstant("-","(?<=x+)y",'\\',"",nullptr,res); /* Result: c */; + // --error ER_REGEXP_MAX_LT_MIN + // DB::MatchImpl::constantConstant("-","a{37,17}",'\\',"",nullptr,res); /* Result: c */; + DB::MatchImpl::constantConstant("a\nb\n", "\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a\nb\n", "$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\na\n", "\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\na\n", "$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\na", "\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\na", "$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a\nb\n", "(?m)\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a\nb\n", "(?m)$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\na\n", "(?m)\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\na\n", "(?m)$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\na", "(?m)\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\na", "(?m)$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a\nb\n", "a\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a\nb\n", "a$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\na\n", "a\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\na", "a\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\na", "a$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a\nb\n", "(?m)a\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a\nb\n", "(?m)a$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\na\n", "(?m)a\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\na\n", "(?m)a$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\na", "(?m)a\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\na", "(?m)a$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aa\nb\n", "aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("aa\nb\n", "aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\naa\n", "aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\naa", "aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\naa", "aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aa\nb\n", "(?m)aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("aa\nb\n", "(?m)aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\naa\n", "(?m)aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\naa\n", "(?m)aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\naa", "(?m)aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\naa", "(?m)aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ac\nb\n", "aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ac\nb\n", "aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac\n", "aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac\n", "aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac", "aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac", "aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ac\nb\n", "(?m)aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ac\nb\n", "(?m)aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac\n", "(?m)aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac\n", "(?m)aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac", "(?m)aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac", "(?m)aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ca\nb\n", "aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ca\nb\n", "aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca\n", "aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca\n", "aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca", "aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca", "aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ca\nb\n", "(?m)aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ca\nb\n", "(?m)aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca\n", "(?m)aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca\n", "(?m)aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca", "(?m)aa\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca", "(?m)aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ab\nb\n", "ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ab\nb\n", "ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nab\n", "ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nab", "ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\nab", "ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab\nb\n", "(?m)ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ab\nb\n", "(?m)ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\nab\n", "(?m)ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nab\n", "(?m)ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\nab", "(?m)ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\nab", "(?m)ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ac\nb\n", "ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ac\nb\n", "ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac\n", "ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac\n", "ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac", "ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac", "ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ac\nb\n", "(?m)ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ac\nb\n", "(?m)ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac\n", "(?m)ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac\n", "(?m)ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac", "(?m)ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac", "(?m)ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ca\nb\n", "ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ca\nb\n", "ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca\n", "ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca\n", "ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca", "ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca", "ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ca\nb\n", "(?m)ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ca\nb\n", "(?m)ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca\n", "(?m)ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca\n", "(?m)ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca", "(?m)ab\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca", "(?m)ab$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abb\nb\n", "abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abb\nb\n", "abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nabb\n", "abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nabb", "abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\nabb", "abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abb\nb\n", "(?m)abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abb\nb\n", "(?m)abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\nabb\n", "(?m)abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nabb\n", "(?m)abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\nabb", "(?m)abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b\nabb", "(?m)abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ac\nb\n", "abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ac\nb\n", "abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac\n", "abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac\n", "abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac", "abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac", "abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ac\nb\n", "(?m)abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ac\nb\n", "(?m)abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac\n", "(?m)abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac\n", "(?m)abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac", "(?m)abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nac", "(?m)abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ca\nb\n", "abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ca\nb\n", "abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca\n", "abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca\n", "abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca", "abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca", "abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ca\nb\n", "(?m)abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ca\nb\n", "(?m)abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca\n", "(?m)abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca\n", "(?m)abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca", "(?m)abb\\z", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b\nca", "(?m)abb$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("ca", "(^|x)(c)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("x", "a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("yabz","a(?{$a=2;$b=3;($b)=$a})b",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("foo.bart", "foo.bart", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcd\ndxxx", "(?m)^d[x][x][x]", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("xxxtt", "tt+$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("za-9z", "([a\\-\\d]+)", '\\', "", nullptr, res); /* Result: yi */ + ; + DB::MatchImpl::constantConstant("a0-za", "([\\d-z]+)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a0- z", "([\\d-\\s]+)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("=0-z=", "([[:digit:]-z]+)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("=0-z=", "([[:digit:]-[:alpha:]]+)", '\\', "", nullptr, res); /* Result: iy */ + ; + DB::MatchImpl::constantConstant("3.1415926", "(\\d+\\.\\d+)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("have a web browser", "(\\ba.{0,10}br)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("Changes", "(?i)\\.c(pp|xx|c)?$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("IO.c", "(?i)\\.c(pp|xx|c)?$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("IO.c", "(?i)(\\.c(pp|xx|c)?$)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("C:/", "^([a-z]:)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("\nx aa", "(?m)^\\S\\s+aa$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab", "(^|a)b", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abac", "^([ab]*?)(b)?(c)$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a,b,c", "^(?:.,){2}c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a,b,c", "^(.,){2}c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a,b,c", "^(?:[^,]*,){2}c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a,b,c", "^([^,]*,){2}c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aaa,b,c,d", "^([^,]*,){3}d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aaa,b,c,d", "^([^,]*,){3,}d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aaa,b,c,d", "^([^,]*,){0,3}d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aaa,b,c,d", "^([^,]{1,3},){3}d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aaa,b,c,d", "^([^,]{1,3},){3,}d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aaa,b,c,d", "^([^,]{1,3},){0,3}d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aaa,b,c,d", "^([^,]{1,},){3}d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aaa,b,c,d", "^([^,]{1,},){3,}d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aaa,b,c,d", "^([^,]{1,},){0,3}d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aaa,b,c,d", "^([^,]{0,3},){3}d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aaa,b,c,d", "^([^,]{0,3},){3,}d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aaa,b,c,d", "^([^,]{0,3},){0,3}d", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("", "(?i)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aba", "^(a(b)?)+$", '\\', "", nullptr, res); /* Result: yi */ + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("123\nabcabcabcabc\n", "(?m)^.{9}abc.*\n", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a", "^(a)?a$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("a","^(a)?(?(1)a|b)+$",'\\',"",nullptr,res); ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("x1", "^(0+)?(?:x(1))?", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant( + "012cxx0190", + "^([0-9a-fA-F]+)(?:x([0-9a-fA-F]+)?)(?:x([0-9a-fA-F]+))?", + '\\', + "", + nullptr, + res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("bbbac", "^(b+?|a){1,2}c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("bbbbac", "^(b+?|a){1,2}c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("cd. (A. Tw)", "\\((\\w\\. \\w+)\\)", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("aaaacccc", "((?:aaaa|bbbb)cccc)?", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("bbbbcccc", "((?:aaaa|bbbb)cccc)?", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a", "(a)?(a)+", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab", "(ab)?(ab)+", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "(abc)?(abc)+", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a", "\\ba", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // # ?? Not supported + // --error ER_REGEXP_RULE_SYNTAX + // DB::MatchImpl::constantConstant("ab","^(a(??{"(?!)"})|(a)(?{1}))b",'\\',"",nullptr,res); /* Result: yi */; + DB::MatchImpl::constantConstant("AbCd", "ab(?i)cd", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("abCd", "ab(?i)cd", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("CD","(A|B)*(?(1)(CD)|(CD))",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("ABCD","(A|B)*(?(1)(CD)|(CD))",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("CD","(A|B)*?(?(1)(CD)|(CD))",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + // # Not implemented + // --error ER_REGEXP_UNIMPLEMENTED + // DB::MatchImpl::constantConstant("ABCD","(A|B)*?(?(1)(CD)|(CD))",'\\',"",nullptr,res); ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("foo\n bar", "(?m:(foo\\s*$))", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcd", "(.*)c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcd", "(.*?)c", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + // # ?? not supported + // --error ER_REGEXP_RULE_SYNTAX + // DB::MatchImpl::constantConstant("x","(??{})",'\\',"",nullptr,res); /* Result: yi */; + DB::MatchImpl::constantConstant("abc", "a", '\\', "m", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "b", '\\', "m", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "c", '\\', "m", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "d", '\\', "m", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a", "a.*", '\\', "m", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab", "a.*", '\\', "m", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "A", '\\', "i", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abc", "A", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); +} + +TEST_F(Regexp, testRegexpTiDBCase) +{ + UInt8 res; + std::shared_ptr binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); + std::shared_ptr ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI); + DB::MatchImpl::constantConstant("a", "^$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("a", "a", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("b", "a", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("aA", "aA", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("A", "^a$", '\\', "", binary_collator, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("A", "^a$", '\\', "", ci_collator, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("a", ".", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("ab", "^.$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("b", "..", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("aab", ".ab", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("abcd", ".*", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("测试", "^.$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 0); + DB::MatchImpl::constantConstant("测", "^.$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + DB::MatchImpl::constantConstant("平凯星辰", "^平..辰$", '\\', "", nullptr, res); + ASSERT_TRUE(res == 1); + ASSERT_ANY_THROW((DB::MatchImpl::constantConstant("", "(", '\\', "", nullptr, res))); + ASSERT_ANY_THROW((DB::MatchImpl::constantConstant("", "(*", '\\', "", nullptr, res))); + ASSERT_ANY_THROW((DB::MatchImpl::constantConstant("", "[a", '\\', "", nullptr, res))); + ASSERT_ANY_THROW((DB::MatchImpl::constantConstant("", "\\", '\\', "", nullptr, res))); +} + +TEST_F(Regexp, testRegexp) +{ + std::shared_ptr binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); + auto string_type = std::make_shared(); + auto nullable_string_type = makeNullable(string_type); + auto uint8_type = std::make_shared(); + auto nullable_uint8_type = makeNullable(uint8_type); + + std::vector input_strings{"abc", "Abc", "a\nb\nc", "abcd", "hello, 平凯星辰"}; + std::vector input_string_nulls{0, 0, 0, 1, 0}; + + std::vector patterns{"^a", "abc$", "a.*B.*c", "^bc$", "平凯.*"}; + std::vector pattern_nulls{1, 0, 0, 0, 0}; + + std::vector match_types{"", "i", "ims", "i", ""}; + std::vector match_type_nulls{0, 1, 0, 0, 0}; + + std::vector results{1, 0, 0, 0, 1}; + std::vector results_with_match_type{1, 1, 1, 0, 1}; + std::vector results_with_match_type_collator{1, 0, 0, 0, 1}; + + std::vector vec_results{1, 0, 1, 1, 0}; + std::vector vec_results_with_match_type{1, 1, 1, 1, 0}; + std::vector vec_results_with_match_type_collator{1, 0, 1, 1, 0}; + + size_t row_size = input_string_nulls.size(); + + auto const_UInt8_null_column = createConstColumn>(row_size, {}); + auto const_string_null_column = createConstColumn>(row_size, {}); + /// case 1. regexp(const, const [, const]) + for (size_t i = 0; i < row_size; i++) + { + /// test regexp(const, const) + ASSERT_COLUMN_EQ(createConstColumn(row_size, results[i]), + executeFunction("regexp", createConstColumn(row_size, input_strings[i]), createConstColumn(row_size, patterns[i]))); + + /// test regexp(const, const, const) + ASSERT_COLUMN_EQ(createConstColumn(row_size, results_with_match_type[i]), + executeFunction("regexp", createConstColumn(row_size, input_strings[i]), createConstColumn(row_size, patterns[i]), createConstColumn(row_size, match_types[i]))); + + /// test regexp(const, const, const) with binary collator + ASSERT_COLUMN_EQ(createConstColumn(row_size, results_with_match_type_collator[i]), + executeFunction("regexp", {createConstColumn(row_size, input_strings[i]), createConstColumn(row_size, patterns[i]), createConstColumn(row_size, match_types[i])}, binary_collator)); + } + /// case 2. regexp(const, const [, const]) with null value + for (size_t i = 0; i < row_size; i++) + { + /// test regexp(const, const) + ASSERT_COLUMN_EQ(input_string_nulls[i] || pattern_nulls[i] ? const_UInt8_null_column : createConstColumn>(row_size, results[i]), + executeFunction("regexp", input_string_nulls[i] ? const_string_null_column : createConstColumn>(row_size, input_strings[i]), pattern_nulls[i] ? const_string_null_column : createConstColumn>(row_size, patterns[i]))); + + /// test regexp(const, const, const) + ASSERT_COLUMN_EQ(input_string_nulls[i] || pattern_nulls[i] || match_type_nulls[i] ? const_UInt8_null_column : createConstColumn>(row_size, results_with_match_type[i]), + executeFunction("regexp", input_string_nulls[i] ? const_string_null_column : createConstColumn>(row_size, input_strings[i]), pattern_nulls[i] ? const_string_null_column : createConstColumn>(row_size, patterns[i]), match_type_nulls[i] ? const_string_null_column : createConstColumn>(row_size, match_types[i]))); + + /// test regexp(const, const, const) with binary collator + ASSERT_COLUMN_EQ(input_string_nulls[i] || pattern_nulls[i] || match_type_nulls[i] ? const_UInt8_null_column : createConstColumn>(row_size, results_with_match_type_collator[i]), + executeFunction("regexp", {input_string_nulls[i] ? const_string_null_column : createConstColumn>(row_size, input_strings[i]), pattern_nulls[i] ? const_string_null_column : createConstColumn>(row_size, patterns[i]), match_type_nulls[i] ? const_string_null_column : createConstColumn>(row_size, match_types[i])}, binary_collator)); + } + /// case 3 regexp(vector, const[, const]) + { + /// test regexp(vector, const) + ASSERT_COLUMN_EQ(createColumn(vec_results), + executeFunction("regexp", createColumn(input_strings), createConstColumn(row_size, patterns[0]))); + + /// test regexp(vector, const, const) + ASSERT_COLUMN_EQ(createColumn(vec_results_with_match_type), + executeFunction("regexp", createColumn(input_strings), createConstColumn(row_size, patterns[0]), createConstColumn(row_size, "i"))); + + /// test regexp(vector, const, const) with binary collator + ASSERT_COLUMN_EQ(createColumn(vec_results_with_match_type_collator), + executeFunction("regexp", {createColumn(input_strings), createConstColumn(row_size, patterns[0]), createConstColumn(row_size, "i")}, binary_collator)); + } + /// case 4 regexp(vector, const[, const]) nullable + { + ASSERT_COLUMN_EQ(createNullableVectorColumn(vec_results, input_string_nulls), + executeFunction("regexp", createNullableVectorColumn(input_strings, input_string_nulls), createConstColumn(row_size, patterns[0]))); + ASSERT_COLUMN_EQ(createNullableVectorColumn(vec_results_with_match_type, input_string_nulls), + executeFunction("regexp", createNullableVectorColumn(input_strings, input_string_nulls), createConstColumn(row_size, patterns[0]), createConstColumn(row_size, "i"))); + ASSERT_COLUMN_EQ(createNullableVectorColumn(vec_results_with_match_type_collator, input_string_nulls), + executeFunction("regexp", {createNullableVectorColumn(input_strings, input_string_nulls), createConstColumn(row_size, patterns[0]), createConstColumn(row_size, "i")}, binary_collator)); + } +} + +TEST_F(Regexp, testRegexpCustomerCases) +{ + String pattern = "^(53|94)[0-9]{10}$|" + "^(1200|1201|1202|1203|1204|1205|1206|1207|1208)[0-9]{8}$|" + "^54[0-9]{10}$|" + "^665[0-9]{9}$|" + "^63[0-9]{10}$|" + "^731[0-9]{11}$|" + "^73220[0-9]{9}$|" + "^73200[0-9]{9}$|" + "^73210[0-9]{9}$|" + "^771[0-9]{11}$|" + "^91[0-9]{10}$|" + "^73211[0-9]{9}$|" + "^781[0-9]{11}$|" + "^73222[0-9]{9}$|" + "^734[0-9]{11}$|" + "^75210[0-9]{9}$|" + "^73223[0-9]{9}$|" + "^73224[0-9]{9}$|" + "^882[0-9]{9}$|" + "^7777[0-9]{10}$|" + "^758[0-9]{11}$|" + "^759[0-9]{11}$|" + "^73226[0-9]{9}$|" + "^77761[0-9]{9}$|" + "^73227[0-9]{9}$|" + "^73225[0-9]{9}$|" + "^31111[0-9]{9}$|" + "^754[0-9]{11}$|" + "^755[0-9]{11}$|" + "^73228[0-9]{9}$|" + "^73229[0-9]{9}$|" + "^782[0-9]{11}$|" + "^756[0-9]{11}$"; + std::vector patterns{pattern, pattern, pattern, pattern, pattern}; + std::vector inputs{"73228012343218", "530101343498", "540101323298", "31111191919191", "78200000000000"}; + /// columnNothing, columnConstNull, columnConstNotNull, columnVectorNullable, columnVectorNotNull + ColumnsWithTypeAndName input_columns{createOnlyNullColumnConst(5), createConstColumn>(5, {}), createConstColumn>(5, inputs[0]), createConstColumn(5, inputs[0]), createColumn>({inputs[0], {}, {}, inputs[3], inputs[4]}), createColumn(inputs)}; + ColumnsWithTypeAndName pattern_columns{createOnlyNullColumnConst(5), createConstColumn>(5, {}), createConstColumn>(5, patterns[0]), createConstColumn(5, patterns[0]), createColumn>({patterns[0], {}, {}, patterns[3], patterns[4]}), createColumn(patterns)}; + for (const auto & input_column : input_columns) + { + for (const auto & pattern_column : pattern_columns) + { + if (input_column.type->onlyNull() || pattern_column.type->onlyNull()) + { + ASSERT_COLUMN_EQ(createOnlyNullColumnConst(5), + executeFunction("regexp", input_column, pattern_column)); + } + else if (isColumnConstNull(input_column) || isColumnConstNull(pattern_column)) + { + ASSERT_COLUMN_EQ(createConstColumn>(5, {}), + executeFunction("regexp", input_column, pattern_column)); + } + else if (isColumnConstNotNull(input_column) && isColumnConstNotNull(pattern_column)) + { + if (input_column.type->isNullable() || pattern_column.type->isNullable()) + { + ASSERT_COLUMN_EQ(createConstColumn>(5, 1), + executeFunction("regexp", input_column, pattern_column)); + } + else + { + ASSERT_COLUMN_EQ(createConstColumn(5, 1), + executeFunction("regexp", input_column, pattern_column)); + } + } + else + { + bool result_nullable = input_column.type->isNullable() || pattern_column.type->isNullable(); + if (!result_nullable) + { + ASSERT_COLUMN_EQ(createColumn({1, 1, 1, 1, 1}), + executeFunction("regexp", input_column, pattern_column)); + } + else + { + bool input_contains_null = isNullableColumnVector(input_column) || isNullableColumnVector(pattern_column); + if (input_contains_null) + { + ASSERT_COLUMN_EQ(createColumn>({1, {}, {}, 1, 1}), + executeFunction("regexp", input_column, pattern_column)); + } + else + { + ASSERT_COLUMN_EQ(createColumn>({1, 1, 1, 1, 1}), + executeFunction("regexp", input_column, pattern_column)); + } + } + } + } + } +} + +TEST_F(Regexp, testRegexpReplaceMatchType) +{ + String res; + std::shared_ptr binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); + std::shared_ptr ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI); + DB::ReplaceRegexpImpl::constant("a\nB\nc", "(?m)(?i)^b", "xxx", 1, 0, "", nullptr, res); + ASSERT_TRUE(res == "a\nxxx\nc"); + DB::ReplaceRegexpImpl::constant("a\nB\nc", "^b", "xxx", 1, 0, "mi", nullptr, res); + ASSERT_TRUE(res == "a\nxxx\nc"); + DB::ReplaceRegexpImpl::constant("a\nB\nc", "^b", "xxx", 1, 0, "m", ci_collator, res); + ASSERT_TRUE(res == "a\nxxx\nc"); + DB::ReplaceRegexpImpl::constant("a\nB\nc", "^b", "xxx", 1, 0, "mi", binary_collator, res); + ASSERT_TRUE(res == "a\nB\nc"); + DB::ReplaceRegexpImpl::constant("a\nB\nc", "^b", "xxx", 1, 0, "i", nullptr, res); + ASSERT_TRUE(res == "a\nxxx\nc"); + DB::ReplaceRegexpImpl::constant("a\nB\nc", "^b", "xxx", 1, 0, "m", nullptr, res); + ASSERT_TRUE(res == "a\nB\nc"); + DB::ReplaceRegexpImpl::constant("a\nB\n", "^a.*b", "xxx", 1, 0, "", nullptr, res); + ASSERT_TRUE(res == "a\nB\n"); + DB::ReplaceRegexpImpl::constant("a\nB\n", "^a.*B", "xxx", 1, 0, "s", nullptr, res); + ASSERT_TRUE(res == "xxx\n"); + DB::ReplaceRegexpImpl::constant("a\nB\n", "^a.*b", "xxx", 1, 0, "is", nullptr, res); + ASSERT_TRUE(res == "xxx\n"); +} + +TEST_F(Regexp, testRegexpReplaceMySQLCases) +{ + // Test based on https://github.com/mysql/mysql-server/blob/mysql-cluster-8.0.17/mysql-test/t/regular_expressions_utf-8.test + String res; + DB::ReplaceRegexpImpl::constant("aaa", "a", "X", 1, 0, "", nullptr, res); + ASSERT_TRUE(res == "XXX"); + DB::ReplaceRegexpImpl::constant("abc", "b", "X", 1, 0, "", nullptr, res); + ASSERT_TRUE(res == "aXc"); + DB::ReplaceRegexpImpl::constant("aaabbccbbddaa", "b+", "X", 1, 1, "", nullptr, res); + ASSERT_TRUE(res == "aaaXccbbddaa"); + DB::ReplaceRegexpImpl::constant("aaabbccbbddaa", "b+", "X", 1, 2, "", nullptr, res); + ASSERT_TRUE(res == "aaabbccXddaa"); + DB::ReplaceRegexpImpl::constant("aaabbccbbddaa", "(b+)", "<\\1>", 1, 2, "", nullptr, res); + ASSERT_TRUE(res == "aaabbccddaa"); + DB::ReplaceRegexpImpl::constant("aaabbccbbddaa", "x+", "x", 1, 0, "", nullptr, res); + ASSERT_TRUE(res == "aaabbccbbddaa"); + DB::ReplaceRegexpImpl::constant("aaabbccbbddaa", "b+", "x", 1, 0, "", nullptr, res); + ASSERT_TRUE(res == "aaaxccxddaa"); + DB::ReplaceRegexpImpl::constant("aaab", "b", "x", 1, 2, "", nullptr, res); + ASSERT_TRUE(res == "aaab"); + DB::ReplaceRegexpImpl::constant("aaabccc", "b", "x", 1, 2, "", nullptr, res); + ASSERT_TRUE(res == "aaabccc"); + DB::ReplaceRegexpImpl::constant("abcbdb", "b", "X", 1, 0, "", nullptr, res); + ASSERT_TRUE(res == "aXcXdX"); + DB::ReplaceRegexpImpl::constant("aaabcbdb", "b", "X", 1, 0, "", nullptr, res); + ASSERT_TRUE(res == "aaaXcXdX"); + DB::ReplaceRegexpImpl::constant("aaabcbdb", "b", "X", 2, 0, "", nullptr, res); + ASSERT_TRUE(res == "aaaXcXdX"); + DB::ReplaceRegexpImpl::constant("aaabcbdb", "b", "X", 3, 0, "", nullptr, res); + ASSERT_TRUE(res == "aaaXcXdX"); + DB::ReplaceRegexpImpl::constant("aaa", "a", "X", 2, 0, "", nullptr, res); + ASSERT_TRUE(res == "aXX"); + DB::ReplaceRegexpImpl::constant("aaa", "a", "XX", 2, 0, "", nullptr, res); + ASSERT_TRUE(res == "aXXXX"); + DB::ReplaceRegexpImpl::constant("c b b", "^([[:alpha:]]+)[[:space:]].*$", "\\1", 1, 0, "", nullptr, res); + ASSERT_TRUE(res == "c"); + DB::ReplaceRegexpImpl::constant("\U0001F450\U0001F450\U0001F450", ".", "a", 2, 0, "", nullptr, res); + ASSERT_TRUE(res == "\U0001F450aa"); + DB::ReplaceRegexpImpl::constant("\U0001F450\U0001F450\U0001F450", ".", "a", 2, 2, "", nullptr, res); + ASSERT_TRUE(res == "\U0001F450\U0001F450a"); +} + +TEST_F(Regexp, testRegexpReplace) +{ + std::shared_ptr binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); + auto string_type = std::make_shared(); + auto nullable_string_type = makeNullable(string_type); + auto uint8_type = std::make_shared(); + auto nullable_uint8_type = makeNullable(uint8_type); + + std::vector input_strings{"abb\nabbabb", "abbcabbabb", "abbabbabb", "ABBABBABB", "ABB\nABBABB"}; + std::vector input_string_nulls{0, 1, 0, 0, 0}; + + std::vector patterns{"^a.*", "bb", "abc", "abb", "abb.abb"}; + std::vector pattern_nulls{0, 0, 1, 0, 0}; + + std::vector replacements{"xxx", "xxx", "xxx", "xxx", "xxx"}; + std::vector replacement_nulls{0, 0, 1, 0, 0}; + + std::vector pos{1, 3, 2, 2, 1}; + std::vector pos_nulls{0, 0, 0, 1, 0}; + + std::vector occ{0, 2, 0, 0, 0}; + std::vector occ_nulls{1, 0, 0, 0, 0}; + + std::vector match_types{"is", "", "", "i", "ism"}; + std::vector match_type_nulls{1, 0, 0, 0, 0}; + + std::vector results{"xxx\nabbabb", "axxxcaxxxaxxx", "abbabbabb", "ABBABBABB", "ABB\nABBABB"}; + std::vector results_with_pos{"xxx\nabbabb", "abbcaxxxaxxx", "abbabbabb", "ABBABBABB", "ABB\nABBABB"}; + std::vector results_with_pos_occ{"xxx\nabbabb", "abbcabbaxxx", "abbabbabb", "ABBABBABB", "ABB\nABBABB"}; + std::vector results_with_pos_occ_match_type{"xxx", "abbcabbaxxx", "abbabbabb", "ABBxxxxxx", "xxxABB"}; + std::vector results_with_pos_occ_match_type_binary{"xxx", "abbcabbaxxx", "abbabbabb", "ABBABBABB", "ABB\nABBABB"}; + + std::vector vec_results{"xxx\nabbabb", "xxx", "xxx", "ABBABBABB", "ABB\nABBABB"}; + std::vector vec_results_with_pos{"xxx\nabbabb", "xxx", "xxx", "ABBABBABB", "ABB\nABBABB"}; + std::vector vec_results_with_pos_occ{"xxx\nabbabb", "xxx", "xxx", "ABBABBABB", "ABB\nABBABB"}; + std::vector vec_results_with_pos_occ_match_type{"xxx", "xxx", "xxx", "xxx", "xxx"}; + std::vector vec_results_with_pos_occ_match_type_binary{"xxx", "xxx", "xxx", "ABBABBABB", "ABB\nABBABB"}; + + size_t row_size = input_strings.size(); + auto const_string_null_column = createConstColumn>(row_size, {}); + auto const_int64_null_column = createConstColumn>(row_size, {}); + + /// case 1. regexp_replace(const, const, const [, const, const ,const]) + for (size_t i = 0; i < match_types.size(); i++) + { + /// test regexp_replace(str, pattern, replacement) + ASSERT_COLUMN_EQ(createConstColumn(row_size, results[i]), + executeFunction("replaceRegexpAll", createConstColumn(row_size, input_strings[i]), createConstColumn(row_size, patterns[i]), createConstColumn(row_size, replacements[i]))); + + /// test regexp_replace(str, pattern, replacement, pos) + ASSERT_COLUMN_EQ(createConstColumn(row_size, results_with_pos[i]), + executeFunction("replaceRegexpAll", createConstColumn(row_size, input_strings[i]), createConstColumn(row_size, patterns[i]), createConstColumn(row_size, replacements[i]), createConstColumn(row_size, pos[i]))); + + /// test regexp_replace(str, pattern, replacement, pos, occ) + ASSERT_COLUMN_EQ(createConstColumn(row_size, results_with_pos_occ[i]), + executeFunction("replaceRegexpAll", createConstColumn(row_size, input_strings[i]), createConstColumn(row_size, patterns[i]), createConstColumn(row_size, replacements[i]), createConstColumn(row_size, pos[i]), createConstColumn(row_size, occ[i]))); + + /// test regexp_replace(str, pattern, replacement, pos, occ, match_type) + ASSERT_COLUMN_EQ(createConstColumn(row_size, results_with_pos_occ_match_type[i]), + executeFunction("replaceRegexpAll", createConstColumn(row_size, input_strings[i]), createConstColumn(row_size, patterns[i]), createConstColumn(row_size, replacements[i]), createConstColumn(row_size, pos[i]), createConstColumn(row_size, occ[i]), createConstColumn(row_size, match_types[i]))); + + /// test regexp_replace(str, pattern, replacement, pos, occ, match_type) with binary collator + ASSERT_COLUMN_EQ(createConstColumn(row_size, results_with_pos_occ_match_type_binary[i]), + executeFunction("replaceRegexpAll", {createConstColumn(row_size, input_strings[i]), createConstColumn(row_size, patterns[i]), createConstColumn(row_size, replacements[i]), createConstColumn(row_size, pos[i]), createConstColumn(row_size, occ[i]), createConstColumn(row_size, match_types[i])}, binary_collator)); + } + + /// case 2. regexp_replace(const, const, const [, const, const ,const]) with null value + for (size_t i = 0; i < match_types.size(); i++) + { + /// test regexp_replace(str, pattern, replacement) + bool null_result = input_string_nulls[i] || pattern_nulls[i] || replacement_nulls[i]; + ASSERT_COLUMN_EQ(null_result ? const_string_null_column : createConstColumn>(row_size, results[i]), + executeFunction("replaceRegexpAll", input_string_nulls[i] ? const_string_null_column : createConstColumn>(row_size, input_strings[i]), pattern_nulls[i] ? const_string_null_column : createConstColumn>(row_size, patterns[i]), replacement_nulls[i] ? const_string_null_column : createConstColumn>(row_size, replacements[i]))); + + /// test regexp_replace(str, pattern, replacement, pos) + null_result = null_result || pos_nulls[i]; + ASSERT_COLUMN_EQ(null_result ? const_string_null_column : createConstColumn>(row_size, results_with_pos[i]), + executeFunction("replaceRegexpAll", input_string_nulls[i] ? const_string_null_column : createConstColumn>(row_size, input_strings[i]), pattern_nulls[i] ? const_string_null_column : createConstColumn>(row_size, patterns[i]), replacement_nulls[i] ? const_string_null_column : createConstColumn>(row_size, replacements[i]), pos_nulls[i] ? const_int64_null_column : createConstColumn>(row_size, pos[i]))); + + /// test regexp_replace(str, pattern, replacement, pos, occ) + null_result = null_result || occ_nulls[i]; + ASSERT_COLUMN_EQ(null_result ? const_string_null_column : createConstColumn>(row_size, results_with_pos_occ[i]), + executeFunction("replaceRegexpAll", input_string_nulls[i] ? const_string_null_column : createConstColumn>(row_size, input_strings[i]), pattern_nulls[i] ? const_string_null_column : createConstColumn>(row_size, patterns[i]), replacement_nulls[i] ? const_string_null_column : createConstColumn>(row_size, replacements[i]), pos_nulls[i] ? const_int64_null_column : createConstColumn>(row_size, pos[i]), occ_nulls[i] ? const_int64_null_column : createConstColumn>(row_size, occ[i]))); + + /// test regexp_replace(str, pattern, replacement, pos, occ, match_type) + null_result = null_result || match_type_nulls[i]; + ASSERT_COLUMN_EQ(null_result ? const_string_null_column : createConstColumn>(row_size, results_with_pos_occ_match_type[i]), + executeFunction("replaceRegexpAll", input_string_nulls[i] ? const_string_null_column : createConstColumn>(row_size, input_strings[i]), pattern_nulls[i] ? const_string_null_column : createConstColumn>(row_size, patterns[i]), replacement_nulls[i] ? const_string_null_column : createConstColumn>(row_size, replacements[i]), pos_nulls[i] ? const_int64_null_column : createConstColumn>(row_size, pos[i]), occ_nulls[i] ? const_int64_null_column : createConstColumn>(row_size, occ[i]), match_type_nulls[i] ? const_string_null_column : createConstColumn>(row_size, match_types[i]))); + + /// test regexp_replace(str, pattern, replacement, pos, occ, match_type) with binary collator + ASSERT_COLUMN_EQ(null_result ? const_string_null_column : createConstColumn>(row_size, results_with_pos_occ_match_type_binary[i]), + executeFunction("replaceRegexpAll", {input_string_nulls[i] ? const_string_null_column : createConstColumn>(row_size, input_strings[i]), pattern_nulls[i] ? const_string_null_column : createConstColumn>(row_size, patterns[i]), replacement_nulls[i] ? const_string_null_column : createConstColumn>(row_size, replacements[i]), pos_nulls[i] ? const_int64_null_column : createConstColumn>(row_size, pos[i]), occ_nulls[i] ? const_int64_null_column : createConstColumn>(row_size, occ[i]), match_type_nulls[i] ? const_string_null_column : createConstColumn>(row_size, match_types[i])}, binary_collator)); + } + + /// case 3 regexp_replace(vector, const, const[, const, const, const]) + { + /// test regexp_replace(str, pattern, replacement) + ASSERT_COLUMN_EQ(createColumn(vec_results), + executeFunction("replaceRegexpAll", createColumn(input_strings), createConstColumn(row_size, patterns[0]), createConstColumn(row_size, replacements[0]))); + + /// test regexp_replace(str, pattern, replacement, pos) + ASSERT_COLUMN_EQ(createColumn(vec_results_with_pos), + executeFunction("replaceRegexpAll", createColumn(input_strings), createConstColumn(row_size, patterns[0]), createConstColumn(row_size, replacements[0]), createConstColumn(row_size, pos[0]))); + + /// test regexp_replace(str, pattern, replacement, pos, occ) + ASSERT_COLUMN_EQ(createColumn(vec_results_with_pos_occ), + executeFunction("replaceRegexpAll", createColumn(input_strings), createConstColumn(row_size, patterns[0]), createConstColumn(row_size, replacements[0]), createConstColumn(row_size, pos[0]), createConstColumn(row_size, occ[0]))); + + /// test regexp_replace(str, pattern, replacement, pos, occ, match_type) + ASSERT_COLUMN_EQ(createColumn(vec_results_with_pos_occ_match_type), + executeFunction("replaceRegexpAll", createColumn(input_strings), createConstColumn(row_size, patterns[0]), createConstColumn(row_size, replacements[0]), createConstColumn(row_size, pos[0]), createConstColumn(row_size, occ[0]), createConstColumn(row_size, match_types[0]))); + + /// test regexp_replace(str, pattern, replacement, pos, occ, match_type) with binary collator + ASSERT_COLUMN_EQ(createColumn(vec_results_with_pos_occ_match_type_binary), + executeFunction("replaceRegexpAll", {createColumn(input_strings), createConstColumn(row_size, patterns[0]), createConstColumn(row_size, replacements[0]), createConstColumn(row_size, pos[0]), createConstColumn(row_size, occ[0]), createConstColumn(row_size, match_types[0])}, binary_collator)); + } + + /// case 4 regexp_replace(vector, const, const[, const, const, const]) with null value + { + /// test regexp_replace(str, pattern, replacement) + ASSERT_COLUMN_EQ(createNullableVectorColumn(vec_results, input_string_nulls), + executeFunction("replaceRegexpAll", createNullableVectorColumn(input_strings, input_string_nulls), createConstColumn(row_size, patterns[0]), createConstColumn(row_size, replacements[0]))); + + /// test regexp_replace(str, pattern, replacement, pos) + ASSERT_COLUMN_EQ(createNullableVectorColumn(vec_results_with_pos, input_string_nulls), + executeFunction("replaceRegexpAll", createNullableVectorColumn(input_strings, input_string_nulls), createConstColumn(row_size, patterns[0]), createConstColumn(row_size, replacements[0]), createConstColumn(row_size, pos[0]))); + + /// test regexp_replace(str, pattern, replacement, pos, occ) + ASSERT_COLUMN_EQ(createNullableVectorColumn(vec_results_with_pos_occ, input_string_nulls), + executeFunction("replaceRegexpAll", createNullableVectorColumn(input_strings, input_string_nulls), createConstColumn(row_size, patterns[0]), createConstColumn(row_size, replacements[0]), createConstColumn(row_size, pos[0]), createConstColumn(row_size, occ[0]))); + + + /// test regexp_replace(str, pattern, replacement, pos, occ, match_type) + ASSERT_COLUMN_EQ(createNullableVectorColumn(vec_results_with_pos_occ_match_type, input_string_nulls), + executeFunction("replaceRegexpAll", createNullableVectorColumn(input_strings, input_string_nulls), createConstColumn(row_size, patterns[0]), createConstColumn(row_size, replacements[0]), createConstColumn(row_size, pos[0]), createConstColumn(row_size, occ[0]), createConstColumn(row_size, match_types[0]))); + + + /// test regexp_replace(str, pattern, replacement, pos, occ, match_type) with binary collator + ASSERT_COLUMN_EQ(createNullableVectorColumn(vec_results_with_pos_occ_match_type_binary, input_string_nulls), + executeFunction("replaceRegexpAll", {createNullableVectorColumn(input_strings, input_string_nulls), createConstColumn(row_size, patterns[0]), createConstColumn(row_size, replacements[0]), createConstColumn(row_size, pos[0]), createConstColumn(row_size, occ[0]), createConstColumn(row_size, match_types[0])}, binary_collator)); + } +} +} // namespace tests +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Storages/Transaction/Collator.h b/dbms/src/Storages/Transaction/Collator.h index 8c2005771ac..85dc2a82b20 100644 --- a/dbms/src/Storages/Transaction/Collator.h +++ b/dbms/src/Storages/Transaction/Collator.h @@ -53,6 +53,17 @@ class ITiDBCollator : public ICollator virtual StringRef sortKey(const char * s, size_t length, std::string & container) const = 0; virtual std::unique_ptr pattern() const = 0; int32_t getCollatorId() const { return collator_id; } + bool isBinary() const { return collator_id == BINARY; } + bool isCI() const + { + return collator_id == UTF8_UNICODE_CI || collator_id == UTF8_GENERAL_CI + || collator_id == UTF8MB4_UNICODE_CI || collator_id == UTF8MB4_GENERAL_CI; + } + bool isBin() const + { + return collator_id == UTF8_BIN || collator_id == UTF8MB4_BIN + || collator_id == ASCII_BIN || collator_id == LATIN1_BIN; + } protected: explicit ITiDBCollator(int32_t collator_id_) diff --git a/tests/fullstack-test/expr/regexp.test b/tests/fullstack-test/expr/regexp.test new file mode 100644 index 00000000000..9eefa82d159 --- /dev/null +++ b/tests/fullstack-test/expr/regexp.test @@ -0,0 +1,57 @@ +mysql> drop table if exists test.t +mysql> create table test.t (data varchar(30), data_not_null varchar(30) not null, pattern varchar(30), pattern_not_null varchar(30) not null); +mysql> insert into test.t values ('aaaa', 'AAAA', '^a.*', '^A.*'), ('abcd', 'abcd', null, '^a..d$'), (null, 'bbb', 'bb$', 'bb$'),('中文测试','中文测试','中文','^....$'),('中English混合','中English混合','^中English','^..nglish..$'); +mysql> alter table test.t set tiflash replica 1 +func> wait_table test t + +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp=1; select data regexp '^a', data_not_null regexp '^a' from test.t; ++------------------+---------------------------+ +| data regexp '^a' | data_not_null regexp '^a' | ++------------------+---------------------------+ +| 1 | 0 | +| 1 | 1 | +| NULL | 0 | +| 0 | 0 | +| 0 | 0 | ++------------------+---------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp=1; select data regexp 'A$', data_not_null regexp 'A$' from test.t; ++------------------+---------------------------+ +| data regexp 'A$' | data_not_null regexp 'A$' | ++------------------+---------------------------+ +| 0 | 1 | +| 0 | 0 | +| NULL | 0 | +| 0 | 0 | +| 0 | 0 | ++------------------+---------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp=1; select data regexp '^....$', data_not_null regexp '^....$' from test.t; ++----------------------+-------------------------------+ +| data regexp '^....$' | data_not_null regexp '^....$' | ++----------------------+-------------------------------+ +| 1 | 1 | +| 1 | 1 | +| NULL | 0 | +| 1 | 1 | +| 0 | 0 | ++----------------------+-------------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp=1; select data regexp null, data_not_null regexp null from test.t; ++------------------+---------------------------+ +| data regexp null | data_not_null regexp null | ++------------------+---------------------------+ +| NULL | NULL | +| NULL | NULL | +| NULL | NULL | +| NULL | NULL | +| NULL | NULL | ++------------------+---------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp=1; select data regexp pattern, data_not_null regexp pattern, data regexp pattern_not_null, data_not_null regexp pattern_not_null from test.t; ++---------------------+------------------------------+------------------------------+---------------------------------------+ +| data regexp pattern | data_not_null regexp pattern | data regexp pattern_not_null | data_not_null regexp pattern_not_null | ++---------------------+------------------------------+------------------------------+---------------------------------------+ +| 1 | 0 | 0 | 1 | +| NULL | NULL | 1 | 1 | +| NULL | 1 | NULL | 1 | +| 1 | 1 | 1 | 1 | +| 1 | 1 | 1 | 1 | ++---------------------+------------------------------+------------------------------+---------------------------------------+ + diff --git a/tests/tidb-ci/new_collation_fullstack/regexp.test b/tests/tidb-ci/new_collation_fullstack/regexp.test new file mode 100644 index 00000000000..91b03eec58b --- /dev/null +++ b/tests/tidb-ci/new_collation_fullstack/regexp.test @@ -0,0 +1,69 @@ +mysql> drop table if exists test.t +mysql> create table test.t (data varchar(30) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin, data_not_null varchar(30) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci not null, pattern varchar(30) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin, pattern_not_null varchar(30) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci not null); +mysql> insert into test.t values ('aaaa', 'AAAA', '^a.*', '^A.*'), ('abcd', 'abcd', null, '^a..d$'), (null, 'bbb', 'bb$', 'bb$'),('中文测试','中文测试','中文','^....$'),('中English混合','中English混合','^中english','^..nglisH..$'); +mysql> alter table test.t set tiflash replica 1 +func> wait_table test t + +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp=1; select data regexp '^a', data_not_null regexp '^a' from test.t; ++------------------+---------------------------+ +| data regexp '^a' | data_not_null regexp '^a' | ++------------------+---------------------------+ +| 1 | 1 | +| 1 | 1 | +| NULL | 0 | +| 0 | 0 | +| 0 | 0 | ++------------------+---------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp=1; select data regexp 'A$', data_not_null regexp 'A$' from test.t; ++------------------+---------------------------+ +| data regexp 'A$' | data_not_null regexp 'A$' | ++------------------+---------------------------+ +| 0 | 1 | +| 0 | 0 | +| NULL | 0 | +| 0 | 0 | +| 0 | 0 | ++------------------+---------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp=1; select data regexp '^....$', data_not_null regexp '^....$' from test.t; ++----------------------+-------------------------------+ +| data regexp '^....$' | data_not_null regexp '^....$' | ++----------------------+-------------------------------+ +| 1 | 1 | +| 1 | 1 | +| NULL | 0 | +| 1 | 1 | +| 0 | 0 | ++----------------------+-------------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp=1; select data regexp null, data_not_null regexp null from test.t; ++------------------+---------------------------+ +| data regexp null | data_not_null regexp null | ++------------------+---------------------------+ +| NULL | NULL | +| NULL | NULL | +| NULL | NULL | +| NULL | NULL | +| NULL | NULL | ++------------------+---------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp=1; select data regexp pattern, data_not_null regexp pattern, data regexp pattern_not_null, data_not_null regexp pattern_not_null from test.t; ++---------------------+------------------------------+------------------------------+---------------------------------------+ +| data regexp pattern | data_not_null regexp pattern | data regexp pattern_not_null | data_not_null regexp pattern_not_null | ++---------------------+------------------------------+------------------------------+---------------------------------------+ +| 1 | 0 | 0 | 1 | +| NULL | NULL | 1 | 1 | +| NULL | 1 | NULL | 1 | +| 1 | 1 | 1 | 1 | +| 0 | 0 | 0 | 1 | ++---------------------+------------------------------+------------------------------+---------------------------------------+ + +mysql> drop table if exists test.t +mysql> create table test.t (data binary(10), pattern binary(10)); +mysql> insert into test.t values('aa','AA'),('aa','(?i)AA'); +mysql> alter table test.t set tiflash replica 1 +func> wait_table test t +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp=1; select data, pattern, data regexp pattern from test.t; ++------------------------+------------------------+---------------------+ +| data | pattern | data regexp pattern | ++------------------------+------------------------+---------------------+ +| aa\0\0\0\0\0\0\0\0 | AA\0\0\0\0\0\0\0\0 | 0 | +| aa\0\0\0\0\0\0\0\0 | (?i)AA\0\0\0\0 | 1 | ++------------------------+------------------------+---------------------+ From 01a923bc1501451bf032e3d8f2473597eac9629d Mon Sep 17 00:00:00 2001 From: xufei Date: Tue, 8 Feb 2022 12:00:50 +0800 Subject: [PATCH 2/4] refine code --- dbms/src/Functions/FunctionsStringSearch.cpp | 29 +++++++++++++++++--- dbms/src/Functions/FunctionsStringSearch.h | 15 ++++++---- dbms/src/Functions/FunctionsVisitParam.h | 4 +++ 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index df32f50f005..c77f62cb56d 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -152,6 +152,10 @@ template struct PositionImpl { using ResultType = UInt64; + /// need customized escape char when do the string search + static const bool need_customized_escape_char = false; + /// support match type when do the string search, used in regexp + static const bool support_match_type = false; /// Find one substring in many strings. static void vectorConstant(const ColumnString::Chars_t & data, @@ -459,6 +463,10 @@ template struct MatchImpl { using ResultType = UInt8; + /// need customized escape char when do the string search + static const bool need_customized_escape_char = like && for_tidb; + /// support match type when do the string search, used in regexp + static const bool support_match_type = !like && for_tidb; static void vectorConstant( const ColumnString::Chars_t & data, @@ -738,6 +746,11 @@ struct MatchImpl struct ExtractImpl { + /// need customized escape char when do the string search + static const bool need_customized_escape_char = false; + /// support match type when do the string search, used in regexp + static const bool support_match_type = false; + static void vector(const ColumnString::Chars_t & data, const ColumnString::Offsets & offsets, const std::string & pattern, @@ -792,6 +805,10 @@ struct ReplaceRegexpImpl { static constexpr bool support_non_const_needle = false; static constexpr bool support_non_const_replacement = false; + /// need customized escape char when do the string search + static const bool need_customized_escape_char = false; + /// support match type when do the string search, used in regexp + static const bool support_match_type = true; /// Sequence of instructions, describing how to get resulting string. /// Each element is either: @@ -1052,6 +1069,10 @@ struct ReplaceStringImpl { static constexpr bool support_non_const_needle = true; static constexpr bool support_non_const_replacement = true; + /// need customized escape char during the string search + static const bool need_customized_escape_char = false; + /// support match type during the string search, used in regexp + static const bool support_match_type = false; static void vector(const ColumnString::Chars_t & data, const ColumnString::Offsets & offsets, @@ -2045,7 +2066,7 @@ struct NameMatch static constexpr auto name = "match"; }; -struct NameTiDBMatch +struct NameTiDBRegexp { static constexpr auto name = "regexp"; }; @@ -2090,9 +2111,9 @@ using FunctionPositionCaseInsensitiveUTF8 = FunctionsStringSearch, NamePositionCaseInsensitiveUTF8>; using FunctionMatch = FunctionsStringSearch, NameMatch>; -using FunctionTiDBMatch = FunctionsStringSearch, NameTiDBMatch>; +using FunctionTiDBRegexp = FunctionsStringSearch, NameTiDBRegexp>; using FunctionLike = FunctionsStringSearch, NameLike>; -using FunctionLike3Args = FunctionsStringSearch, NameLike3Args, true>; +using FunctionLike3Args = FunctionsStringSearch, NameLike3Args>; using FunctionNotLike = FunctionsStringSearch, NameNotLike>; using FunctionExtract = FunctionsStringSearchToString; using FunctionReplaceOne = FunctionStringReplace, NameReplaceOne>; @@ -2112,7 +2133,7 @@ void registerFunctionsStringSearch(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/dbms/src/Functions/FunctionsStringSearch.h b/dbms/src/Functions/FunctionsStringSearch.h index 8a1eaef85bd..dfbe61b1d34 100644 --- a/dbms/src/Functions/FunctionsStringSearch.h +++ b/dbms/src/Functions/FunctionsStringSearch.h @@ -43,9 +43,11 @@ extern const int ILLEGAL_COLUMN; static const UInt8 CH_ESCAPE_CHAR = '\\'; -template +template class FunctionsStringSearch : public IFunction { + static_assert(!(Impl::need_customized_escape_char && Impl::support_match_type)); + public: static constexpr auto name = Name::name; static FunctionPtr create(const Context &) @@ -79,20 +81,23 @@ class FunctionsStringSearch : public IFunction throw Exception( "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - if constexpr (customize_escape_char) + if constexpr (Impl::need_customized_escape_char || Impl::support_match_type) { if (!arguments[2]->isInteger()) throw Exception( "Illegal type " + arguments[2]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } - else + else if constexpr (Impl::support_match_type) { if (arguments.size() > 2 && !arguments[2]->isString()) throw Exception( "Illegal type " + arguments[2]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } + size_t max_arguments_number = Impl::need_customized_escape_char || Impl::support_match_type ? 3 : 2; + if (arguments.size() > max_arguments_number) + throw Exception("Too many arguments, only " + std::to_string(max_arguments_number) + " argument is supported for function " + getName()); return std::make_shared>(); } @@ -109,7 +114,7 @@ class FunctionsStringSearch : public IFunction UInt8 escape_char = CH_ESCAPE_CHAR; String match_type = ""; - if constexpr (customize_escape_char) + if constexpr (Impl::need_customized_escape_char) { const auto * col_escape_const = typeid_cast(&*block.getByPosition(arguments[2]).column); bool valid_args = true; @@ -135,7 +140,7 @@ class FunctionsStringSearch : public IFunction throw Exception("3rd arguments of function " + getName() + " must be constants and between 0 and 255."); } } - else + else if constexpr (Impl::support_match_type) { if (arguments.size() > 2) { diff --git a/dbms/src/Functions/FunctionsVisitParam.h b/dbms/src/Functions/FunctionsVisitParam.h index a4ca69aa96d..3db8f49ef6f 100644 --- a/dbms/src/Functions/FunctionsVisitParam.h +++ b/dbms/src/Functions/FunctionsVisitParam.h @@ -169,6 +169,10 @@ template struct ExtractParamImpl { using ResultType = typename ParamExtractor::ResultType; + /// need customized escape char when do the string search + static const bool need_customized_escape_char = false; + /// support match type when do the string search, used in regexp + static const bool support_match_type = false; /// It is assumed that `res` is the correct size and initialized with zeros. static void vectorConstant(const ColumnString::Chars_t & data, const ColumnString::Offsets & offsets, std::string needle, const UInt8 escape_char, const std::string & match_type, const TiDB::TiDBCollatorPtr & collator, PaddedPODArray & res) From 6e9082a0b4635d6c388f58b0d8e42bce87f48fbd Mon Sep 17 00:00:00 2001 From: xufei Date: Mon, 14 Feb 2022 13:05:06 +0800 Subject: [PATCH 3/4] fix build --- .../DAGExpressionAnalyzerHelper.cpp | 2 +- dbms/src/Functions/FunctionsStringSearch.cpp | 30 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp index 34a2e00c4f6..79ca547d0ca 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp @@ -392,7 +392,7 @@ String DAGExpressionAnalyzerHelper::buildRegexpFunction( String name = analyzer->getActions(child, actions); argument_names.push_back(name); } - std::shared_ptr collator = getCollatorFromExpr(expr); + TiDB::TiDBCollatorPtr collator = getCollatorFromExpr(expr); if (expr.sig() == tipb::ScalarFuncSig::RegexpReplaceSig || expr.sig() == tipb::ScalarFuncSig::RegexpSig) { /// according to https://github.com/pingcap/tidb/blob/v5.0.0/expression/builtin_like.go#L126, diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index c77f62cb56d..b466eb5273a 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -317,7 +317,7 @@ struct PositionImpl } }; -static String getRE2ModeModifiers(const std::string & match_type, const std::shared_ptr & collator) +static String getRE2ModeModifiers(const std::string & match_type, const TiDB::TiDBCollatorPtr collator) { /// for regexp only ci/cs is supported re2_st::RE2::Options options(re2_st::RE2::CannedOptions::DefaultOptions); @@ -963,7 +963,7 @@ struct ReplaceRegexpImpl const Int64 & pos, const Int64 & occ, const std::string & match_type, - std::shared_ptr collator, + TiDB::TiDBCollatorPtr collator, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1012,7 +1012,7 @@ struct ReplaceRegexpImpl const Int64 & pos, const Int64 & occ, const std::string & match_type, - std::shared_ptr collator, + TiDB::TiDBCollatorPtr collator, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1048,7 +1048,7 @@ struct ReplaceRegexpImpl res_offsets[i] = res_offset; } } - static void constant(const String & input, const String & needle, const String & replacement, const Int64 & pos, const Int64 & occ, const String & match_type, std::shared_ptr collator, String & output) + static void constant(const String & input, const String & needle, const String & replacement, const Int64 & pos, const Int64 & occ, const String & match_type, TiDB::TiDBCollatorPtr collator, String & output) { ColumnString::Chars_t input_data; input_data.insert(input_data.end(), input.begin(), input.end()); @@ -1081,7 +1081,7 @@ struct ReplaceStringImpl const Int64 & /* pos */, const Int64 & /* occ */, const std::string & /* match_type */, - std::shared_ptr /* collator */, + TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1169,7 +1169,7 @@ struct ReplaceStringImpl const Int64 & /* pos */, const Int64 & /* occ */, const std::string & /* match_type */, - std::shared_ptr /* collator */, + TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1243,7 +1243,7 @@ struct ReplaceStringImpl const Int64 & /* pos */, const Int64 & /* occ */, const std::string & /* match_type */, - std::shared_ptr /* collator */, + TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1335,7 +1335,7 @@ struct ReplaceStringImpl const Int64 & /* pos */, const Int64 & /* occ */, const std::string & /* match_type */, - std::shared_ptr /* collator */, + TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1410,7 +1410,7 @@ struct ReplaceStringImpl const Int64 & /* pos */, const Int64 & /* occ */, const std::string & /* match_type */, - std::shared_ptr /* collator */, + TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1508,7 +1508,7 @@ struct ReplaceStringImpl const Int64 & /* pos */, const Int64 & /* occ */, const std::string & /* match_type */, - std::shared_ptr /* collator */, + TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1583,7 +1583,7 @@ struct ReplaceStringImpl const Int64 & /* pos */, const Int64 & /* occ */, const std::string & /* match_type */, - std::shared_ptr /* collator */, + TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1685,7 +1685,7 @@ struct ReplaceStringImpl const Int64 & /* pos */, const Int64 & /* occ */, const std::string & /* match_type */, - std::shared_ptr /* collator */, + TiDB::TiDBCollatorPtr /* collator */, ColumnString::Chars_t & res_data, ColumnString::Offsets & res_offsets) { @@ -1755,7 +1755,7 @@ struct ReplaceStringImpl } } - static void constant(const std::string & data, const std::string & needle, const std::string & replacement, const Int64 & /* pos */, const Int64 & /* occ */, const std::string & /* match_type */, std::shared_ptr /* collator */, std::string & res_data) + static void constant(const std::string & data, const std::string & needle, const std::string & replacement, const Int64 & /* pos */, const Int64 & /* occ */, const std::string & /* match_type */, TiDB::TiDBCollatorPtr /* collator */, std::string & res_data) { if (needle.empty()) { @@ -1826,7 +1826,7 @@ class FunctionStringReplace : public IFunction return {1, 2, 3, 4, 5}; } } - void setCollator(const std::shared_ptr & collator_) override { collator = collator_; } + void setCollator(const TiDB::TiDBCollatorPtr & collator_) override { collator = collator_; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { @@ -2042,7 +2042,7 @@ class FunctionStringReplace : public IFunction throw Exception("Argument at index 2 and 3 for function replace must be constant", ErrorCodes::ILLEGAL_COLUMN); } } - std::shared_ptr collator; + TiDB::TiDBCollatorPtr collator; }; struct NamePosition From 06483f3bf8b6aa4e226ce5259ef1549bc3b96a5a Mon Sep 17 00:00:00 2001 From: xufei Date: Fri, 25 Feb 2022 15:36:00 +0800 Subject: [PATCH 4/4] address comments --- .../DAGExpressionAnalyzerHelper.cpp | 2 +- .../Coprocessor/DAGExpressionAnalyzerHelper.h | 2 +- dbms/src/Functions/FunctionsStringSearch.cpp | 15 +++++-- dbms/src/Functions/FunctionsStringSearch.h | 2 +- dbms/src/Functions/tests/gtest_regexp.cpp | 41 ++++++++++--------- tests/fullstack-test/expr/regexp.test | 12 ++++++ 6 files changed, 48 insertions(+), 26 deletions(-) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp index 79ca547d0ca..1585d6ffd79 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp @@ -383,7 +383,7 @@ String DAGExpressionAnalyzerHelper::buildRoundFunction( String DAGExpressionAnalyzerHelper::buildRegexpFunction( DAGExpressionAnalyzer * analyzer, const tipb::Expr & expr, - ExpressionActionsPtr & actions) + const ExpressionActionsPtr & actions) { const String & func_name = getFunctionName(expr); Names argument_names; diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.h index 173f061258e..83597d48a53 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.h @@ -67,7 +67,7 @@ class DAGExpressionAnalyzerHelper static String buildRegexpFunction( DAGExpressionAnalyzer * analyzer, const tipb::Expr & expr, - ExpressionActionsPtr & actions); + const ExpressionActionsPtr & actions); static String genFuncString( const String & func_name, diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index b466eb5273a..87408fadb3f 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -317,10 +317,19 @@ struct PositionImpl } }; +static re2_st::RE2::Options getDefaultRe2Options() +{ + re2_st::RE2::Options options(re2_st::RE2::CannedOptions::DefaultOptions); + options.set_case_sensitive(true); + options.set_one_line(true); + options.set_dot_nl(false); + return options; +} + static String getRE2ModeModifiers(const std::string & match_type, const TiDB::TiDBCollatorPtr collator) { /// for regexp only ci/cs is supported - re2_st::RE2::Options options(re2_st::RE2::CannedOptions::DefaultOptions); + re2_st::RE2::Options options = getDefaultRe2Options(); if (collator != nullptr && collator->isCI()) options.set_case_sensitive(false); @@ -335,8 +344,8 @@ static String getRE2ModeModifiers(const std::string & match_type, const TiDB::Ti /// according to MySQL doc: if either argument is a binary string, the arguments are handled in /// case-sensitive fashion as binary strings, even if match_type contains the i character. /// However, test in MySQL 8.0.25 shows that i flag still take affect even if the collation is binary, - /// if (collator == nullptr || !collator->isBinary()) - options.set_case_sensitive(false); + if (collator == nullptr || !collator->isBinary()) + options.set_case_sensitive(false); break; case 'c': options.set_case_sensitive(true); diff --git a/dbms/src/Functions/FunctionsStringSearch.h b/dbms/src/Functions/FunctionsStringSearch.h index dfbe61b1d34..b5e47e36e82 100644 --- a/dbms/src/Functions/FunctionsStringSearch.h +++ b/dbms/src/Functions/FunctionsStringSearch.h @@ -81,7 +81,7 @@ class FunctionsStringSearch : public IFunction throw Exception( "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - if constexpr (Impl::need_customized_escape_char || Impl::support_match_type) + if constexpr (Impl::need_customized_escape_char) { if (!arguments[2]->isInteger()) throw Exception( diff --git a/dbms/src/Functions/tests/gtest_regexp.cpp b/dbms/src/Functions/tests/gtest_regexp.cpp index 73c6ba36113..fea1b51ff56 100644 --- a/dbms/src/Functions/tests/gtest_regexp.cpp +++ b/dbms/src/Functions/tests/gtest_regexp.cpp @@ -3,7 +3,8 @@ #include #include -#include +/// this is a hack, include the cpp file so we can test MatchImpl directly +#include // NOLINT #include #include @@ -20,15 +21,15 @@ namespace tests class Regexp : public FunctionTest { protected: - bool isColumnConstNull(const ColumnWithTypeAndName & column_with_type) + static bool isColumnConstNull(const ColumnWithTypeAndName & column_with_type) { return column_with_type.column->isColumnConst() && column_with_type.column->isNullAt(0); } - bool isColumnConstNotNull(const ColumnWithTypeAndName & column_with_type) + static bool isColumnConstNotNull(const ColumnWithTypeAndName & column_with_type) { return column_with_type.column->isColumnConst() && !column_with_type.column->isNullAt(0); } - bool isNullableColumnVector(const ColumnWithTypeAndName & column_with_type) + static bool isNullableColumnVector(const ColumnWithTypeAndName & column_with_type) { return !column_with_type.column->isColumnConst() && column_with_type.type->isNullable(); } @@ -51,8 +52,8 @@ class Regexp : public FunctionTest TEST_F(Regexp, testRegexpMatchType) { UInt8 res = false; - std::shared_ptr binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); - std::shared_ptr ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI); + const auto * binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); + const auto * ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI); DB::MatchImpl::constantConstant("a\nB\n", "(?m)(?i)^b", '\\', "", nullptr, res); ASSERT_TRUE(res == 1); DB::MatchImpl::constantConstant("a\nB\n", "^b", '\\', "mi", nullptr, res); @@ -62,7 +63,7 @@ TEST_F(Regexp, testRegexpMatchType) DB::MatchImpl::constantConstant("a\nB\n", "^b", '\\', "mi", binary_collator, res); ASSERT_TRUE(res == 0); DB::MatchImpl::constantConstant("a\nB\n", "^b", '\\', "i", nullptr, res); - ASSERT_TRUE(res == 1); + ASSERT_TRUE(res == 0); DB::MatchImpl::constantConstant("a\nB\n", "^b", '\\', "m", nullptr, res); ASSERT_TRUE(res == 0); DB::MatchImpl::constantConstant("a\nB\n", "^a.*b", '\\', "", nullptr, res); @@ -1591,7 +1592,7 @@ TEST_F(Regexp, testRegexpMySQLCases) ASSERT_TRUE(res == 1); DB::MatchImpl::constantConstant("=0-z=", "([[:digit:]-[:alpha:]]+)", '\\', "", nullptr, res); /* Result: iy */ ; - DB::MatchImpl::constantConstant("3.1415926", "(\\d+\\.\\d+)", '\\', "", nullptr, res); + DB::MatchImpl::constantConstant("3.1415926", R"((\d+\.\d+))", '\\', "", nullptr, res); ASSERT_TRUE(res == 1); DB::MatchImpl::constantConstant("have a web browser", "(\\ba.{0,10}br)", '\\', "", nullptr, res); ASSERT_TRUE(res == 1); @@ -1666,7 +1667,7 @@ TEST_F(Regexp, testRegexpMySQLCases) ASSERT_TRUE(res == 1); DB::MatchImpl::constantConstant("bbbbac", "^(b+?|a){1,2}c", '\\', "", nullptr, res); ASSERT_TRUE(res == 1); - DB::MatchImpl::constantConstant("cd. (A. Tw)", "\\((\\w\\. \\w+)\\)", '\\', "", nullptr, res); + DB::MatchImpl::constantConstant("cd. (A. Tw)", R"(\((\w\. \w+)\))", '\\', "", nullptr, res); ASSERT_TRUE(res == 1); DB::MatchImpl::constantConstant("aaaacccc", "((?:aaaa|bbbb)cccc)?", '\\', "", nullptr, res); ASSERT_TRUE(res == 1); @@ -1729,8 +1730,8 @@ TEST_F(Regexp, testRegexpMySQLCases) TEST_F(Regexp, testRegexpTiDBCase) { UInt8 res; - std::shared_ptr binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); - std::shared_ptr ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI); + const auto * binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); + const auto * ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI); DB::MatchImpl::constantConstant("a", "^$", '\\', "", nullptr, res); ASSERT_TRUE(res == 0); DB::MatchImpl::constantConstant("a", "a", '\\', "", nullptr, res); @@ -1767,7 +1768,7 @@ TEST_F(Regexp, testRegexpTiDBCase) TEST_F(Regexp, testRegexp) { - std::shared_ptr binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); + const auto * binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); auto string_type = std::make_shared(); auto nullable_string_type = makeNullable(string_type); auto uint8_type = std::make_shared(); @@ -1792,7 +1793,7 @@ TEST_F(Regexp, testRegexp) size_t row_size = input_string_nulls.size(); - auto const_UInt8_null_column = createConstColumn>(row_size, {}); + auto const_uint8_null_column = createConstColumn>(row_size, {}); auto const_string_null_column = createConstColumn>(row_size, {}); /// case 1. regexp(const, const [, const]) for (size_t i = 0; i < row_size; i++) @@ -1813,15 +1814,15 @@ TEST_F(Regexp, testRegexp) for (size_t i = 0; i < row_size; i++) { /// test regexp(const, const) - ASSERT_COLUMN_EQ(input_string_nulls[i] || pattern_nulls[i] ? const_UInt8_null_column : createConstColumn>(row_size, results[i]), + ASSERT_COLUMN_EQ(input_string_nulls[i] || pattern_nulls[i] ? const_uint8_null_column : createConstColumn>(row_size, results[i]), executeFunction("regexp", input_string_nulls[i] ? const_string_null_column : createConstColumn>(row_size, input_strings[i]), pattern_nulls[i] ? const_string_null_column : createConstColumn>(row_size, patterns[i]))); /// test regexp(const, const, const) - ASSERT_COLUMN_EQ(input_string_nulls[i] || pattern_nulls[i] || match_type_nulls[i] ? const_UInt8_null_column : createConstColumn>(row_size, results_with_match_type[i]), + ASSERT_COLUMN_EQ(input_string_nulls[i] || pattern_nulls[i] || match_type_nulls[i] ? const_uint8_null_column : createConstColumn>(row_size, results_with_match_type[i]), executeFunction("regexp", input_string_nulls[i] ? const_string_null_column : createConstColumn>(row_size, input_strings[i]), pattern_nulls[i] ? const_string_null_column : createConstColumn>(row_size, patterns[i]), match_type_nulls[i] ? const_string_null_column : createConstColumn>(row_size, match_types[i]))); /// test regexp(const, const, const) with binary collator - ASSERT_COLUMN_EQ(input_string_nulls[i] || pattern_nulls[i] || match_type_nulls[i] ? const_UInt8_null_column : createConstColumn>(row_size, results_with_match_type_collator[i]), + ASSERT_COLUMN_EQ(input_string_nulls[i] || pattern_nulls[i] || match_type_nulls[i] ? const_uint8_null_column : createConstColumn>(row_size, results_with_match_type_collator[i]), executeFunction("regexp", {input_string_nulls[i] ? const_string_null_column : createConstColumn>(row_size, input_strings[i]), pattern_nulls[i] ? const_string_null_column : createConstColumn>(row_size, patterns[i]), match_type_nulls[i] ? const_string_null_column : createConstColumn>(row_size, match_types[i])}, binary_collator)); } /// case 3 regexp(vector, const[, const]) @@ -1946,8 +1947,8 @@ TEST_F(Regexp, testRegexpCustomerCases) TEST_F(Regexp, testRegexpReplaceMatchType) { String res; - std::shared_ptr binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); - std::shared_ptr ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI); + const auto * binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); + const auto * ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI); DB::ReplaceRegexpImpl::constant("a\nB\nc", "(?m)(?i)^b", "xxx", 1, 0, "", nullptr, res); ASSERT_TRUE(res == "a\nxxx\nc"); DB::ReplaceRegexpImpl::constant("a\nB\nc", "^b", "xxx", 1, 0, "mi", nullptr, res); @@ -1957,7 +1958,7 @@ TEST_F(Regexp, testRegexpReplaceMatchType) DB::ReplaceRegexpImpl::constant("a\nB\nc", "^b", "xxx", 1, 0, "mi", binary_collator, res); ASSERT_TRUE(res == "a\nB\nc"); DB::ReplaceRegexpImpl::constant("a\nB\nc", "^b", "xxx", 1, 0, "i", nullptr, res); - ASSERT_TRUE(res == "a\nxxx\nc"); + ASSERT_TRUE(res == "a\nB\nc"); DB::ReplaceRegexpImpl::constant("a\nB\nc", "^b", "xxx", 1, 0, "m", nullptr, res); ASSERT_TRUE(res == "a\nB\nc"); DB::ReplaceRegexpImpl::constant("a\nB\n", "^a.*b", "xxx", 1, 0, "", nullptr, res); @@ -2012,7 +2013,7 @@ TEST_F(Regexp, testRegexpReplaceMySQLCases) TEST_F(Regexp, testRegexpReplace) { - std::shared_ptr binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); + const auto * binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); auto string_type = std::make_shared(); auto nullable_string_type = makeNullable(string_type); auto uint8_type = std::make_shared(); diff --git a/tests/fullstack-test/expr/regexp.test b/tests/fullstack-test/expr/regexp.test index 9eefa82d159..3f69ad50218 100644 --- a/tests/fullstack-test/expr/regexp.test +++ b/tests/fullstack-test/expr/regexp.test @@ -55,3 +55,15 @@ mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp=1; se | 1 | 1 | 1 | 1 | +---------------------+------------------------------+------------------------------+---------------------------------------+ +mysql> drop table if exists test.t +mysql> create table test.t (data varchar(30), pattern varchar(30)); +mysql> insert into test.t values ('', ''), ('abcd', 'abcd'); +mysql> alter table test.t set tiflash replica 1 +func> wait_table test t +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp=1; select data regexp pattern, data regexp '', '' regexp pattern from test.t; ++---------------------+----------------+-------------------+ +| data regexp pattern | data regexp '' | '' regexp pattern | ++---------------------+----------------+-------------------+ +| 1 | 1 | 1 | +| 1 | 1 | 0 | ++---------------------+----------------+-------------------+