Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
windtalker committed Feb 25, 2022
1 parent 6e9082a commit 06483f3
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 26 deletions.
2 changes: 1 addition & 1 deletion dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ String DAGExpressionAnalyzerHelper::buildRoundFunction(
String DAGExpressionAnalyzerHelper::buildRegexpFunction(
DAGExpressionAnalyzer * analyzer,
const tipb::Expr & expr,
ExpressionActionsPtr & actions)
const ExpressionActionsPtr & actions)
{
const String & func_name = getFunctionName(expr);
Names argument_names;
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class DAGExpressionAnalyzerHelper
static String buildRegexpFunction(
DAGExpressionAnalyzer * analyzer,
const tipb::Expr & expr,
ExpressionActionsPtr & actions);
const ExpressionActionsPtr & actions);

static String genFuncString(
const String & func_name,
Expand Down
15 changes: 12 additions & 3 deletions dbms/src/Functions/FunctionsStringSearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,10 +317,19 @@ struct PositionImpl
}
};

static re2_st::RE2::Options getDefaultRe2Options()
{
re2_st::RE2::Options options(re2_st::RE2::CannedOptions::DefaultOptions);
options.set_case_sensitive(true);
options.set_one_line(true);
options.set_dot_nl(false);
return options;
}

static String getRE2ModeModifiers(const std::string & match_type, const TiDB::TiDBCollatorPtr collator)
{
/// for regexp only ci/cs is supported
re2_st::RE2::Options options(re2_st::RE2::CannedOptions::DefaultOptions);
re2_st::RE2::Options options = getDefaultRe2Options();
if (collator != nullptr && collator->isCI())
options.set_case_sensitive(false);

Expand All @@ -335,8 +344,8 @@ static String getRE2ModeModifiers(const std::string & match_type, const TiDB::Ti
/// according to MySQL doc: if either argument is a binary string, the arguments are handled in
/// case-sensitive fashion as binary strings, even if match_type contains the i character.
/// However, test in MySQL 8.0.25 shows that i flag still take affect even if the collation is binary,
/// if (collator == nullptr || !collator->isBinary())
options.set_case_sensitive(false);
if (collator == nullptr || !collator->isBinary())
options.set_case_sensitive(false);
break;
case 'c':
options.set_case_sensitive(true);
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Functions/FunctionsStringSearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ class FunctionsStringSearch : public IFunction
throw Exception(
"Illegal type " + arguments[1]->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if constexpr (Impl::need_customized_escape_char || Impl::support_match_type)
if constexpr (Impl::need_customized_escape_char)
{
if (!arguments[2]->isInteger())
throw Exception(
Expand Down
41 changes: 21 additions & 20 deletions dbms/src/Functions/tests/gtest_regexp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
#include <Storages/Transaction/Collator.h>
#include <TestUtils/FunctionTestUtils.h>

#include <Functions/FunctionsStringSearch.cpp>
/// this is a hack, include the cpp file so we can test MatchImpl directly
#include <Functions/FunctionsStringSearch.cpp> // NOLINT
#include <string>
#include <vector>

Expand All @@ -20,15 +21,15 @@ namespace tests
class Regexp : public FunctionTest
{
protected:
bool isColumnConstNull(const ColumnWithTypeAndName & column_with_type)
static bool isColumnConstNull(const ColumnWithTypeAndName & column_with_type)
{
return column_with_type.column->isColumnConst() && column_with_type.column->isNullAt(0);
}
bool isColumnConstNotNull(const ColumnWithTypeAndName & column_with_type)
static bool isColumnConstNotNull(const ColumnWithTypeAndName & column_with_type)
{
return column_with_type.column->isColumnConst() && !column_with_type.column->isNullAt(0);
}
bool isNullableColumnVector(const ColumnWithTypeAndName & column_with_type)
static bool isNullableColumnVector(const ColumnWithTypeAndName & column_with_type)
{
return !column_with_type.column->isColumnConst() && column_with_type.type->isNullable();
}
Expand All @@ -51,8 +52,8 @@ class Regexp : public FunctionTest
TEST_F(Regexp, testRegexpMatchType)
{
UInt8 res = false;
std::shared_ptr<TiDB::ITiDBCollator> binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY);
std::shared_ptr<TiDB::ITiDBCollator> ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI);
const auto * binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY);
const auto * ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI);
DB::MatchImpl<false, false, true>::constantConstant("a\nB\n", "(?m)(?i)^b", '\\', "", nullptr, res);
ASSERT_TRUE(res == 1);
DB::MatchImpl<false, false, true>::constantConstant("a\nB\n", "^b", '\\', "mi", nullptr, res);
Expand All @@ -62,7 +63,7 @@ TEST_F(Regexp, testRegexpMatchType)
DB::MatchImpl<false, false, true>::constantConstant("a\nB\n", "^b", '\\', "mi", binary_collator, res);
ASSERT_TRUE(res == 0);
DB::MatchImpl<false, false, true>::constantConstant("a\nB\n", "^b", '\\', "i", nullptr, res);
ASSERT_TRUE(res == 1);
ASSERT_TRUE(res == 0);
DB::MatchImpl<false, false, true>::constantConstant("a\nB\n", "^b", '\\', "m", nullptr, res);
ASSERT_TRUE(res == 0);
DB::MatchImpl<false, false, true>::constantConstant("a\nB\n", "^a.*b", '\\', "", nullptr, res);
Expand Down Expand Up @@ -1591,7 +1592,7 @@ TEST_F(Regexp, testRegexpMySQLCases)
ASSERT_TRUE(res == 1);
DB::MatchImpl<false, false, true>::constantConstant("=0-z=", "([[:digit:]-[:alpha:]]+)", '\\', "", nullptr, res); /* Result: iy */
;
DB::MatchImpl<false, false, true>::constantConstant("3.1415926", "(\\d+\\.\\d+)", '\\', "", nullptr, res);
DB::MatchImpl<false, false, true>::constantConstant("3.1415926", R"((\d+\.\d+))", '\\', "", nullptr, res);
ASSERT_TRUE(res == 1);
DB::MatchImpl<false, false, true>::constantConstant("have a web browser", "(\\ba.{0,10}br)", '\\', "", nullptr, res);
ASSERT_TRUE(res == 1);
Expand Down Expand Up @@ -1666,7 +1667,7 @@ TEST_F(Regexp, testRegexpMySQLCases)
ASSERT_TRUE(res == 1);
DB::MatchImpl<false, false, true>::constantConstant("bbbbac", "^(b+?|a){1,2}c", '\\', "", nullptr, res);
ASSERT_TRUE(res == 1);
DB::MatchImpl<false, false, true>::constantConstant("cd. (A. Tw)", "\\((\\w\\. \\w+)\\)", '\\', "", nullptr, res);
DB::MatchImpl<false, false, true>::constantConstant("cd. (A. Tw)", R"(\((\w\. \w+)\))", '\\', "", nullptr, res);
ASSERT_TRUE(res == 1);
DB::MatchImpl<false, false, true>::constantConstant("aaaacccc", "((?:aaaa|bbbb)cccc)?", '\\', "", nullptr, res);
ASSERT_TRUE(res == 1);
Expand Down Expand Up @@ -1729,8 +1730,8 @@ TEST_F(Regexp, testRegexpMySQLCases)
TEST_F(Regexp, testRegexpTiDBCase)
{
UInt8 res;
std::shared_ptr<TiDB::ITiDBCollator> binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY);
std::shared_ptr<TiDB::ITiDBCollator> ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI);
const auto * binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY);
const auto * ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI);
DB::MatchImpl<false, false, true>::constantConstant("a", "^$", '\\', "", nullptr, res);
ASSERT_TRUE(res == 0);
DB::MatchImpl<false, false, true>::constantConstant("a", "a", '\\', "", nullptr, res);
Expand Down Expand Up @@ -1767,7 +1768,7 @@ TEST_F(Regexp, testRegexpTiDBCase)

TEST_F(Regexp, testRegexp)
{
std::shared_ptr<TiDB::ITiDBCollator> binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY);
const auto * binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY);
auto string_type = std::make_shared<DataTypeString>();
auto nullable_string_type = makeNullable(string_type);
auto uint8_type = std::make_shared<DataTypeUInt8>();
Expand All @@ -1792,7 +1793,7 @@ TEST_F(Regexp, testRegexp)

size_t row_size = input_string_nulls.size();

auto const_UInt8_null_column = createConstColumn<Nullable<UInt8>>(row_size, {});
auto const_uint8_null_column = createConstColumn<Nullable<UInt8>>(row_size, {});
auto const_string_null_column = createConstColumn<Nullable<String>>(row_size, {});
/// case 1. regexp(const, const [, const])
for (size_t i = 0; i < row_size; i++)
Expand All @@ -1813,15 +1814,15 @@ TEST_F(Regexp, testRegexp)
for (size_t i = 0; i < row_size; i++)
{
/// test regexp(const, const)
ASSERT_COLUMN_EQ(input_string_nulls[i] || pattern_nulls[i] ? const_UInt8_null_column : createConstColumn<Nullable<UInt8>>(row_size, results[i]),
ASSERT_COLUMN_EQ(input_string_nulls[i] || pattern_nulls[i] ? const_uint8_null_column : createConstColumn<Nullable<UInt8>>(row_size, results[i]),
executeFunction("regexp", input_string_nulls[i] ? const_string_null_column : createConstColumn<Nullable<String>>(row_size, input_strings[i]), pattern_nulls[i] ? const_string_null_column : createConstColumn<Nullable<String>>(row_size, patterns[i])));

/// test regexp(const, const, const)
ASSERT_COLUMN_EQ(input_string_nulls[i] || pattern_nulls[i] || match_type_nulls[i] ? const_UInt8_null_column : createConstColumn<Nullable<UInt8>>(row_size, results_with_match_type[i]),
ASSERT_COLUMN_EQ(input_string_nulls[i] || pattern_nulls[i] || match_type_nulls[i] ? const_uint8_null_column : createConstColumn<Nullable<UInt8>>(row_size, results_with_match_type[i]),
executeFunction("regexp", input_string_nulls[i] ? const_string_null_column : createConstColumn<Nullable<String>>(row_size, input_strings[i]), pattern_nulls[i] ? const_string_null_column : createConstColumn<Nullable<String>>(row_size, patterns[i]), match_type_nulls[i] ? const_string_null_column : createConstColumn<Nullable<String>>(row_size, match_types[i])));

/// test regexp(const, const, const) with binary collator
ASSERT_COLUMN_EQ(input_string_nulls[i] || pattern_nulls[i] || match_type_nulls[i] ? const_UInt8_null_column : createConstColumn<Nullable<UInt8>>(row_size, results_with_match_type_collator[i]),
ASSERT_COLUMN_EQ(input_string_nulls[i] || pattern_nulls[i] || match_type_nulls[i] ? const_uint8_null_column : createConstColumn<Nullable<UInt8>>(row_size, results_with_match_type_collator[i]),
executeFunction("regexp", {input_string_nulls[i] ? const_string_null_column : createConstColumn<Nullable<String>>(row_size, input_strings[i]), pattern_nulls[i] ? const_string_null_column : createConstColumn<Nullable<String>>(row_size, patterns[i]), match_type_nulls[i] ? const_string_null_column : createConstColumn<Nullable<String>>(row_size, match_types[i])}, binary_collator));
}
/// case 3 regexp(vector, const[, const])
Expand Down Expand Up @@ -1946,8 +1947,8 @@ TEST_F(Regexp, testRegexpCustomerCases)
TEST_F(Regexp, testRegexpReplaceMatchType)
{
String res;
std::shared_ptr<TiDB::ITiDBCollator> binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY);
std::shared_ptr<TiDB::ITiDBCollator> ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI);
const auto * binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY);
const auto * ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI);
DB::ReplaceRegexpImpl<false>::constant("a\nB\nc", "(?m)(?i)^b", "xxx", 1, 0, "", nullptr, res);
ASSERT_TRUE(res == "a\nxxx\nc");
DB::ReplaceRegexpImpl<false>::constant("a\nB\nc", "^b", "xxx", 1, 0, "mi", nullptr, res);
Expand All @@ -1957,7 +1958,7 @@ TEST_F(Regexp, testRegexpReplaceMatchType)
DB::ReplaceRegexpImpl<false>::constant("a\nB\nc", "^b", "xxx", 1, 0, "mi", binary_collator, res);
ASSERT_TRUE(res == "a\nB\nc");
DB::ReplaceRegexpImpl<false>::constant("a\nB\nc", "^b", "xxx", 1, 0, "i", nullptr, res);
ASSERT_TRUE(res == "a\nxxx\nc");
ASSERT_TRUE(res == "a\nB\nc");
DB::ReplaceRegexpImpl<false>::constant("a\nB\nc", "^b", "xxx", 1, 0, "m", nullptr, res);
ASSERT_TRUE(res == "a\nB\nc");
DB::ReplaceRegexpImpl<false>::constant("a\nB\n", "^a.*b", "xxx", 1, 0, "", nullptr, res);
Expand Down Expand Up @@ -2012,7 +2013,7 @@ TEST_F(Regexp, testRegexpReplaceMySQLCases)

TEST_F(Regexp, testRegexpReplace)
{
std::shared_ptr<TiDB::ITiDBCollator> binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY);
const auto * binary_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY);
auto string_type = std::make_shared<DataTypeString>();
auto nullable_string_type = makeNullable(string_type);
auto uint8_type = std::make_shared<DataTypeUInt8>();
Expand Down
12 changes: 12 additions & 0 deletions tests/fullstack-test/expr/regexp.test
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,15 @@ mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp=1; se
| 1 | 1 | 1 | 1 |
+---------------------+------------------------------+------------------------------+---------------------------------------+

mysql> drop table if exists test.t
mysql> create table test.t (data varchar(30), pattern varchar(30));
mysql> insert into test.t values ('', ''), ('abcd', 'abcd');
mysql> alter table test.t set tiflash replica 1
func> wait_table test t
mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp=1; select data regexp pattern, data regexp '', '' regexp pattern from test.t;
+---------------------+----------------+-------------------+
| data regexp pattern | data regexp '' | '' regexp pattern |
+---------------------+----------------+-------------------+
| 1 | 1 | 1 |
| 1 | 1 | 0 |
+---------------------+----------------+-------------------+

0 comments on commit 06483f3

Please sign in to comment.