Skip to content

Commit

Permalink
Removed invalid check and added bad utf-8 tests
Browse files Browse the repository at this point in the history
  • Loading branch information
codyschierbeck committed Mar 6, 2024
1 parent 0548319 commit f086999
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 9 deletions.
12 changes: 3 additions & 9 deletions velox/functions/sparksql/RegexFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,15 +173,9 @@ struct RegexpReplaceFunction {
return true;
}

if (stringInput.size() == 0) {
if (pattern.size() == 0 && position == 1) {
result = replace;
return true;
}
if (pattern.size() > 0) {
result = stringInput;
return true;
}
if (stringInput.size() == 0 && pattern.size() == 0 && position == 1) {
result = replace;
return true;
}
return false;
}
Expand Down
13 changes: 13 additions & 0 deletions velox/functions/sparksql/tests/RegexFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,19 @@ TEST_F(RegexFunctionsTest, regexpReplaceSimple) {
auto result = testRegexpReplace("Hello World", "l", "L");
EXPECT_EQ(result, output);
}
TEST_F(RegexFunctionsTest, badUTF8) {
std::string badUTF = "\xF0\x82\x82\xAC";
std::string badHalf = "\xF0\x82";
VELOX_ASSERT_THROW(
testingRegexpReplaceRows({badUTF}, {badHalf}, {"Bad"}), "invalid UTF-8");
// python converts above values to below and completes regexp_replace
// converts.
badUTF = "\xc3\xb0\xc2\xac";
badHalf = "\xc3\xb0";
auto result = testRegexpReplace(badUTF, badHalf, "");
EXPECT_EQ(result, "\xc2\xac");
}


TEST_F(RegexFunctionsTest, regexpReplaceSimplePosition) {
std::string output = "Hello WorLd";
Expand Down

0 comments on commit f086999

Please sign in to comment.