Skip to content

Commit

Permalink
YQL-18464: Fix Remove UDFs for NonASCII input (ydb-platform#4794)
Browse files Browse the repository at this point in the history
(cherry picked from ydb-platform#4789)
  • Loading branch information
igormunkin authored May 23, 2024
1 parent e38e6ea commit 4137016
Show file tree
Hide file tree
Showing 10 changed files with 155 additions and 14 deletions.
24 changes: 12 additions & 12 deletions ydb/library/yql/udfs/common/string/string_udf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -538,11 +538,11 @@ namespace {
std::string input(args[0].AsStringRef());
const std::string_view remove(args[1].AsStringRef());
std::array<bool, 256> chars{};
for (const char c : remove) {
for (const ui8 c : remove) {
chars[c] = true;
}
size_t tpos = 0;
for (const char c : input) {
for (const ui8 c : input) {
if (!chars[c]) {
input[tpos++] = c;
}
Expand All @@ -562,11 +562,11 @@ namespace {
std::string input(arg1.AsStringRef());
const std::string_view remove(arg2.AsStringRef());
std::array<bool, 256> chars{};
for (const char c : remove) {
for (const ui8 c : remove) {
chars[c] = true;
}
size_t tpos = 0;
for (const char c : input) {
for (const ui8 c : input) {
if (!chars[c]) {
input[tpos++] = c;
}
Expand All @@ -586,11 +586,11 @@ namespace {
std::string input(args[0].AsStringRef());
const std::string_view remove(args[1].AsStringRef());
std::array<bool, 256> chars{};
for (const char c : remove) {
for (const ui8 c : remove) {
chars[c] = true;
}
for (auto it = input.cbegin(); it != input.cend(); ++it) {
if (chars[*it]) {
if (chars[static_cast<ui8>(*it)]) {
input.erase(it);
return valueBuilder->NewString(input);
}
Expand All @@ -606,11 +606,11 @@ namespace {
std::string input(arg1.AsStringRef());
const std::string_view remove(arg2.AsStringRef());
std::array<bool, 256> chars{};
for (const char c : remove) {
for (const ui8 c : remove) {
chars[c] = true;
}
for (auto it = input.cbegin(); it != input.cend(); ++it) {
if (chars[*it]) {
if (chars[static_cast<ui8>(*it)]) {
input.erase(it);
return sink(TBlockItem(input));
}
Expand All @@ -626,11 +626,11 @@ namespace {
std::string input(args[0].AsStringRef());
const std::string_view remove(args[1].AsStringRef());
std::array<bool, 256> chars{};
for (const char c : remove) {
for (const ui8 c : remove) {
chars[c] = true;
}
for (auto it = input.crbegin(); it != input.crend(); ++it) {
if (chars[*it]) {
if (chars[static_cast<ui8>(*it)]) {
input.erase(input.crend() - it - 1, 1);
return valueBuilder->NewString(input);
}
Expand All @@ -646,11 +646,11 @@ namespace {
std::string input(arg1.AsStringRef());
const std::string_view remove(arg2.AsStringRef());
std::array<bool, 256> chars{};
for (const char c : remove) {
for (const ui8 c : remove) {
chars[c] = true;
}
for (auto it = input.crbegin(); it != input.crend(); ++it) {
if (chars[*it]) {
if (chars[static_cast<ui8>(*it)]) {
input.erase(input.crend() - it - 1, 1);
return sink(TBlockItem(input));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@
"";
%false;
"2"
];
[
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
%false;
"23"
]
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,27 @@
"DataType";
"String"
]
];
[
"hwruall";
[
"DataType";
"String"
]
];
[
"hwrufirst";
[
"DataType";
"String"
]
];
[
"hwrulast";
[
"DataType";
"String"
]
]
]
]
Expand All @@ -75,6 +96,9 @@
"fda";
"fds";
"fdsa";
"fdsa";
"fdsa";
"fdsa";
"fdsa"
];
[
Expand All @@ -85,6 +109,9 @@
"swedfg";
"awedfg";
"aswedfg";
"aswedfg";
"aswedfg";
"aswedfg";
"aswedfg"
];
[
Expand All @@ -95,6 +122,9 @@
"sdadsaasd";
"asdadsaad";
"asdadsaasd";
"asdadsaasd";
"asdadsaasd";
"asdadsaasd";
"asdadsaasd"
];
[
Expand All @@ -105,6 +135,9 @@
"gdfsassas";
"gdsfsassa";
"gdsfsassas";
"gdsfsassas";
"gdsfsassas";
"gdsfsassas";
"gdsfsassas"
];
[
Expand All @@ -115,7 +148,23 @@
"";
"";
"";
"";
"";
"";
""
];
[
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!";
"\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!"
]
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,16 @@
"";
"";
""
];
[
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"
]
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,17 @@
"-1";
"-1";
"2"
];
[
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
%false;
%false;
%false;
%false;
%false;
"-1";
"-1";
"23"
]
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,27 @@
"DataType";
"String"
]
];
[
"hwruall";
[
"DataType";
"String"
]
];
[
"hwrufirst";
[
"DataType";
"String"
]
];
[
"hwrulast";
[
"DataType";
"String"
]
]
]
]
Expand All @@ -75,6 +96,9 @@
"fda";
"fds";
"fdsa";
"fdsa";
"fdsa";
"fdsa";
"fdsa"
];
[
Expand All @@ -85,6 +109,9 @@
"swedfg";
"awedfg";
"aswedfg";
"aswedfg";
"aswedfg";
"aswedfg";
"aswedfg"
];
[
Expand All @@ -95,6 +122,9 @@
"sdadsaasd";
"asdadsaad";
"asdadsaasd";
"asdadsaasd";
"asdadsaasd";
"asdadsaasd";
"asdadsaasd"
];
[
Expand All @@ -105,6 +135,9 @@
"gdfsassas";
"gdsfsassa";
"gdsfsassas";
"gdsfsassas";
"gdsfsassas";
"gdsfsassas";
"gdsfsassas"
];
[
Expand All @@ -115,7 +148,23 @@
"";
"";
"";
"";
"";
"";
""
];
[
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!";
"\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!"
]
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,16 @@
"";
"";
""
];
[
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`";
"`\xD0\x9F\xD1\x80\xD0\xB8\xD0\xB2\xD0\xB5\xD1\x82, \xD0\xBC\xD0\xB8\xD1\x80!`"
]
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,8 @@ SELECT
String::RemoveFirst(value, "as") AS first2,
String::RemoveLast(value, "as") AS last2,
String::RemoveFirst(value, "") AS first3,
String::RemoveLast(value, "") AS last3
String::RemoveLast(value, "") AS last3,
String::RemoveAll(value, "`") AS hwruall,
String::RemoveFirst(value, "`") AS hwrufirst,
String::RemoveLast(value, "`") AS hwrulast,
FROM Input;
5 changes: 4 additions & 1 deletion ydb/library/yql/udfs/common/string/test/cases/Remove.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,8 @@ SELECT
String::RemoveFirst(value, "as") AS first2,
String::RemoveLast(value, "as") AS last2,
String::RemoveFirst(value, "") AS first3,
String::RemoveLast(value, "") AS last3
String::RemoveLast(value, "") AS last3,
String::RemoveAll(value, "`") AS hwruall,
String::RemoveFirst(value, "`") AS hwrufirst,
String::RemoveLast(value, "`") AS hwrulast,
FROM Input;
1 change: 1 addition & 0 deletions ydb/library/yql/udfs/common/string/test/cases/default.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
{"key"="3";"subkey"="3";"value"="asdadsaasd"};
{"key"="4";"subkey"="4";"value"="gdsfsassas"};
{"key"="5";"subkey"="5";"value"=""};
{"key"="6";"subkey"="6";"value"="`Привет, мир!`"};

0 comments on commit 4137016

Please sign in to comment.