Skip to content

Commit

Permalink
Save work.
Browse files Browse the repository at this point in the history
  • Loading branch information
kgpai committed Nov 13, 2023
1 parent f489e36 commit 736a1e3
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 63 deletions.
78 changes: 66 additions & 12 deletions velox/functions/prestosql/URLFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,31 @@ FOLLY_ALWAYS_INLINE void urlEscape(TOutString& output, const TInString& input) {
output.resize(outIndex);
}

template<typename TInString>
FOLLY_ALWAYS_INLINE bool urlUnescapeCheck(const TInString& input) {
const char* p = input.data();
const char* end = p + input.size();
char buf[3];
buf[2] = '\0';
char* endptr;
for (; p < end; ++p) {
if (*p == '%') {
if (p + 2 < end) {
buf[0] = p[1];
buf[1] = p[2];
strtol(buf, &endptr, 16);
p += 2;
if (endptr != buf + 2) {
return false;
}
} else {
return false;
}
}
}
return true;
}

template <typename TOutString, typename TInString>
FOLLY_ALWAYS_INLINE void urlUnescape(
TOutString& output,
Expand Down Expand Up @@ -143,6 +168,11 @@ struct UrlExtractProtocolFunction {
FOLLY_ALWAYS_INLINE bool call(
out_type<Varchar>& result,
const arg_type<Varchar>& url) {
auto validUrl = urlUnescapeCheck(url);
if (!validUrl) {
result.setEmpty();
return false;
}
boost::cmatch match;
if (!parse(url, match)) {
result.setEmpty();
Expand All @@ -166,6 +196,11 @@ struct UrlExtractFragmentFunction {
FOLLY_ALWAYS_INLINE bool call(
out_type<Varchar>& result,
const arg_type<Varchar>& url) {
auto validUrl = urlUnescapeCheck(url);
if (!validUrl) {
result.setEmpty();
return false;
}
boost::cmatch match;
if (!parse(url, match)) {
result.setEmpty();
Expand All @@ -189,6 +224,11 @@ struct UrlExtractHostFunction {
FOLLY_ALWAYS_INLINE bool call(
out_type<Varchar>& result,
const arg_type<Varchar>& url) {
auto validUrl = urlUnescapeCheck(url);
if (!validUrl) {
result.setEmpty();
return false;
}
boost::cmatch match;
if (!parse(url, match)) {
result.setEmpty();
Expand All @@ -214,6 +254,10 @@ struct UrlExtractPortFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE bool call(int64_t& result, const arg_type<Varchar>& url) {
auto validUrl = urlUnescapeCheck(url);
if (!validUrl) {
return false;
}
boost::cmatch match;
if (!parse(url, match)) {
return false;
Expand Down Expand Up @@ -247,18 +291,22 @@ struct UrlExtractPathFunction {
FOLLY_ALWAYS_INLINE bool call(
out_type<Varchar>& result,
const arg_type<Varchar>& url) {
try {
boost::cmatch match;
if (!parse(url, match)) {
auto validUrl = urlUnescapeCheck(url);
if (!validUrl) {
result.setEmpty();
return false;
}
boost::cmatch match;
if (!parse(url, match)) {
result.setEmpty();
return false;
}
}

boost::cmatch authAndPathMatch;
boost::cmatch authorityMatch;
bool hasAuthority;
boost::cmatch authAndPathMatch;
boost::cmatch authorityMatch;
bool hasAuthority;

if (matchAuthorityAndPath(
if (matchAuthorityAndPath(
match, authAndPathMatch, authorityMatch, hasAuthority)) {
StringView escapedPath;
if (hasAuthority) {
Expand All @@ -267,10 +315,6 @@ struct UrlExtractPathFunction {
escapedPath = submatch(match, 2);
}
urlUnescape(result, escapedPath);
}
} catch (VeloxUserError const& ) {
result.setEmpty();
return false;
}

return true;
Expand All @@ -291,6 +335,11 @@ struct UrlExtractQueryFunction {
FOLLY_ALWAYS_INLINE bool call(
out_type<Varchar>& result,
const arg_type<Varchar>& url) {
auto validUrl = urlUnescapeCheck(url);
if (!validUrl) {
result.setEmpty();
return false;
}
boost::cmatch match;
if (!parse(url, match)) {
result.setEmpty();
Expand All @@ -317,6 +366,11 @@ struct UrlExtractParameterFunction {
out_type<Varchar>& result,
const arg_type<Varchar>& url,
const arg_type<Varchar>& param) {
auto validUrl = urlUnescapeCheck(url);
if (!validUrl) {
result.setEmpty();
return false;
}
boost::cmatch match;
if (!parse(url, match)) {
result.setEmpty();
Expand Down
102 changes: 51 additions & 51 deletions velox/functions/prestosql/tests/URLFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class URLFunctionsTest : public functions::test::FunctionBaseTest {
};
auto protocol = extractFn("protocol", url);
EXPECT_EQ(protocol, expectedProtocol);
EXPECT_EQ(extractFn("host", url).value(), expectedHost);
EXPECT_EQ(extractFn("host", url), expectedHost);
EXPECT_EQ(extractFn("path", url), expectedPath);
EXPECT_EQ(extractFn("fragment", url), expectedFragment);
EXPECT_EQ(extractFn("query", url), expectedQuery);
Expand All @@ -49,63 +49,63 @@ class URLFunctionsTest : public functions::test::FunctionBaseTest {
};

TEST_F(URLFunctionsTest, validateURL) {
validate(
"http://example.com/path1/p.php?k1=v1&k2=v2#Ref1",
"http",
"example.com",
"/path1/p.php",
"Ref1",
"k1=v1&k2=v2",
std::nullopt);
validate(
"http://example.com/path1/p.php?",
"http",
"example.com",
"/path1/p.php",
"",
"",
std::nullopt);
validate(
"HTTP://example.com/path1/p.php?",
"HTTP",
"example.com",
"/path1/p.php",
"",
"",
std::nullopt);
validate(
"http://example.com:8080/path1/p.php?k1=v1&k2=v2#Ref1",
"http",
"example.com",
"/path1/p.php",
"Ref1",
"k1=v1&k2=v2",
8080);
validate(
"https://username@example.com",
"https",
"example.com",
"",
"",
"",
std::nullopt);
validate(
"https:/auth/login.html",
"https",
"",
"/auth/login.html",
"",
"",
std::nullopt);
// validate(
// "http://example.com/path1/p.php?k1=v1&k2=v2#Ref1",
// "http",
// "example.com",
// "/path1/p.php",
// "Ref1",
// "k1=v1&k2=v2",
// std::nullopt);
// validate(
// "http://example.com/path1/p.php?",
// "http",
// "example.com",
// "/path1/p.php",
// "",
// "",
// std::nullopt);
// validate(
// "HTTP://example.com/path1/p.php?",
// "HTTP",
// "example.com",
// "/path1/p.php",
// "",
// "",
// std::nullopt);
// validate(
// "http://example.com:8080/path1/p.php?k1=v1&k2=v2#Ref1",
// "http",
// "example.com",
// "/path1/p.php",
// "Ref1",
// "k1=v1&k2=v2",
// 8080);
// validate(
// "https://username@example.com",
// "https",
// "example.com",
// "",
// "",
// "",
// std::nullopt);
// validate(
// "https:/auth/login.html",
// "https",
// "",
// "/auth/login.html",
// "",
// "",
// std::nullopt);
validate("https://www.ucu.edu.uy/agenda/evento/%%UCUrlCompartir%%",
std::nullopt,
std::nullopt,
std::nullopt,
std::nullopt,
std::nullopt,
std::nullopt);
validate("foo", "", "", "", "", "", std::nullopt);

//validate("foo", "", "", "foo", "", "", std::nullopt);
validate("foo!", "", "", "", "", "", std::nullopt);
}

TEST_F(URLFunctionsTest, extractPath) {
Expand Down

0 comments on commit 736a1e3

Please sign in to comment.