diff --git a/ydb/core/kqp/ut/federated_query/s3/kqp_federated_query_ut.cpp b/ydb/core/kqp/ut/federated_query/s3/kqp_federated_query_ut.cpp index b216d24f96f9..ac02ef8a5fe5 100644 --- a/ydb/core/kqp/ut/federated_query/s3/kqp_federated_query_ut.cpp +++ b/ydb/core/kqp/ut/federated_query/s3/kqp_federated_query_ut.cpp @@ -20,11 +20,22 @@ using namespace NTestUtils; using namespace fmt::literals; Y_UNIT_TEST_SUITE(KqpFederatedQuery) { + TString GetSymbolsString(char start, char end, const TString& skip = "") { + TStringBuilder result; + for (char symbol = start; symbol <= end; ++symbol) { + if (skip.Contains(symbol)) { + continue; + } + result << symbol; + } + return result; + } + Y_UNIT_TEST(ExecuteScriptWithExternalTableResolve) { const TString externalDataSourceName = "/Root/external_data_source"; const TString externalTableName = "/Root/test_binding_resolve"; const TString bucket = "test_bucket1"; - const TString object = "test_object"; + const TString object = TStringBuilder() << "test_" << GetSymbolsString(' ', '~', "{}") << "_object"; CreateBucketWithObject(bucket, object, TEST_CONTENT); @@ -49,7 +60,7 @@ Y_UNIT_TEST_SUITE(KqpFederatedQuery) { "external_source"_a = externalDataSourceName, "external_table"_a = externalTableName, "location"_a = GetBucketLocation(bucket), - "object"_a = object + "object"_a = EscapeC(object) ); auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); @@ -930,7 +941,7 @@ Y_UNIT_TEST_SUITE(KqpFederatedQuery) { const TString externalDataSourceName = "/Root/external_data_source"; const TString externalTableName = "/Root/test_binding_resolve"; const TString bucket = "test_bucket1"; - const TString object = "year=1/month=2/test_object"; + const TString object = TStringBuilder() << "year=1/month=2/test_" << GetSymbolsString(' ', '~') << "_object"; const TString content = "data,year,month\ntest,1,2"; CreateBucketWithObject(bucket, object, content); diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_raw_read_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_raw_read_actor.cpp index d5bfdd479e2f..29381cdad996 100644 --- a/ydb/library/yql/providers/s3/actors/yql_s3_raw_read_actor.cpp +++ b/ydb/library/yql/providers/s3/actors/yql_s3_raw_read_actor.cpp @@ -167,7 +167,7 @@ class TS3ReadActor : public NActors::TActorBootstrapped, public ID const auto& authInfo = Credentials.GetAuthInfo(); LOG_D("TS3ReadActor", "Download: " << url << ", ID: " << id << ", request id: [" << requestId << "]"); Gateway->Download( - UrlEscapeRet(url, true), + NS3Util::UrlEscapeRet(url), IHTTPGateway::MakeYcHeaders(requestId, authInfo.GetToken(), {}, authInfo.GetAwsUserPwd(), authInfo.GetAwsSigV4()), 0U, std::min(object.GetSize(), SizeLimit), diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp index de1fcc7a1b4d..c8dc7c4cb7ea 100644 --- a/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp +++ b/ydb/library/yql/providers/s3/actors/yql_s3_read_actor.cpp @@ -186,7 +186,7 @@ struct TRetryStuff { const TString& requestId, const IHTTPGateway::TRetryPolicy::TPtr& retryPolicy ) : Gateway(std::move(gateway)) - , Url(UrlEscapeRet(url, true)) + , Url(NS3Util::UrlEscapeRet(url)) , Headers(headers) , Offset(0U) , SizeLimit(sizeLimit) diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp index 76b65478da26..6c26518dfe90 100644 --- a/ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp +++ b/ydb/library/yql/providers/s3/actors/yql_s3_write_actor.cpp @@ -578,7 +578,7 @@ class TS3WriteActor : public TActorBootstrapped, public IDqComput Gateway, Credentials, key, - UrlEscapeRet(Url + Path + key + MakeOutputName() + Extension, true), + NS3Util::UrlEscapeRet(Url + Path + key + MakeOutputName() + Extension), Compression, RetryPolicy, DirtyWrite, Token); keyIt->second.emplace_back(fileWrite.get()); diff --git a/ydb/library/yql/providers/s3/common/ut/ya.make b/ydb/library/yql/providers/s3/common/ut/ya.make new file mode 100644 index 000000000000..61c94727884b --- /dev/null +++ b/ydb/library/yql/providers/s3/common/ut/ya.make @@ -0,0 +1,12 @@ +UNITTEST_FOR(ydb/library/yql/providers/s3/common) + +SRCS( + util_ut.cpp +) + +PEERDIR( + ydb/library/yql/public/udf/service/stub + ydb/library/yql/sql/pg_dummy +) + +END() diff --git a/ydb/library/yql/providers/s3/common/util.cpp b/ydb/library/yql/providers/s3/common/util.cpp index b1947f1eb5ff..594040188745 100644 --- a/ydb/library/yql/providers/s3/common/util.cpp +++ b/ydb/library/yql/providers/s3/common/util.cpp @@ -1,7 +1,35 @@ #include "util.h" +#include + + namespace NYql::NS3Util { +namespace { + +inline char d2x(unsigned x) { + return (char)((x < 10) ? ('0' + x) : ('A' + x - 10)); +} + +char* UrlEscape(char* to, const char* from) { + while (*from) { + if (*from == '%' || *from == '#' || *from == '?' || (unsigned char)*from <= ' ' || (unsigned char)*from > '~') { + *to++ = '%'; + *to++ = d2x((unsigned char)*from >> 4); + *to++ = d2x((unsigned char)*from & 0xF); + } else { + *to++ = *from; + } + ++from; + } + + *to = 0; + + return to; +} + +} + TIssues AddParentIssue(const TStringBuilder& prefix, TIssues&& issues) { if (!issues) { return TIssues{}; @@ -13,4 +41,11 @@ TIssues AddParentIssue(const TStringBuilder& prefix, TIssues&& issues) { return TIssues{result}; } +TString UrlEscapeRet(const TStringBuf from) { + TString to; + to.ReserveAndResize(CgiEscapeBufLen(from.size())); + to.resize(UrlEscape(to.begin(), from.begin()) - to.data()); + return to; +} + } diff --git a/ydb/library/yql/providers/s3/common/util.h b/ydb/library/yql/providers/s3/common/util.h index a8086b5558ed..f49ab1096bb1 100644 --- a/ydb/library/yql/providers/s3/common/util.h +++ b/ydb/library/yql/providers/s3/common/util.h @@ -7,4 +7,9 @@ namespace NYql::NS3Util { TIssues AddParentIssue(const TStringBuilder& prefix, TIssues&& issues); +// Like UrlEscape with forceEscape = true +// from ydb/library/cpp/string_utils/quote/quote.h, but also escapes: +// '#', '?' +TString UrlEscapeRet(const TStringBuf from); + } diff --git a/ydb/library/yql/providers/s3/common/util_ut.cpp b/ydb/library/yql/providers/s3/common/util_ut.cpp new file mode 100644 index 000000000000..2dcbf47ceef3 --- /dev/null +++ b/ydb/library/yql/providers/s3/common/util_ut.cpp @@ -0,0 +1,33 @@ +#include "util.h" + +#include +#include + + +namespace NYql::NS3Util { + +Y_UNIT_TEST_SUITE(TestS3UrlEscape) { + // Tests on force UrlEscape copied from library/cpp/string_utils/quote/quote_ut.cpp + Y_UNIT_TEST(EscapeEscapedForce) { + TString s; + + s = "hello%3dworld"; + UNIT_ASSERT_VALUES_EQUAL(NS3Util::UrlEscapeRet(s), "hello%253dworld"); + } + + Y_UNIT_TEST(EscapeUnescapeForceRet) { + TString s; + + s = "hello%3dworld"; + UNIT_ASSERT_VALUES_EQUAL(UrlUnescapeRet(NS3Util::UrlEscapeRet(s)), "hello%3dworld"); + } + + // Test additional symbols escape + Y_UNIT_TEST(EscapeAdditionalSymbols) { + TString s = "hello#?world"; + + UNIT_ASSERT_VALUES_EQUAL(NS3Util::UrlEscapeRet(s), "hello%23%3Fworld"); + } +} + +} // namespace NYql::NS3Util diff --git a/ydb/library/yql/providers/s3/common/ya.make b/ydb/library/yql/providers/s3/common/ya.make index 426cb2339bbb..6aa2ee7748a1 100644 --- a/ydb/library/yql/providers/s3/common/ya.make +++ b/ydb/library/yql/providers/s3/common/ya.make @@ -34,3 +34,7 @@ IF (CLANG AND NOT WITH_VALGRIND) ENDIF() END() + +RECURSE_FOR_TESTS( + ut +) diff --git a/ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp b/ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp index 32f2df4629b0..be5fd6134c2e 100644 --- a/ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp +++ b/ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp @@ -255,7 +255,7 @@ class TS3Lister : public IS3Lister { MakeFilter(listingRequest.Pattern, listingRequest.PatternType, sharedCtx); auto request = listingRequest; - request.Url = UrlEscapeRet(request.Url, true); + request.Url = NS3Util::UrlEscapeRet(request.Url); auto ctx = TListingContext{ std::move(sharedCtx), std::move(filter),