From d3f247f60912e322c083ffb847e81d6d59ba5169 Mon Sep 17 00:00:00 2001 From: Pisarenko Grigoriy <79596613+GrigoriyPA@users.noreply.github.com> Date: Tue, 20 Aug 2024 13:34:48 +0300 Subject: [PATCH] YQ-3154 improve error in s3 applicator actor (#8008) --- .../kqp_finalize_script_actor.cpp | 6 +- .../s3/actors/yql_s3_applicator_actor.cpp | 60 +++++++++++++++---- 2 files changed, 54 insertions(+), 12 deletions(-) diff --git a/ydb/core/kqp/finalize_script_service/kqp_finalize_script_actor.cpp b/ydb/core/kqp/finalize_script_service/kqp_finalize_script_actor.cpp index 9650e2809d3d..4c68e24c7792 100644 --- a/ydb/core/kqp/finalize_script_service/kqp_finalize_script_actor.cpp +++ b/ydb/core/kqp/finalize_script_service/kqp_finalize_script_actor.cpp @@ -179,7 +179,11 @@ class TScriptFinalizerActor : public TActorBootstrapped { void Handle(NFq::TEvents::TEvEffectApplicationResult::TPtr& ev) { if (ev->Get()->FatalError) { - FinishScriptFinalization(Ydb::StatusIds::BAD_REQUEST, std::move(ev->Get()->Issues)); + NYql::TIssue rootIssue("Failed to commit/abort s3 multipart uploads"); + for (const NYql::TIssue& issue : ev->Get()->Issues) { + rootIssue.AddSubIssue(MakeIntrusive(issue)); + } + FinishScriptFinalization(Ydb::StatusIds::BAD_REQUEST, {rootIssue}); } else { FinishScriptFinalization(); } diff --git a/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp b/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp index e8a84a6e6916..fbd1a4c296b0 100644 --- a/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp +++ b/ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp @@ -273,16 +273,43 @@ class TS3ApplicatorActor : public NActors::TActorBootstrappedCreateRetryState()->GetNextRetryDelay(curlResponseCode, httpResponseCode); - Issues.AddIssue(TStringBuilder() << "Retry operation " << operationName << ", curl error: " << curl_easy_strerror(curlResponseCode) << ", http code: " << httpResponseCode << ", url: " << url); + bool RetryOperation(IHTTPGateway::TResult&& operationResult, const TString& url, const TString& operationName) { + const auto curlResponseCode = operationResult.CurlResponseCode; + const auto httpResponseCode = operationResult.Content.HttpResponseCode; + const auto result = RetryCount && GetRetryState(operationName)->GetNextRetryDelay(curlResponseCode, httpResponseCode); + + NYql::TIssues issues = std::move(operationResult.Issues); + TStringBuilder errorMessage = TStringBuilder() << "Retry operation " << operationName << ", curl error: " << curl_easy_strerror(curlResponseCode) << ", url: " << url; + if (const TString errorText = operationResult.Content.Extract()) { + TString errorCode; + TString message; + if (!ParseS3ErrorResponse(errorText, errorCode, message)) { + message = errorText; + } + issues.AddIssues(BuildIssues(httpResponseCode, errorCode, message)); + } else { + errorMessage << ", HTTP code: " << httpResponseCode; + } + + if (issues) { + RetryIssues.AddIssues(NS3Util::AddParentIssue(errorMessage, std::move(issues))); + } else { + RetryIssues.AddIssue(errorMessage); + } + if (result) { RetryCount--; } else { - Finish(true, RetryCount - ? TString("Number of retries exceeded limit per operation") - : TStringBuilder() << "Number of retries exceeded global limit in " << GLOBAL_RETRY_LIMIT << " retries"); + Issues.AddIssues(NS3Util::AddParentIssue( + RetryCount + ? TStringBuilder() << "Number of retries exceeded limit for operation " << operationName + : TStringBuilder() << "Number of retries exceeded global limit in " << GLOBAL_RETRY_LIMIT << " retries", + NYql::TIssues(RetryIssues) + )); + RetryIssues.Clear(); + Finish(true); } + return result; } @@ -377,7 +404,7 @@ class TS3ApplicatorActor : public NActors::TActorBootstrappedGet()->State->BuildUrl(); LOG_D("CommitMultipartUpload ERROR " << url); - if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode, url, "CommitMultipartUpload")) { + if (RetryOperation(std::move(result), url, "CommitMultipartUpload")) { PushCommitMultipartUpload(ev->Get()->State); } } @@ -422,7 +449,9 @@ class TS3ApplicatorActor : public NActors::TActorBootstrappedGet()->State->Url + ev->Get()->State->Prefix; if (!UnknownPrefixes.contains(prefix)) { UnknownPrefixes.insert(prefix); - Issues.AddIssue(TIssue("Unknown uncommitted upload with prefix: " + prefix)); + TIssue issue(TStringBuilder() << "Unknown uncommitted upload with prefix: " << prefix); + issue.SetCode(NYql::DEFAULT_ERROR, NYql::TSeverityIds::S_INFO); + Issues.AddIssue(std::move(issue)); } } else { pos += KeyPrefix.size(); @@ -452,7 +481,7 @@ class TS3ApplicatorActor : public NActors::TActorBootstrappedGet()->State->BuildUrl(); LOG_D("ListMultipartUploads ERROR " << url); - if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode, url, "ListMultipartUploads")) { + if (RetryOperation(std::move(result), url, "ListMultipartUploads")) { PushListMultipartUploads(ev->Get()->State); } } @@ -476,7 +505,7 @@ class TS3ApplicatorActor : public NActors::TActorBootstrappedGet()->State->BuildUrl(); LOG_D("AbortMultipartUpload ERROR " << url); - if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode, url, "AbortMultipartUpload")) { + if (RetryOperation(std::move(result), url, "AbortMultipartUpload")) { PushAbortMultipartUpload(ev->Get()->State); } } @@ -527,7 +556,7 @@ class TS3ApplicatorActor : public NActors::TActorBootstrappedGet()->State->BuildUrl(); LOG_D("ListParts ERROR " << url); - if (RetryOperation(result.CurlResponseCode, result.Content.HttpResponseCode, url, "ListParts")) { + if (RetryOperation(std::move(result), url, "ListParts")) { PushListParts(ev->Get()->State); } } @@ -597,6 +626,13 @@ class TS3ApplicatorActor : public NActors::TActorBootstrappedSend(new NActors::IEventHandle(selfId, {}, new TEvPrivate::TEvListParts(state, std::move(result)))); } + IHTTPGateway::TRetryPolicy::IRetryState::TPtr& GetRetryState(const TString& operationName) { + if (const auto it = RetryStates.find(operationName); it != RetryStates.end()) { + return it->second; + } + return RetryStates.insert({operationName, RetryPolicy->CreateRetryState()}).first->second; + } + private: NActors::TActorId ParentId; IHTTPGateway::TPtr Gateway; @@ -609,11 +645,13 @@ class TS3ApplicatorActor : public NActors::TActorBootstrapped RetryStates; ui64 HttpRequestInflight = 0; ui64 RetryCount; THashSet UnknownPrefixes; THashSet CommitUploads; NYql::TIssues Issues; + NYql::TIssues RetryIssues; std::queue RequestQueue; bool ApplicationFinished = false; };