From c90b67702ba1bb752c570ee95af956b38ffcc48b Mon Sep 17 00:00:00 2001 From: Alexey Ozeritskiy Date: Sun, 26 May 2024 14:13:33 +0100 Subject: [PATCH 1/2] Use primary_keys in CBO Statistics for s3 provider --- .../s3/provider/yql_s3_dq_integration.cpp | 22 +++++++++++++++++-- .../providers/s3/provider/yql_s3_provider.h | 1 + 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp index 40bdbb464d04..084eae615cab 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp @@ -15,8 +15,10 @@ #include #include #include +#include #include +#include namespace NYql { @@ -179,7 +181,7 @@ class TS3DqIntegration: public TDqIntegrationBase { TMaybe ReadStatistics(const TExprNode::TPtr& sourceWrap, TExprContext& ctx) override { Y_UNUSED(ctx); double size = 0; - double cols = 0; + int cols = 0; double rows = 0; if (const auto& maybeParseSettings = TMaybeNode(sourceWrap->Child(0))) { const auto& parseSettings = maybeParseSettings.Cast(); @@ -195,12 +197,28 @@ class TS3DqIntegration: public TDqIntegrationBase { } } + TVector* primaryKey = nullptr; + if (auto constraints = GetSetting(parseSettings.Settings().Ref(), "constraints"sv)) { + auto node = NYT::NodeFromYsonString(constraints->Child(1)->Content()); + auto* primaryKeyNode = node.AsMap().FindPtr("primary_key"); + if (primaryKeyNode) { + TVector parsed; + for (auto col : primaryKeyNode->AsList()) { + parsed.push_back(col.AsString()); + } + State_->PrimaryKeys.emplace_back(std::move(parsed)); + primaryKey = &State_->PrimaryKeys.back(); + } + } + if (parseSettings.RowType().Maybe()) { cols = parseSettings.RowType().Ptr()->ChildrenSize(); } rows = size / 1024; // magic estimate - return TOptimizerStatistics(BaseTable, rows, cols, size); + return primaryKey + ? TOptimizerStatistics(BaseTable, rows, cols, size, size) + : TOptimizerStatistics(BaseTable, rows, cols, size, size, *primaryKey); } else { return Nothing(); } diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_provider.h b/ydb/library/yql/providers/s3/provider/yql_s3_provider.h index dd02c29e0445..49c805c707f3 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_provider.h +++ b/ydb/library/yql/providers/s3/provider/yql_s3_provider.h @@ -29,6 +29,7 @@ struct TS3State : public TThrRefBase ISecuredServiceAccountCredentialsFactory::TPtr CredentialsFactory; IHTTPGateway::TPtr Gateway; ui32 ExecutorPoolId = 0; + std::list> PrimaryKeys; }; TDataProviderInitializer GetS3DataProviderInitializer(IHTTPGateway::TPtr gateway, ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory = nullptr, bool allowLocalFiles = false); From 53b40461ade35ab7a68c00a5344b65338825cc5d Mon Sep 17 00:00:00 2001 From: Alexey Ozeritskiy Date: Sun, 26 May 2024 14:19:38 +0100 Subject: [PATCH 2/2] Fix --- .../yql/providers/s3/provider/yql_s3_dq_integration.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp index 084eae615cab..67853af5ebeb 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp @@ -217,8 +217,8 @@ class TS3DqIntegration: public TDqIntegrationBase { rows = size / 1024; // magic estimate return primaryKey - ? TOptimizerStatistics(BaseTable, rows, cols, size, size) - : TOptimizerStatistics(BaseTable, rows, cols, size, size, *primaryKey); + ? TOptimizerStatistics(BaseTable, rows, cols, size, size, *primaryKey) + : TOptimizerStatistics(BaseTable, rows, cols, size, size); } else { return Nothing(); }