From bea3bec1e5ac8959011f5dde6842cd9ed2adfcf1 Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Wed, 17 Apr 2024 16:23:06 +0300 Subject: [PATCH] Refactoring to perform cardinality estimation specifically for YT. --- .../kqp/opt/kqp_statistics_transformer.cpp | 18 +-- ydb/core/kqp/opt/logical/kqp_opt_cbo.h | 4 +- .../yql/core/cbo/cbo_optimizer_new.cpp | 125 +++++++++++++++- ydb/library/yql/core/cbo/cbo_optimizer_new.h | 135 +++++++++--------- ydb/library/yql/core/cbo/ya.make | 2 + ydb/library/yql/core/yql_cost_function.cpp | 112 +-------------- ydb/library/yql/core/yql_cost_function.h | 45 +++--- ydb/library/yql/core/yql_statistics.cpp | 18 +++ ydb/library/yql/core/yql_statistics.h | 23 ++- ydb/library/yql/dq/opt/dq_cbo_ut.cpp | 46 +++--- ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h | 4 +- .../yql/dq/opt/dq_opt_join_cost_based.cpp | 5 +- .../yql/dq/opt/dq_opt_join_tree_node.cpp | 2 +- ydb/library/yql/dq/opt/dq_opt_stat.cpp | 14 +- ydb/library/yql/dq/opt/dq_opt_stat.h | 2 + .../yql/providers/dq/opt/logical_optimize.cpp | 2 +- .../dq/provider/yql_dq_datasource.cpp | 2 +- .../dq/provider/yql_dq_statistics.cpp | 2 +- .../s3/provider/yql_s3_dq_integration.cpp | 2 +- .../yt/provider/ut/yql_yt_cbo_ut.cpp | 2 +- .../yt/provider/yql_yt_dq_integration.cpp | 2 +- .../yt/provider/yql_yt_join_reorder.cpp | 2 +- 22 files changed, 311 insertions(+), 258 deletions(-) diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp index 248cddf6a5ea..ea72af3515e2 100644 --- a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp +++ b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp @@ -43,8 +43,7 @@ void InferStatisticsForReadTable(const TExprNode::TPtr& input, TTypeAnnotationCo YQL_CLOG(TRACE, CoreDq) << "Infer statistics for read table, nrows:" << nRows << ", nattrs: " << nAttrs; - auto outputStats = TOptimizerStatistics(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0.0, tableData.Metadata->KeyColumnNames); - typeCtx->SetStats(input.Get(), std::make_shared(outputStats)); + typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0.0, tableData.Metadata->KeyColumnNames)); } /** @@ -63,8 +62,7 @@ void InferStatisticsForKqpTable(const TExprNode::TPtr& input, TTypeAnnotationCon int nAttrs = tableData.Metadata->Columns.size(); YQL_CLOG(TRACE, CoreDq) << "Infer statistics for table: " << path.Value() << ", nrows: " << nRows << ", nattrs: " << nAttrs << ", nKeyColumns: " << tableData.Metadata->KeyColumnNames.size(); - auto outputStats = TOptimizerStatistics(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0.0, tableData.Metadata->KeyColumnNames); - typeCtx->SetStats(input.Get(), std::make_shared(outputStats)); + typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0.0, tableData.Metadata->KeyColumnNames)); } /** @@ -84,8 +82,7 @@ void InferStatisticsForSteamLookup(const TExprNode::TPtr& input, TTypeAnnotation auto inputStats = typeCtx->GetStats(streamLookup.Table().Raw()); auto byteSize = inputStats->ByteSize * (nAttrs / (double) inputStats->Ncols); - auto outputStats = TOptimizerStatistics(EStatisticsType::BaseTable, inputStats->Nrows, nAttrs, byteSize, 0, inputStats->KeyColumns); - typeCtx->SetStats(input.Get(), std::make_shared(outputStats)); + typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, inputStats->Nrows, nAttrs, byteSize, 0, inputStats->KeyColumns)); } /** @@ -116,8 +113,7 @@ void InferStatisticsForLookupTable(const TExprNode::TPtr& input, TTypeAnnotation byteSize = 10; } - auto outputStats = TOptimizerStatistics(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0, inputStats->KeyColumns); - typeCtx->SetStats(input.Get(), std::make_shared(outputStats)); + typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0, inputStats->KeyColumns)); } /** @@ -151,8 +147,7 @@ void InferStatisticsForRowsSourceSettings(const TExprNode::TPtr& input, TTypeAnn double cost = inputStats->Cost; double byteSize = inputStats->ByteSize * (nAttrs / (double)inputStats->Ncols); - auto outputStats = TOptimizerStatistics(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, cost, inputStats->KeyColumns); - typeCtx->SetStats(input.Get(), std::make_shared(outputStats)); + typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, cost, inputStats->KeyColumns)); } /** @@ -160,8 +155,7 @@ void InferStatisticsForRowsSourceSettings(const TExprNode::TPtr& input, TTypeAnn * Currently we just make up a number for cardinality (5) and set cost to 0 */ void InferStatisticsForIndexLookup(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) { - auto outputStats = TOptimizerStatistics(EStatisticsType::BaseTable, 5, 5, 20, 0.0); - typeCtx->SetStats(input.Get(), std::make_shared(outputStats)); + typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, 5, 5, 20, 0.0)); } /*** diff --git a/ydb/core/kqp/opt/logical/kqp_opt_cbo.h b/ydb/core/kqp/opt/logical/kqp_opt_cbo.h index 88e81461acdb..30a0498c55a2 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_cbo.h +++ b/ydb/core/kqp/opt/logical/kqp_opt_cbo.h @@ -20,7 +20,7 @@ struct TKqpRelOptimizerNode : public NYql::TRelOptimizerNode { /** * KQP Specific cost function and join applicability cost function */ -struct TKqpProviderContext : public NYql::IProviderContext { +struct TKqpProviderContext : public NYql::TBaseProviderContext { TKqpProviderContext(const TKqpOptimizeContext& kqpCtx, const int optLevel) : KqpCtx(kqpCtx), OptLevel(optLevel) {} virtual bool IsJoinApplicable(const std::shared_ptr& left, @@ -35,4 +35,4 @@ struct TKqpProviderContext : public NYql::IProviderContext { int OptLevel; }; -} \ No newline at end of file +} diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp index 9fbfda733e2f..ea4a2331c660 100644 --- a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp +++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp @@ -8,6 +8,9 @@ #include +const TString& ToString(NYql::EJoinKind); +const TString& ToString(NYql::EJoinAlgoType); + namespace NYql { using namespace NYql::NDq; @@ -89,7 +92,8 @@ void TJoinOptimizerNode::Print(std::stringstream& stream, int ntabs) { stream << " "; } - stream << "Join: (" << ConvertToJoinString(JoinType) << "," << ConvertToJoinAlgoString(JoinAlgo) << ") "; + stream << "Join: (" << ToString(JoinType) << "," << ToString(JoinAlgo) << ") "; + for (auto c : JoinConditions){ stream << c.first.RelName << "." << c.first.AttributeName << "=" << c.second.RelName << "." @@ -109,4 +113,123 @@ void TJoinOptimizerNode::Print(std::stringstream& stream, int ntabs) { RightArg->Print(stream, ntabs+1); } +bool IsPKJoin(const TOptimizerStatistics& stats, const TVector& joinKeys) { + if (stats.KeyColumns.size() == 0) { + return false; + } + + for(size_t i = 0; i < stats.KeyColumns.size(); i++){ + if (std::find(joinKeys.begin(), joinKeys.end(), stats.KeyColumns[i]) == joinKeys.end()) { + return false; + } + } + return true; +} + +bool TBaseProviderContext::IsJoinApplicable(const std::shared_ptr& left, + const std::shared_ptr& right, + const std::set>& joinConditions, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, + EJoinAlgoType joinAlgo) { + + Y_UNUSED(left); + Y_UNUSED(right); + Y_UNUSED(joinConditions); + Y_UNUSED(leftJoinKeys); + Y_UNUSED(rightJoinKeys); + + return joinAlgo == EJoinAlgoType::MapJoin; +} + +double TBaseProviderContext::ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgo) const { + Y_UNUSED(outputByteSize); + Y_UNUSED(joinAlgo); + return leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows; +} + +/** + * Compute the cost and output cardinality of a join + * + * Currently a very basic computation targeted at GraceJoin + * + * The build is on the right side, so we make the build side a bit more expensive than the probe +*/ +TOptimizerStatistics TBaseProviderContext::ComputeJoinStats( + const TOptimizerStatistics& leftStats, + const TOptimizerStatistics& rightStats, + const std::set>& joinConditions, + EJoinAlgoType joinAlgo) const +{ + TVector leftJoinKeys; + TVector rightJoinKeys; + + for (auto c : joinConditions) { + leftJoinKeys.emplace_back(c.first.AttributeName); + rightJoinKeys.emplace_back(c.second.AttributeName); + } + + return ComputeJoinStats(leftStats, rightStats, leftJoinKeys, rightJoinKeys, joinAlgo); +} + +TOptimizerStatistics TBaseProviderContext::ComputeJoinStats( + const TOptimizerStatistics& leftStats, + const TOptimizerStatistics& rightStats, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, + EJoinAlgoType joinAlgo) const +{ + double newCard; + EStatisticsType outputType; + bool leftKeyColumns = false; + bool rightKeyColumns = false; + double selectivity = 1.0; + + + if (IsPKJoin(rightStats,rightJoinKeys)) { + newCard = leftStats.Nrows * rightStats.Selectivity; + selectivity = leftStats.Selectivity * rightStats.Selectivity; + leftKeyColumns = true; + if (leftStats.Type == EStatisticsType::BaseTable){ + outputType = EStatisticsType::FilteredFactTable; + } else { + outputType = leftStats.Type; + } + } + else if (IsPKJoin(leftStats,leftJoinKeys)) { + newCard = rightStats.Nrows; + newCard = rightStats.Nrows * leftStats.Selectivity; + selectivity = leftStats.Selectivity * rightStats.Selectivity; + + rightKeyColumns = true; + if (rightStats.Type == EStatisticsType::BaseTable){ + outputType = EStatisticsType::FilteredFactTable; + } else { + outputType = rightStats.Type; + } + } + else { + newCard = 0.2 * leftStats.Nrows * rightStats.Nrows; + outputType = EStatisticsType::ManyManyJoin; + } + + int newNCols = leftStats.Ncols + rightStats.Ncols; + double newByteSize = leftStats.Nrows ? (leftStats.ByteSize / leftStats.Nrows) * newCard : 0 + + rightStats.Nrows ? (rightStats.ByteSize / rightStats.Nrows) * newCard : 0; + + double cost = ComputeJoinCost(leftStats, rightStats, newCard, newByteSize, joinAlgo) + + leftStats.Cost + rightStats.Cost; + + auto result = TOptimizerStatistics(outputType, newCard, newNCols, newByteSize, cost, + leftKeyColumns ? leftStats.KeyColumns : ( rightKeyColumns ? rightStats.KeyColumns : TOptimizerStatistics::EmptyColumns)); + result.Selectivity = selectivity; + return result; +} + +const TBaseProviderContext& TBaseProviderContext::Instance() { + static TBaseProviderContext staticContext; + return staticContext; +} + + } // namespace NYql diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.h b/ydb/library/yql/core/cbo/cbo_optimizer_new.h index 63450edcc8fe..0e8103151c2c 100644 --- a/ydb/library/yql/core/cbo/cbo_optimizer_new.h +++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.h @@ -10,13 +10,12 @@ #include #include - namespace NYql { /** * OptimizerNodes are the internal representations of operators inside the * Cost-based optimizer. Currently we only support RelOptimizerNode - a node that - * is an input relation to the equi-join, and JoinOptimizerNode - an inner join + * is an input relation to the equi-join, and JoinOptimizerNode - an inner join * that connects two sets of relations. */ enum EOptimizerNodeKind: ui32 @@ -35,13 +34,76 @@ struct IBaseOptimizerNode { std::shared_ptr Stats; IBaseOptimizerNode(EOptimizerNodeKind k) : Kind(k) {} - IBaseOptimizerNode(EOptimizerNodeKind k, std::shared_ptr s) : + IBaseOptimizerNode(EOptimizerNodeKind k, std::shared_ptr s) : Kind(k), Stats(s) {} virtual TVector Labels()=0; virtual void Print(std::stringstream& stream, int ntabs=0)=0; }; + +/** + * This is a temporary structure for KQP provider + * We will soon be supporting multiple providers and we will need to design + * some interfaces to pass provider-specific context to the optimizer +*/ +struct IProviderContext { + virtual ~IProviderContext() = default; + + virtual double ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgol) const = 0; + + virtual TOptimizerStatistics ComputeJoinStats( + const TOptimizerStatistics& leftStats, + const TOptimizerStatistics& rightStats, + const std::set>& joinConditions, EJoinAlgoType joinAlgo) const = 0; + + virtual TOptimizerStatistics ComputeJoinStats( + const TOptimizerStatistics& leftStats, + const TOptimizerStatistics& rightStats, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, + EJoinAlgoType joinAlgo) const = 0; + + virtual bool IsJoinApplicable(const std::shared_ptr& left, + const std::shared_ptr& right, + const std::set>& joinConditions, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, + EJoinAlgoType joinAlgo) = 0; +}; + +/** + * Default provider context with default cost and stats computation. +*/ + +struct TBaseProviderContext : public IProviderContext { + TBaseProviderContext() {} + + double ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgo) const override; + + bool IsJoinApplicable(const std::shared_ptr& left, + const std::shared_ptr& right, + const std::set>& joinConditions, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, + EJoinAlgoType joinAlgo) override; + + virtual TOptimizerStatistics ComputeJoinStats( + const TOptimizerStatistics& leftStats, + const TOptimizerStatistics& rightStats, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, + EJoinAlgoType joinAlgo) const override; + + virtual TOptimizerStatistics ComputeJoinStats( + const TOptimizerStatistics& leftStats, + const TOptimizerStatistics& rightStats, + const std::set>& joinConditions, + EJoinAlgoType joinAlgo) const override; + + static const TBaseProviderContext& Instance(); +}; + /** * RelOptimizerNode adds a label to base class * This is the label assinged to the input by equi-Join @@ -52,9 +114,9 @@ struct TRelOptimizerNode : public IBaseOptimizerNode { // Temporary solution to check if a LookupJoin is possible in KQP //void* Expr; - TRelOptimizerNode(TString label, std::shared_ptr stats) : + TRelOptimizerNode(TString label, std::shared_ptr stats) : IBaseOptimizerNode(RelNodeType, stats), Label(label) { } - //TRelOptimizerNode(TString label, std::shared_ptr stats, const TExprNode::TPtr expr) : + //TRelOptimizerNode(TString label, std::shared_ptr stats, const TExprNode::TPtr expr) : // IBaseOptimizerNode(RelNodeType, stats), Label(label), Expr(expr) { } virtual ~TRelOptimizerNode() {} @@ -79,61 +141,6 @@ enum EJoinKind: ui32 EJoinKind ConvertToJoinKind(const TString& joinString); TString ConvertToJoinString(const EJoinKind kind); -/** - * This is a temporary structure for KQP provider - * We will soon be supporting multiple providers and we will need to design - * some interfaces to pass provider-specific context to the optimizer -*/ -struct IProviderContext { - virtual ~IProviderContext() = default; - - virtual double ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgol) const = 0; - - virtual bool IsJoinApplicable(const std::shared_ptr& left, - const std::shared_ptr& right, - const std::set>& joinConditions, - const TVector& leftJoinKeys, - const TVector& rightJoinKeys, - EJoinAlgoType joinAlgo) = 0; - -}; - -/** - * Temporary solution for default provider context -*/ - -struct TDummyProviderContext : public IProviderContext { - TDummyProviderContext() {} - - double ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgo) const override { - Y_UNUSED(outputByteSize); - Y_UNUSED(joinAlgo); - return leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows; - } - - bool IsJoinApplicable(const std::shared_ptr& left, - const std::shared_ptr& right, - const std::set>& joinConditions, - const TVector& leftJoinKeys, - const TVector& rightJoinKeys, - EJoinAlgoType joinAlgo) override { - - Y_UNUSED(left); - Y_UNUSED(right); - Y_UNUSED(joinConditions); - Y_UNUSED(leftJoinKeys); - Y_UNUSED(rightJoinKeys); - - return joinAlgo == EJoinAlgoType::MapJoin; - } - - static const TDummyProviderContext& instance() { - static TDummyProviderContext staticContext; - return staticContext; - } - -}; - /** * JoinOptimizerNode records the left and right arguments of the join * as well as the set of join conditions. @@ -150,11 +157,11 @@ struct TJoinOptimizerNode : public IBaseOptimizerNode { EJoinAlgoType JoinAlgo; bool IsReorderable; - TJoinOptimizerNode(const std::shared_ptr& left, - const std::shared_ptr& right, + TJoinOptimizerNode(const std::shared_ptr& left, + const std::shared_ptr& right, const std::set>& joinConditions, - const EJoinKind joinType, - const EJoinAlgoType joinAlgo, + const EJoinKind joinType, + const EJoinAlgoType joinAlgo, bool nonReorderable=false); virtual ~TJoinOptimizerNode() {} virtual TVector Labels(); diff --git a/ydb/library/yql/core/cbo/ya.make b/ydb/library/yql/core/cbo/ya.make index d0986d4f6420..15e4923b631b 100644 --- a/ydb/library/yql/core/cbo/ya.make +++ b/ydb/library/yql/core/cbo/ya.make @@ -4,6 +4,8 @@ SRCS( cbo_optimizer_new.cpp ) +GENERATE_ENUM_SERIALIZATION(cbo_optimizer_new.h) + END() RECURSE_FOR_TESTS( diff --git a/ydb/library/yql/core/yql_cost_function.cpp b/ydb/library/yql/core/yql_cost_function.cpp index 078acd70bfb6..7dfe08a2cdd7 100644 --- a/ydb/library/yql/core/yql_cost_function.cpp +++ b/ydb/library/yql/core/yql_cost_function.cpp @@ -1,32 +1,7 @@ -#include "yql_cost_function.h" - -#include - -using namespace NYql; - -namespace { - -THashMap JoinAlgoMap = { - {"Undefined",EJoinAlgoType::Undefined}, - {"LookupJoin",EJoinAlgoType::LookupJoin}, - {"MapJoin",EJoinAlgoType::MapJoin}, - {"GraceJoin",EJoinAlgoType::GraceJoin}, - {"StreamLookupJoin",EJoinAlgoType::StreamLookupJoin}}; -bool IsPKJoin(const TOptimizerStatistics& stats, const TVector& joinKeys) { - if (stats.KeyColumns.size()==0) { - return false; - } - - for(size_t i=0; i& leftJoinKeys, const TVector& rightJoinKeys, EJoinAlgoType joinAlgo, const IProviderContext& ctx) { - - double newCard; - EStatisticsType outputType; - bool leftKeyColumns = false; - bool rightKeyColumns = false; - double selectivity = 1.0; - - - if (IsPKJoin(rightStats,rightJoinKeys)) { - newCard = leftStats.Nrows * rightStats.Selectivity; - selectivity = leftStats.Selectivity * rightStats.Selectivity; - leftKeyColumns = true; - if (leftStats.Type == EStatisticsType::BaseTable){ - outputType = EStatisticsType::FilteredFactTable; - } else { - outputType = leftStats.Type; - } - } - else if (IsPKJoin(leftStats,leftJoinKeys)) { - newCard = rightStats.Nrows; - newCard = rightStats.Nrows * leftStats.Selectivity; - selectivity = leftStats.Selectivity * rightStats.Selectivity; - - rightKeyColumns = true; - if (rightStats.Type == EStatisticsType::BaseTable){ - outputType = EStatisticsType::FilteredFactTable; - } else { - outputType = rightStats.Type; - } - } - else { - newCard = 0.2 * leftStats.Nrows * rightStats.Nrows; - outputType = EStatisticsType::ManyManyJoin; - } - - int newNCols = leftStats.Ncols + rightStats.Ncols; - double newByteSize = leftStats.Nrows ? (leftStats.ByteSize / leftStats.Nrows) * newCard : 0 + - rightStats.Nrows ? (rightStats.ByteSize / rightStats.Nrows) * newCard : 0; - - double cost = ctx.ComputeJoinCost(leftStats, rightStats, newCard, newByteSize, joinAlgo) - + leftStats.Cost + rightStats.Cost; - - auto result = TOptimizerStatistics(outputType, newCard, newNCols, newByteSize, cost, - leftKeyColumns ? leftStats.KeyColumns : ( rightKeyColumns ? rightStats.KeyColumns : TOptimizerStatistics::EmptyColumns)); - result.Selectivity = selectivity; - return result; -} - - -TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, - const std::set>& joinConditions, EJoinAlgoType joinAlgo, const IProviderContext& ctx) { - - TVector leftJoinKeys; - TVector rightJoinKeys; - - for (auto c : joinConditions) { - leftJoinKeys.emplace_back(c.first.AttributeName); - rightJoinKeys.emplace_back(c.second.AttributeName); - } - - return ComputeJoinStats(leftStats, rightStats, leftJoinKeys, rightJoinKeys, joinAlgo, ctx); -} - +} // namespace NYql diff --git a/ydb/library/yql/core/yql_cost_function.h b/ydb/library/yql/core/yql_cost_function.h index dcacef50a82a..778ee6276a6c 100644 --- a/ydb/library/yql/core/yql_cost_function.h +++ b/ydb/library/yql/core/yql_cost_function.h @@ -16,17 +16,30 @@ namespace NYql { struct IProviderContext; -namespace NDq { +enum class EJoinAlgoType { + Undefined, + LookupJoin, + MapJoin, + GraceJoin, + StreamLookupJoin, //Right part can be updated during an operation. Used mainly for joining streams with lookup tables. Currently impplemented in Dq by LookupInputTransform + MergeJoin // To be used in YT +}; + +//StreamLookupJoin is not a subject for CBO and not not included here +static constexpr auto AllJoinAlgos = { EJoinAlgoType::MapJoin, EJoinAlgoType::GraceJoin, EJoinAlgoType::LookupJoin, EJoinAlgoType::MergeJoin }; + +namespace NDq { + /** - * Join column is a struct that records the relation label and + * Join column is a struct that records the relation label and * attribute name, used in join conditions */ struct TJoinColumn { TString RelName; TString AttributeName; - TJoinColumn(TString relName, TString attributeName) : RelName(relName), - AttributeName(attributeName) {} + TJoinColumn(TString relName, TString attributeName) : RelName(relName), + AttributeName(std::move(attributeName)) {} bool operator == (const TJoinColumn& other) const { return RelName == other.RelName && AttributeName == other.AttributeName; @@ -43,26 +56,6 @@ struct TJoinColumn { bool operator < (const TJoinColumn& c1, const TJoinColumn& c2); -} - -enum class EJoinAlgoType { - Undefined, - LookupJoin, - MapJoin, - GraceJoin, - StreamLookupJoin //Right part can be updated during an operation. Used mainly for joining streams with lookup tables. Currently impplemented in Dq by LookupInputTransform -}; - -TString ConvertToJoinAlgoString(EJoinAlgoType joinAlgo); - -//StreamLookupJoin is not a subject for CBO and not not included here -static constexpr auto AllJoinAlgos = { EJoinAlgoType::MapJoin, EJoinAlgoType::GraceJoin, EJoinAlgoType::LookupJoin }; - -TOptimizerStatistics ComputeJoinStats(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, - const std::set>& joinConditions, EJoinAlgoType joinAlgo, const IProviderContext& ctx); - -TOptimizerStatistics ComputeJoinStats(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, - const TVector& leftJoinKeys, const TVector& rightJoinKeys, EJoinAlgoType joinAlgo, const IProviderContext& ctx); - -} +} // namespace NDq +} // namespace NYql diff --git a/ydb/library/yql/core/yql_statistics.cpp b/ydb/library/yql/core/yql_statistics.cpp index f7f07ce85877..f28080b31b9d 100644 --- a/ydb/library/yql/core/yql_statistics.cpp +++ b/ydb/library/yql/core/yql_statistics.cpp @@ -27,6 +27,24 @@ bool TOptimizerStatistics::Empty() const { return ! (Nrows || Ncols || Cost); } +TOptimizerStatistics::TOptimizerStatistics( + EStatisticsType type, + double nrows, + int ncols, + double byteSize, + double cost, + const TVector& keyColumns, + std::unique_ptr specific) + : Type(type) + , Nrows(nrows) + , Ncols(ncols) + , ByteSize(byteSize) + , Cost(cost) + , KeyColumns(keyColumns) + , Specific(std::move(specific)) +{ +} + TOptimizerStatistics& TOptimizerStatistics::operator+=(const TOptimizerStatistics& other) { Nrows += other.Nrows; Ncols += other.Ncols; diff --git a/ydb/library/yql/core/yql_statistics.h b/ydb/library/yql/core/yql_statistics.h index e92473fb2791..abb3b65e230f 100644 --- a/ydb/library/yql/core/yql_statistics.h +++ b/ydb/library/yql/core/yql_statistics.h @@ -13,6 +13,12 @@ enum EStatisticsType : ui32 { ManyManyJoin }; +// Providers may subclass this struct to associate specific statistics, useful to +// derive stats for higher-level operators in the plan. +struct IProviderStatistics { + virtual ~IProviderStatistics() {} +}; + /** * Optimizer Statistics struct records per-table and per-column statistics * for the current operator in the plan. Currently, only Nrows and Ncols are @@ -28,14 +34,19 @@ struct TOptimizerStatistics { double Cost = 0; double Selectivity = 1.0; const TVector& KeyColumns; + std::unique_ptr Specific; + TOptimizerStatistics(TOptimizerStatistics&&) = default; TOptimizerStatistics() : KeyColumns(EmptyColumns) {} - TOptimizerStatistics(double nrows, int ncols): Nrows(nrows), Ncols(ncols), KeyColumns(EmptyColumns) {} - TOptimizerStatistics(double nrows, int ncols, double cost): Nrows(nrows), Ncols(ncols), Cost(cost), KeyColumns(EmptyColumns) {} - TOptimizerStatistics(EStatisticsType type, double nrows, int ncols, double cost): Type(type), Nrows(nrows), Ncols(ncols), Cost(cost), KeyColumns(EmptyColumns) {} - TOptimizerStatistics(EStatisticsType type, double nrows, int ncols, double byteSize, double cost): Type(type), Nrows(nrows), Ncols(ncols), ByteSize(byteSize), Cost(cost), KeyColumns(EmptyColumns) {} - TOptimizerStatistics(EStatisticsType type, double nrows, int ncols, double cost, const TVector& keyColumns): Type(type), Nrows(nrows), Ncols(ncols), Cost(cost), KeyColumns(keyColumns) {} - TOptimizerStatistics(EStatisticsType type, double nrows, int ncols, double byteSize, double cost, const TVector& keyColumns): Type(type), Nrows(nrows), Ncols(ncols), ByteSize(byteSize), Cost(cost), KeyColumns(keyColumns) {} + + TOptimizerStatistics( + EStatisticsType type, + double nrows = 0.0, + int ncols = 0, + double byteSize = 0.0, + double cost = 0.0, + const TVector& keyColumns = EmptyColumns, + std::unique_ptr specific = nullptr); TOptimizerStatistics& operator+=(const TOptimizerStatistics& other); bool Empty() const; diff --git a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp index 21302b4af36c..3bd72d7fe449 100644 --- a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp +++ b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp @@ -32,16 +32,18 @@ TExprNode::TPtr MakeLabel(TExprContext& ctx, const std::vector& vars Y_UNIT_TEST_SUITE(DQCBO) { Y_UNIT_TEST(Empty) { - TDummyProviderContext pctx; + TBaseProviderContext pctx; std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000)); } Y_UNIT_TEST(JoinSearch2Rels) { - TDummyProviderContext pctx; + TBaseProviderContext pctx; std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000)); - auto rel1 = std::make_shared("a", std::make_shared(100000, 1, 1000000)); - auto rel2 = std::make_shared("b", std::make_shared(1000000, 1, 9000009)); + auto rel1 = std::make_shared("a", + std::make_shared(BaseTable, 100000, 1, 0, 1000000)); + auto rel2 = std::make_shared("b", + std::make_shared(BaseTable, 1000000, 1, 0, 9000009)); std::set> joinConditions; joinConditions.insert({ @@ -59,7 +61,7 @@ Y_UNIT_TEST(JoinSearch2Rels) { auto res = optimizer->JoinSearch(op); std::stringstream ss; res->Print(ss); - TString expected = R"__(Join: (Inner,MapJoin) b.1=a.1, + TString expected = R"__(Join: (InnerJoin,MapJoin) b.1=a.1, Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10 Rel: b Type: BaseTable, Nrows: 1e+06, Ncols: 1, ByteSize: 0, Cost: 9.00001e+06 @@ -71,12 +73,15 @@ Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10 } Y_UNIT_TEST(JoinSearch3Rels) { - TDummyProviderContext pctx; + TBaseProviderContext pctx; std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000)); - auto rel1 = std::make_shared("a", std::make_shared(100000, 1, 1000000)); - auto rel2 = std::make_shared("b", std::make_shared(1000000, 1, 9000009)); - auto rel3 = std::make_shared("c", std::make_shared(10000, 1, 9009)); + auto rel1 = std::make_shared("a", + std::make_shared(BaseTable, 100000, 1, 0, 1000000)); + auto rel2 = std::make_shared("b", + std::make_shared(BaseTable, 1000000, 1, 0, 9000009)); + auto rel3 = std::make_shared("c", + std::make_shared(BaseTable, 10000, 1, 0, 9009)); std::set> joinConditions; joinConditions.insert({ @@ -108,9 +113,9 @@ Y_UNIT_TEST(JoinSearch3Rels) { std::stringstream ss; res->Print(ss); - TString expected = R"__(Join: (Inner,MapJoin) a.1=b.1,a.1=c.1, + TString expected = R"__(Join: (InnerJoin,MapJoin) a.1=b.1,a.1=c.1, Type: ManyManyJoin, Nrows: 4e+13, Ncols: 3, ByteSize: 0, Cost: 4.004e+13 - Join: (Inner,MapJoin) b.1=a.1, + Join: (InnerJoin,MapJoin) b.1=a.1, Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10 Rel: b Type: BaseTable, Nrows: 1e+06, Ncols: 1, ByteSize: 0, Cost: 9.00001e+06 @@ -147,11 +152,14 @@ Y_UNIT_TEST(RelCollector) { TVector> rels; UNIT_ASSERT(DqCollectJoinRelationsWithStats(rels, typeCtx, equiJoin, [&](auto, auto, auto, auto) {}) == false); - typeCtx.StatisticsMap[tables[1].Ptr()->Child(0)] = std::make_shared(1, 1, 1); + typeCtx.StatisticsMap[tables[1].Ptr()->Child(0)] = + std::make_shared(BaseTable, 1, 1, 1); UNIT_ASSERT(DqCollectJoinRelationsWithStats(rels, typeCtx, equiJoin, [&](auto, auto, auto, auto) {}) == false); - typeCtx.StatisticsMap[tables[0].Ptr()->Child(0)] = std::make_shared(1, 1, 1); - typeCtx.StatisticsMap[tables[2].Ptr()->Child(0)] = std::make_shared(1, 1, 1); + typeCtx.StatisticsMap[tables[0].Ptr()->Child(0)] = + std::make_shared(BaseTable, 1, 1, 1); + typeCtx.StatisticsMap[tables[2].Ptr()->Child(0)] = + std::make_shared(BaseTable, 1, 1, 1); TVector labels; UNIT_ASSERT(DqCollectJoinRelationsWithStats(rels, typeCtx, equiJoin, [&](auto, auto label, auto, auto) { labels.emplace_back(label); }) == true); @@ -199,8 +207,10 @@ void _DqOptimizeEquiJoinWithCosts(const std::function& optFact joinArgs.emplace_back(joinTree); joinArgs.emplace_back(settings); - typeCtx.StatisticsMap[tables[0].Ptr()->Child(0)] = std::make_shared(1, 1, 1); - typeCtx.StatisticsMap[tables[1].Ptr()->Child(0)] = std::make_shared(1, 1, 1); + typeCtx.StatisticsMap[tables[0].Ptr()->Child(0)] = + std::make_shared(BaseTable, 1, 1, 1); + typeCtx.StatisticsMap[tables[1].Ptr()->Child(0)] = + std::make_shared(BaseTable, 1, 1, 1); TCoEquiJoin equiJoin = Build(ctx, pos) .Add(joinArgs) @@ -227,7 +237,7 @@ void _DqOptimizeEquiJoinWithCosts(const std::function& optFact Y_UNIT_TEST(DqOptimizeEquiJoinWithCostsNative) { TExprContext ctx; - TDummyProviderContext pctx; + TBaseProviderContext pctx; std::function optFactory = [&]() { return MakeNativeOptimizerNew(pctx, 100000); }; @@ -236,7 +246,7 @@ Y_UNIT_TEST(DqOptimizeEquiJoinWithCostsNative) { Y_UNIT_TEST(DqOptimizeEquiJoinWithCostsPG) { TExprContext ctx; - TDummyProviderContext pctx; + TBaseProviderContext pctx; std::function log = [&](auto str) { Cerr << str; }; diff --git a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h index 54ceda217902..a724d3a28465 100644 --- a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h +++ b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h @@ -405,7 +405,7 @@ template std::shared_ptr TDPHypS for (auto joinAlgo : AllJoinAlgos) { if (ctx.IsJoinApplicable(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinAlgo)){ - auto cost = ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo, ctx).Cost; + auto cost = ctx.ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo).Cost; if (cost < bestCost) { bestCost = cost; bestAlgo = joinAlgo; @@ -415,7 +415,7 @@ template std::shared_ptr TDPHypS if (isCommutative) { if (ctx.IsJoinApplicable(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinAlgo)){ - auto cost = ComputeJoinStats(*right->Stats, *left->Stats, rightJoinKeys, leftJoinKeys, joinAlgo, ctx).Cost; + auto cost = ctx.ComputeJoinStats(*right->Stats, *left->Stats, rightJoinKeys, leftJoinKeys, joinAlgo).Cost; if (cost < bestCost) { bestCost = cost; bestAlgo = joinAlgo; diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp index 4f1c66a01c6a..d4993062abca 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp @@ -197,13 +197,12 @@ void ComputeStatistics(const std::shared_ptr& join, IProvide ComputeStatistics(static_pointer_cast(join->RightArg), ctx); } join->Stats = std::make_shared( - ComputeJoinStats( + ctx.ComputeJoinStats( *join->LeftArg->Stats, *join->RightArg->Stats, join->LeftJoinKeys, join->RightJoinKeys, - EJoinAlgoType::GraceJoin, - ctx + EJoinAlgoType::GraceJoin ) ); } diff --git a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp index 1c4db89bb627..181ba5db080c 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp @@ -13,7 +13,7 @@ std::shared_ptr MakeJoinInternal( IProviderContext& ctx) { auto res = std::make_shared(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, joinAlgo); - res->Stats = std::make_shared(ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo, ctx)); + res->Stats = std::make_shared(ctx.ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo)); return res; } diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp index f5f5e185bb7d..6ffc0ef4e9e4 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp @@ -161,8 +161,8 @@ void InferStatisticsForMapJoin(const TExprNode::TPtr& input, TTypeAnnotationCont rightJoinKeys.push_back(RemoveAliases(join.RightKeysColumnNames().Item(i).StringValue())); } - typeCtx->SetStats(join.Raw(), std::make_shared( - ComputeJoinStats(*leftStats, *rightStats, leftJoinKeys, rightJoinKeys, EJoinAlgoType::MapJoin, ctx))); + typeCtx->SetStats(join.Raw(), std::make_shared( + ctx.ComputeJoinStats(*leftStats, *rightStats, leftJoinKeys, rightJoinKeys, EJoinAlgoType::MapJoin))); } /** @@ -194,7 +194,7 @@ void InferStatisticsForGraceJoin(const TExprNode::TPtr& input, TTypeAnnotationCo } typeCtx->SetStats(join.Raw(), std::make_shared( - ComputeJoinStats(*leftStats, *rightStats, leftJoinKeys, rightJoinKeys, EJoinAlgoType::GraceJoin, ctx))); + ctx.ComputeJoinStats(*leftStats, *rightStats, leftJoinKeys, rightJoinKeys, EJoinAlgoType::GraceJoin))); } /** @@ -241,7 +241,7 @@ void InferStatisticsForFlatMap(const TExprNode::TPtr& input, TTypeAnnotationCont auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->ByteSize * selectivity, inputStats->Cost, inputStats->KeyColumns ); outputStats.Selectivity *= selectivity; - typeCtx->SetStats(input.Get(), std::make_shared(outputStats) ); + typeCtx->SetStats(input.Get(), std::make_shared(std::move(outputStats)) ); } else if (flatmap.Lambda().Body().Maybe() || flatmap.Lambda().Body().Maybe().Input().Maybe() || @@ -283,7 +283,7 @@ void InferStatisticsForFilter(const TExprNode::TPtr& input, TTypeAnnotationConte auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->ByteSize * selectivity, inputStats->Cost, inputStats->KeyColumns); outputStats.Selectivity *= selectivity; - typeCtx->SetStats(input.Get(), std::make_shared(outputStats) ); + typeCtx->SetStats(input.Get(), std::make_shared(std::move(outputStats)) ); } /** @@ -365,8 +365,8 @@ void InferStatisticsForAsList(const TExprNode::TPtr& input, TTypeAnnotationConte if (input->ChildrenSize() && input->Child(0)->IsCallable("AsStruct")) { nAttrs = input->Child(0)->ChildrenSize(); } - auto outputStats = TOptimizerStatistics(EStatisticsType::BaseTable, nRows, nAttrs, nRows*nAttrs, 0.0); - typeCtx->SetStats(input.Get(), std::make_shared(outputStats)); + typeCtx->SetStats(input.Get(), std::make_shared( + EStatisticsType::BaseTable, nRows, nAttrs, nRows*nAttrs, 0.0)); } /*** diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.h b/ydb/library/yql/dq/opt/dq_opt_stat.h index 31013988c3f5..1505f383d933 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.h +++ b/ydb/library/yql/dq/opt/dq_opt_stat.h @@ -1,3 +1,5 @@ +#pragma once + #include "dq_opt.h" #include diff --git a/ydb/library/yql/providers/dq/opt/logical_optimize.cpp b/ydb/library/yql/providers/dq/opt/logical_optimize.cpp index 18421c5df911..2bbaa480b5b2 100644 --- a/ydb/library/yql/providers/dq/opt/logical_optimize.cpp +++ b/ydb/library/yql/providers/dq/opt/logical_optimize.cpp @@ -141,7 +141,7 @@ class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase { }; std::unique_ptr opt; - TDummyProviderContext pctx; + TBaseProviderContext pctx; switch (TypesCtx.CostBasedOptimizer) { case ECostBasedOptimizerType::Native: diff --git a/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp b/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp index 886d5787b45f..798cc3847630 100644 --- a/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp +++ b/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp @@ -50,7 +50,7 @@ class TDqDataProviderSource: public TDataProviderBase { , ExecTransformer_([this, execTransformerFactory] () { return THolder(execTransformerFactory(State_)); }) , TypeAnnotationTransformer_([] () { return CreateDqsDataSourceTypeAnnotationTransformer(); }) , ConstraintsTransformer_([] () { return CreateDqDataSourceConstraintTransformer(); }) - , StatisticsTransformer_([this]() { return CreateDqsStatisticsTransformer(State_, TDummyProviderContext::instance()); }) + , StatisticsTransformer_([this]() { return CreateDqsStatisticsTransformer(State_, TBaseProviderContext::Instance()); }) { } TStringBuf GetName() const override { diff --git a/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp b/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp index 36b3e4603155..9c9559927c50 100644 --- a/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp +++ b/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp @@ -36,7 +36,7 @@ class TDqsStatisticsTransformer : public NDq::TDqStatisticsTransformerBase { if (auto dqIntegration = (*datasource)->GetDqIntegration()) { auto stat = dqIntegration->ReadStatistics(node, ctx); if (stat) { - State->TypeCtx->SetStats(input.Get(), std::move(std::make_shared(*stat))); + State->TypeCtx->SetStats(input.Get(), std::make_shared(std::move(*stat))); } } } else { diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp index 94e93ee2b1f5..40bdbb464d04 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_dq_integration.cpp @@ -200,7 +200,7 @@ class TS3DqIntegration: public TDqIntegrationBase { } rows = size / 1024; // magic estimate - return TOptimizerStatistics(rows, cols, size); + return TOptimizerStatistics(BaseTable, rows, cols, size); } else { return Nothing(); } diff --git a/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp b/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp index 047920cc71b2..b225d0ed62dd 100644 --- a/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp +++ b/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp @@ -81,7 +81,7 @@ Y_UNIT_TEST(NonReordable) { joinConditions.insert({NDq::TJoinColumn{"a", "b"}, NDq::TJoinColumn{"a","c"}}); auto root = std::make_shared( left, right, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::GraceJoin, true); - TDummyProviderContext optCtx; + TBaseProviderContext optCtx; std::unique_ptr opt = std::unique_ptr(NDq::MakeNativeOptimizerNew(optCtx, 1024)); auto result = opt->JoinSearch(root); diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp index 49f559b65182..56142de6b7c1 100644 --- a/ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp +++ b/ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp @@ -413,7 +413,7 @@ class TYtDqIntegration: public TDqIntegrationBase { TMaybe ReadStatistics(const TExprNode::TPtr& read, TExprContext& ctx) override { Y_UNUSED(ctx); - TOptimizerStatistics stat(0, 0); + TOptimizerStatistics stat; if (auto maybeRead = TMaybeNode(read)) { auto input = maybeRead.Cast().Input(); for (auto section: input) { diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp index b4caccc4a9ae..53e8f50e46f5 100644 --- a/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp +++ b/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp @@ -46,7 +46,7 @@ void DebugPrint(TYtJoinNode::TPtr node, TExprContext& ctx, int level) { } } -class TYtProviderContext: public TDummyProviderContext { +class TYtProviderContext: public TBaseProviderContext { public: TYtProviderContext() { }