From b38cf1e4093c12732fb775e4fb6f3180f42ad721 Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Wed, 17 Apr 2024 16:23:06 +0300 Subject: [PATCH 1/6] Refactoring to perform cardinality estimation specifically for YT. --- .../yql/core/cbo/cbo_optimizer_new.cpp | 119 +++++++++++++++ ydb/library/yql/core/cbo/cbo_optimizer_new.h | 136 +++++++++--------- ydb/library/yql/core/yql_cost_function.cpp | 103 +------------ ydb/library/yql/core/yql_cost_function.h | 40 +++--- .../yql/dq/opt/dq_opt_join_cost_based.cpp | 3 +- ydb/library/yql/dq/opt/dq_opt_stat.cpp | 4 +- ydb/library/yql/dq/opt/dq_opt_stat.h | 2 + 7 files changed, 216 insertions(+), 191 deletions(-) diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp index 9fbfda733e2f..d317ecafad77 100644 --- a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp +++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp @@ -109,4 +109,123 @@ void TJoinOptimizerNode::Print(std::stringstream& stream, int ntabs) { RightArg->Print(stream, ntabs+1); } +bool IsPKJoin(const TOptimizerStatistics& stats, const TVector& joinKeys) { + if (stats.KeyColumns.size()==0) { + return false; + } + + for(size_t i=0; i& left, + const std::shared_ptr& right, + const std::set>& joinConditions, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, + EJoinAlgoType joinAlgo) { + + Y_UNUSED(left); + Y_UNUSED(right); + Y_UNUSED(joinConditions); + Y_UNUSED(leftJoinKeys); + Y_UNUSED(rightJoinKeys); + + return joinAlgo == EJoinAlgoType::MapJoin; +} + +double TDummyProviderContext::ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgo) const { + Y_UNUSED(outputByteSize); + Y_UNUSED(joinAlgo); + return leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows; +} + +/** + * Compute the cost and output cardinality of a join + * + * Currently a very basic computation targeted at GraceJoin + * + * The build is on the right side, so we make the build side a bit more expensive than the probe +*/ +TOptimizerStatistics TDummyProviderContext::ComputeJoinStats( + const TOptimizerStatistics& leftStats, + const TOptimizerStatistics& rightStats, + const std::set>& joinConditions, + EJoinAlgoType joinAlgo) const +{ + TVector leftJoinKeys; + TVector rightJoinKeys; + + for (auto c : joinConditions) { + leftJoinKeys.emplace_back(c.first.AttributeName); + rightJoinKeys.emplace_back(c.second.AttributeName); + } + + return ComputeJoinStats(leftStats, rightStats, leftJoinKeys, rightJoinKeys, joinAlgo); +} + +TOptimizerStatistics TDummyProviderContext::ComputeJoinStats( + const TOptimizerStatistics& leftStats, + const TOptimizerStatistics& rightStats, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, + EJoinAlgoType joinAlgo) const +{ + double newCard; + EStatisticsType outputType; + bool leftKeyColumns = false; + bool rightKeyColumns = false; + double selectivity = 1.0; + + + if (IsPKJoin(rightStats,rightJoinKeys)) { + newCard = leftStats.Nrows * rightStats.Selectivity; + selectivity = leftStats.Selectivity * rightStats.Selectivity; + leftKeyColumns = true; + if (leftStats.Type == EStatisticsType::BaseTable){ + outputType = EStatisticsType::FilteredFactTable; + } else { + outputType = leftStats.Type; + } + } + else if (IsPKJoin(leftStats,leftJoinKeys)) { + newCard = rightStats.Nrows; + newCard = rightStats.Nrows * leftStats.Selectivity; + selectivity = leftStats.Selectivity * rightStats.Selectivity; + + rightKeyColumns = true; + if (rightStats.Type == EStatisticsType::BaseTable){ + outputType = EStatisticsType::FilteredFactTable; + } else { + outputType = rightStats.Type; + } + } + else { + newCard = 0.2 * leftStats.Nrows * rightStats.Nrows; + outputType = EStatisticsType::ManyManyJoin; + } + + int newNCols = leftStats.Ncols + rightStats.Ncols; + double newByteSize = leftStats.Nrows ? (leftStats.ByteSize / leftStats.Nrows) * newCard : 0 + + rightStats.Nrows ? (rightStats.ByteSize / rightStats.Nrows) * newCard : 0; + + double cost = ComputeJoinCost(leftStats, rightStats, newCard, newByteSize, joinAlgo) + + leftStats.Cost + rightStats.Cost; + + auto result = TOptimizerStatistics(outputType, newCard, newNCols, newByteSize, cost, + leftKeyColumns ? leftStats.KeyColumns : ( rightKeyColumns ? rightStats.KeyColumns : TOptimizerStatistics::EmptyColumns)); + result.Selectivity = selectivity; + return result; +} + +const TDummyProviderContext& TDummyProviderContext::instance() { + static TDummyProviderContext staticContext; + return staticContext; +} + + } // namespace NYql diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.h b/ydb/library/yql/core/cbo/cbo_optimizer_new.h index 63450edcc8fe..b224577964d9 100644 --- a/ydb/library/yql/core/cbo/cbo_optimizer_new.h +++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.h @@ -10,13 +10,12 @@ #include #include - namespace NYql { /** * OptimizerNodes are the internal representations of operators inside the * Cost-based optimizer. Currently we only support RelOptimizerNode - a node that - * is an input relation to the equi-join, and JoinOptimizerNode - an inner join + * is an input relation to the equi-join, and JoinOptimizerNode - an inner join * that connects two sets of relations. */ enum EOptimizerNodeKind: ui32 @@ -35,49 +34,13 @@ struct IBaseOptimizerNode { std::shared_ptr Stats; IBaseOptimizerNode(EOptimizerNodeKind k) : Kind(k) {} - IBaseOptimizerNode(EOptimizerNodeKind k, std::shared_ptr s) : + IBaseOptimizerNode(EOptimizerNodeKind k, std::shared_ptr s) : Kind(k), Stats(s) {} virtual TVector Labels()=0; virtual void Print(std::stringstream& stream, int ntabs=0)=0; }; -/** - * RelOptimizerNode adds a label to base class - * This is the label assinged to the input by equi-Join -*/ -struct TRelOptimizerNode : public IBaseOptimizerNode { - TString Label; - - // Temporary solution to check if a LookupJoin is possible in KQP - //void* Expr; - - TRelOptimizerNode(TString label, std::shared_ptr stats) : - IBaseOptimizerNode(RelNodeType, stats), Label(label) { } - //TRelOptimizerNode(TString label, std::shared_ptr stats, const TExprNode::TPtr expr) : - // IBaseOptimizerNode(RelNodeType, stats), Label(label), Expr(expr) { } - virtual ~TRelOptimizerNode() {} - - virtual TVector Labels(); - virtual void Print(std::stringstream& stream, int ntabs=0); -}; - -enum EJoinKind: ui32 -{ - InnerJoin, - LeftJoin, - RightJoin, - OuterJoin, - LeftOnly, - RightOnly, - LeftSemi, - RightSemi, - Cross, - Exclusion -}; - -EJoinKind ConvertToJoinKind(const TString& joinString); -TString ConvertToJoinString(const EJoinKind kind); /** * This is a temporary structure for KQP provider @@ -89,8 +52,20 @@ struct IProviderContext { virtual double ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgol) const = 0; - virtual bool IsJoinApplicable(const std::shared_ptr& left, - const std::shared_ptr& right, + virtual TOptimizerStatistics ComputeJoinStats( + const TOptimizerStatistics& leftStats, + const TOptimizerStatistics& rightStats, + const std::set>& joinConditions, EJoinAlgoType joinAlgo) const = 0; + + virtual TOptimizerStatistics ComputeJoinStats( + const TOptimizerStatistics& leftStats, + const TOptimizerStatistics& rightStats, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, + EJoinAlgoType joinAlgo) const = 0; + + virtual bool IsJoinApplicable(const std::shared_ptr& left, + const std::shared_ptr& right, const std::set>& joinConditions, const TVector& leftJoinKeys, const TVector& rightJoinKeys, @@ -105,35 +80,68 @@ struct IProviderContext { struct TDummyProviderContext : public IProviderContext { TDummyProviderContext() {} - double ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgo) const override { - Y_UNUSED(outputByteSize); - Y_UNUSED(joinAlgo); - return leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows; - } + double ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgo) const override; - bool IsJoinApplicable(const std::shared_ptr& left, - const std::shared_ptr& right, + bool IsJoinApplicable(const std::shared_ptr& left, + const std::shared_ptr& right, const std::set>& joinConditions, const TVector& leftJoinKeys, const TVector& rightJoinKeys, - EJoinAlgoType joinAlgo) override { + EJoinAlgoType joinAlgo) override; + + virtual TOptimizerStatistics ComputeJoinStats( + const TOptimizerStatistics& leftStats, + const TOptimizerStatistics& rightStats, + const TVector& leftJoinKeys, + const TVector& rightJoinKeys, + EJoinAlgoType joinAlgo) const override; + + virtual TOptimizerStatistics ComputeJoinStats( + const TOptimizerStatistics& leftStats, + const TOptimizerStatistics& rightStats, + const std::set>& joinConditions, + EJoinAlgoType joinAlgo) const override; - Y_UNUSED(left); - Y_UNUSED(right); - Y_UNUSED(joinConditions); - Y_UNUSED(leftJoinKeys); - Y_UNUSED(rightJoinKeys); + static const TDummyProviderContext& instance(); +}; - return joinAlgo == EJoinAlgoType::MapJoin; - } +/** + * RelOptimizerNode adds a label to base class + * This is the label assinged to the input by equi-Join +*/ +struct TRelOptimizerNode : public IBaseOptimizerNode { + TString Label; + + // Temporary solution to check if a LookupJoin is possible in KQP + //void* Expr; + + TRelOptimizerNode(TString label, std::shared_ptr stats) : + IBaseOptimizerNode(RelNodeType, stats), Label(label) { } + //TRelOptimizerNode(TString label, std::shared_ptr stats, const TExprNode::TPtr expr) : + // IBaseOptimizerNode(RelNodeType, stats), Label(label), Expr(expr) { } + virtual ~TRelOptimizerNode() {} - static const TDummyProviderContext& instance() { - static TDummyProviderContext staticContext; - return staticContext; - } + virtual TVector Labels(); + virtual void Print(std::stringstream& stream, int ntabs=0); +}; +enum EJoinKind: ui32 +{ + InnerJoin, + LeftJoin, + RightJoin, + OuterJoin, + LeftOnly, + RightOnly, + LeftSemi, + RightSemi, + Cross, + Exclusion }; +EJoinKind ConvertToJoinKind(const TString& joinString); +TString ConvertToJoinString(const EJoinKind kind); + /** * JoinOptimizerNode records the left and right arguments of the join * as well as the set of join conditions. @@ -150,11 +158,11 @@ struct TJoinOptimizerNode : public IBaseOptimizerNode { EJoinAlgoType JoinAlgo; bool IsReorderable; - TJoinOptimizerNode(const std::shared_ptr& left, - const std::shared_ptr& right, + TJoinOptimizerNode(const std::shared_ptr& left, + const std::shared_ptr& right, const std::set>& joinConditions, - const EJoinKind joinType, - const EJoinAlgoType joinAlgo, + const EJoinKind joinType, + const EJoinAlgoType joinAlgo, bool nonReorderable=false); virtual ~TJoinOptimizerNode() {} virtual TVector Labels(); diff --git a/ydb/library/yql/core/yql_cost_function.cpp b/ydb/library/yql/core/yql_cost_function.cpp index 078acd70bfb6..105e792ea44a 100644 --- a/ydb/library/yql/core/yql_cost_function.cpp +++ b/ydb/library/yql/core/yql_cost_function.cpp @@ -1,32 +1,7 @@ -#include "yql_cost_function.h" - -#include - -using namespace NYql; - -namespace { - -THashMap JoinAlgoMap = { - {"Undefined",EJoinAlgoType::Undefined}, - {"LookupJoin",EJoinAlgoType::LookupJoin}, - {"MapJoin",EJoinAlgoType::MapJoin}, - {"GraceJoin",EJoinAlgoType::GraceJoin}, - {"StreamLookupJoin",EJoinAlgoType::StreamLookupJoin}}; -bool IsPKJoin(const TOptimizerStatistics& stats, const TVector& joinKeys) { - if (stats.KeyColumns.size()==0) { - return false; - } - - for(size_t i=0; i& leftJoinKeys, const TVector& rightJoinKeys, EJoinAlgoType joinAlgo, const IProviderContext& ctx) { - - double newCard; - EStatisticsType outputType; - bool leftKeyColumns = false; - bool rightKeyColumns = false; - double selectivity = 1.0; - - - if (IsPKJoin(rightStats,rightJoinKeys)) { - newCard = leftStats.Nrows * rightStats.Selectivity; - selectivity = leftStats.Selectivity * rightStats.Selectivity; - leftKeyColumns = true; - if (leftStats.Type == EStatisticsType::BaseTable){ - outputType = EStatisticsType::FilteredFactTable; - } else { - outputType = leftStats.Type; - } - } - else if (IsPKJoin(leftStats,leftJoinKeys)) { - newCard = rightStats.Nrows; - newCard = rightStats.Nrows * leftStats.Selectivity; - selectivity = leftStats.Selectivity * rightStats.Selectivity; - - rightKeyColumns = true; - if (rightStats.Type == EStatisticsType::BaseTable){ - outputType = EStatisticsType::FilteredFactTable; - } else { - outputType = rightStats.Type; - } - } - else { - newCard = 0.2 * leftStats.Nrows * rightStats.Nrows; - outputType = EStatisticsType::ManyManyJoin; - } - - int newNCols = leftStats.Ncols + rightStats.Ncols; - double newByteSize = leftStats.Nrows ? (leftStats.ByteSize / leftStats.Nrows) * newCard : 0 + - rightStats.Nrows ? (rightStats.ByteSize / rightStats.Nrows) * newCard : 0; - - double cost = ctx.ComputeJoinCost(leftStats, rightStats, newCard, newByteSize, joinAlgo) - + leftStats.Cost + rightStats.Cost; - - auto result = TOptimizerStatistics(outputType, newCard, newNCols, newByteSize, cost, - leftKeyColumns ? leftStats.KeyColumns : ( rightKeyColumns ? rightStats.KeyColumns : TOptimizerStatistics::EmptyColumns)); - result.Selectivity = selectivity; - return result; -} - - -TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, - const std::set>& joinConditions, EJoinAlgoType joinAlgo, const IProviderContext& ctx) { - - TVector leftJoinKeys; - TVector rightJoinKeys; - - for (auto c : joinConditions) { - leftJoinKeys.emplace_back(c.first.AttributeName); - rightJoinKeys.emplace_back(c.second.AttributeName); - } - - return ComputeJoinStats(leftStats, rightStats, leftJoinKeys, rightJoinKeys, joinAlgo, ctx); -} - +} // namespace NYql diff --git a/ydb/library/yql/core/yql_cost_function.h b/ydb/library/yql/core/yql_cost_function.h index dcacef50a82a..030774f3d303 100644 --- a/ydb/library/yql/core/yql_cost_function.h +++ b/ydb/library/yql/core/yql_cost_function.h @@ -16,16 +16,28 @@ namespace NYql { struct IProviderContext; -namespace NDq { +enum class EJoinAlgoType { + Undefined, + LookupJoin, + MapJoin, + GraceJoin, + StreamLookupJoin //Right part can be updated during an operation. Used mainly for joining streams with lookup tables. Currently impplemented in Dq by LookupInputTransform +}; + +//StreamLookupJoin is not a subject for CBO and not not included here +static constexpr auto AllJoinAlgos = { EJoinAlgoType::MapJoin, EJoinAlgoType::GraceJoin, EJoinAlgoType::LookupJoin }; + +namespace NDq { + /** - * Join column is a struct that records the relation label and + * Join column is a struct that records the relation label and * attribute name, used in join conditions */ struct TJoinColumn { TString RelName; TString AttributeName; - TJoinColumn(TString relName, TString attributeName) : RelName(relName), + TJoinColumn(TString relName, TString attributeName) : RelName(relName), AttributeName(attributeName) {} bool operator == (const TJoinColumn& other) const { @@ -43,26 +55,8 @@ struct TJoinColumn { bool operator < (const TJoinColumn& c1, const TJoinColumn& c2); -} - -enum class EJoinAlgoType { - Undefined, - LookupJoin, - MapJoin, - GraceJoin, - StreamLookupJoin //Right part can be updated during an operation. Used mainly for joining streams with lookup tables. Currently impplemented in Dq by LookupInputTransform -}; +} // namespace NDq TString ConvertToJoinAlgoString(EJoinAlgoType joinAlgo); -//StreamLookupJoin is not a subject for CBO and not not included here -static constexpr auto AllJoinAlgos = { EJoinAlgoType::MapJoin, EJoinAlgoType::GraceJoin, EJoinAlgoType::LookupJoin }; - -TOptimizerStatistics ComputeJoinStats(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, - const std::set>& joinConditions, EJoinAlgoType joinAlgo, const IProviderContext& ctx); - -TOptimizerStatistics ComputeJoinStats(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, - const TVector& leftJoinKeys, const TVector& rightJoinKeys, EJoinAlgoType joinAlgo, const IProviderContext& ctx); - -} - +} // namespace NYql diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp index 809c88611fb9..54354d27bc10 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp @@ -197,13 +197,12 @@ void ComputeStatistics(const std::shared_ptr& join, IProvide ComputeStatistics(static_pointer_cast(join->RightArg), ctx); } join->Stats = std::make_shared( - ComputeJoinStats( + ctx.ComputeJoinStats( *join->LeftArg->Stats, *join->RightArg->Stats, join->LeftJoinKeys, join->RightJoinKeys, EJoinAlgoType::GraceJoin, - ctx ) ); } diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp index f5f5e185bb7d..21dde19f8a28 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp @@ -162,7 +162,7 @@ void InferStatisticsForMapJoin(const TExprNode::TPtr& input, TTypeAnnotationCont } typeCtx->SetStats(join.Raw(), std::make_shared( - ComputeJoinStats(*leftStats, *rightStats, leftJoinKeys, rightJoinKeys, EJoinAlgoType::MapJoin, ctx))); + ctx.ComputeJoinStats(*leftStats, *rightStats, leftJoinKeys, rightJoinKeys, EJoinAlgoType::MapJoin))); } /** @@ -194,7 +194,7 @@ void InferStatisticsForGraceJoin(const TExprNode::TPtr& input, TTypeAnnotationCo } typeCtx->SetStats(join.Raw(), std::make_shared( - ComputeJoinStats(*leftStats, *rightStats, leftJoinKeys, rightJoinKeys, EJoinAlgoType::GraceJoin, ctx))); + ctx.ComputeJoinStats(*leftStats, *rightStats, leftJoinKeys, rightJoinKeys, EJoinAlgoType::GraceJoin))); } /** diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.h b/ydb/library/yql/dq/opt/dq_opt_stat.h index 31013988c3f5..1505f383d933 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.h +++ b/ydb/library/yql/dq/opt/dq_opt_stat.h @@ -1,3 +1,5 @@ +#pragma once + #include "dq_opt.h" #include From 3fa1fe2fd333bf7acdeb3256d209af40bee18ea2 Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Thu, 18 Apr 2024 17:16:37 +0300 Subject: [PATCH 2/6] make it compile --- ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h | 4 ++-- ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp | 2 +- ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h index da042c374058..370ca0804d74 100644 --- a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h +++ b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h @@ -405,7 +405,7 @@ template std::shared_ptr TDPHypS for (auto joinAlgo : AllJoinAlgos) { if (ctx.IsJoinApplicable(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinAlgo)){ - auto cost = ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo, ctx).Cost; + auto cost = ctx.ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo).Cost; if (cost < bestCost) { bestCost = cost; bestAlgo = joinAlgo; @@ -415,7 +415,7 @@ template std::shared_ptr TDPHypS if (isCommutative) { if (ctx.IsJoinApplicable(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinAlgo)){ - auto cost = ComputeJoinStats(*right->Stats, *left->Stats, rightJoinKeys, leftJoinKeys, joinAlgo, ctx).Cost; + auto cost = ctx.ComputeJoinStats(*right->Stats, *left->Stats, rightJoinKeys, leftJoinKeys, joinAlgo).Cost; if (cost < bestCost) { bestCost = cost; bestAlgo = joinAlgo; diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp index 54354d27bc10..fd5ed9f2c238 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp @@ -202,7 +202,7 @@ void ComputeStatistics(const std::shared_ptr& join, IProvide *join->RightArg->Stats, join->LeftJoinKeys, join->RightJoinKeys, - EJoinAlgoType::GraceJoin, + EJoinAlgoType::GraceJoin ) ); } diff --git a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp index 1c4db89bb627..181ba5db080c 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp @@ -13,7 +13,7 @@ std::shared_ptr MakeJoinInternal( IProviderContext& ctx) { auto res = std::make_shared(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, joinAlgo); - res->Stats = std::make_shared(ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo, ctx)); + res->Stats = std::make_shared(ctx.ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo)); return res; } From 27ad45db503dc8f5d0829cf8eef59bf2bcf90bba Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Thu, 18 Apr 2024 19:34:18 +0300 Subject: [PATCH 3/6] Add specific statistic --- ydb/library/yql/core/yql_statistics.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ydb/library/yql/core/yql_statistics.h b/ydb/library/yql/core/yql_statistics.h index e92473fb2791..5d2c6a1c91df 100644 --- a/ydb/library/yql/core/yql_statistics.h +++ b/ydb/library/yql/core/yql_statistics.h @@ -13,6 +13,12 @@ enum EStatisticsType : ui32 { ManyManyJoin }; +// Providers may subclass this struct to associate specific statistics, useful to +// derive stats for higher-level operators in the plan. +struct IProviderStatistics { + virtual ~IProviderStatistics() {} +}; + /** * Optimizer Statistics struct records per-table and per-column statistics * for the current operator in the plan. Currently, only Nrows and Ncols are @@ -28,6 +34,7 @@ struct TOptimizerStatistics { double Cost = 0; double Selectivity = 1.0; const TVector& KeyColumns; + const IProviderStatistics* Specific = nullptr; TOptimizerStatistics() : KeyColumns(EmptyColumns) {} TOptimizerStatistics(double nrows, int ncols): Nrows(nrows), Ncols(ncols), KeyColumns(EmptyColumns) {} @@ -36,6 +43,8 @@ struct TOptimizerStatistics { TOptimizerStatistics(EStatisticsType type, double nrows, int ncols, double byteSize, double cost): Type(type), Nrows(nrows), Ncols(ncols), ByteSize(byteSize), Cost(cost), KeyColumns(EmptyColumns) {} TOptimizerStatistics(EStatisticsType type, double nrows, int ncols, double cost, const TVector& keyColumns): Type(type), Nrows(nrows), Ncols(ncols), Cost(cost), KeyColumns(keyColumns) {} TOptimizerStatistics(EStatisticsType type, double nrows, int ncols, double byteSize, double cost, const TVector& keyColumns): Type(type), Nrows(nrows), Ncols(ncols), ByteSize(byteSize), Cost(cost), KeyColumns(keyColumns) {} + TOptimizerStatistics(EStatisticsType type, double nrows, int ncols, double byteSize, double cost, const TVector& keyColumns, IProviderStatistics* specific) + : Type(type), Nrows(nrows), Ncols(ncols), ByteSize(byteSize), Cost(cost), KeyColumns(keyColumns), Specific(specific) {} TOptimizerStatistics& operator+=(const TOptimizerStatistics& other); bool Empty() const; From 184a8b737c5a72b2ee2ee1dad5c7f8b313c76294 Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Fri, 19 Apr 2024 16:25:41 +0300 Subject: [PATCH 4/6] use move ctor --- ydb/library/yql/core/yql_statistics.h | 3 ++- ydb/library/yql/dq/opt/dq_opt_stat.cpp | 12 ++++++------ .../yql/providers/dq/provider/yql_dq_statistics.cpp | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/ydb/library/yql/core/yql_statistics.h b/ydb/library/yql/core/yql_statistics.h index 5d2c6a1c91df..0d21e827adec 100644 --- a/ydb/library/yql/core/yql_statistics.h +++ b/ydb/library/yql/core/yql_statistics.h @@ -34,8 +34,9 @@ struct TOptimizerStatistics { double Cost = 0; double Selectivity = 1.0; const TVector& KeyColumns; - const IProviderStatistics* Specific = nullptr; + std::unique_ptr Specific; + TOptimizerStatistics(TOptimizerStatistics&&) = default; TOptimizerStatistics() : KeyColumns(EmptyColumns) {} TOptimizerStatistics(double nrows, int ncols): Nrows(nrows), Ncols(ncols), KeyColumns(EmptyColumns) {} TOptimizerStatistics(double nrows, int ncols, double cost): Nrows(nrows), Ncols(ncols), Cost(cost), KeyColumns(EmptyColumns) {} diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp index 21dde19f8a28..6ffc0ef4e9e4 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp @@ -161,8 +161,8 @@ void InferStatisticsForMapJoin(const TExprNode::TPtr& input, TTypeAnnotationCont rightJoinKeys.push_back(RemoveAliases(join.RightKeysColumnNames().Item(i).StringValue())); } - typeCtx->SetStats(join.Raw(), std::make_shared( - ctx.ComputeJoinStats(*leftStats, *rightStats, leftJoinKeys, rightJoinKeys, EJoinAlgoType::MapJoin))); + typeCtx->SetStats(join.Raw(), std::make_shared( + ctx.ComputeJoinStats(*leftStats, *rightStats, leftJoinKeys, rightJoinKeys, EJoinAlgoType::MapJoin))); } /** @@ -241,7 +241,7 @@ void InferStatisticsForFlatMap(const TExprNode::TPtr& input, TTypeAnnotationCont auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->ByteSize * selectivity, inputStats->Cost, inputStats->KeyColumns ); outputStats.Selectivity *= selectivity; - typeCtx->SetStats(input.Get(), std::make_shared(outputStats) ); + typeCtx->SetStats(input.Get(), std::make_shared(std::move(outputStats)) ); } else if (flatmap.Lambda().Body().Maybe() || flatmap.Lambda().Body().Maybe().Input().Maybe() || @@ -283,7 +283,7 @@ void InferStatisticsForFilter(const TExprNode::TPtr& input, TTypeAnnotationConte auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->ByteSize * selectivity, inputStats->Cost, inputStats->KeyColumns); outputStats.Selectivity *= selectivity; - typeCtx->SetStats(input.Get(), std::make_shared(outputStats) ); + typeCtx->SetStats(input.Get(), std::make_shared(std::move(outputStats)) ); } /** @@ -365,8 +365,8 @@ void InferStatisticsForAsList(const TExprNode::TPtr& input, TTypeAnnotationConte if (input->ChildrenSize() && input->Child(0)->IsCallable("AsStruct")) { nAttrs = input->Child(0)->ChildrenSize(); } - auto outputStats = TOptimizerStatistics(EStatisticsType::BaseTable, nRows, nAttrs, nRows*nAttrs, 0.0); - typeCtx->SetStats(input.Get(), std::make_shared(outputStats)); + typeCtx->SetStats(input.Get(), std::make_shared( + EStatisticsType::BaseTable, nRows, nAttrs, nRows*nAttrs, 0.0)); } /*** diff --git a/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp b/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp index 36b3e4603155..9c9559927c50 100644 --- a/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp +++ b/ydb/library/yql/providers/dq/provider/yql_dq_statistics.cpp @@ -36,7 +36,7 @@ class TDqsStatisticsTransformer : public NDq::TDqStatisticsTransformerBase { if (auto dqIntegration = (*datasource)->GetDqIntegration()) { auto stat = dqIntegration->ReadStatistics(node, ctx); if (stat) { - State->TypeCtx->SetStats(input.Get(), std::move(std::make_shared(*stat))); + State->TypeCtx->SetStats(input.Get(), std::make_shared(std::move(*stat))); } } } else { From a0ef79232925fd3f749ac5a3766e080b857df87c Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Fri, 19 Apr 2024 19:06:26 +0300 Subject: [PATCH 5/6] TKqpProviderContext sublasses from NYql::TDummyProviderContext --- ydb/core/kqp/opt/logical/kqp_opt_cbo.h | 4 ++-- ydb/library/yql/core/cbo/cbo_optimizer_new.h | 1 - ydb/library/yql/core/yql_cost_function.cpp | 13 ++++++++++++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/ydb/core/kqp/opt/logical/kqp_opt_cbo.h b/ydb/core/kqp/opt/logical/kqp_opt_cbo.h index 88e81461acdb..53ea02c9b8fd 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_cbo.h +++ b/ydb/core/kqp/opt/logical/kqp_opt_cbo.h @@ -20,7 +20,7 @@ struct TKqpRelOptimizerNode : public NYql::TRelOptimizerNode { /** * KQP Specific cost function and join applicability cost function */ -struct TKqpProviderContext : public NYql::IProviderContext { +struct TKqpProviderContext : public NYql::TDummyProviderContext { TKqpProviderContext(const TKqpOptimizeContext& kqpCtx, const int optLevel) : KqpCtx(kqpCtx), OptLevel(optLevel) {} virtual bool IsJoinApplicable(const std::shared_ptr& left, @@ -35,4 +35,4 @@ struct TKqpProviderContext : public NYql::IProviderContext { int OptLevel; }; -} \ No newline at end of file +} diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.h b/ydb/library/yql/core/cbo/cbo_optimizer_new.h index b224577964d9..737b41bced6f 100644 --- a/ydb/library/yql/core/cbo/cbo_optimizer_new.h +++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.h @@ -70,7 +70,6 @@ struct IProviderContext { const TVector& leftJoinKeys, const TVector& rightJoinKeys, EJoinAlgoType joinAlgo) = 0; - }; /** diff --git a/ydb/library/yql/core/yql_cost_function.cpp b/ydb/library/yql/core/yql_cost_function.cpp index 105e792ea44a..7f0e8477d2b5 100644 --- a/ydb/library/yql/core/yql_cost_function.cpp +++ b/ydb/library/yql/core/yql_cost_function.cpp @@ -3,6 +3,17 @@ namespace NYql { +namespace { + +THashMap JoinAlgoMap = { + {"Undefined",EJoinAlgoType::Undefined}, + {"LookupJoin",EJoinAlgoType::LookupJoin}, + {"MapJoin",EJoinAlgoType::MapJoin}, + {"GraceJoin",EJoinAlgoType::GraceJoin}, + {"StreamLookupJoin",EJoinAlgoType::StreamLookupJoin}}; + +} // namespace + bool NDq::operator < (const NDq::TJoinColumn& c1, const NDq::TJoinColumn& c2) { if (c1.RelName < c2.RelName){ return true; @@ -12,7 +23,7 @@ bool NDq::operator < (const NDq::TJoinColumn& c1, const NDq::TJoinColumn& c2) { return false; } -TString NYql::ConvertToJoinAlgoString(EJoinAlgoType joinAlgo) { +TString ConvertToJoinAlgoString(EJoinAlgoType joinAlgo) { for (const auto& [k,v] : JoinAlgoMap) { if (v == joinAlgo) { return k; From 22d2ca8f0f1e81440b30cc7ed943eac1e55cbdf1 Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Fri, 19 Apr 2024 19:32:40 +0300 Subject: [PATCH 6/6] TDummyProviderContext -> TBaseProviderContext --- .../kqp/opt/kqp_statistics_transformer.cpp | 18 ++++++------------ ydb/core/kqp/opt/logical/kqp_opt_cbo.h | 2 +- ydb/library/yql/core/cbo/cbo_optimizer_new.cpp | 12 ++++++------ ydb/library/yql/core/cbo/cbo_optimizer_new.h | 8 ++++---- ydb/library/yql/dq/opt/dq_cbo_ut.cpp | 10 +++++----- .../yql/providers/dq/opt/logical_optimize.cpp | 2 +- .../dq/provider/yql_dq_datasource.cpp | 2 +- .../providers/yt/provider/ut/yql_yt_cbo_ut.cpp | 2 +- .../yt/provider/yql_yt_join_reorder.cpp | 2 +- 9 files changed, 26 insertions(+), 32 deletions(-) diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp index 248cddf6a5ea..ea72af3515e2 100644 --- a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp +++ b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp @@ -43,8 +43,7 @@ void InferStatisticsForReadTable(const TExprNode::TPtr& input, TTypeAnnotationCo YQL_CLOG(TRACE, CoreDq) << "Infer statistics for read table, nrows:" << nRows << ", nattrs: " << nAttrs; - auto outputStats = TOptimizerStatistics(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0.0, tableData.Metadata->KeyColumnNames); - typeCtx->SetStats(input.Get(), std::make_shared(outputStats)); + typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0.0, tableData.Metadata->KeyColumnNames)); } /** @@ -63,8 +62,7 @@ void InferStatisticsForKqpTable(const TExprNode::TPtr& input, TTypeAnnotationCon int nAttrs = tableData.Metadata->Columns.size(); YQL_CLOG(TRACE, CoreDq) << "Infer statistics for table: " << path.Value() << ", nrows: " << nRows << ", nattrs: " << nAttrs << ", nKeyColumns: " << tableData.Metadata->KeyColumnNames.size(); - auto outputStats = TOptimizerStatistics(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0.0, tableData.Metadata->KeyColumnNames); - typeCtx->SetStats(input.Get(), std::make_shared(outputStats)); + typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0.0, tableData.Metadata->KeyColumnNames)); } /** @@ -84,8 +82,7 @@ void InferStatisticsForSteamLookup(const TExprNode::TPtr& input, TTypeAnnotation auto inputStats = typeCtx->GetStats(streamLookup.Table().Raw()); auto byteSize = inputStats->ByteSize * (nAttrs / (double) inputStats->Ncols); - auto outputStats = TOptimizerStatistics(EStatisticsType::BaseTable, inputStats->Nrows, nAttrs, byteSize, 0, inputStats->KeyColumns); - typeCtx->SetStats(input.Get(), std::make_shared(outputStats)); + typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, inputStats->Nrows, nAttrs, byteSize, 0, inputStats->KeyColumns)); } /** @@ -116,8 +113,7 @@ void InferStatisticsForLookupTable(const TExprNode::TPtr& input, TTypeAnnotation byteSize = 10; } - auto outputStats = TOptimizerStatistics(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0, inputStats->KeyColumns); - typeCtx->SetStats(input.Get(), std::make_shared(outputStats)); + typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0, inputStats->KeyColumns)); } /** @@ -151,8 +147,7 @@ void InferStatisticsForRowsSourceSettings(const TExprNode::TPtr& input, TTypeAnn double cost = inputStats->Cost; double byteSize = inputStats->ByteSize * (nAttrs / (double)inputStats->Ncols); - auto outputStats = TOptimizerStatistics(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, cost, inputStats->KeyColumns); - typeCtx->SetStats(input.Get(), std::make_shared(outputStats)); + typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, cost, inputStats->KeyColumns)); } /** @@ -160,8 +155,7 @@ void InferStatisticsForRowsSourceSettings(const TExprNode::TPtr& input, TTypeAnn * Currently we just make up a number for cardinality (5) and set cost to 0 */ void InferStatisticsForIndexLookup(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) { - auto outputStats = TOptimizerStatistics(EStatisticsType::BaseTable, 5, 5, 20, 0.0); - typeCtx->SetStats(input.Get(), std::make_shared(outputStats)); + typeCtx->SetStats(input.Get(), std::make_shared(EStatisticsType::BaseTable, 5, 5, 20, 0.0)); } /*** diff --git a/ydb/core/kqp/opt/logical/kqp_opt_cbo.h b/ydb/core/kqp/opt/logical/kqp_opt_cbo.h index 53ea02c9b8fd..30a0498c55a2 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_cbo.h +++ b/ydb/core/kqp/opt/logical/kqp_opt_cbo.h @@ -20,7 +20,7 @@ struct TKqpRelOptimizerNode : public NYql::TRelOptimizerNode { /** * KQP Specific cost function and join applicability cost function */ -struct TKqpProviderContext : public NYql::TDummyProviderContext { +struct TKqpProviderContext : public NYql::TBaseProviderContext { TKqpProviderContext(const TKqpOptimizeContext& kqpCtx, const int optLevel) : KqpCtx(kqpCtx), OptLevel(optLevel) {} virtual bool IsJoinApplicable(const std::shared_ptr& left, diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp index d317ecafad77..31cfffeaf9eb 100644 --- a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp +++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp @@ -122,7 +122,7 @@ bool IsPKJoin(const TOptimizerStatistics& stats, const TVector& joinKey return true; } -bool TDummyProviderContext::IsJoinApplicable(const std::shared_ptr& left, +bool TBaseProviderContext::IsJoinApplicable(const std::shared_ptr& left, const std::shared_ptr& right, const std::set>& joinConditions, const TVector& leftJoinKeys, @@ -138,7 +138,7 @@ bool TDummyProviderContext::IsJoinApplicable(const std::shared_ptr>& joinConditions, @@ -168,7 +168,7 @@ TOptimizerStatistics TDummyProviderContext::ComputeJoinStats( return ComputeJoinStats(leftStats, rightStats, leftJoinKeys, rightJoinKeys, joinAlgo); } -TOptimizerStatistics TDummyProviderContext::ComputeJoinStats( +TOptimizerStatistics TBaseProviderContext::ComputeJoinStats( const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const TVector& leftJoinKeys, @@ -222,8 +222,8 @@ TOptimizerStatistics TDummyProviderContext::ComputeJoinStats( return result; } -const TDummyProviderContext& TDummyProviderContext::instance() { - static TDummyProviderContext staticContext; +const TBaseProviderContext& TBaseProviderContext::instance() { + static TBaseProviderContext staticContext; return staticContext; } diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.h b/ydb/library/yql/core/cbo/cbo_optimizer_new.h index 737b41bced6f..19985a0bb549 100644 --- a/ydb/library/yql/core/cbo/cbo_optimizer_new.h +++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.h @@ -73,11 +73,11 @@ struct IProviderContext { }; /** - * Temporary solution for default provider context + * Default provider context with default cost and stats computation. */ -struct TDummyProviderContext : public IProviderContext { - TDummyProviderContext() {} +struct TBaseProviderContext : public IProviderContext { + TBaseProviderContext() {} double ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgo) const override; @@ -101,7 +101,7 @@ struct TDummyProviderContext : public IProviderContext { const std::set>& joinConditions, EJoinAlgoType joinAlgo) const override; - static const TDummyProviderContext& instance(); + static const TBaseProviderContext& instance(); }; /** diff --git a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp index 21302b4af36c..6f9715afdbbb 100644 --- a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp +++ b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp @@ -32,12 +32,12 @@ TExprNode::TPtr MakeLabel(TExprContext& ctx, const std::vector& vars Y_UNIT_TEST_SUITE(DQCBO) { Y_UNIT_TEST(Empty) { - TDummyProviderContext pctx; + TBaseProviderContext pctx; std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000)); } Y_UNIT_TEST(JoinSearch2Rels) { - TDummyProviderContext pctx; + TBaseProviderContext pctx; std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000)); auto rel1 = std::make_shared("a", std::make_shared(100000, 1, 1000000)); @@ -71,7 +71,7 @@ Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10 } Y_UNIT_TEST(JoinSearch3Rels) { - TDummyProviderContext pctx; + TBaseProviderContext pctx; std::unique_ptr optimizer = std::unique_ptr(MakeNativeOptimizerNew(pctx, 100000)); auto rel1 = std::make_shared("a", std::make_shared(100000, 1, 1000000)); @@ -227,7 +227,7 @@ void _DqOptimizeEquiJoinWithCosts(const std::function& optFact Y_UNIT_TEST(DqOptimizeEquiJoinWithCostsNative) { TExprContext ctx; - TDummyProviderContext pctx; + TBaseProviderContext pctx; std::function optFactory = [&]() { return MakeNativeOptimizerNew(pctx, 100000); }; @@ -236,7 +236,7 @@ Y_UNIT_TEST(DqOptimizeEquiJoinWithCostsNative) { Y_UNIT_TEST(DqOptimizeEquiJoinWithCostsPG) { TExprContext ctx; - TDummyProviderContext pctx; + TBaseProviderContext pctx; std::function log = [&](auto str) { Cerr << str; }; diff --git a/ydb/library/yql/providers/dq/opt/logical_optimize.cpp b/ydb/library/yql/providers/dq/opt/logical_optimize.cpp index 18421c5df911..2bbaa480b5b2 100644 --- a/ydb/library/yql/providers/dq/opt/logical_optimize.cpp +++ b/ydb/library/yql/providers/dq/opt/logical_optimize.cpp @@ -141,7 +141,7 @@ class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase { }; std::unique_ptr opt; - TDummyProviderContext pctx; + TBaseProviderContext pctx; switch (TypesCtx.CostBasedOptimizer) { case ECostBasedOptimizerType::Native: diff --git a/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp b/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp index 886d5787b45f..7b73882529f7 100644 --- a/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp +++ b/ydb/library/yql/providers/dq/provider/yql_dq_datasource.cpp @@ -50,7 +50,7 @@ class TDqDataProviderSource: public TDataProviderBase { , ExecTransformer_([this, execTransformerFactory] () { return THolder(execTransformerFactory(State_)); }) , TypeAnnotationTransformer_([] () { return CreateDqsDataSourceTypeAnnotationTransformer(); }) , ConstraintsTransformer_([] () { return CreateDqDataSourceConstraintTransformer(); }) - , StatisticsTransformer_([this]() { return CreateDqsStatisticsTransformer(State_, TDummyProviderContext::instance()); }) + , StatisticsTransformer_([this]() { return CreateDqsStatisticsTransformer(State_, TBaseProviderContext::instance()); }) { } TStringBuf GetName() const override { diff --git a/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp b/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp index 047920cc71b2..080166abe8a4 100644 --- a/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp +++ b/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp @@ -81,7 +81,7 @@ Y_UNIT_TEST(NonReordable) { joinConditions.insert({NDq::TJoinColumn{"a", "b"}, NDq::TJoinColumn{"a","c"}}); auto root = std::make_shared( left, right, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::GraceJoin, true); - TDummyProviderContext optCtx; + BaseProviderContext optCtx; std::unique_ptr opt = std::unique_ptr(NDq::MakeNativeOptimizerNew(optCtx, 1024)); auto result = opt->JoinSearch(root); diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp index b4caccc4a9ae..53e8f50e46f5 100644 --- a/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp +++ b/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp @@ -46,7 +46,7 @@ void DebugPrint(TYtJoinNode::TPtr node, TExprContext& ctx, int level) { } } -class TYtProviderContext: public TDummyProviderContext { +class TYtProviderContext: public TBaseProviderContext { public: TYtProviderContext() { }