Skip to content

Commit

Permalink
Merge 27ad45d into ced8265
Browse files Browse the repository at this point in the history
  • Loading branch information
alephonea authored Apr 19, 2024
2 parents ced8265 + 27ad45d commit 005c0f1
Show file tree
Hide file tree
Showing 10 changed files with 229 additions and 195 deletions.
119 changes: 119 additions & 0 deletions ydb/library/yql/core/cbo/cbo_optimizer_new.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,123 @@ void TJoinOptimizerNode::Print(std::stringstream& stream, int ntabs) {
RightArg->Print(stream, ntabs+1);
}

bool IsPKJoin(const TOptimizerStatistics& stats, const TVector<TString>& joinKeys) {
if (stats.KeyColumns.size()==0) {
return false;
}

for(size_t i=0; i<stats.KeyColumns.size(); i++){
if (std::find(joinKeys.begin(), joinKeys.end(), stats.KeyColumns[i]) == joinKeys.end()) {
return false;
}
}
return true;
}

bool TDummyProviderContext::IsJoinApplicable(const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
const TVector<TString>& leftJoinKeys,
const TVector<TString>& rightJoinKeys,
EJoinAlgoType joinAlgo) {

Y_UNUSED(left);
Y_UNUSED(right);
Y_UNUSED(joinConditions);
Y_UNUSED(leftJoinKeys);
Y_UNUSED(rightJoinKeys);

return joinAlgo == EJoinAlgoType::MapJoin;
}

double TDummyProviderContext::ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgo) const {
Y_UNUSED(outputByteSize);
Y_UNUSED(joinAlgo);
return leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows;
}

/**
* Compute the cost and output cardinality of a join
*
* Currently a very basic computation targeted at GraceJoin
*
* The build is on the right side, so we make the build side a bit more expensive than the probe
*/
TOptimizerStatistics TDummyProviderContext::ComputeJoinStats(
const TOptimizerStatistics& leftStats,
const TOptimizerStatistics& rightStats,
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
EJoinAlgoType joinAlgo) const
{
TVector<TString> leftJoinKeys;
TVector<TString> rightJoinKeys;

for (auto c : joinConditions) {
leftJoinKeys.emplace_back(c.first.AttributeName);
rightJoinKeys.emplace_back(c.second.AttributeName);
}

return ComputeJoinStats(leftStats, rightStats, leftJoinKeys, rightJoinKeys, joinAlgo);
}

TOptimizerStatistics TDummyProviderContext::ComputeJoinStats(
const TOptimizerStatistics& leftStats,
const TOptimizerStatistics& rightStats,
const TVector<TString>& leftJoinKeys,
const TVector<TString>& rightJoinKeys,
EJoinAlgoType joinAlgo) const
{
double newCard;
EStatisticsType outputType;
bool leftKeyColumns = false;
bool rightKeyColumns = false;
double selectivity = 1.0;


if (IsPKJoin(rightStats,rightJoinKeys)) {
newCard = leftStats.Nrows * rightStats.Selectivity;
selectivity = leftStats.Selectivity * rightStats.Selectivity;
leftKeyColumns = true;
if (leftStats.Type == EStatisticsType::BaseTable){
outputType = EStatisticsType::FilteredFactTable;
} else {
outputType = leftStats.Type;
}
}
else if (IsPKJoin(leftStats,leftJoinKeys)) {
newCard = rightStats.Nrows;
newCard = rightStats.Nrows * leftStats.Selectivity;
selectivity = leftStats.Selectivity * rightStats.Selectivity;

rightKeyColumns = true;
if (rightStats.Type == EStatisticsType::BaseTable){
outputType = EStatisticsType::FilteredFactTable;
} else {
outputType = rightStats.Type;
}
}
else {
newCard = 0.2 * leftStats.Nrows * rightStats.Nrows;
outputType = EStatisticsType::ManyManyJoin;
}

int newNCols = leftStats.Ncols + rightStats.Ncols;
double newByteSize = leftStats.Nrows ? (leftStats.ByteSize / leftStats.Nrows) * newCard : 0 +
rightStats.Nrows ? (rightStats.ByteSize / rightStats.Nrows) * newCard : 0;

double cost = ComputeJoinCost(leftStats, rightStats, newCard, newByteSize, joinAlgo)
+ leftStats.Cost + rightStats.Cost;

auto result = TOptimizerStatistics(outputType, newCard, newNCols, newByteSize, cost,
leftKeyColumns ? leftStats.KeyColumns : ( rightKeyColumns ? rightStats.KeyColumns : TOptimizerStatistics::EmptyColumns));
result.Selectivity = selectivity;
return result;
}

const TDummyProviderContext& TDummyProviderContext::instance() {
static TDummyProviderContext staticContext;
return staticContext;
}


} // namespace NYql
136 changes: 72 additions & 64 deletions ydb/library/yql/core/cbo/cbo_optimizer_new.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@
#include <map>
#include <sstream>


namespace NYql {

/**
* OptimizerNodes are the internal representations of operators inside the
* Cost-based optimizer. Currently we only support RelOptimizerNode - a node that
* is an input relation to the equi-join, and JoinOptimizerNode - an inner join
* is an input relation to the equi-join, and JoinOptimizerNode - an inner join
* that connects two sets of relations.
*/
enum EOptimizerNodeKind: ui32
Expand All @@ -35,49 +34,13 @@ struct IBaseOptimizerNode {
std::shared_ptr<TOptimizerStatistics> Stats;

IBaseOptimizerNode(EOptimizerNodeKind k) : Kind(k) {}
IBaseOptimizerNode(EOptimizerNodeKind k, std::shared_ptr<TOptimizerStatistics> s) :
IBaseOptimizerNode(EOptimizerNodeKind k, std::shared_ptr<TOptimizerStatistics> s) :
Kind(k), Stats(s) {}

virtual TVector<TString> Labels()=0;
virtual void Print(std::stringstream& stream, int ntabs=0)=0;
};

/**
* RelOptimizerNode adds a label to base class
* This is the label assinged to the input by equi-Join
*/
struct TRelOptimizerNode : public IBaseOptimizerNode {
TString Label;

// Temporary solution to check if a LookupJoin is possible in KQP
//void* Expr;

TRelOptimizerNode(TString label, std::shared_ptr<TOptimizerStatistics> stats) :
IBaseOptimizerNode(RelNodeType, stats), Label(label) { }
//TRelOptimizerNode(TString label, std::shared_ptr<TOptimizerStatistics> stats, const TExprNode::TPtr expr) :
// IBaseOptimizerNode(RelNodeType, stats), Label(label), Expr(expr) { }
virtual ~TRelOptimizerNode() {}

virtual TVector<TString> Labels();
virtual void Print(std::stringstream& stream, int ntabs=0);
};

enum EJoinKind: ui32
{
InnerJoin,
LeftJoin,
RightJoin,
OuterJoin,
LeftOnly,
RightOnly,
LeftSemi,
RightSemi,
Cross,
Exclusion
};

EJoinKind ConvertToJoinKind(const TString& joinString);
TString ConvertToJoinString(const EJoinKind kind);

/**
* This is a temporary structure for KQP provider
Expand All @@ -89,8 +52,20 @@ struct IProviderContext {

virtual double ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgol) const = 0;

virtual bool IsJoinApplicable(const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
virtual TOptimizerStatistics ComputeJoinStats(
const TOptimizerStatistics& leftStats,
const TOptimizerStatistics& rightStats,
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions, EJoinAlgoType joinAlgo) const = 0;

virtual TOptimizerStatistics ComputeJoinStats(
const TOptimizerStatistics& leftStats,
const TOptimizerStatistics& rightStats,
const TVector<TString>& leftJoinKeys,
const TVector<TString>& rightJoinKeys,
EJoinAlgoType joinAlgo) const = 0;

virtual bool IsJoinApplicable(const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
const TVector<TString>& leftJoinKeys,
const TVector<TString>& rightJoinKeys,
Expand All @@ -105,35 +80,68 @@ struct IProviderContext {
struct TDummyProviderContext : public IProviderContext {
TDummyProviderContext() {}

double ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgo) const override {
Y_UNUSED(outputByteSize);
Y_UNUSED(joinAlgo);
return leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows;
}
double ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgo) const override;

bool IsJoinApplicable(const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
bool IsJoinApplicable(const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
const TVector<TString>& leftJoinKeys,
const TVector<TString>& rightJoinKeys,
EJoinAlgoType joinAlgo) override {
EJoinAlgoType joinAlgo) override;

virtual TOptimizerStatistics ComputeJoinStats(
const TOptimizerStatistics& leftStats,
const TOptimizerStatistics& rightStats,
const TVector<TString>& leftJoinKeys,
const TVector<TString>& rightJoinKeys,
EJoinAlgoType joinAlgo) const override;

virtual TOptimizerStatistics ComputeJoinStats(
const TOptimizerStatistics& leftStats,
const TOptimizerStatistics& rightStats,
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
EJoinAlgoType joinAlgo) const override;

Y_UNUSED(left);
Y_UNUSED(right);
Y_UNUSED(joinConditions);
Y_UNUSED(leftJoinKeys);
Y_UNUSED(rightJoinKeys);
static const TDummyProviderContext& instance();
};

return joinAlgo == EJoinAlgoType::MapJoin;
}
/**
* RelOptimizerNode adds a label to base class
* This is the label assinged to the input by equi-Join
*/
struct TRelOptimizerNode : public IBaseOptimizerNode {
TString Label;

// Temporary solution to check if a LookupJoin is possible in KQP
//void* Expr;

TRelOptimizerNode(TString label, std::shared_ptr<TOptimizerStatistics> stats) :
IBaseOptimizerNode(RelNodeType, stats), Label(label) { }
//TRelOptimizerNode(TString label, std::shared_ptr<TOptimizerStatistics> stats, const TExprNode::TPtr expr) :
// IBaseOptimizerNode(RelNodeType, stats), Label(label), Expr(expr) { }
virtual ~TRelOptimizerNode() {}

static const TDummyProviderContext& instance() {
static TDummyProviderContext staticContext;
return staticContext;
}
virtual TVector<TString> Labels();
virtual void Print(std::stringstream& stream, int ntabs=0);
};

enum EJoinKind: ui32
{
InnerJoin,
LeftJoin,
RightJoin,
OuterJoin,
LeftOnly,
RightOnly,
LeftSemi,
RightSemi,
Cross,
Exclusion
};

EJoinKind ConvertToJoinKind(const TString& joinString);
TString ConvertToJoinString(const EJoinKind kind);

/**
* JoinOptimizerNode records the left and right arguments of the join
* as well as the set of join conditions.
Expand All @@ -150,11 +158,11 @@ struct TJoinOptimizerNode : public IBaseOptimizerNode {
EJoinAlgoType JoinAlgo;
bool IsReorderable;

TJoinOptimizerNode(const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
TJoinOptimizerNode(const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
const EJoinKind joinType,
const EJoinAlgoType joinAlgo,
const EJoinKind joinType,
const EJoinAlgoType joinAlgo,
bool nonReorderable=false);
virtual ~TJoinOptimizerNode() {}
virtual TVector<TString> Labels();
Expand Down
Loading

0 comments on commit 005c0f1

Please sign in to comment.