Skip to content

Commit

Permalink
Fixed a TPCH11 bug, losing JoinKind
Browse files Browse the repository at this point in the history
  • Loading branch information
pavelvelikhov committed Jan 30, 2024
1 parent b3763f7 commit 4f48f82
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 12 deletions.
84 changes: 83 additions & 1 deletion ydb/core/kqp/ut/join/kqp_join_order_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -866,7 +866,89 @@ limit 20;
Cout << result.GetPlan();
}
}

Y_UNIT_TEST(TPCH11) {

auto kikimr = GetKikimrWithJoinSettings();
auto db = kikimr.GetTableClient();
auto session = db.CreateSession().GetValueSync().GetSession();

CreateSampleTable(session);

/* join with parameters */
{
const TString query = Q_(R"(
-- TPC-H/TPC-R Important Stock Identification Query (Q11)
-- TPC TPC-H Parameter Substitution (Version 2.17.2 build 0)
-- using 1680793381 as a seed to the RNG
$join1 = (
select
ps.ps_partkey as ps_partkey,
ps.ps_supplycost as ps_supplycost,
ps.ps_availqty as ps_availqty,
s.s_nationkey as s_nationkey
from
`/Root/partsupp` as ps
join
`/Root/supplier` as s
on
ps.ps_suppkey = s.s_suppkey
);
$join2 = (
select
j.ps_partkey as ps_partkey,
j.ps_supplycost as ps_supplycost,
j.ps_availqty as ps_availqty,
j.s_nationkey as s_nationkey
from
$join1 as j
join
`/Root/nation` as n
on
n.n_nationkey = j.s_nationkey
where
n.n_name = 'CANADA'
);
$threshold = (
select
sum(ps_supplycost * ps_availqty) * 0.0001000000 as threshold
from
$join2
);
$values = (
select
ps_partkey,
sum(ps_supplycost * ps_availqty) as value
from
$join2
group by
ps_partkey
);
select
v.ps_partkey as ps_partkey,
v.value as value
from
$values as v
cross join
$threshold as t
where
v.value > t.threshold
order by
value desc;
)");

auto result = session.ExplainDataQuery(query).ExtractValueSync();

UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS);

NJson::TJsonValue plan;
NJson::ReadJsonTree(result.GetPlan(), &plan, true);
Cout << result.GetPlan();
}
}
}
}
}

2 changes: 1 addition & 1 deletion ydb/library/yql/core/yql_cost_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ enum EJoinAlgoType {
LookupJoin
};

static const EJoinAlgoType AllJoinTypes[] = { DictJoin, MapJoin, GraceJoin, LookupJoin };
static const EJoinAlgoType AllJoinAlgos[] = { DictJoin, MapJoin, GraceJoin, LookupJoin };

TOptimizerStatistics ComputeJoinStats(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats,
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions, EJoinAlgoType joinAlgo, const IProviderContext& ctx);
Expand Down
22 changes: 12 additions & 10 deletions ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,11 @@ void ComputeJoinConditions(const TCoEquiJoinTuple& joinTuple,
std::shared_ptr<TJoinOptimizerNode> MakeJoin(std::shared_ptr<IBaseOptimizerNode> left,
std::shared_ptr<IBaseOptimizerNode> right,
const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
EJoinKind joinKind,
EJoinAlgoType joinAlgo,
IProviderContext& ctx) {

auto res = std::make_shared<TJoinOptimizerNode>(left, right, joinConditions, EJoinKind::InnerJoin, joinAlgo);
auto res = std::make_shared<TJoinOptimizerNode>(left, right, joinConditions, joinKind, joinAlgo);
res->Stats = std::make_shared<TOptimizerStatistics>( ComputeJoinStats(*left->Stats, *right->Stats, joinConditions, joinAlgo, ctx));
return res;
}
Expand All @@ -117,12 +118,12 @@ std::shared_ptr<TJoinOptimizerNode> PickBestJoin(std::shared_ptr<IBaseOptimizerN

auto res = std::shared_ptr<TJoinOptimizerNode>();

for ( auto joinType : AllJoinTypes ) {
auto p1 = ctx.IsJoinApplicable(left, right, leftJoinConditions, joinType) ?
MakeJoin(left, right, leftJoinConditions, joinType, ctx) :
for ( auto joinAlgo : AllJoinAlgos ) {
auto p1 = ctx.IsJoinApplicable(left, right, leftJoinConditions, joinAlgo) ?
MakeJoin(left, right, leftJoinConditions, EJoinKind::InnerJoin, joinAlgo, ctx) :
std::shared_ptr<TJoinOptimizerNode>();
auto p2 = ctx.IsJoinApplicable(right, left, rightJoinConditions, joinType) ?
MakeJoin(right, left, rightJoinConditions, joinType, ctx) :
auto p2 = ctx.IsJoinApplicable(right, left, rightJoinConditions, joinAlgo) ?
MakeJoin(right, left, rightJoinConditions, EJoinKind::InnerJoin, joinAlgo, ctx) :
std::shared_ptr<TJoinOptimizerNode>();

if (p1) {
Expand Down Expand Up @@ -155,13 +156,14 @@ std::shared_ptr<TJoinOptimizerNode> PickBestJoin(std::shared_ptr<IBaseOptimizerN
std::shared_ptr<TJoinOptimizerNode> PickBestNonReorderabeJoin(std::shared_ptr<IBaseOptimizerNode> left,
std::shared_ptr<IBaseOptimizerNode> right,
const std::set<std::pair<TJoinColumn, TJoinColumn>>& leftJoinConditions,
EJoinKind joinKind,
IProviderContext& ctx) {

auto res = std::shared_ptr<TJoinOptimizerNode>();

for ( auto joinType : AllJoinTypes ) {
auto p = ctx.IsJoinApplicable(left, right, leftJoinConditions, joinType) ?
MakeJoin(left, right, leftJoinConditions, joinType, ctx) :
for ( auto joinAlgo : AllJoinAlgos ) {
auto p = ctx.IsJoinApplicable(left, right, leftJoinConditions, joinAlgo) ?
MakeJoin(left, right, leftJoinConditions, joinKind, joinAlgo, ctx) :
std::shared_ptr<TJoinOptimizerNode>();

if (p) {
Expand Down Expand Up @@ -1009,7 +1011,7 @@ void ComputeStatistics(const std::shared_ptr<TJoinOptimizerNode>& join, IProvide
*/
std::shared_ptr<TJoinOptimizerNode> OptimizeSubtree(const std::shared_ptr<TJoinOptimizerNode>& joinTree, ui32 maxDPccpDPTableSize, IProviderContext& ctx) {
if (!joinTree->IsReorderable) {
return PickBestNonReorderabeJoin(joinTree->LeftArg, joinTree->RightArg, joinTree->JoinConditions, ctx);
return PickBestNonReorderabeJoin(joinTree->LeftArg, joinTree->RightArg, joinTree->JoinConditions, joinTree->JoinType, ctx);
}

TGraph<64> joinGraph;
Expand Down

0 comments on commit 4f48f82

Please sign in to comment.