Skip to content

Commit

Permalink
planner: variable tidb_opt_enable_hash_join to skip hash join (#46575) (
Browse files Browse the repository at this point in the history
#47359)

close #46695
  • Loading branch information
qw4990 authored Oct 8, 2023
1 parent 62a0305 commit c9dae10
Show file tree
Hide file tree
Showing 7 changed files with 195 additions and 12 deletions.
14 changes: 9 additions & 5 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ func (p *LogicalJoin) GetMergeJoin(prop *property.PhysicalProperty, schema *expr
// If TiDB_SMJ hint is existed, it should consider enforce merge join,
// because we can't trust lhsChildProperty completely.
if (p.preferJoinType&preferMergeJoin) > 0 ||
(p.preferJoinType&preferNoHashJoin) > 0 { // if hash join is not allowed, generate as many other types of join as possible to avoid 'cant-find-plan' error.
p.shouldSkipHashJoin() { // if hash join is not allowed, generate as many other types of join as possible to avoid 'cant-find-plan' error.
joins = append(joins, p.getEnforcedMergeJoin(prop, schema, statsInfo)...)
}

Expand Down Expand Up @@ -388,6 +388,10 @@ var ForceUseOuterBuild4Test = atomic.NewBool(false)
// TODO: use hint and remove this variable
var ForcedHashLeftJoin4Test = atomic.NewBool(false)

func (p *LogicalJoin) shouldSkipHashJoin() bool {
return (p.preferJoinType&preferNoHashJoin) > 0 || (p.SCtx().GetSessionVars().DisableHashJoin)
}

func (p *LogicalJoin) getHashJoins(prop *property.PhysicalProperty) (joins []PhysicalPlan, forced bool) {
if !prop.IsSortItemEmpty() { // hash join doesn't promise any orders
return
Expand Down Expand Up @@ -448,12 +452,12 @@ func (p *LogicalJoin) getHashJoins(prop *property.PhysicalProperty) (joins []Phy
}

forced = (p.preferJoinType&preferHashJoin > 0) || forceLeftToBuild || forceRightToBuild
noHashJoin := (p.preferJoinType & preferNoHashJoin) > 0
if !forced && noHashJoin {
if !forced && p.shouldSkipHashJoin() {
return nil, false
} else if forced && noHashJoin {
} else if forced && p.shouldSkipHashJoin() {
p.ctx.GetSessionVars().StmtCtx.AppendWarning(ErrInternal.GenWithStack(
"Some HASH_JOIN and NO_HASH_JOIN hints conflict, NO_HASH_JOIN is ignored"))
"A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, " +
"or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence."))
}
return
}
Expand Down
13 changes: 13 additions & 0 deletions planner/core/rule_join_reorder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,19 @@ func TestNoHashJoinHint(t *testing.T) {
runJoinReorderTestData(t, tk, "TestNoHashJoinHint")
}

// test the global/session variable tidb_opt_enable_hash_join being set to no
func TestOptEnableHashJoin(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("set tidb_opt_enable_hash_join=off")
tk.MustExec("create table t1(a int, b int, key(a));")
tk.MustExec("create table t2(a int, b int, key(a));")
tk.MustExec("create table t3(a int, b int, key(a));")
tk.MustExec("create table t4(a int, b int, key(a));")
runJoinReorderTestData(t, tk, "TestOptEnableHashJoin")
}

func TestNoMergeJoinHint(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
Expand Down
25 changes: 20 additions & 5 deletions planner/core/testdata/join_reorder_suite_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,21 @@
"select /*+ no_hash_join(t2) */ * from t1 right join t2 on t1.a=t2.a"
]
},
{
"name": "TestOptEnableHashJoin",
"cases": [
"select * from t1, t2",
"select * from t1, t2 where t1.a=t2.a",
"select * from t1, t2 where t1.b=t2.b",
"select * from t1, t2 where t1.a=t2.a and t1.b=t2.b",
"select * from t1 left join t2 on t1.b=t2.b",
"select * from t1 left join t2 on t1.a=t2.a",
"select * from t1 right join t2 on t1.b=t2.b",
"select * from t1 right join t2 on t1.a=t2.a",
"select /*+ hash_join(t1) */ * from t1, t2",
"select /*+ hash_join(t2) */ * from t1, t2"
]
},
{
"name": "TestNoIndexJoinHint",
"cases": [
Expand Down Expand Up @@ -509,11 +524,11 @@
{
"name": "TestAdditionOtherConditionsRemained4OuterJoin",
"cases": [
"SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` LEFT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC",
"SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` RIGHT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC",
"explain format = 'brief' SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` LEFT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC;",
// The where clause should be a Selection out of joins.
"explain format='brief' select * from t left join t1 on t.a=t1.a inner join t2 on t.a=t2.a and t2.c = 100 left join t3 on t2.a=t3.a and t3.b > 1 left join t4 on t2.a = t4.a where (t2.b > 100 or t.a > 10 or t1.b < 10)"
"SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` LEFT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC",
"SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` RIGHT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC",
"explain format = 'brief' SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` LEFT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC;",
// The where clause should be a Selection out of joins.
"explain format='brief' select * from t left join t1 on t.a=t1.a inner join t2 on t.a=t2.a and t2.c = 100 left join t3 on t2.a=t3.a and t3.b > 1 left join t4 on t2.a = t4.a where (t2.b > 100 or t.a > 10 or t1.b < 10)"
]
}
]
144 changes: 142 additions & 2 deletions planner/core/testdata/join_reorder_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,7 @@
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": [
"Warning 1815 Some HASH_JOIN and NO_HASH_JOIN hints conflict, NO_HASH_JOIN is ignored"
"Warning 1815 A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence."
]
},
{
Expand All @@ -622,7 +622,7 @@
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": [
"Warning 1815 Some HASH_JOIN and NO_HASH_JOIN hints conflict, NO_HASH_JOIN is ignored"
"Warning 1815 A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence."
]
},
{
Expand Down Expand Up @@ -747,6 +747,146 @@
}
]
},
{
"Name": "TestOptEnableHashJoin",
"Cases": [
{
"SQL": "select * from t1, t2",
"Plan": [
"MergeJoin 100000000.00 root inner join",
"├─TableReader(Build) 10000.00 root data:TableFullScan",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1, t2 where t1.a=t2.a",
"Plan": [
"IndexHashJoin 12487.50 root inner join, inner:IndexLookUp, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)",
"├─TableReader(Build) 9990.00 root data:Selection",
"│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))",
"│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 12487.50 root ",
" ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t2.a))",
" │ └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 12487.50 cop[tikv] table:t2 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1, t2 where t1.b=t2.b",
"Plan": [
"MergeJoin 12487.50 root inner join, left key:test.t1.b, right key:test.t2.b",
"├─Sort(Build) 9990.00 root test.t2.b",
"│ └─TableReader 9990.00 root data:Selection",
"│ └─Selection 9990.00 cop[tikv] not(isnull(test.t2.b))",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─Sort(Probe) 9990.00 root test.t1.b",
" └─TableReader 9990.00 root data:Selection",
" └─Selection 9990.00 cop[tikv] not(isnull(test.t1.b))",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1, t2 where t1.a=t2.a and t1.b=t2.b",
"Plan": [
"IndexHashJoin 12475.01 root inner join, inner:IndexLookUp, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a), eq(test.t1.b, test.t2.b)",
"├─TableReader(Build) 9980.01 root data:Selection",
"│ └─Selection 9980.01 cop[tikv] not(isnull(test.t1.a)), not(isnull(test.t1.b))",
"│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 12475.01 root ",
" ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t2.a))",
" │ └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo",
" └─Selection(Probe) 12475.01 cop[tikv] not(isnull(test.t2.b))",
" └─TableRowIDScan 12487.50 cop[tikv] table:t2 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1 left join t2 on t1.b=t2.b",
"Plan": [
"MergeJoin 12487.50 root left outer join, left key:test.t1.b, right key:test.t2.b",
"├─Sort(Build) 9990.00 root test.t2.b",
"│ └─TableReader 9990.00 root data:Selection",
"│ └─Selection 9990.00 cop[tikv] not(isnull(test.t2.b))",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─Sort(Probe) 10000.00 root test.t1.b",
" └─TableReader 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1 left join t2 on t1.a=t2.a",
"Plan": [
"IndexHashJoin 12487.50 root left outer join, inner:IndexLookUp, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)",
"├─TableReader(Build) 10000.00 root data:TableFullScan",
"│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 12487.50 root ",
" ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t2.a))",
" │ └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 12487.50 cop[tikv] table:t2 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1 right join t2 on t1.b=t2.b",
"Plan": [
"MergeJoin 12487.50 root right outer join, left key:test.t1.b, right key:test.t2.b",
"├─Sort(Build) 9990.00 root test.t1.b",
"│ └─TableReader 9990.00 root data:Selection",
"│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.b))",
"│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
"└─Sort(Probe) 10000.00 root test.t2.b",
" └─TableReader 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1 right join t2 on t1.a=t2.a",
"Plan": [
"IndexHashJoin 12487.50 root right outer join, inner:IndexLookUp, outer key:test.t2.a, inner key:test.t1.a, equal cond:eq(test.t2.a, test.t1.a)",
"├─TableReader(Build) 10000.00 root data:TableFullScan",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 12487.50 root ",
" ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t1.a))",
" │ └─IndexRangeScan 12500.00 cop[tikv] table:t1, index:a(a) range: decided by [eq(test.t1.a, test.t2.a)], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 12487.50 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select /*+ hash_join(t1) */ * from t1, t2",
"Plan": [
"HashJoin 100000000.00 root CARTESIAN inner join",
"├─TableReader(Build) 10000.00 root data:TableFullScan",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": [
"Warning 1815 A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence."
]
},
{
"SQL": "select /*+ hash_join(t2) */ * from t1, t2",
"Plan": [
"HashJoin 100000000.00 root CARTESIAN inner join",
"├─TableReader(Build) 10000.00 root data:TableFullScan",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": [
"Warning 1815 A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence."
]
}
]
},
{
"Name": "TestNoIndexJoinHint",
"Cases": [
Expand Down
3 changes: 3 additions & 0 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,9 @@ type SessionVars struct {
// EnableOuterJoinWithJoinReorder enables TiDB to involve the outer join into the join reorder.
EnableOuterJoinReorder bool

// DisableHashJoin indicates whether to disable hash join.
DisableHashJoin bool

// OptimizerEnableNAAJ enables TiDB to use null-aware anti join.
OptimizerEnableNAAJ bool

Expand Down
4 changes: 4 additions & 0 deletions sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,10 @@ var defaultSysVars = []*SysVar{
s.EnableOuterJoinReorder = TiDBOptOn(val)
return nil
}},
{Scope: ScopeGlobal | ScopeSession, Name: TiDBOptEnableHashJoin, Value: BoolToOnOff(DefTiDBOptEnableHashJoin), Type: TypeBool, SetSession: func(s *SessionVars, val string) error {
s.DisableHashJoin = !TiDBOptOn(val)
return nil
}},
{Scope: ScopeGlobal | ScopeSession, Name: TiDBOptimizerEnableNAAJ, Value: BoolToOnOff(DefTiDBEnableNAAJ), Type: TypeBool, SetSession: func(s *SessionVars, val string) error {
s.OptimizerEnableNAAJ = TiDBOptOn(val)
return nil
Expand Down
4 changes: 4 additions & 0 deletions sessionctx/variable/tidb_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,9 @@ const (
// we'll choose a rather time-consuming algorithm to calculate the join order.
TiDBOptJoinReorderThreshold = "tidb_opt_join_reorder_threshold"

// TiDBOptEnableHashJoin indicates whether to enable hash join.
TiDBOptEnableHashJoin = "tidb_opt_enable_hash_join"

// TiDBSlowQueryFile indicates which slow query log file for SLOW_QUERY table to parse.
TiDBSlowQueryFile = "tidb_slow_query_file"

Expand Down Expand Up @@ -982,6 +985,7 @@ const (
DefTiDBOptimizerSelectivityLevel = 0
DefTiDBOptimizerEnableNewOFGB = false
DefTiDBEnableOuterJoinReorder = true
DefTiDBOptEnableHashJoin = true
DefTiDBEnableNAAJ = false
DefTiDBAllowBatchCop = 1
DefTiDBAllowMPPExecution = true
Expand Down

0 comments on commit c9dae10

Please sign in to comment.