From c9dae103646132de96afe68a7132213e8cb3484d Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Sun, 8 Oct 2023 18:36:23 +0800 Subject: [PATCH] planner: variable tidb_opt_enable_hash_join to skip hash join (#46575) (#47359) close pingcap/tidb#46695 --- planner/core/exhaust_physical_plans.go | 14 +- planner/core/rule_join_reorder_test.go | 13 ++ .../core/testdata/join_reorder_suite_in.json | 25 ++- .../core/testdata/join_reorder_suite_out.json | 144 +++++++++++++++++- sessionctx/variable/session.go | 3 + sessionctx/variable/sysvar.go | 4 + sessionctx/variable/tidb_vars.go | 4 + 7 files changed, 195 insertions(+), 12 deletions(-) diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index bea3f3070c956..a9e233fedc684 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -241,7 +241,7 @@ func (p *LogicalJoin) GetMergeJoin(prop *property.PhysicalProperty, schema *expr // If TiDB_SMJ hint is existed, it should consider enforce merge join, // because we can't trust lhsChildProperty completely. if (p.preferJoinType&preferMergeJoin) > 0 || - (p.preferJoinType&preferNoHashJoin) > 0 { // if hash join is not allowed, generate as many other types of join as possible to avoid 'cant-find-plan' error. + p.shouldSkipHashJoin() { // if hash join is not allowed, generate as many other types of join as possible to avoid 'cant-find-plan' error. joins = append(joins, p.getEnforcedMergeJoin(prop, schema, statsInfo)...) } @@ -388,6 +388,10 @@ var ForceUseOuterBuild4Test = atomic.NewBool(false) // TODO: use hint and remove this variable var ForcedHashLeftJoin4Test = atomic.NewBool(false) +func (p *LogicalJoin) shouldSkipHashJoin() bool { + return (p.preferJoinType&preferNoHashJoin) > 0 || (p.SCtx().GetSessionVars().DisableHashJoin) +} + func (p *LogicalJoin) getHashJoins(prop *property.PhysicalProperty) (joins []PhysicalPlan, forced bool) { if !prop.IsSortItemEmpty() { // hash join doesn't promise any orders return @@ -448,12 +452,12 @@ func (p *LogicalJoin) getHashJoins(prop *property.PhysicalProperty) (joins []Phy } forced = (p.preferJoinType&preferHashJoin > 0) || forceLeftToBuild || forceRightToBuild - noHashJoin := (p.preferJoinType & preferNoHashJoin) > 0 - if !forced && noHashJoin { + if !forced && p.shouldSkipHashJoin() { return nil, false - } else if forced && noHashJoin { + } else if forced && p.shouldSkipHashJoin() { p.ctx.GetSessionVars().StmtCtx.AppendWarning(ErrInternal.GenWithStack( - "Some HASH_JOIN and NO_HASH_JOIN hints conflict, NO_HASH_JOIN is ignored")) + "A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, " + + "or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence.")) } return } diff --git a/planner/core/rule_join_reorder_test.go b/planner/core/rule_join_reorder_test.go index 66081bd68f337..0b94448b671b6 100644 --- a/planner/core/rule_join_reorder_test.go +++ b/planner/core/rule_join_reorder_test.go @@ -73,6 +73,19 @@ func TestNoHashJoinHint(t *testing.T) { runJoinReorderTestData(t, tk, "TestNoHashJoinHint") } +// test the global/session variable tidb_opt_enable_hash_join being set to no +func TestOptEnableHashJoin(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("set tidb_opt_enable_hash_join=off") + tk.MustExec("create table t1(a int, b int, key(a));") + tk.MustExec("create table t2(a int, b int, key(a));") + tk.MustExec("create table t3(a int, b int, key(a));") + tk.MustExec("create table t4(a int, b int, key(a));") + runJoinReorderTestData(t, tk, "TestOptEnableHashJoin") +} + func TestNoMergeJoinHint(t *testing.T) { store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) diff --git a/planner/core/testdata/join_reorder_suite_in.json b/planner/core/testdata/join_reorder_suite_in.json index 36ba9202b73d3..9fc3a45bb6180 100644 --- a/planner/core/testdata/join_reorder_suite_in.json +++ b/planner/core/testdata/join_reorder_suite_in.json @@ -47,6 +47,21 @@ "select /*+ no_hash_join(t2) */ * from t1 right join t2 on t1.a=t2.a" ] }, + { + "name": "TestOptEnableHashJoin", + "cases": [ + "select * from t1, t2", + "select * from t1, t2 where t1.a=t2.a", + "select * from t1, t2 where t1.b=t2.b", + "select * from t1, t2 where t1.a=t2.a and t1.b=t2.b", + "select * from t1 left join t2 on t1.b=t2.b", + "select * from t1 left join t2 on t1.a=t2.a", + "select * from t1 right join t2 on t1.b=t2.b", + "select * from t1 right join t2 on t1.a=t2.a", + "select /*+ hash_join(t1) */ * from t1, t2", + "select /*+ hash_join(t2) */ * from t1, t2" + ] + }, { "name": "TestNoIndexJoinHint", "cases": [ @@ -509,11 +524,11 @@ { "name": "TestAdditionOtherConditionsRemained4OuterJoin", "cases": [ - "SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` LEFT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC", - "SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` RIGHT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC", - "explain format = 'brief' SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` LEFT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC;", - // The where clause should be a Selection out of joins. - "explain format='brief' select * from t left join t1 on t.a=t1.a inner join t2 on t.a=t2.a and t2.c = 100 left join t3 on t2.a=t3.a and t3.b > 1 left join t4 on t2.a = t4.a where (t2.b > 100 or t.a > 10 or t1.b < 10)" + "SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` LEFT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC", + "SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` RIGHT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC", + "explain format = 'brief' SELECT `queries_identifier`.`id`, `queries_identifier`.`name` FROM `queries_identifier` LEFT OUTER JOIN `queries_channel` ON (`queries_identifier`.`id` = `queries_channel`.`identifier_id`) INNER JOIN `queries_program` ON (`queries_identifier`.`id` = `queries_program`.`identifier_id`) WHERE ((`queries_channel`.`id` = 5 AND `queries_program`.`id` = 9) OR `queries_program`.`id` = 8) ORDER BY `queries_identifier`.`id` ASC;", + // The where clause should be a Selection out of joins. + "explain format='brief' select * from t left join t1 on t.a=t1.a inner join t2 on t.a=t2.a and t2.c = 100 left join t3 on t2.a=t3.a and t3.b > 1 left join t4 on t2.a = t4.a where (t2.b > 100 or t.a > 10 or t1.b < 10)" ] } ] diff --git a/planner/core/testdata/join_reorder_suite_out.json b/planner/core/testdata/join_reorder_suite_out.json index 80fc203c6f177..bd6e06d73ac23 100644 --- a/planner/core/testdata/join_reorder_suite_out.json +++ b/planner/core/testdata/join_reorder_suite_out.json @@ -609,7 +609,7 @@ " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Warning": [ - "Warning 1815 Some HASH_JOIN and NO_HASH_JOIN hints conflict, NO_HASH_JOIN is ignored" + "Warning 1815 A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence." ] }, { @@ -622,7 +622,7 @@ " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" ], "Warning": [ - "Warning 1815 Some HASH_JOIN and NO_HASH_JOIN hints conflict, NO_HASH_JOIN is ignored" + "Warning 1815 A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence." ] }, { @@ -747,6 +747,146 @@ } ] }, + { + "Name": "TestOptEnableHashJoin", + "Cases": [ + { + "SQL": "select * from t1, t2", + "Plan": [ + "MergeJoin 100000000.00 root inner join", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Warning": null + }, + { + "SQL": "select * from t1, t2 where t1.a=t2.a", + "Plan": [ + "IndexHashJoin 12487.50 root inner join, inner:IndexLookUp, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)", + "├─TableReader(Build) 9990.00 root data:Selection", + "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─IndexLookUp(Probe) 12487.50 root ", + " ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t2.a))", + " │ └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 12487.50 cop[tikv] table:t2 keep order:false, stats:pseudo" + ], + "Warning": null + }, + { + "SQL": "select * from t1, t2 where t1.b=t2.b", + "Plan": [ + "MergeJoin 12487.50 root inner join, left key:test.t1.b, right key:test.t2.b", + "├─Sort(Build) 9990.00 root test.t2.b", + "│ └─TableReader 9990.00 root data:Selection", + "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t2.b))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─Sort(Probe) 9990.00 root test.t1.b", + " └─TableReader 9990.00 root data:Selection", + " └─Selection 9990.00 cop[tikv] not(isnull(test.t1.b))", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Warning": null + }, + { + "SQL": "select * from t1, t2 where t1.a=t2.a and t1.b=t2.b", + "Plan": [ + "IndexHashJoin 12475.01 root inner join, inner:IndexLookUp, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a), eq(test.t1.b, test.t2.b)", + "├─TableReader(Build) 9980.01 root data:Selection", + "│ └─Selection 9980.01 cop[tikv] not(isnull(test.t1.a)), not(isnull(test.t1.b))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─IndexLookUp(Probe) 12475.01 root ", + " ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t2.a))", + " │ └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", + " └─Selection(Probe) 12475.01 cop[tikv] not(isnull(test.t2.b))", + " └─TableRowIDScan 12487.50 cop[tikv] table:t2 keep order:false, stats:pseudo" + ], + "Warning": null + }, + { + "SQL": "select * from t1 left join t2 on t1.b=t2.b", + "Plan": [ + "MergeJoin 12487.50 root left outer join, left key:test.t1.b, right key:test.t2.b", + "├─Sort(Build) 9990.00 root test.t2.b", + "│ └─TableReader 9990.00 root data:Selection", + "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t2.b))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─Sort(Probe) 10000.00 root test.t1.b", + " └─TableReader 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Warning": null + }, + { + "SQL": "select * from t1 left join t2 on t1.a=t2.a", + "Plan": [ + "IndexHashJoin 12487.50 root left outer join, inner:IndexLookUp, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─IndexLookUp(Probe) 12487.50 root ", + " ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t2.a))", + " │ └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 12487.50 cop[tikv] table:t2 keep order:false, stats:pseudo" + ], + "Warning": null + }, + { + "SQL": "select * from t1 right join t2 on t1.b=t2.b", + "Plan": [ + "MergeJoin 12487.50 root right outer join, left key:test.t1.b, right key:test.t2.b", + "├─Sort(Build) 9990.00 root test.t1.b", + "│ └─TableReader 9990.00 root data:Selection", + "│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.b))", + "│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo", + "└─Sort(Probe) 10000.00 root test.t2.b", + " └─TableReader 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo" + ], + "Warning": null + }, + { + "SQL": "select * from t1 right join t2 on t1.a=t2.a", + "Plan": [ + "IndexHashJoin 12487.50 root right outer join, inner:IndexLookUp, outer key:test.t2.a, inner key:test.t1.a, equal cond:eq(test.t2.a, test.t1.a)", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─IndexLookUp(Probe) 12487.50 root ", + " ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t1.a))", + " │ └─IndexRangeScan 12500.00 cop[tikv] table:t1, index:a(a) range: decided by [eq(test.t1.a, test.t2.a)], keep order:false, stats:pseudo", + " └─TableRowIDScan(Probe) 12487.50 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Warning": null + }, + { + "SQL": "select /*+ hash_join(t1) */ * from t1, t2", + "Plan": [ + "HashJoin 100000000.00 root CARTESIAN inner join", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Warning": [ + "Warning 1815 A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence." + ] + }, + { + "SQL": "select /*+ hash_join(t2) */ * from t1, t2", + "Plan": [ + "HashJoin 100000000.00 root CARTESIAN inner join", + "├─TableReader(Build) 10000.00 root data:TableFullScan", + "│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo", + "└─TableReader(Probe) 10000.00 root data:TableFullScan", + " └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo" + ], + "Warning": [ + "Warning 1815 A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence." + ] + } + ] + }, { "Name": "TestNoIndexJoinHint", "Cases": [ diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go index 31e012d03ad26..372efad7724ef 100644 --- a/sessionctx/variable/session.go +++ b/sessionctx/variable/session.go @@ -895,6 +895,9 @@ type SessionVars struct { // EnableOuterJoinWithJoinReorder enables TiDB to involve the outer join into the join reorder. EnableOuterJoinReorder bool + // DisableHashJoin indicates whether to disable hash join. + DisableHashJoin bool + // OptimizerEnableNAAJ enables TiDB to use null-aware anti join. OptimizerEnableNAAJ bool diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go index c095d7e8a3d1c..e6cccb3345f09 100644 --- a/sessionctx/variable/sysvar.go +++ b/sessionctx/variable/sysvar.go @@ -234,6 +234,10 @@ var defaultSysVars = []*SysVar{ s.EnableOuterJoinReorder = TiDBOptOn(val) return nil }}, + {Scope: ScopeGlobal | ScopeSession, Name: TiDBOptEnableHashJoin, Value: BoolToOnOff(DefTiDBOptEnableHashJoin), Type: TypeBool, SetSession: func(s *SessionVars, val string) error { + s.DisableHashJoin = !TiDBOptOn(val) + return nil + }}, {Scope: ScopeGlobal | ScopeSession, Name: TiDBOptimizerEnableNAAJ, Value: BoolToOnOff(DefTiDBEnableNAAJ), Type: TypeBool, SetSession: func(s *SessionVars, val string) error { s.OptimizerEnableNAAJ = TiDBOptOn(val) return nil diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index 086f32a685839..77e73b0a95647 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -505,6 +505,9 @@ const ( // we'll choose a rather time-consuming algorithm to calculate the join order. TiDBOptJoinReorderThreshold = "tidb_opt_join_reorder_threshold" + // TiDBOptEnableHashJoin indicates whether to enable hash join. + TiDBOptEnableHashJoin = "tidb_opt_enable_hash_join" + // TiDBSlowQueryFile indicates which slow query log file for SLOW_QUERY table to parse. TiDBSlowQueryFile = "tidb_slow_query_file" @@ -982,6 +985,7 @@ const ( DefTiDBOptimizerSelectivityLevel = 0 DefTiDBOptimizerEnableNewOFGB = false DefTiDBEnableOuterJoinReorder = true + DefTiDBOptEnableHashJoin = true DefTiDBEnableNAAJ = false DefTiDBAllowBatchCop = 1 DefTiDBAllowMPPExecution = true