Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: variable tidb_opt_enable_hash_join to skip hash join #46575

Merged
merged 17 commits into from
Sep 26, 2023
Merged
2 changes: 1 addition & 1 deletion planner/core/casetest/rule/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ go_test(
],
data = glob(["testdata/**"]),
flaky = True,
shard_count = 23,
shard_count = 24,
deps = [
"//domain",
"//expression",
Expand Down
13 changes: 13 additions & 0 deletions planner/core/casetest/rule/rule_join_reorder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,19 @@ func TestNoHashJoinHint(t *testing.T) {
runJoinReorderTestData(t, tk, "TestNoHashJoinHint")
}

// test the global/session variable tidb_opt_enable_hash_join being set to no
func TestOptEnableHashJoin(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("set tidb_opt_enable_hash_join=off")
tk.MustExec("create table t1(a int, b int, key(a));")
tk.MustExec("create table t2(a int, b int, key(a));")
tk.MustExec("create table t3(a int, b int, key(a));")
tk.MustExec("create table t4(a int, b int, key(a));")
runJoinReorderTestData(t, tk, "TestOptEnableHashJoin")
}

func TestNoMergeJoinHint(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
Expand Down
15 changes: 15 additions & 0 deletions planner/core/casetest/rule/testdata/join_reorder_suite_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,21 @@
"select /*+ leading(t1, t2, t3, t4), hash_join(t1, t2), no_hash_join(t3), hash_join(t4) */ * from t1, t2, t3, t4"
]
},
{
"name": "TestOptEnableHashJoin",
"cases": [
"select * from t1, t2",
"select * from t1, t2 where t1.a=t2.a",
"select * from t1, t2 where t1.b=t2.b",
"select * from t1, t2 where t1.a=t2.a and t1.b=t2.b",
"select * from t1 left join t2 on t1.b=t2.b",
"select * from t1 left join t2 on t1.a=t2.a",
"select * from t1 right join t2 on t1.b=t2.b",
"select * from t1 right join t2 on t1.a=t2.a",
"select /*+ hash_join(t1) */ * from t1, t2",
"select /*+ hash_join(t2) */ * from t1, t2"
]
},
{
"name": "TestNoIndexJoinHint",
"cases": [
Expand Down
145 changes: 143 additions & 2 deletions planner/core/casetest/rule/testdata/join_reorder_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,7 @@
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": [
"Warning 1815 Some HASH_JOIN and NO_HASH_JOIN hints conflict, NO_HASH_JOIN is ignored"
"Warning 1815 A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence."
]
},
{
Expand All @@ -622,7 +622,7 @@
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": [
"Warning 1815 Some HASH_JOIN and NO_HASH_JOIN hints conflict, NO_HASH_JOIN is ignored"
"Warning 1815 A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence."
]
},
{
Expand Down Expand Up @@ -782,6 +782,147 @@
}
]
},
{
"Name": "TestOptEnableHashJoin",
"Cases": [
{
"SQL": "select * from t1, t2",
"Plan": [
"MergeJoin 100000000.00 root inner join",
"├─TableReader(Build) 10000.00 root data:TableFullScan",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1, t2 where t1.a=t2.a",
"Plan": [
"IndexHashJoin 12487.50 root inner join, inner:IndexLookUp, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)",
"├─TableReader(Build) 9990.00 root data:Selection",
"│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a))",
"│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 12487.50 root ",
" ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t2.a))",
" │ └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 12487.50 cop[tikv] table:t2 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1, t2 where t1.b=t2.b",
"Plan": [
"MergeJoin 12487.50 root inner join, left key:test.t1.b, right key:test.t2.b",
"├─Sort(Build) 9990.00 root test.t2.b",
"│ └─TableReader 9990.00 root data:Selection",
"│ └─Selection 9990.00 cop[tikv] not(isnull(test.t2.b))",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─Sort(Probe) 9990.00 root test.t1.b",
" └─TableReader 9990.00 root data:Selection",
" └─Selection 9990.00 cop[tikv] not(isnull(test.t1.b))",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1, t2 where t1.a=t2.a and t1.b=t2.b",
"Plan": [
"IndexHashJoin 12475.01 root inner join, inner:IndexLookUp, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a), eq(test.t1.b, test.t2.b)",
"├─TableReader(Build) 9980.01 root data:Selection",
"│ └─Selection 9980.01 cop[tikv] not(isnull(test.t1.a)), not(isnull(test.t1.b))",
"│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 12475.01 root ",
" ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t2.a))",
" │ └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo",
" └─Selection(Probe) 12475.01 cop[tikv] not(isnull(test.t2.b))",
" └─TableRowIDScan 12487.50 cop[tikv] table:t2 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1 left join t2 on t1.b=t2.b",
"Plan": [
"MergeJoin 12487.50 root left outer join, left key:test.t1.b, right key:test.t2.b",
"├─Sort(Build) 9990.00 root test.t2.b",
"│ └─TableReader 9990.00 root data:Selection",
"│ └─Selection 9990.00 cop[tikv] not(isnull(test.t2.b))",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─Sort(Probe) 10000.00 root test.t1.b",
" └─TableReader 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1 left join t2 on t1.a=t2.a",
"Plan": [
"IndexHashJoin 12487.50 root left outer join, inner:IndexLookUp, outer key:test.t1.a, inner key:test.t2.a, equal cond:eq(test.t1.a, test.t2.a)",
"├─TableReader(Build) 10000.00 root data:TableFullScan",
"│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 12487.50 root ",
" ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t2.a))",
" │ └─IndexRangeScan 12500.00 cop[tikv] table:t2, index:a(a) range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 12487.50 cop[tikv] table:t2 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1 right join t2 on t1.b=t2.b",
"Plan": [
"MergeJoin 12487.50 root right outer join, left key:test.t1.b, right key:test.t2.b",
"├─Sort(Build) 9990.00 root test.t1.b",
"│ └─TableReader 9990.00 root data:Selection",
"│ └─Selection 9990.00 cop[tikv] not(isnull(test.t1.b))",
"│ └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo",
"└─Sort(Probe) 10000.00 root test.t2.b",
" └─TableReader 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select * from t1 right join t2 on t1.a=t2.a",
"Plan": [
"IndexHashJoin 12487.50 root right outer join, inner:IndexLookUp, outer key:test.t2.a, inner key:test.t1.a, equal cond:eq(test.t2.a, test.t1.a)",
"├─TableReader(Build) 10000.00 root data:TableFullScan",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─IndexLookUp(Probe) 12487.50 root ",
" ├─Selection(Build) 12487.50 cop[tikv] not(isnull(test.t1.a))",
" │ └─IndexRangeScan 12500.00 cop[tikv] table:t1, index:a(a) range: decided by [eq(test.t1.a, test.t2.a)], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 12487.50 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": null
},
{
"SQL": "select /*+ hash_join(t1) */ * from t1, t2",
"Plan": [
"HashJoin 100000000.00 root CARTESIAN inner join",
"├─TableReader(Build) 10000.00 root data:TableFullScan",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": [
"Warning 1815 A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence."
]
},
{
"SQL": "select /*+ hash_join(t2) */ * from t1, t2",
"Plan": [
"HashJoin 100000000.00 root CARTESIAN inner join",
"├─TableReader(Build) 10000.00 root data:TableFullScan",
"│ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
"└─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Warning": [
"Warning 1815 A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence."
]
}
]
},

{
"Name": "TestNoIndexJoinHint",
"Cases": [
Expand Down
14 changes: 9 additions & 5 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ func (p *LogicalJoin) GetMergeJoin(prop *property.PhysicalProperty, schema *expr
// If TiDB_SMJ hint is existed, it should consider enforce merge join,
// because we can't trust lhsChildProperty completely.
if (p.preferJoinType&preferMergeJoin) > 0 ||
(p.preferJoinType&preferNoHashJoin) > 0 { // if hash join is not allowed, generate as many other types of join as possible to avoid 'cant-find-plan' error.
p.shouldSkipHashJoin() { // if hash join is not allowed, generate as many other types of join as possible to avoid 'cant-find-plan' error.
joins = append(joins, p.getEnforcedMergeJoin(prop, schema, statsInfo)...)
}

Expand Down Expand Up @@ -391,6 +391,10 @@ var ForceUseOuterBuild4Test = atomic.NewBool(false)
// TODO: use hint and remove this variable
var ForcedHashLeftJoin4Test = atomic.NewBool(false)

func (p *LogicalJoin) shouldSkipHashJoin() bool {
return (p.preferJoinType&preferNoHashJoin) > 0 || (!p.SCtx().GetSessionVars().EnableHashJoin)
}

func (p *LogicalJoin) getHashJoins(prop *property.PhysicalProperty) (joins []PhysicalPlan, forced bool) {
if !prop.IsSortItemEmpty() { // hash join doesn't promise any orders
return
Expand Down Expand Up @@ -451,12 +455,12 @@ func (p *LogicalJoin) getHashJoins(prop *property.PhysicalProperty) (joins []Phy
}

forced = (p.preferJoinType&preferHashJoin > 0) || forceLeftToBuild || forceRightToBuild
noHashJoin := (p.preferJoinType & preferNoHashJoin) > 0
if !forced && noHashJoin {
if !forced && p.shouldSkipHashJoin() {
return nil, false
} else if forced && noHashJoin {
} else if forced && p.shouldSkipHashJoin() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another problem that needs to solve is to avoid no-plan error.
In the case below, after disabling Hash Join, the optimizer can't find a plan for this query.

mysql> explain select /*+ no_merge_join(t1) */ * from t1, t2 where t1.a=t2.a;
+------------------------------+----------+-----------+---------------+----------------------------------------------+
| id                           | estRows  | task      | access object | operator info                                |
+------------------------------+----------+-----------+---------------+----------------------------------------------+
| HashJoin_8                   | 12487.50 | root      |               | inner join, equal:[eq(test.t1.a, test.t2.a)] |
| ├─TableReader_15(Build)      | 9990.00  | root      |               | data:Selection_14                            |
| │ └─Selection_14             | 9990.00  | cop[tikv] |               | not(isnull(test.t2.a))                       |
| │   └─TableFullScan_13       | 10000.00 | cop[tikv] | table:t2      | keep order:false, stats:pseudo               |
| └─TableReader_12(Probe)      | 9990.00  | root      |               | data:Selection_11                            |
|   └─Selection_11             | 9990.00  | cop[tikv] |               | not(isnull(test.t1.a))                       |
|     └─TableFullScan_10       | 10000.00 | cop[tikv] | table:t1      | keep order:false, stats:pseudo               |
+------------------------------+----------+-----------+---------------+----------------------------------------------+
7 rows in set (0.00 sec)

mysql> set tidb_opt_enable_hash_join=off;
Query OK, 0 rows affected (0.01 sec)

mysql> explain select /*+ no_merge_join(t1) */ * from t1, t2 where t1.a=t2.a;
ERROR 1815 (HY000): Internal : Can't find a proper physical plan for this query

Can we implement this in LogicalJoin.exhaustPhysicalPlans where we can know whether there is any other possible join plans?
Then to implement it in this way:

func (p *LogicalJoin) exhaustPhysicalPlans(...) {
    ...
	if noHashJoin && len(otherJoins) == 0 {
        hashJoins := p.getHashJoins(prop)
        warning("no hash join can not be applied")
        return hashJoins
    } else {
        ...
    }
}

Copy link
Contributor Author

@coderplay coderplay Sep 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This appears to be the behavior we actually want, doesn't it? If a user explicitly disables hash join and also disable merge join, they should expect such consequences. Why should tidb_opt_enable_hash_join compromise here instead of no_merge_join? Letting it fail seems to be the more predictable outcome; otherwise, users might not be aware of this implicit behavior, right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, then we'd better add this into the user doc later on just like what we've done for hints (https://docs.pingcap.com/tidb/dev/optimizer-hints#using-hints-causes-the-cant-find-a-proper-physical-plan-for-this-query-error).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am creating another PR for the doc.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

p.SCtx().GetSessionVars().StmtCtx.AppendWarning(ErrInternal.GenWithStack(
"Some HASH_JOIN and NO_HASH_JOIN hints conflict, NO_HASH_JOIN is ignored"))
"A conflict between the HASH_JOIN hint and the NO_HASH_JOIN hint, " +
"or the tidb_opt_enable_hash_join system variable, the HASH_JOIN hint will take precedence."))
}
return
}
Expand Down
1 change: 1 addition & 0 deletions planner/core/optimizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ type logicalOptRule interface {
func BuildLogicalPlanForTest(ctx context.Context, sctx sessionctx.Context, node ast.Node, infoSchema infoschema.InfoSchema) (Plan, types.NameSlice, error) {
sctx.GetSessionVars().PlanID.Store(0)
sctx.GetSessionVars().PlanColumnID.Store(0)
sctx.GetSessionVars().EnableHashJoin = true
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Too many CI failures due to the default value of this variable:
image

Can we change the name of the variable from EnableHashJoin to DisableHashJoin and set it as false by default?

  1. for most users, HashJoin should be allowed, so using false as the default value seems safer.
  2. for some tests, the test framework won't use DefEnableHashJoin to initialize sctx.EnableHashJoin, instead it uses the default value of Bool in Golang, which is false; If we use EnableHashJoin, there will be plenty of test failures, changing it to DisableHashJoin can fix this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After discussing with PM, we decided to keep it unchanged as EnableHashJoin.

Copy link
Contributor

@qw4990 qw4990 Sep 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Too many CI failures... For safety, how about using another solution, which is using DisableHashJoin internally, and exposing enable_hash_join to users.

Copy link
Contributor

@qw4990 qw4990 Sep 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Too many CI failures... For safety, how about using another solution, which is using DisableHashJoin internally, and exposing enable_hash_join to users.

@coderplay I've pushed some code to your PR directly (tomorrow is the code freeze deadline of v7.4 ...) by using this approach to solve CI failures.
image

Copy link
Contributor Author

@coderplay coderplay Sep 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for fixing those @qw4990 ! In the future, how can I detect those issues locally before submitting this PR?

builder, _ := NewPlanBuilder().Init(sctx, infoSchema, &utilhint.BlockHintProcessor{})
p, err := builder.Build(ctx, node)
if err != nil {
Expand Down
3 changes: 3 additions & 0 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -1244,6 +1244,9 @@ type SessionVars struct {
// AnalyzeVersion indicates how TiDB collect and use analyzed statistics.
AnalyzeVersion int

// EnableHashjoin indicates whether to enable hash join.
EnableHashJoin bool

// EnableHistoricalStats indicates whether to enable historical statistics.
EnableHistoricalStats bool

Expand Down
4 changes: 4 additions & 0 deletions sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -2056,6 +2056,10 @@ var defaultSysVars = []*SysVar{
s.AnalyzeVersion = tidbOptPositiveInt32(val, DefTiDBAnalyzeVersion)
return nil
}},
{Scope: ScopeGlobal | ScopeSession, Name: TiDBOptEnableHashJoin, Value: BoolToOnOff(DefTiDBOptEnableHashJoin), Type: TypeBool, SetSession: func(s *SessionVars, val string) error {
s.EnableHashJoin = TiDBOptOn(val)
return nil
}},
{Scope: ScopeGlobal | ScopeSession, Name: TiDBEnableIndexMergeJoin, Value: BoolToOnOff(DefTiDBEnableIndexMergeJoin), Hidden: true, Type: TypeBool, SetSession: func(s *SessionVars, val string) error {
s.EnableIndexMergeJoin = TiDBOptOn(val)
return nil
Expand Down
4 changes: 4 additions & 0 deletions sessionctx/variable/tidb_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -906,6 +906,9 @@ const (
// TiDBEnableCheckConstraint indicates whether to enable check constraint feature.
TiDBEnableCheckConstraint = "tidb_enable_check_constraint"

// TiDBOptEnableHashJoin indicates whether to enable hash join.
TiDBOptEnableHashJoin = "tidb_opt_enable_hash_join"

// TiDBOptObjective indicates whether the optimizer should be more stable, predictable or more aggressive.
// Please see comments of SessionVars.OptObjective for details.
TiDBOptObjective = "tidb_opt_objective"
Expand Down Expand Up @@ -1406,6 +1409,7 @@ const (
DefTiDBLockUnchangedKeys = true
DefTiDBEnableCheckConstraint = false
DefTiDBSkipMissingPartitionStats = true
DefTiDBOptEnableHashJoin = true
DefTiDBOptObjective = OptObjectiveModerate
DefTiDBSchemaVersionCacheLimit = 16
)
Expand Down