planner: plan cache supports Batch/PointGet converted from (primary k…

…eys) in ((...), ...) (#50000)
pingcap · Jan 3, 2024 · b5a2eb8 · b5a2eb8
1 parent 96e870e
commit b5a2eb8
Show file tree

Hide file tree

Showing 9 changed files with 206 additions and 24 deletions.
diff --git a/expression/builtin_convert_charset.go b/expression/builtin_convert_charset.go
@@ -322,7 +322,8 @@ func HandleBinaryLiteral(ctx sessionctx.Context, expr Expression, ec *ExprCollat
 				return expr
 			}
 			return BuildToBinaryFunction(ctx, expr)
-		} else if argChs == charset.CharsetBin && dstChs != charset.CharsetBin {
+		} else if argChs == charset.CharsetBin && dstChs != charset.CharsetBin &&
+			expr.GetType().GetType() != mysql.TypeNull {
 			ft := expr.GetType().Clone()
 			ft.SetCharset(ec.Charset)
 			ft.SetCollate(ec.Collation)

diff --git a/expression/integration_test.go b/expression/integration_test.go
@@ -7973,3 +7973,14 @@ func TestIssue41986(t *testing.T) {
 	// shouldn't report they can't find column error and return the right result.
 	tk.MustQuery("SELECT GROUP_CONCAT(effective_date order by stlmnt_hour DESC) FROM ( SELECT (COALESCE(pct.clearing_time, 0)/3600000) AS stlmnt_hour ,COALESCE(pct.effective_date, '1970-01-01 08:00:00') AS effective_date FROM poi_clearing_time_topic pct ORDER BY pct.effective_date DESC ) a;").Check(testkit.Rows("2023-08-25 00:00:00"))
 }
+
+func TestIssue49526(t *testing.T) {
+	store := testkit.CreateMockStore(t)
+	tk := testkit.NewTestKit(t, store)
+
+	rows := tk.MustQuery("explain select null as a union all select 'a' as a;").Rows()
+	for _, r := range rows {
+		require.NotContains(t, r[4], "from_binary")
+	}
+	tk.MustQuery("select null as a union all select 'a' as a;").Sort().Check(testkit.Rows("<nil>", "a"))
+}
diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go
@@ -1061,7 +1061,7 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty, planCounter
 
 				// Batch/PointGet plans may be over-optimized, like `a>=1(?) and a<=1(?)` --> `a=1` --> PointGet(a=1).
 				// For safety, prevent these plans from the plan cache here.
-				if !pointGetTask.invalid() && expression.MaybeOverOptimized4PlanCache(ds.ctx, candidate.path.AccessConds) && !ds.isSafePointGetPlan4PlanCache(candidate.path) {
+				if !pointGetTask.invalid() && expression.MaybeOverOptimized4PlanCache(ds.ctx, candidate.path.AccessConds) && !isSafePointGetPath4PlanCache(ds.ctx, candidate.path) {
 					ds.ctx.GetSessionVars().StmtCtx.SetSkipPlanCache(errors.New("Batch/PointGet plans may be over-optimized"))
 				}
 
@@ -1144,26 +1144,6 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty, planCounter
 	return
 }
 
-func (ds *DataSource) isSafePointGetPlan4PlanCache(path *util.AccessPath) bool {
-	// PointGet might contain some over-optimized assumptions, like `a>=1 and a<=1` --> `a=1`, but
-	// these assumptions may be broken after parameters change.
-
-	// safe scenario 1: each column corresponds to a single EQ, `a=1 and b=2 and c=3` --> `[1, 2, 3]`
-	if len(path.Ranges) > 0 && path.Ranges[0].Width() == len(path.AccessConds) {
-		for _, accessCond := range path.AccessConds {
-			f, ok := accessCond.(*expression.ScalarFunction)
-			if !ok {
-				return false
-			}
-			if f.FuncName.L != ast.EQ {
-				return false
-			}
-		}
-		return true
-	}
-	return false
-}
-
 func (ds *DataSource) convertToIndexMergeScan(prop *property.PhysicalProperty, candidate *candidatePath, _ *physicalOptimizeOp) (task task, err error) {
 	if prop.IsFlashProp() || prop.TaskTp == property.CopSingleReadTaskType || !prop.IsSortItemEmpty() {
 		return invalidTask, nil

diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go
@@ -1278,6 +1278,15 @@ func TestIssue15110(t *testing.T) {
 
 	tk.MustExec("set @@session.tidb_isolation_read_engines = 'tiflash'")
 	tk.MustExec("explain format = 'brief' SELECT count(*) FROM crm_rd_150m dataset_48 WHERE (CASE WHEN (month(dataset_48.customer_first_date)) <= 30 THEN '新客' ELSE NULL END) IS NOT NULL;")
+
+	// for #49616
+	tk.MustExec(`use test`)
+	tk.MustExec("set @@session.tidb_isolation_read_engines = 'tikv'")
+	tk.MustExec(`create table t1 (k int, a int)`)
+	tk.MustExec(`create table t2 (k int, b int, key(k))`)
+	tk.MustUseIndex(`select /*+ tidb_inlj(t2, t1) */ *
+  from t2 left join t1 on t1.k=t2.k
+  where a>0 or (a=0 and b>0)`, "k")
 }
 
 func TestReadFromStorageHint(t *testing.T) {

diff --git a/planner/core/plan_cache.go b/planner/core/plan_cache.go
@@ -438,7 +438,7 @@ func rebuildRange(p Plan) error {
 				if err != nil {
 					return err
 				}
-				if len(ranges.Ranges) == 0 || len(ranges.AccessConds) != len(x.AccessConditions) {
+				if len(ranges.Ranges) != 1 || len(ranges.AccessConds) != len(x.AccessConditions) {
 					return errors.New("failed to rebuild range: the length of the range has changed")
 				}
 				for i := range x.IndexValues {
@@ -456,7 +456,7 @@ func rebuildRange(p Plan) error {
 					if err != nil {
 						return err
 					}
-					if len(ranges) == 0 {
+					if len(ranges) != 1 {
 						return errors.New("failed to rebuild range: the length of the range has changed")
 					}
 					x.Handle = kv.IntHandle(ranges[0].LowVal[0].GetInt64())

diff --git a/planner/core/plan_cache_test.go b/planner/core/plan_cache_test.go
@@ -544,3 +544,47 @@ func TestIssue42150(t *testing.T) {
 	tk.MustExec("execute st")
 	tk.MustQuery("select @@last_plan_from_cache").Check(testkit.Rows("1"))
 }
+
+func TestIssue44830(t *testing.T) {
+	store := testkit.CreateMockStore(t)
+	tk := testkit.NewTestKit(t, store)
+	tk.MustExec(`use test`)
+	tk.MustExec(`set @@tidb_opt_fix_control = "44830:ON"`)
+	tk.MustExec(`create table t (a int, primary key(a))`)
+	tk.MustExec(`create table t1 (a int, b int, primary key(a, b))`) // multiple-column primary key
+	tk.MustExec(`insert into t values (1), (2), (3)`)
+	tk.MustExec(`insert into t1 values (1, 1), (2, 2), (3, 3)`)
+	tk.MustExec(`set @a=1, @b=2, @c=3`)
+
+	// single-column primary key cases
+	tk.MustExec(`prepare st from 'select * from t where 1=1 and a in (?, ?, ?)'`)
+	tk.MustQuery(`execute st using @a, @b, @c`).Sort().Check(testkit.Rows("1", "2", "3"))
+	tk.MustQuery(`execute st using @a, @b, @c`).Sort().Check(testkit.Rows("1", "2", "3"))
+	tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("1"))
+	tk.MustQuery(`execute st using @a, @b, @b`).Sort().Check(testkit.Rows("1", "2"))
+	tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // range length changed
+	tk.MustQuery(`execute st using @b, @b, @b`).Sort().Check(testkit.Rows("2"))
+	tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // range length changed
+	tk.MustQuery(`execute st using @a, @b, @c`).Sort().Check(testkit.Rows("1", "2", "3"))
+	tk.MustQuery(`execute st using @a, @b, @b`).Sort().Check(testkit.Rows("1", "2"))
+	tk.MustQuery(`execute st using @a, @b, @b`).Sort().Check(testkit.Rows("1", "2"))
+	tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // contain duplicated values in the in-list
+
+	// multi-column primary key cases
+	tk.MustExec(`prepare st from 'select * from t1 where 1=1 and (a, b) in ((?, ?), (?, ?), (?, ?))'`)
+	tk.MustQuery(`execute st using @a, @a, @b, @b, @c, @c`).Sort().Check(testkit.Rows("1 1", "2 2", "3 3"))
+	tk.MustQuery(`execute st using @a, @a, @b, @b, @c, @c`).Sort().Check(testkit.Rows("1 1", "2 2", "3 3"))
+	tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("1"))
+	tk.MustQuery(`execute st using @a, @a, @b, @b, @b, @b`).Sort().Check(testkit.Rows("1 1", "2 2"))
+	tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // range length changed
+	tk.MustQuery(`execute st using @b, @b, @b, @b, @b, @b`).Sort().Check(testkit.Rows("2 2"))
+	tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // range length changed
+	tk.MustQuery(`execute st using @b, @b, @b, @b, @c, @c`).Sort().Check(testkit.Rows("2 2", "3 3"))
+	tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // range length changed
+	tk.MustQuery(`execute st using @a, @a, @a, @a, @a, @a`).Sort().Check(testkit.Rows("1 1"))
+	tk.MustQuery(`execute st using @a, @a, @a, @a, @a, @a`).Sort().Check(testkit.Rows("1 1"))
+	tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // contain duplicated values in the in-list
+	tk.MustQuery(`execute st using @a, @a, @b, @b, @b, @b`).Sort().Check(testkit.Rows("1 1", "2 2"))
+	tk.MustQuery(`execute st using @a, @a, @b, @b, @b, @b`).Sort().Check(testkit.Rows("1 1", "2 2"))
+	tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // contain duplicated values in the in-list
+}
diff --git a/planner/core/plan_cache_utils.go b/planner/core/plan_cache_utils.go
@@ -22,11 +22,13 @@ import (
 	"unsafe"
 
 	"github.com/pingcap/errors"
+	"github.com/pingcap/tidb/expression"
 	"github.com/pingcap/tidb/kv"
 	"github.com/pingcap/tidb/parser"
 	"github.com/pingcap/tidb/parser/ast"
 	"github.com/pingcap/tidb/parser/model"
 	"github.com/pingcap/tidb/parser/mysql"
+	"github.com/pingcap/tidb/planner/util"
 	"github.com/pingcap/tidb/sessionctx"
 	"github.com/pingcap/tidb/sessionctx/stmtctx"
 	"github.com/pingcap/tidb/sessionctx/variable"
@@ -458,3 +460,90 @@ func GetPreparedStmt(stmt *ast.ExecuteStmt, vars *variable.SessionVars) (*PlanCa
 	}
 	return nil, ErrStmtNotFound
 }
+
+func isSafePointGetPath4PlanCache(sctx sessionctx.Context, path *util.AccessPath) bool {
+	// PointGet might contain some over-optimized assumptions, like `a>=1 and a<=1` --> `a=1`, but
+	// these assumptions may be broken after parameters change.
+
+	if isSafePointGetPath4PlanCacheScenario1(path) {
+		return true
+	}
+
+	// TODO: enable this fix control switch by default after more test cases are added.
+	if sctx != nil && sctx.GetSessionVars() != nil && sctx.GetSessionVars().OptimizerFixControl != nil {
+		v, ok := sctx.GetSessionVars().OptimizerFixControl[variable.TiDBOptFixControl44830]
+		if ok && variable.TiDBOptOn(v) && (isSafePointGetPath4PlanCacheScenario2(path) || isSafePointGetPath4PlanCacheScenario3(path)) {
+			return true
+		}
+	}
+
+	return false
+}
+
+func isSafePointGetPath4PlanCacheScenario1(path *util.AccessPath) bool {
+	// safe scenario 1: each column corresponds to a single EQ, `a=1 and b=2 and c=3` --> `[1, 2, 3]`
+	if len(path.Ranges) <= 0 || path.Ranges[0].Width() != len(path.AccessConds) {
+		return false
+	}
+	for _, accessCond := range path.AccessConds {
+		f, ok := accessCond.(*expression.ScalarFunction)
+		if !ok || f.FuncName.L != ast.EQ { // column = constant
+			return false
+		}
+	}
+	return true
+}
+
+func isSafePointGetPath4PlanCacheScenario2(path *util.AccessPath) bool {
+	// safe scenario 2: this Batch or PointGet is simply from a single IN predicate, `key in (...)`
+	if len(path.Ranges) <= 0 || len(path.AccessConds) != 1 {
+		return false
+	}
+	f, ok := path.AccessConds[0].(*expression.ScalarFunction)
+	if !ok || f.FuncName.L != ast.In {
+		return false
+	}
+	return len(path.Ranges) == len(f.GetArgs())-1 // no duplicated values in this in-list for safety.
+}
+
+func isSafePointGetPath4PlanCacheScenario3(path *util.AccessPath) bool {
+	// safe scenario 3: this Batch or PointGet is simply from a simple DNF like `key=? or key=? or key=?`
+	if len(path.Ranges) <= 0 || len(path.AccessConds) != 1 {
+		return false
+	}
+	f, ok := path.AccessConds[0].(*expression.ScalarFunction)
+	if !ok || f.FuncName.L != ast.LogicOr {
+		return false
+	}
+
+	dnfExprs := expression.FlattenDNFConditions(f)
+	if len(path.Ranges) != len(dnfExprs) {
+		// no duplicated values in this in-list for safety.
+		// e.g. `k=1 or k=2 or k=1` --> [[1, 1], [2, 2]]
+		return false
+	}
+
+	for _, expr := range dnfExprs {
+		f, ok := expr.(*expression.ScalarFunction)
+		if !ok {
+			return false
+		}
+		switch f.FuncName.L {
+		case ast.EQ: // (k=1 or k=2) --> [k=1, k=2]
+		case ast.LogicAnd: // ((k1=1 and k2=1) or (k1=2 and k2=2)) --> [k1=1 and k2=1, k2=2 and k2=2]
+			cnfExprs := expression.FlattenCNFConditions(f)
+			if path.Ranges[0].Width() != len(cnfExprs) { // not all key columns are specified
+				return false
+			}
+			for _, expr := range cnfExprs { // k1=1 and k2=1
+				f, ok := expr.(*expression.ScalarFunction)
+				if !ok || f.FuncName.L != ast.EQ {
+					return false
+				}
+			}
+		default:
+			return false
+		}
+	}
+	return true
+}
diff --git a/planner/core/rule_predicate_push_down.go b/planner/core/rule_predicate_push_down.go
@@ -444,6 +444,10 @@ func isNullRejected(ctx sessionctx.Context, schema *expression.Schema, expr expr
 		sc.InNullRejectCheck = false
 	}()
 	for _, cond := range expression.SplitCNFItems(expr) {
+		if isNullRejectedSpecially(ctx, schema, expr) {
+			return true
+		}
+
 		result := expression.EvaluateExprWithNull(ctx, schema, cond)
 		x, ok := result.(*expression.Constant)
 		if !ok {
@@ -458,6 +462,47 @@ func isNullRejected(ctx sessionctx.Context, schema *expression.Schema, expr expr
 	return false
 }
 
+// isNullRejectedSpecially handles some null-rejected cases specially, since the current in
+// EvaluateExprWithNull is too strict for some cases, e.g. #49616.
+func isNullRejectedSpecially(ctx sessionctx.Context, schema *expression.Schema, expr expression.Expression) bool {
+	return specialNullRejectedCase1(ctx, schema, expr) // only 1 case now
+}
+
+// specialNullRejectedCase1 is mainly for #49616.
+// Case1 specially handles `null-rejected OR (null-rejected AND {others})`, then no matter what the result
+// of `{others}` is (True, False or Null), the result of this predicate is null, so this predicate is null-rejected.
+func specialNullRejectedCase1(ctx sessionctx.Context, schema *expression.Schema, expr expression.Expression) bool {
+	isFunc := func(e expression.Expression, lowerFuncName string) *expression.ScalarFunction {
+		f, ok := e.(*expression.ScalarFunction)
+		if !ok {
+			return nil
+		}
+		if f.FuncName.L == lowerFuncName {
+			return f
+		}
+		return nil
+	}
+	orFunc := isFunc(expr, ast.LogicOr)
+	if orFunc == nil {
+		return false
+	}
+	for i := 0; i < 2; i++ {
+		andFunc := isFunc(orFunc.GetArgs()[i], ast.LogicAnd)
+		if andFunc == nil {
+			continue
+		}
+		if !isNullRejected(ctx, schema, orFunc.GetArgs()[1-i]) {
+			continue // the other side should be null-rejected: null-rejected OR (... AND ...)
+		}
+		for _, andItem := range expression.SplitCNFItems(andFunc) {
+			if isNullRejected(ctx, schema, andItem) {
+				return true // hit the case in the comment: null-rejected OR (null-rejected AND ...)
+			}
+		}
+	}
+	return false
+}
+
 // PredicatePushDown implements LogicalPlan PredicatePushDown interface.
 func (p *LogicalProjection) PredicatePushDown(predicates []expression.Expression, opt *logicalOptimizeOp) (ret []expression.Expression, retPlan LogicalPlan) {
 	canBePushed := make([]expression.Expression, 0, len(predicates))

diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go
@@ -1348,6 +1348,9 @@ var (
 	TiDBOptFixControl44855 uint64 = 44855
 	// TiDBOptFixControl46177 controls whether to explore enforced plans for DataSource if it has already found an unenforced plan.
 	TiDBOptFixControl46177 uint64 = 46177
+	// TiDBOptFixControl44830 controls whether to allow to cache Batch/PointGet from some complex scenarios.
+	// See #44830 for more details.
+	TiDBOptFixControl44830 uint64 = 44830
 )
 
 // GetOptimizerFixControlValue returns the specified value of the optimizer fix control.