diff --git a/expression/builtin_convert_charset.go b/expression/builtin_convert_charset.go index df9623dbe34ff..6d3b353c38ecc 100644 --- a/expression/builtin_convert_charset.go +++ b/expression/builtin_convert_charset.go @@ -322,7 +322,8 @@ func HandleBinaryLiteral(ctx sessionctx.Context, expr Expression, ec *ExprCollat return expr } return BuildToBinaryFunction(ctx, expr) - } else if argChs == charset.CharsetBin && dstChs != charset.CharsetBin { + } else if argChs == charset.CharsetBin && dstChs != charset.CharsetBin && + expr.GetType().GetType() != mysql.TypeNull { ft := expr.GetType().Clone() ft.SetCharset(ec.Charset) ft.SetCollate(ec.Collation) diff --git a/expression/integration_test.go b/expression/integration_test.go index 5f4028fd6ffe2..1f6b843e4feab 100644 --- a/expression/integration_test.go +++ b/expression/integration_test.go @@ -7973,3 +7973,14 @@ func TestIssue41986(t *testing.T) { // shouldn't report they can't find column error and return the right result. tk.MustQuery("SELECT GROUP_CONCAT(effective_date order by stlmnt_hour DESC) FROM ( SELECT (COALESCE(pct.clearing_time, 0)/3600000) AS stlmnt_hour ,COALESCE(pct.effective_date, '1970-01-01 08:00:00') AS effective_date FROM poi_clearing_time_topic pct ORDER BY pct.effective_date DESC ) a;").Check(testkit.Rows("2023-08-25 00:00:00")) } + +func TestIssue49526(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + + rows := tk.MustQuery("explain select null as a union all select 'a' as a;").Rows() + for _, r := range rows { + require.NotContains(t, r[4], "from_binary") + } + tk.MustQuery("select null as a union all select 'a' as a;").Sort().Check(testkit.Rows("", "a")) +} diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 28366dd9f4000..15ea71cfdaf98 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -1061,7 +1061,7 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty, planCounter // Batch/PointGet plans may be over-optimized, like `a>=1(?) and a<=1(?)` --> `a=1` --> PointGet(a=1). // For safety, prevent these plans from the plan cache here. - if !pointGetTask.invalid() && expression.MaybeOverOptimized4PlanCache(ds.ctx, candidate.path.AccessConds) && !ds.isSafePointGetPlan4PlanCache(candidate.path) { + if !pointGetTask.invalid() && expression.MaybeOverOptimized4PlanCache(ds.ctx, candidate.path.AccessConds) && !isSafePointGetPath4PlanCache(ds.ctx, candidate.path) { ds.ctx.GetSessionVars().StmtCtx.SetSkipPlanCache(errors.New("Batch/PointGet plans may be over-optimized")) } @@ -1144,26 +1144,6 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty, planCounter return } -func (ds *DataSource) isSafePointGetPlan4PlanCache(path *util.AccessPath) bool { - // PointGet might contain some over-optimized assumptions, like `a>=1 and a<=1` --> `a=1`, but - // these assumptions may be broken after parameters change. - - // safe scenario 1: each column corresponds to a single EQ, `a=1 and b=2 and c=3` --> `[1, 2, 3]` - if len(path.Ranges) > 0 && path.Ranges[0].Width() == len(path.AccessConds) { - for _, accessCond := range path.AccessConds { - f, ok := accessCond.(*expression.ScalarFunction) - if !ok { - return false - } - if f.FuncName.L != ast.EQ { - return false - } - } - return true - } - return false -} - func (ds *DataSource) convertToIndexMergeScan(prop *property.PhysicalProperty, candidate *candidatePath, _ *physicalOptimizeOp) (task task, err error) { if prop.IsFlashProp() || prop.TaskTp == property.CopSingleReadTaskType || !prop.IsSortItemEmpty() { return invalidTask, nil diff --git a/planner/core/integration_test.go b/planner/core/integration_test.go index c39cbf2722d35..cc440ca20953f 100644 --- a/planner/core/integration_test.go +++ b/planner/core/integration_test.go @@ -1278,6 +1278,15 @@ func TestIssue15110(t *testing.T) { tk.MustExec("set @@session.tidb_isolation_read_engines = 'tiflash'") tk.MustExec("explain format = 'brief' SELECT count(*) FROM crm_rd_150m dataset_48 WHERE (CASE WHEN (month(dataset_48.customer_first_date)) <= 30 THEN '新客' ELSE NULL END) IS NOT NULL;") + + // for #49616 + tk.MustExec(`use test`) + tk.MustExec("set @@session.tidb_isolation_read_engines = 'tikv'") + tk.MustExec(`create table t1 (k int, a int)`) + tk.MustExec(`create table t2 (k int, b int, key(k))`) + tk.MustUseIndex(`select /*+ tidb_inlj(t2, t1) */ * + from t2 left join t1 on t1.k=t2.k + where a>0 or (a=0 and b>0)`, "k") } func TestReadFromStorageHint(t *testing.T) { diff --git a/planner/core/plan_cache.go b/planner/core/plan_cache.go index 958f65f44c75a..b84ed21320509 100644 --- a/planner/core/plan_cache.go +++ b/planner/core/plan_cache.go @@ -438,7 +438,7 @@ func rebuildRange(p Plan) error { if err != nil { return err } - if len(ranges.Ranges) == 0 || len(ranges.AccessConds) != len(x.AccessConditions) { + if len(ranges.Ranges) != 1 || len(ranges.AccessConds) != len(x.AccessConditions) { return errors.New("failed to rebuild range: the length of the range has changed") } for i := range x.IndexValues { @@ -456,7 +456,7 @@ func rebuildRange(p Plan) error { if err != nil { return err } - if len(ranges) == 0 { + if len(ranges) != 1 { return errors.New("failed to rebuild range: the length of the range has changed") } x.Handle = kv.IntHandle(ranges[0].LowVal[0].GetInt64()) diff --git a/planner/core/plan_cache_test.go b/planner/core/plan_cache_test.go index 0d73bef965d81..3000b34de657a 100644 --- a/planner/core/plan_cache_test.go +++ b/planner/core/plan_cache_test.go @@ -544,3 +544,47 @@ func TestIssue42150(t *testing.T) { tk.MustExec("execute st") tk.MustQuery("select @@last_plan_from_cache").Check(testkit.Rows("1")) } + +func TestIssue44830(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec(`use test`) + tk.MustExec(`set @@tidb_opt_fix_control = "44830:ON"`) + tk.MustExec(`create table t (a int, primary key(a))`) + tk.MustExec(`create table t1 (a int, b int, primary key(a, b))`) // multiple-column primary key + tk.MustExec(`insert into t values (1), (2), (3)`) + tk.MustExec(`insert into t1 values (1, 1), (2, 2), (3, 3)`) + tk.MustExec(`set @a=1, @b=2, @c=3`) + + // single-column primary key cases + tk.MustExec(`prepare st from 'select * from t where 1=1 and a in (?, ?, ?)'`) + tk.MustQuery(`execute st using @a, @b, @c`).Sort().Check(testkit.Rows("1", "2", "3")) + tk.MustQuery(`execute st using @a, @b, @c`).Sort().Check(testkit.Rows("1", "2", "3")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("1")) + tk.MustQuery(`execute st using @a, @b, @b`).Sort().Check(testkit.Rows("1", "2")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // range length changed + tk.MustQuery(`execute st using @b, @b, @b`).Sort().Check(testkit.Rows("2")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // range length changed + tk.MustQuery(`execute st using @a, @b, @c`).Sort().Check(testkit.Rows("1", "2", "3")) + tk.MustQuery(`execute st using @a, @b, @b`).Sort().Check(testkit.Rows("1", "2")) + tk.MustQuery(`execute st using @a, @b, @b`).Sort().Check(testkit.Rows("1", "2")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // contain duplicated values in the in-list + + // multi-column primary key cases + tk.MustExec(`prepare st from 'select * from t1 where 1=1 and (a, b) in ((?, ?), (?, ?), (?, ?))'`) + tk.MustQuery(`execute st using @a, @a, @b, @b, @c, @c`).Sort().Check(testkit.Rows("1 1", "2 2", "3 3")) + tk.MustQuery(`execute st using @a, @a, @b, @b, @c, @c`).Sort().Check(testkit.Rows("1 1", "2 2", "3 3")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("1")) + tk.MustQuery(`execute st using @a, @a, @b, @b, @b, @b`).Sort().Check(testkit.Rows("1 1", "2 2")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // range length changed + tk.MustQuery(`execute st using @b, @b, @b, @b, @b, @b`).Sort().Check(testkit.Rows("2 2")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // range length changed + tk.MustQuery(`execute st using @b, @b, @b, @b, @c, @c`).Sort().Check(testkit.Rows("2 2", "3 3")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // range length changed + tk.MustQuery(`execute st using @a, @a, @a, @a, @a, @a`).Sort().Check(testkit.Rows("1 1")) + tk.MustQuery(`execute st using @a, @a, @a, @a, @a, @a`).Sort().Check(testkit.Rows("1 1")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // contain duplicated values in the in-list + tk.MustQuery(`execute st using @a, @a, @b, @b, @b, @b`).Sort().Check(testkit.Rows("1 1", "2 2")) + tk.MustQuery(`execute st using @a, @a, @b, @b, @b, @b`).Sort().Check(testkit.Rows("1 1", "2 2")) + tk.MustQuery(`select @@last_plan_from_cache`).Check(testkit.Rows("0")) // contain duplicated values in the in-list +} diff --git a/planner/core/plan_cache_utils.go b/planner/core/plan_cache_utils.go index ba35b76afc87d..c8fbfa4cc6daf 100644 --- a/planner/core/plan_cache_utils.go +++ b/planner/core/plan_cache_utils.go @@ -22,11 +22,13 @@ import ( "unsafe" "github.com/pingcap/errors" + "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/parser" "github.com/pingcap/tidb/parser/ast" "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" + "github.com/pingcap/tidb/planner/util" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/sessionctx/variable" @@ -458,3 +460,90 @@ func GetPreparedStmt(stmt *ast.ExecuteStmt, vars *variable.SessionVars) (*PlanCa } return nil, ErrStmtNotFound } + +func isSafePointGetPath4PlanCache(sctx sessionctx.Context, path *util.AccessPath) bool { + // PointGet might contain some over-optimized assumptions, like `a>=1 and a<=1` --> `a=1`, but + // these assumptions may be broken after parameters change. + + if isSafePointGetPath4PlanCacheScenario1(path) { + return true + } + + // TODO: enable this fix control switch by default after more test cases are added. + if sctx != nil && sctx.GetSessionVars() != nil && sctx.GetSessionVars().OptimizerFixControl != nil { + v, ok := sctx.GetSessionVars().OptimizerFixControl[variable.TiDBOptFixControl44830] + if ok && variable.TiDBOptOn(v) && (isSafePointGetPath4PlanCacheScenario2(path) || isSafePointGetPath4PlanCacheScenario3(path)) { + return true + } + } + + return false +} + +func isSafePointGetPath4PlanCacheScenario1(path *util.AccessPath) bool { + // safe scenario 1: each column corresponds to a single EQ, `a=1 and b=2 and c=3` --> `[1, 2, 3]` + if len(path.Ranges) <= 0 || path.Ranges[0].Width() != len(path.AccessConds) { + return false + } + for _, accessCond := range path.AccessConds { + f, ok := accessCond.(*expression.ScalarFunction) + if !ok || f.FuncName.L != ast.EQ { // column = constant + return false + } + } + return true +} + +func isSafePointGetPath4PlanCacheScenario2(path *util.AccessPath) bool { + // safe scenario 2: this Batch or PointGet is simply from a single IN predicate, `key in (...)` + if len(path.Ranges) <= 0 || len(path.AccessConds) != 1 { + return false + } + f, ok := path.AccessConds[0].(*expression.ScalarFunction) + if !ok || f.FuncName.L != ast.In { + return false + } + return len(path.Ranges) == len(f.GetArgs())-1 // no duplicated values in this in-list for safety. +} + +func isSafePointGetPath4PlanCacheScenario3(path *util.AccessPath) bool { + // safe scenario 3: this Batch or PointGet is simply from a simple DNF like `key=? or key=? or key=?` + if len(path.Ranges) <= 0 || len(path.AccessConds) != 1 { + return false + } + f, ok := path.AccessConds[0].(*expression.ScalarFunction) + if !ok || f.FuncName.L != ast.LogicOr { + return false + } + + dnfExprs := expression.FlattenDNFConditions(f) + if len(path.Ranges) != len(dnfExprs) { + // no duplicated values in this in-list for safety. + // e.g. `k=1 or k=2 or k=1` --> [[1, 1], [2, 2]] + return false + } + + for _, expr := range dnfExprs { + f, ok := expr.(*expression.ScalarFunction) + if !ok { + return false + } + switch f.FuncName.L { + case ast.EQ: // (k=1 or k=2) --> [k=1, k=2] + case ast.LogicAnd: // ((k1=1 and k2=1) or (k1=2 and k2=2)) --> [k1=1 and k2=1, k2=2 and k2=2] + cnfExprs := expression.FlattenCNFConditions(f) + if path.Ranges[0].Width() != len(cnfExprs) { // not all key columns are specified + return false + } + for _, expr := range cnfExprs { // k1=1 and k2=1 + f, ok := expr.(*expression.ScalarFunction) + if !ok || f.FuncName.L != ast.EQ { + return false + } + } + default: + return false + } + } + return true +} diff --git a/planner/core/rule_predicate_push_down.go b/planner/core/rule_predicate_push_down.go index 01cd085d5bd0d..6752ae27abecf 100644 --- a/planner/core/rule_predicate_push_down.go +++ b/planner/core/rule_predicate_push_down.go @@ -444,6 +444,10 @@ func isNullRejected(ctx sessionctx.Context, schema *expression.Schema, expr expr sc.InNullRejectCheck = false }() for _, cond := range expression.SplitCNFItems(expr) { + if isNullRejectedSpecially(ctx, schema, expr) { + return true + } + result := expression.EvaluateExprWithNull(ctx, schema, cond) x, ok := result.(*expression.Constant) if !ok { @@ -458,6 +462,47 @@ func isNullRejected(ctx sessionctx.Context, schema *expression.Schema, expr expr return false } +// isNullRejectedSpecially handles some null-rejected cases specially, since the current in +// EvaluateExprWithNull is too strict for some cases, e.g. #49616. +func isNullRejectedSpecially(ctx sessionctx.Context, schema *expression.Schema, expr expression.Expression) bool { + return specialNullRejectedCase1(ctx, schema, expr) // only 1 case now +} + +// specialNullRejectedCase1 is mainly for #49616. +// Case1 specially handles `null-rejected OR (null-rejected AND {others})`, then no matter what the result +// of `{others}` is (True, False or Null), the result of this predicate is null, so this predicate is null-rejected. +func specialNullRejectedCase1(ctx sessionctx.Context, schema *expression.Schema, expr expression.Expression) bool { + isFunc := func(e expression.Expression, lowerFuncName string) *expression.ScalarFunction { + f, ok := e.(*expression.ScalarFunction) + if !ok { + return nil + } + if f.FuncName.L == lowerFuncName { + return f + } + return nil + } + orFunc := isFunc(expr, ast.LogicOr) + if orFunc == nil { + return false + } + for i := 0; i < 2; i++ { + andFunc := isFunc(orFunc.GetArgs()[i], ast.LogicAnd) + if andFunc == nil { + continue + } + if !isNullRejected(ctx, schema, orFunc.GetArgs()[1-i]) { + continue // the other side should be null-rejected: null-rejected OR (... AND ...) + } + for _, andItem := range expression.SplitCNFItems(andFunc) { + if isNullRejected(ctx, schema, andItem) { + return true // hit the case in the comment: null-rejected OR (null-rejected AND ...) + } + } + } + return false +} + // PredicatePushDown implements LogicalPlan PredicatePushDown interface. func (p *LogicalProjection) PredicatePushDown(predicates []expression.Expression, opt *logicalOptimizeOp) (ret []expression.Expression, retPlan LogicalPlan) { canBePushed := make([]expression.Expression, 0, len(predicates)) diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go index c60d71d1ba107..cc820f2b34d03 100644 --- a/sessionctx/variable/session.go +++ b/sessionctx/variable/session.go @@ -1348,6 +1348,9 @@ var ( TiDBOptFixControl44855 uint64 = 44855 // TiDBOptFixControl46177 controls whether to explore enforced plans for DataSource if it has already found an unenforced plan. TiDBOptFixControl46177 uint64 = 46177 + // TiDBOptFixControl44830 controls whether to allow to cache Batch/PointGet from some complex scenarios. + // See #44830 for more details. + TiDBOptFixControl44830 uint64 = 44830 ) // GetOptimizerFixControlValue returns the specified value of the optimizer fix control.