From 6d337352ea67589ef387ccd290b957a82e5be645 Mon Sep 17 00:00:00 2001 From: Jiarui Li <34512395+Willendless@users.noreply.github.com> Date: Tue, 21 Jun 2022 00:22:36 -0400 Subject: [PATCH] expression: avoid padding 0 when implicitly cast to binary (#35053) close pingcap/tidb#34823 --- ...lation_check_use_collation_disabled.result | 25 +++++++++++++++++++ ...llation_check_use_collation_enabled.result | 25 +++++++++++++++++++ .../t/collation_check_use_collation.test | 23 +++++++++++++++++ expression/builtin_cast.go | 10 ++++++++ planner/core/expression_rewriter.go | 9 +++++++ 5 files changed, 92 insertions(+) diff --git a/cmd/explaintest/r/collation_check_use_collation_disabled.result b/cmd/explaintest/r/collation_check_use_collation_disabled.result index 5b335fba0a59f..06af2890faa8f 100644 --- a/cmd/explaintest/r/collation_check_use_collation_disabled.result +++ b/cmd/explaintest/r/collation_check_use_collation_disabled.result @@ -128,4 +128,29 @@ select col_25 from tbl_2 where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 col_25 select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95); col_25 +drop table if exists t1; +drop table if exists t2; +create table t1(a char(20)); +create table t2(b binary(20), c binary(20)); +insert into t1 value('-1'); +insert into t2 value(0x2D31, 0x67); +insert into t2 value(0x2D31, 0x73); +select a from t1, t2 where t1.a between t2.b and t2.c; +a +select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c; +a +-1 +-1 +drop table if exists t1; +drop table if exists t2; +create table t1(a char(20)) collate utf8mb4_general_ci; +create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci; +insert into t1 values ('a'); +insert into t2 values (0x0, 'A'); +select * from t1, t2 where t1.a between t2.b and t2.c; +a b c +insert into t1 values ('-1'); +insert into t2 values (0x2d31, ''); +select * from t1, t2 where t1.a in (t2.b, 3); +a b c use test diff --git a/cmd/explaintest/r/collation_check_use_collation_enabled.result b/cmd/explaintest/r/collation_check_use_collation_enabled.result index 7aee634a8f0df..5bf70a6a73a09 100644 --- a/cmd/explaintest/r/collation_check_use_collation_enabled.result +++ b/cmd/explaintest/r/collation_check_use_collation_enabled.result @@ -147,4 +147,29 @@ col_25 select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95); col_25 89 +drop table if exists t1; +drop table if exists t2; +create table t1(a char(20)); +create table t2(b binary(20), c binary(20)); +insert into t1 value('-1'); +insert into t2 value(0x2D31, 0x67); +insert into t2 value(0x2D31, 0x73); +select a from t1, t2 where t1.a between t2.b and t2.c; +a +select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c; +a +-1 +-1 +drop table if exists t1; +drop table if exists t2; +create table t1(a char(20)) collate utf8mb4_general_ci; +create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci; +insert into t1 values ('a'); +insert into t2 values (0x0, 'A'); +select * from t1, t2 where t1.a between t2.b and t2.c; +a b c +insert into t1 values ('-1'); +insert into t2 values (0x2d31, ''); +select * from t1, t2 where t1.a in (t2.b, 3); +a b c use test diff --git a/cmd/explaintest/t/collation_check_use_collation.test b/cmd/explaintest/t/collation_check_use_collation.test index 04d4642656de9..ebaa37588d153 100644 --- a/cmd/explaintest/t/collation_check_use_collation.test +++ b/cmd/explaintest/t/collation_check_use_collation.test @@ -86,5 +86,28 @@ insert ignore into tbl_2 values ( 5888267793391993829,5371,94.63,-109,5728076076 select col_25 from tbl_2 where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95); select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95); +# check implicit binary collation cast +drop table if exists t1; +drop table if exists t2; +# issue 34823 +create table t1(a char(20)); +create table t2(b binary(20), c binary(20)); +insert into t1 value('-1'); +insert into t2 value(0x2D31, 0x67); +insert into t2 value(0x2D31, 0x73); +select a from t1, t2 where t1.a between t2.b and t2.c; +select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c; +# binary collation in single side +drop table if exists t1; +drop table if exists t2; +create table t1(a char(20)) collate utf8mb4_general_ci; +create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci; +insert into t1 values ('a'); +insert into t2 values (0x0, 'A'); +select * from t1, t2 where t1.a between t2.b and t2.c; +insert into t1 values ('-1'); +insert into t2 values (0x2d31, ''); +select * from t1, t2 where t1.a in (t2.b, 3); + # cleanup environment use test diff --git a/expression/builtin_cast.go b/expression/builtin_cast.go index d52b1aa8c3c20..c281e2de80302 100644 --- a/expression/builtin_cast.go +++ b/expression/builtin_cast.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/tidb/parser/ast" + "github.com/pingcap/tidb/parser/charset" "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tidb/parser/terror" @@ -1840,6 +1841,15 @@ func BuildCastCollationFunction(ctx sessionctx.Context, expr Expression, ec *Exp } else { return expr } + } else if ec.Charset == charset.CharsetBin { + // When cast character string to binary string, if we still use fixed length representation, + // then 0 padding will be used, which can affect later execution. + // e.g. https://github.com/pingcap/tidb/issues/34823. + // On the other hand, we can not directly return origin expr back, + // since we need binary collation to do string comparison later. + // e.g. https://github.com/pingcap/tidb/pull/35053#discussion_r894155052 + // Here we use VarString type of cast, i.e `cast(a as binary)`, to avoid this problem. + tp.SetType(mysql.TypeVarString) } tp.SetCharset(ec.Charset) tp.SetCollate(ec.Collation) diff --git a/planner/core/expression_rewriter.go b/planner/core/expression_rewriter.go index d96a161afa40c..253fb98dd2a9d 100644 --- a/planner/core/expression_rewriter.go +++ b/planner/core/expression_rewriter.go @@ -1557,6 +1557,7 @@ func (er *expressionRewriter) castCollationForIn(colLen int, elemCnt int, stkLen return } for i := stkLen - elemCnt; i < stkLen; i++ { + // todo: consider refining the code and reusing expression.BuildCollationFunction here if er.ctxStack[i].GetType().EvalType() == types.ETString { rowFunc, ok := er.ctxStack[i].(*expression.ScalarFunction) if ok && rowFunc.FuncName.String() == ast.RowFunc { @@ -1573,6 +1574,14 @@ func (er *expressionRewriter) castCollationForIn(colLen int, elemCnt int, stkLen } else { continue } + } else if coll.Charset == charset.CharsetBin { + // When cast character string to binary string, if we still use fixed length representation, + // then 0 padding will be used, which can affect later execution. + // e.g. https://github.com/pingcap/tidb/pull/35053#pullrequestreview-1008757770 gives an unexpected case. + // On the other hand, we can not directly return origin expr back, + // since we need binary collation to do string comparison later. + // Here we use VarString type of cast, i.e `cast(a as binary)`, to avoid this problem. + tp.SetType(mysql.TypeVarString) } tp.SetCharset(coll.Charset) tp.SetCollate(coll.Collation)