Skip to content

Commit

Permalink
expression: avoid padding 0 when implicitly cast to binary (#35053)
Browse files Browse the repository at this point in the history
close #34823
  • Loading branch information
Willendless authored and pull[bot] committed Sep 20, 2024
1 parent 4ddae7f commit 6d33735
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 0 deletions.
25 changes: 25 additions & 0 deletions cmd/explaintest/r/collation_check_use_collation_disabled.result
Original file line number Diff line number Diff line change
Expand Up @@ -128,4 +128,29 @@ select col_25 from tbl_2 where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27
col_25
select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95);
col_25
drop table if exists t1;
drop table if exists t2;
create table t1(a char(20));
create table t2(b binary(20), c binary(20));
insert into t1 value('-1');
insert into t2 value(0x2D31, 0x67);
insert into t2 value(0x2D31, 0x73);
select a from t1, t2 where t1.a between t2.b and t2.c;
a
select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c;
a
-1
-1
drop table if exists t1;
drop table if exists t2;
create table t1(a char(20)) collate utf8mb4_general_ci;
create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci;
insert into t1 values ('a');
insert into t2 values (0x0, 'A');
select * from t1, t2 where t1.a between t2.b and t2.c;
a b c
insert into t1 values ('-1');
insert into t2 values (0x2d31, '');
select * from t1, t2 where t1.a in (t2.b, 3);
a b c
use test
25 changes: 25 additions & 0 deletions cmd/explaintest/r/collation_check_use_collation_enabled.result
Original file line number Diff line number Diff line change
Expand Up @@ -147,4 +147,29 @@ col_25
select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95);
col_25
89
drop table if exists t1;
drop table if exists t2;
create table t1(a char(20));
create table t2(b binary(20), c binary(20));
insert into t1 value('-1');
insert into t2 value(0x2D31, 0x67);
insert into t2 value(0x2D31, 0x73);
select a from t1, t2 where t1.a between t2.b and t2.c;
a
select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c;
a
-1
-1
drop table if exists t1;
drop table if exists t2;
create table t1(a char(20)) collate utf8mb4_general_ci;
create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci;
insert into t1 values ('a');
insert into t2 values (0x0, 'A');
select * from t1, t2 where t1.a between t2.b and t2.c;
a b c
insert into t1 values ('-1');
insert into t2 values (0x2d31, '');
select * from t1, t2 where t1.a in (t2.b, 3);
a b c
use test
23 changes: 23 additions & 0 deletions cmd/explaintest/t/collation_check_use_collation.test
Original file line number Diff line number Diff line change
Expand Up @@ -86,5 +86,28 @@ insert ignore into tbl_2 values ( 5888267793391993829,5371,94.63,-109,5728076076
select col_25 from tbl_2 where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95);
select col_25 from tbl_2 use index(primary) where ( tbl_2.col_27 > 'nSWYrpTH' or not( tbl_2.col_27 between 'CsWIuxlSjU' and 'SfwoyjUEzgg' ) ) and ( tbl_2.col_23 <= -95);

# check implicit binary collation cast
drop table if exists t1;
drop table if exists t2;
# issue 34823
create table t1(a char(20));
create table t2(b binary(20), c binary(20));
insert into t1 value('-1');
insert into t2 value(0x2D31, 0x67);
insert into t2 value(0x2D31, 0x73);
select a from t1, t2 where t1.a between t2.b and t2.c;
select a from t1, t2 where cast(t1.a as binary(20)) between t2.b and t2.c;
# binary collation in single side
drop table if exists t1;
drop table if exists t2;
create table t1(a char(20)) collate utf8mb4_general_ci;
create table t2(b binary(20), c char(20)) collate utf8mb4_general_ci;
insert into t1 values ('a');
insert into t2 values (0x0, 'A');
select * from t1, t2 where t1.a between t2.b and t2.c;
insert into t1 values ('-1');
insert into t2 values (0x2d31, '');
select * from t1, t2 where t1.a in (t2.b, 3);

# cleanup environment
use test
10 changes: 10 additions & 0 deletions expression/builtin_cast.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (

"github.com/pingcap/errors"
"github.com/pingcap/tidb/parser/ast"
"github.com/pingcap/tidb/parser/charset"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/parser/terror"
Expand Down Expand Up @@ -1840,6 +1841,15 @@ func BuildCastCollationFunction(ctx sessionctx.Context, expr Expression, ec *Exp
} else {
return expr
}
} else if ec.Charset == charset.CharsetBin {
// When cast character string to binary string, if we still use fixed length representation,
// then 0 padding will be used, which can affect later execution.
// e.g. https://github.com/pingcap/tidb/issues/34823.
// On the other hand, we can not directly return origin expr back,
// since we need binary collation to do string comparison later.
// e.g. https://github.com/pingcap/tidb/pull/35053#discussion_r894155052
// Here we use VarString type of cast, i.e `cast(a as binary)`, to avoid this problem.
tp.SetType(mysql.TypeVarString)
}
tp.SetCharset(ec.Charset)
tp.SetCollate(ec.Collation)
Expand Down
9 changes: 9 additions & 0 deletions planner/core/expression_rewriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -1557,6 +1557,7 @@ func (er *expressionRewriter) castCollationForIn(colLen int, elemCnt int, stkLen
return
}
for i := stkLen - elemCnt; i < stkLen; i++ {
// todo: consider refining the code and reusing expression.BuildCollationFunction here
if er.ctxStack[i].GetType().EvalType() == types.ETString {
rowFunc, ok := er.ctxStack[i].(*expression.ScalarFunction)
if ok && rowFunc.FuncName.String() == ast.RowFunc {
Expand All @@ -1573,6 +1574,14 @@ func (er *expressionRewriter) castCollationForIn(colLen int, elemCnt int, stkLen
} else {
continue
}
} else if coll.Charset == charset.CharsetBin {
// When cast character string to binary string, if we still use fixed length representation,
// then 0 padding will be used, which can affect later execution.
// e.g. https://github.com/pingcap/tidb/pull/35053#pullrequestreview-1008757770 gives an unexpected case.
// On the other hand, we can not directly return origin expr back,
// since we need binary collation to do string comparison later.
// Here we use VarString type of cast, i.e `cast(a as binary)`, to avoid this problem.
tp.SetType(mysql.TypeVarString)
}
tp.SetCharset(coll.Charset)
tp.SetCollate(coll.Collation)
Expand Down

0 comments on commit 6d33735

Please sign in to comment.