diff --git a/expression/distsql_builtin.go b/expression/distsql_builtin.go index 796f3d5e560f8..12fe77ae90fa5 100644 --- a/expression/distsql_builtin.go +++ b/expression/distsql_builtin.go @@ -24,7 +24,6 @@ import ( "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/codec" - "github.com/pingcap/tidb/util/collate" "github.com/pingcap/tidb/util/mock" "github.com/pingcap/tipb/go-tipb" ) @@ -37,7 +36,7 @@ func PbTypeToFieldType(tp *tipb.FieldType) *types.FieldType { Flen: int(tp.Flen), Decimal: int(tp.Decimal), Charset: tp.Charset, - Collate: mysql.Collations[uint8(tp.Collate)], + Collate: protoToCollation(tp.Collate), } } @@ -1172,7 +1171,7 @@ func convertUint(val []byte) (*Constant, error) { func convertString(val []byte, tp *tipb.FieldType) (*Constant, error) { var d types.Datum - d.SetBytesAsString(val, collate.CollationID2Name(tp.Collate), uint32(tp.Flen)) + d.SetBytesAsString(val, protoToCollation(tp.Collate), uint32(tp.Flen)) return &Constant{Value: d, RetType: types.NewFieldType(mysql.TypeVarString)}, nil } diff --git a/expression/distsql_builtin_test.go b/expression/distsql_builtin_test.go index 2053278fabe53..148420940f171 100644 --- a/expression/distsql_builtin_test.go +++ b/expression/distsql_builtin_test.go @@ -24,15 +24,71 @@ import ( "github.com/pingcap/tidb/types/json" "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/codec" + "github.com/pingcap/tidb/util/collate" "github.com/pingcap/tipb/go-tipb" ) var _ = Suite(&testEvalSuite{}) +var _ = SerialSuites(&testEvalSerialSuite{}) type testEvalSuite struct { colID int64 } +type testEvalSerialSuite struct { +} + +func (s *testEvalSerialSuite) TestPBToExprWithNewCollation(c *C) { + sc := new(stmtctx.StatementContext) + fieldTps := make([]*types.FieldType, 1) + + cases := []struct { + name string + expName string + id int32 + pbID int32 + }{ + {"utf8_general_ci", "utf8_general_ci", 33, 33}, + {"UTF8MB4_BIN", "utf8mb4_bin", 46, 46}, + {"utf8mb4_bin", "utf8mb4_bin", 46, 46}, + {"utf8mb4_general_ci", "utf8mb4_general_ci", 45, 45}, + {"", "utf8mb4_bin", 46, 46}, + {"some_error_collation", "utf8mb4_bin", 46, 46}, + } + + for _, cs := range cases { + ft := types.NewFieldType(mysql.TypeString) + ft.Collate = cs.name + expr := new(tipb.Expr) + expr.Tp = tipb.ExprType_String + expr.FieldType = toPBFieldType(ft) + c.Assert(expr.FieldType.Collate, Equals, cs.pbID) + + e, err := PBToExpr(expr, fieldTps, sc) + c.Assert(err, IsNil) + cons, ok := e.(*Constant) + c.Assert(ok, IsTrue) + c.Assert(cons.Value.Collation(), Equals, cs.expName) + } + collate.SetNewCollationEnabledForTest(true) + defer collate.SetNewCollationEnabledForTest(false) + + for _, cs := range cases { + ft := types.NewFieldType(mysql.TypeString) + ft.Collate = cs.name + expr := new(tipb.Expr) + expr.Tp = tipb.ExprType_String + expr.FieldType = toPBFieldType(ft) + c.Assert(expr.FieldType.Collate, Equals, -cs.pbID) + + e, err := PBToExpr(expr, fieldTps, sc) + c.Assert(err, IsNil) + cons, ok := e.(*Constant) + c.Assert(ok, IsTrue) + c.Assert(cons.Value.Collation(), Equals, cs.expName) + } +} + func (s *testEvalSuite) SetUpSuite(c *C) { s.colID = 0 } diff --git a/expression/expr_to_pb.go b/expression/expr_to_pb.go index d52eedc4b08ea..7f47a630f4704 100644 --- a/expression/expr_to_pb.go +++ b/expression/expr_to_pb.go @@ -28,6 +28,7 @@ import ( "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/codec" + "github.com/pingcap/tidb/util/collate" "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tipb/go-tipb" "go.uber.org/zap" @@ -200,18 +201,30 @@ func FieldTypeFromPB(ft *tipb.FieldType) *types.FieldType { } func collationToProto(c string) int32 { - v, ok := mysql.CollationNames[c] - if ok { - return int32(v) + if v, ok := mysql.CollationNames[c]; ok { + return collate.RewriteNewCollationIDIfNeeded(int32(v)) } - return int32(mysql.DefaultCollationID) + v := collate.RewriteNewCollationIDIfNeeded(int32(mysql.DefaultCollationID)) + logutil.BgLogger().Warn( + "Unable to get collation ID by name, use ID of the default collation instead", + zap.String("name", c), + zap.Int32("default collation ID", v), + zap.String("default collation", mysql.DefaultCollationName), + ) + return v } func protoToCollation(c int32) string { - v, ok := mysql.Collations[uint8(c)] + v, ok := mysql.Collations[uint8(collate.RestoreCollationIDIfNeeded(c))] if ok { return v } + logutil.BgLogger().Warn( + "Unable to get collation name from ID, use name of the default collation instead", + zap.Int32("id", c), + zap.Int("default collation ID", mysql.DefaultCollationID), + zap.String("default collation", mysql.DefaultCollationName), + ) return mysql.DefaultCollationName } diff --git a/expression/expr_to_pb_test.go b/expression/expr_to_pb_test.go index 3374a99cd82e0..9458cd2f1abce 100644 --- a/expression/expr_to_pb_test.go +++ b/expression/expr_to_pb_test.go @@ -27,6 +27,7 @@ import ( "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/util/collate" "github.com/pingcap/tidb/util/mock" "github.com/pingcap/tipb/go-tipb" ) @@ -673,3 +674,49 @@ func (s *testEvaluatorSerialSuites) TestMetadata(c *C) { c.Assert(err, IsNil) c.Assert(metadata.InUnion, Equals, true) } + +func columnCollation(c *Column, coll string) *Column { + c.RetType.Collate = coll + return c +} + +func (s *testEvaluatorSerialSuites) TestNewCollationsEnabled(c *C) { + collate.SetNewCollationEnabledForTest(true) + defer collate.SetNewCollationEnabledForTest(false) + var colExprs []Expression + sc := new(stmtctx.StatementContext) + client := new(mock.Client) + dg := new(dataGen4Expr2PbTest) + + colExprs = colExprs[:0] + colExprs = append(colExprs, dg.genColumn(mysql.TypeVarchar, 1)) + colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 2), "some_invalid_collation")) + colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarString, 3), "utf8mb4_general_ci")) + colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeString, 4), "utf8mb4_0900_ai_ci")) + colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 5), "utf8_bin")) + pbExpr, pushed, _ := ExpressionsToPB(sc, colExprs, client) + c.Assert(len(pushed), Equals, len(colExprs)) + js, err := json.Marshal(pbExpr) + c.Assert(err, IsNil) + c.Assert(string(js), Equals, "{\"tp\":10000,\"children\":[{\"tp\":10000,\"children\":[{\"tp\":10000,\"children\":[{\"tp\":10000,\"children\":[{\"tp\":201,\"val\":\"gAAAAAAAAAE=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-46,\"charset\":\"\"}},{\"tp\":201,\"val\":\"gAAAAAAAAAI=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-46,\"charset\":\"\"}}],\"sig\":3101,\"field_type\":{\"tp\":8,\"flag\":128,\"flen\":1,\"decimal\":0,\"collate\":-63,\"charset\":\"binary\"}},{\"tp\":201,\"val\":\"gAAAAAAAAAM=\",\"sig\":0,\"field_type\":{\"tp\":253,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-45,\"charset\":\"\"}}],\"sig\":3101,\"field_type\":{\"tp\":8,\"flag\":128,\"flen\":1,\"decimal\":0,\"collate\":-63,\"charset\":\"binary\"}},{\"tp\":201,\"val\":\"gAAAAAAAAAQ=\",\"sig\":0,\"field_type\":{\"tp\":254,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-255,\"charset\":\"\"}}],\"sig\":3101,\"field_type\":{\"tp\":8,\"flag\":128,\"flen\":1,\"decimal\":0,\"collate\":-63,\"charset\":\"binary\"}},{\"tp\":201,\"val\":\"gAAAAAAAAAU=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-83,\"charset\":\"\"}}],\"sig\":3101,\"field_type\":{\"tp\":8,\"flag\":128,\"flen\":1,\"decimal\":0,\"collate\":-63,\"charset\":\"binary\"}}") + pbExprs := ExpressionsToPBList(sc, colExprs, client) + jsons := []string{ + "{\"tp\":201,\"val\":\"gAAAAAAAAAE=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-46,\"charset\":\"\"}}", + "{\"tp\":201,\"val\":\"gAAAAAAAAAI=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-46,\"charset\":\"\"}}", + "{\"tp\":201,\"val\":\"gAAAAAAAAAM=\",\"sig\":0,\"field_type\":{\"tp\":253,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-45,\"charset\":\"\"}}", + "{\"tp\":201,\"val\":\"gAAAAAAAAAQ=\",\"sig\":0,\"field_type\":{\"tp\":254,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-255,\"charset\":\"\"}}", + "{\"tp\":201,\"val\":\"gAAAAAAAAAU=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-83,\"charset\":\"\"}}", + } + for i, pbExpr := range pbExprs { + c.Assert(pbExprs, NotNil) + js, err = json.Marshal(pbExpr) + c.Assert(err, IsNil) + c.Assert(string(js), Equals, jsons[i], Commentf("%v\n", i)) + } + + item := columnCollation(dg.genColumn(mysql.TypeDouble, 0), "utf8mb4_0900_ai_ci") + pbByItem := GroupByItemToPB(sc, client, item) + js, err = json.Marshal(pbByItem) + c.Assert(err, IsNil) + c.Assert(string(js), Equals, "{\"expr\":{\"tp\":201,\"val\":\"gAAAAAAAAAA=\",\"sig\":0,\"field_type\":{\"tp\":5,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-255,\"charset\":\"\"}},\"desc\":false}") +} diff --git a/util/collate/collate.go b/util/collate/collate.go index a99d800b94561..f1280f99eaf7a 100644 --- a/util/collate/collate.go +++ b/util/collate/collate.go @@ -18,6 +18,8 @@ import ( "sync" "github.com/pingcap/parser/mysql" + "github.com/pingcap/tidb/util/logutil" + "go.uber.org/zap" ) var ( @@ -90,6 +92,34 @@ func NewCollationEnabled() bool { return newCollationEnabled } +// RewriteNewCollationIDIfNeeded rewrites a collation id if the new collations are enabled. +// When new collations are enabled, we turn the collation id to negative so that other the +// components of the cluster(for example, TiKV) is able to aware of it without any change to +// the protocol definition. +// When new collations are not enabled, collation id remains the same. +func RewriteNewCollationIDIfNeeded(id int32) int32 { + if newCollationEnabled { + if id < 0 { + logutil.BgLogger().Warn("Unexpected negative collation ID for rewrite.", zap.Int32("ID", id)) + } else { + return -id + } + } + return id +} + +// RestoreCollationIDIfNeeded restores a collation id if the new collations are enabled. +func RestoreCollationIDIfNeeded(id int32) int32 { + if newCollationEnabled { + if id > 0 { + logutil.BgLogger().Warn("Unexpected positive collation ID for restore.", zap.Int32("ID", id)) + } else { + return -id + } + } + return id +} + // GetCollator get the collator according to collate, it will return the binary collator if the corresponding collator doesn't exist. func GetCollator(collate string) Collator { ctor, ok := collatorMap[collate] diff --git a/util/collate/collate_test.go b/util/collate/collate_test.go index 5746cec9ce324..7a3658b5c8657 100644 --- a/util/collate/collate_test.go +++ b/util/collate/collate_test.go @@ -143,6 +143,20 @@ func (s *testCollateSuite) TestSetNewCollateEnabled(c *C) { c.Assert(NewCollationEnabled(), Equals, true) } +func (s *testCollateSuite) TestRewriteAndRestoreCollationID(c *C) { + SetNewCollationEnabledForTest(true) + c.Assert(RewriteNewCollationIDIfNeeded(5), Equals, int32(-5)) + c.Assert(RewriteNewCollationIDIfNeeded(-5), Equals, int32(-5)) + c.Assert(RestoreCollationIDIfNeeded(-5), Equals, int32(5)) + c.Assert(RestoreCollationIDIfNeeded(5), Equals, int32(5)) + + SetNewCollationEnabledForTest(false) + c.Assert(RewriteNewCollationIDIfNeeded(5), Equals, int32(5)) + c.Assert(RewriteNewCollationIDIfNeeded(-5), Equals, int32(-5)) + c.Assert(RestoreCollationIDIfNeeded(5), Equals, int32(5)) + c.Assert(RestoreCollationIDIfNeeded(-5), Equals, int32(-5)) +} + func (s *testCollateSuite) TestGetCollator(c *C) { defer testleak.AfterTest(c)() SetNewCollationEnabledForTest(true)