Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

expression: set collation id to negative in tipb if new collations are enabled #14883

Merged
merged 5 commits into from
Feb 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions expression/distsql_builtin.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import (
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tidb/util/collate"
"github.com/pingcap/tidb/util/mock"
"github.com/pingcap/tipb/go-tipb"
)
Expand All @@ -37,7 +36,7 @@ func PbTypeToFieldType(tp *tipb.FieldType) *types.FieldType {
Flen: int(tp.Flen),
Decimal: int(tp.Decimal),
Charset: tp.Charset,
Collate: mysql.Collations[uint8(tp.Collate)],
Collate: protoToCollation(tp.Collate),
}
}

Expand Down Expand Up @@ -1172,7 +1171,7 @@ func convertUint(val []byte) (*Constant, error) {

func convertString(val []byte, tp *tipb.FieldType) (*Constant, error) {
var d types.Datum
d.SetBytesAsString(val, collate.CollationID2Name(tp.Collate), uint32(tp.Flen))
d.SetBytesAsString(val, protoToCollation(tp.Collate), uint32(tp.Flen))
return &Constant{Value: d, RetType: types.NewFieldType(mysql.TypeVarString)}, nil
}

Expand Down
56 changes: 56 additions & 0 deletions expression/distsql_builtin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,71 @@ import (
"github.com/pingcap/tidb/types/json"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tidb/util/collate"
"github.com/pingcap/tipb/go-tipb"
)

var _ = Suite(&testEvalSuite{})
var _ = SerialSuites(&testEvalSerialSuite{})

type testEvalSuite struct {
colID int64
}

type testEvalSerialSuite struct {
}

func (s *testEvalSerialSuite) TestPBToExprWithNewCollation(c *C) {
sc := new(stmtctx.StatementContext)
fieldTps := make([]*types.FieldType, 1)

cases := []struct {
name string
expName string
id int32
pbID int32
}{
{"utf8_general_ci", "utf8_general_ci", 33, 33},
{"UTF8MB4_BIN", "utf8mb4_bin", 46, 46},
{"utf8mb4_bin", "utf8mb4_bin", 46, 46},
{"utf8mb4_general_ci", "utf8mb4_general_ci", 45, 45},
{"", "utf8mb4_bin", 46, 46},
{"some_error_collation", "utf8mb4_bin", 46, 46},
}

for _, cs := range cases {
ft := types.NewFieldType(mysql.TypeString)
ft.Collate = cs.name
expr := new(tipb.Expr)
expr.Tp = tipb.ExprType_String
expr.FieldType = toPBFieldType(ft)
c.Assert(expr.FieldType.Collate, Equals, cs.pbID)

e, err := PBToExpr(expr, fieldTps, sc)
c.Assert(err, IsNil)
cons, ok := e.(*Constant)
c.Assert(ok, IsTrue)
c.Assert(cons.Value.Collation(), Equals, cs.expName)
}
collate.SetNewCollationEnabledForTest(true)
defer collate.SetNewCollationEnabledForTest(false)

for _, cs := range cases {
ft := types.NewFieldType(mysql.TypeString)
ft.Collate = cs.name
expr := new(tipb.Expr)
expr.Tp = tipb.ExprType_String
expr.FieldType = toPBFieldType(ft)
c.Assert(expr.FieldType.Collate, Equals, -cs.pbID)

e, err := PBToExpr(expr, fieldTps, sc)
c.Assert(err, IsNil)
cons, ok := e.(*Constant)
c.Assert(ok, IsTrue)
c.Assert(cons.Value.Collation(), Equals, cs.expName)
}
}

func (s *testEvalSuite) SetUpSuite(c *C) {
s.colID = 0
}
Expand Down
23 changes: 18 additions & 5 deletions expression/expr_to_pb.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tidb/util/collate"
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tipb/go-tipb"
"go.uber.org/zap"
Expand Down Expand Up @@ -200,18 +201,30 @@ func FieldTypeFromPB(ft *tipb.FieldType) *types.FieldType {
}

func collationToProto(c string) int32 {
v, ok := mysql.CollationNames[c]
if ok {
return int32(v)
if v, ok := mysql.CollationNames[c]; ok {
return collate.RewriteNewCollationIDIfNeeded(int32(v))
}
return int32(mysql.DefaultCollationID)
v := collate.RewriteNewCollationIDIfNeeded(int32(mysql.DefaultCollationID))
logutil.BgLogger().Warn(
"Unable to get collation ID by name, use ID of the default collation instead",
zap.String("name", c),
zap.Int32("default collation ID", v),
zap.String("default collation", mysql.DefaultCollationName),
)
return v
}

func protoToCollation(c int32) string {
v, ok := mysql.Collations[uint8(c)]
v, ok := mysql.Collations[uint8(collate.RestoreCollationIDIfNeeded(c))]
if ok {
return v
}
logutil.BgLogger().Warn(
"Unable to get collation name from ID, use name of the default collation instead",
zap.Int32("id", c),
zap.Int("default collation ID", mysql.DefaultCollationID),
zap.String("default collation", mysql.DefaultCollationName),
)
return mysql.DefaultCollationName
}

Expand Down
47 changes: 47 additions & 0 deletions expression/expr_to_pb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/pingcap/parser/mysql"
"github.com/pingcap/tidb/sessionctx/stmtctx"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/collate"
"github.com/pingcap/tidb/util/mock"
"github.com/pingcap/tipb/go-tipb"
)
Expand Down Expand Up @@ -673,3 +674,49 @@ func (s *testEvaluatorSerialSuites) TestMetadata(c *C) {
c.Assert(err, IsNil)
c.Assert(metadata.InUnion, Equals, true)
}

func columnCollation(c *Column, coll string) *Column {
c.RetType.Collate = coll
return c
}

func (s *testEvaluatorSerialSuites) TestNewCollationsEnabled(c *C) {
collate.SetNewCollationEnabledForTest(true)
defer collate.SetNewCollationEnabledForTest(false)
var colExprs []Expression
sc := new(stmtctx.StatementContext)
client := new(mock.Client)
dg := new(dataGen4Expr2PbTest)

colExprs = colExprs[:0]
colExprs = append(colExprs, dg.genColumn(mysql.TypeVarchar, 1))
colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 2), "some_invalid_collation"))
colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarString, 3), "utf8mb4_general_ci"))
colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeString, 4), "utf8mb4_0900_ai_ci"))
colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 5), "utf8_bin"))
pbExpr, pushed, _ := ExpressionsToPB(sc, colExprs, client)
c.Assert(len(pushed), Equals, len(colExprs))
js, err := json.Marshal(pbExpr)
c.Assert(err, IsNil)
c.Assert(string(js), Equals, "{\"tp\":10000,\"children\":[{\"tp\":10000,\"children\":[{\"tp\":10000,\"children\":[{\"tp\":10000,\"children\":[{\"tp\":201,\"val\":\"gAAAAAAAAAE=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-46,\"charset\":\"\"}},{\"tp\":201,\"val\":\"gAAAAAAAAAI=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-46,\"charset\":\"\"}}],\"sig\":3101,\"field_type\":{\"tp\":8,\"flag\":128,\"flen\":1,\"decimal\":0,\"collate\":-63,\"charset\":\"binary\"}},{\"tp\":201,\"val\":\"gAAAAAAAAAM=\",\"sig\":0,\"field_type\":{\"tp\":253,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-45,\"charset\":\"\"}}],\"sig\":3101,\"field_type\":{\"tp\":8,\"flag\":128,\"flen\":1,\"decimal\":0,\"collate\":-63,\"charset\":\"binary\"}},{\"tp\":201,\"val\":\"gAAAAAAAAAQ=\",\"sig\":0,\"field_type\":{\"tp\":254,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-255,\"charset\":\"\"}}],\"sig\":3101,\"field_type\":{\"tp\":8,\"flag\":128,\"flen\":1,\"decimal\":0,\"collate\":-63,\"charset\":\"binary\"}},{\"tp\":201,\"val\":\"gAAAAAAAAAU=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-83,\"charset\":\"\"}}],\"sig\":3101,\"field_type\":{\"tp\":8,\"flag\":128,\"flen\":1,\"decimal\":0,\"collate\":-63,\"charset\":\"binary\"}}")
pbExprs := ExpressionsToPBList(sc, colExprs, client)
jsons := []string{
"{\"tp\":201,\"val\":\"gAAAAAAAAAE=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-46,\"charset\":\"\"}}",
"{\"tp\":201,\"val\":\"gAAAAAAAAAI=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-46,\"charset\":\"\"}}",
"{\"tp\":201,\"val\":\"gAAAAAAAAAM=\",\"sig\":0,\"field_type\":{\"tp\":253,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-45,\"charset\":\"\"}}",
"{\"tp\":201,\"val\":\"gAAAAAAAAAQ=\",\"sig\":0,\"field_type\":{\"tp\":254,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-255,\"charset\":\"\"}}",
"{\"tp\":201,\"val\":\"gAAAAAAAAAU=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-83,\"charset\":\"\"}}",
}
for i, pbExpr := range pbExprs {
c.Assert(pbExprs, NotNil)
js, err = json.Marshal(pbExpr)
c.Assert(err, IsNil)
c.Assert(string(js), Equals, jsons[i], Commentf("%v\n", i))
}

item := columnCollation(dg.genColumn(mysql.TypeDouble, 0), "utf8mb4_0900_ai_ci")
pbByItem := GroupByItemToPB(sc, client, item)
js, err = json.Marshal(pbByItem)
c.Assert(err, IsNil)
c.Assert(string(js), Equals, "{\"expr\":{\"tp\":201,\"val\":\"gAAAAAAAAAA=\",\"sig\":0,\"field_type\":{\"tp\":5,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-255,\"charset\":\"\"}},\"desc\":false}")
}
30 changes: 30 additions & 0 deletions util/collate/collate.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import (
"sync"

"github.com/pingcap/parser/mysql"
"github.com/pingcap/tidb/util/logutil"
"go.uber.org/zap"
)

var (
Expand Down Expand Up @@ -90,6 +92,34 @@ func NewCollationEnabled() bool {
return newCollationEnabled
}

// RewriteNewCollationIDIfNeeded rewrites a collation id if the new collations are enabled.
// When new collations are enabled, we turn the collation id to negative so that other the
// components of the cluster(for example, TiKV) is able to aware of it without any change to
// the protocol definition.
// When new collations are not enabled, collation id remains the same.
func RewriteNewCollationIDIfNeeded(id int32) int32 {
if newCollationEnabled {
if id < 0 {
logutil.BgLogger().Warn("Unexpected negative collation ID for rewrite.", zap.Int32("ID", id))
} else {
return -id
}
}
return id
}

// RestoreCollationIDIfNeeded restores a collation id if the new collations are enabled.
func RestoreCollationIDIfNeeded(id int32) int32 {
if newCollationEnabled {
if id > 0 {
logutil.BgLogger().Warn("Unexpected positive collation ID for restore.", zap.Int32("ID", id))
} else {
return -id
}
}
return id
}

// GetCollator get the collator according to collate, it will return the binary collator if the corresponding collator doesn't exist.
func GetCollator(collate string) Collator {
ctor, ok := collatorMap[collate]
Expand Down
14 changes: 14 additions & 0 deletions util/collate/collate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,20 @@ func (s *testCollateSuite) TestSetNewCollateEnabled(c *C) {
c.Assert(NewCollationEnabled(), Equals, true)
}

func (s *testCollateSuite) TestRewriteAndRestoreCollationID(c *C) {
SetNewCollationEnabledForTest(true)
c.Assert(RewriteNewCollationIDIfNeeded(5), Equals, int32(-5))
c.Assert(RewriteNewCollationIDIfNeeded(-5), Equals, int32(-5))
c.Assert(RestoreCollationIDIfNeeded(-5), Equals, int32(5))
c.Assert(RestoreCollationIDIfNeeded(5), Equals, int32(5))

SetNewCollationEnabledForTest(false)
c.Assert(RewriteNewCollationIDIfNeeded(5), Equals, int32(5))
c.Assert(RewriteNewCollationIDIfNeeded(-5), Equals, int32(-5))
c.Assert(RestoreCollationIDIfNeeded(5), Equals, int32(5))
c.Assert(RestoreCollationIDIfNeeded(-5), Equals, int32(-5))
}

func (s *testCollateSuite) TestGetCollator(c *C) {
defer testleak.AfterTest(c)()
SetNewCollationEnabledForTest(true)
Expand Down