Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

types: convert to new charset before inserting to blob or json column #31031

Merged
merged 33 commits into from
Dec 30, 2021
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
8957427
*: handle binary literal in ConvertTo
tangenta Dec 22, 2021
c498431
move string encoding to ConvertTo
tangenta Dec 26, 2021
a9d5e40
Merge remote-tracking branch 'upstream/master' into handle-bin
tangenta Dec 26, 2021
28feb98
types: rename from/to binary string convert function
tangenta Dec 26, 2021
fe83773
update errors.toml
tangenta Dec 27, 2021
5fec742
Merge remote-tracking branch 'upstream/master' into handle-bin
tangenta Dec 27, 2021
950353b
types: add tests for binary handling
tangenta Dec 27, 2021
56e1ad3
extract find encoding accoring to statement context
tangenta Dec 27, 2021
e80da98
address comment
tangenta Dec 28, 2021
dc70c0e
Merge remote-tracking branch 'upstream/master' into handle-bin
tangenta Dec 28, 2021
7cf31bd
fix integration test TestChangeFromBitToStringInvalidUtf8ErrMsg
tangenta Dec 28, 2021
f602ff6
util: ignore invalid string constructed by like expression
tangenta Dec 29, 2021
aea0725
Merge remote-tracking branch 'upstream/master' into handle-bin
tangenta Dec 29, 2021
a968c6f
add comment
tangenta Dec 29, 2021
35106fd
Merge remote-tracking branch 'upstream/master' into handle-bin
tangenta Dec 29, 2021
178557a
fix typo
tangenta Dec 29, 2021
fc852db
Merge branch 'master' into handle-bin
tangenta Dec 30, 2021
e6a5bc9
fix TestConvertToBinaryString
tangenta Dec 30, 2021
3d7574a
charset: use OpReplaceNoErr if possible
tangenta Dec 30, 2021
a3cc79d
Merge branch 'master' into handle-bin
ti-chi-bot Dec 30, 2021
b677fe1
Merge branch 'master' into handle-bin
ti-chi-bot Dec 30, 2021
0c00650
fix build
tangenta Dec 30, 2021
f7cbf37
Merge branch 'master' into handle-bin
ti-chi-bot Dec 30, 2021
6b2304f
Merge branch 'master' into handle-bin
ti-chi-bot Dec 30, 2021
19a841e
Merge branch 'master' into handle-bin
ti-chi-bot Dec 30, 2021
2f83619
Merge branch 'master' into handle-bin
ti-chi-bot Dec 30, 2021
4bc51f8
Merge branch 'master' into handle-bin
ti-chi-bot Dec 30, 2021
f4eae87
Merge branch 'master' into handle-bin
ti-chi-bot Dec 30, 2021
fa4be5e
Merge branch 'master' into handle-bin
ti-chi-bot Dec 30, 2021
5b5c0ab
Merge branch 'master' into handle-bin
ti-chi-bot Dec 30, 2021
958d55a
Merge branch 'master' into handle-bin
ti-chi-bot Dec 30, 2021
d96bcde
skip utf8 encode before insert to blob type
tangenta Dec 30, 2021
cf1109e
Merge branch 'master' into handle-bin
ti-chi-bot Dec 30, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions cmd/explaintest/r/new_character_set.result
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,13 @@ a hex(a)
中文 E4B8ADE69687
涓?枃 E6B6933FE69E83
set @@character_set_client = 'utf8mb4';
set names gbk;
drop table if exists t;
create table t (b blob, d json);
insert into t values ('你好', '{"测试": "你好"}');
select b, d from t;
b d
你好 {"测试": "你好"}
select hex(b), hex(d) from t;
hex(b) hex(d)
E4BDA0E5A5BD 7B22B2E2CAD4223A2022C4E3BAC3227D
1 change: 1 addition & 0 deletions cmd/explaintest/r/new_character_set_builtin.result
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
set names utf8mb4;
set @@sql_mode = '';
drop table if exists t;
create table t (a char(20) charset utf8mb4, b char(20) charset gbk, c binary(20));
Expand Down
2 changes: 1 addition & 1 deletion cmd/explaintest/r/new_character_set_invalid.result
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ insert into t values ('中文', 'asdf', '字符集');
insert into t values ('À', 'ø', '😂');
Error 1366: Incorrect string value '\xC3\x80' for column 'a'
insert into t values ('中文À中文', 'asdføfdsa', '字符集😂字符集');
Error 1366: Incorrect string value '\xC3\x80\xE4\xB8\xAD\xE6...' for column 'a'
Error 1366: Incorrect string value '\xC3\x80' for column 'a'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not same as MySQL.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is OK to have this difference?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Em... Not sure. Seems we had a PR to fix it before. #25087

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR does not introduce the garbled code as described in #25087. It extracts the error message from ErrInvalidCharacterSet(which should be in a well format).

The only difference is the number of invalid bytes displayed.

insert into t values (0x4040ffff, 0x4040ffff, 0x4040ffff);
Error 1366: Incorrect string value '\xFF\xFF' for column 'a'
select * from t;
Expand Down
7 changes: 7 additions & 0 deletions cmd/explaintest/t/new_character_set.test
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,10 @@ prepare p1 from "insert into t values ('中文');";
execute p1;
select a, hex(a) from t;
set @@character_set_client = 'utf8mb4';

set names gbk;
drop table if exists t;
create table t (b blob, d json);
insert into t values ('你好', '{"测试": "你好"}');
select b, d from t;
select hex(b), hex(d) from t;
1 change: 1 addition & 0 deletions cmd/explaintest/t/new_character_set_builtin.test
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
set names utf8mb4;
set @@sql_mode = '';
-- test for builtin function hex(), length(), ascii(), octet_length()
drop table if exists t;
Expand Down
1 change: 1 addition & 0 deletions errno/errcode.go
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,7 @@ const (
ErrUnresolvedHintName = 3128
ErrInvalidJSONText = 3140
ErrInvalidJSONPath = 3143
ErrInvalidJSONCharset = 3144
ErrInvalidTypeForJSON = 3146
ErrInvalidJSONPathWildcard = 3149
ErrInvalidJSONContainsPathType = 3150
Expand Down
1 change: 1 addition & 0 deletions errno/errname.go
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,7 @@ var MySQLErrName = map[uint16]*mysql.ErrMessage{
ErrInvalidJSONData: mysql.Message("Invalid JSON data provided to function %s: %s", nil),
ErrInvalidJSONText: mysql.Message("Invalid JSON text: %-.192s", []int{0}),
ErrInvalidJSONPath: mysql.Message("Invalid JSON path expression %s.", nil),
ErrInvalidJSONCharset: mysql.Message("Cannot create a JSON value from a string with CHARACTER SET '%s'.", nil),
ErrInvalidTypeForJSON: mysql.Message("Invalid data type for JSON data in argument %d to function %s; a JSON string or JSON type is required.", nil),
ErrInvalidJSONPathWildcard: mysql.Message("In this situation, path expressions may not contain the * and ** tokens.", nil),
ErrInvalidJSONContainsPathType: mysql.Message("The second argument can only be either 'one' or 'all'.", nil),
Expand Down
5 changes: 5 additions & 0 deletions errors.toml
Original file line number Diff line number Diff line change
Expand Up @@ -1006,6 +1006,11 @@ error = '''
Invalid JSON path expression %s.
'''

["json:3144"]
error = '''
Cannot create a JSON value from a string with CHARACTER SET '%s'.
'''

["json:3149"]
error = '''
In this situation, path expressions may not contain the * and ** tokens.
Expand Down
3 changes: 3 additions & 0 deletions executor/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -1831,6 +1831,9 @@ func ResetContextOfStmt(ctx sessionctx.Context, s ast.StmtNode) (err error) {
sc.IgnoreZeroInDate = true
sc.AllowInvalidDate = vars.SQLMode.HasAllowInvalidDatesMode()
}
sc.SkipUTF8Check = vars.SkipUTF8Check
sc.SkipASCIICheck = vars.SkipASCIICheck
sc.SkipUTF8MB4Check = !globalConfig.CheckMb4ValueInUTF8
vars.PreparedParams = vars.PreparedParams[:0]
if priority := mysql.PriorityEnum(atomic.LoadInt32(&variable.ForcePriority)); priority != mysql.NoPriority {
sc.Priority = priority
Expand Down
2 changes: 1 addition & 1 deletion expression/builtin_string.go
Original file line number Diff line number Diff line change
Expand Up @@ -1151,7 +1151,7 @@ func (b *builtinConvertSig) evalString(row chunk.Row) (string, bool, error) {
}
enc := charset.FindEncoding(resultTp.Charset)
if !enc.IsValid(hack.Slice(expr)) {
replace, _ := enc.Transform(nil, hack.Slice(expr), charset.OpReplace)
replace, _ := enc.Transform(nil, hack.Slice(expr), charset.OpReplaceNoErr)
return string(replace), false, nil
}
return expr, false, nil
Expand Down
2 changes: 1 addition & 1 deletion expression/builtin_string_vec.go
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,7 @@ func (b *builtinConvertSig) vecEvalString(input *chunk.Chunk, result *chunk.Colu
}
exprI := expr.GetBytes(i)
if !enc.IsValid(exprI) {
encBuf, _ = enc.Transform(encBuf, exprI, charset.OpReplace)
encBuf, _ = enc.Transform(encBuf, exprI, charset.OpReplaceNoErr)
result.AppendBytes(encBuf)
} else {
result.AppendBytes(exprI)
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ require (
github.com/phayes/freeport v0.0.0-20180830031419-95f893ade6f2
github.com/pingcap/badger v1.5.1-0.20210831093107-2f6cb8008145
github.com/pingcap/check v0.0.0-20200212061837-5e12011dc712
github.com/pingcap/errors v0.11.5-0.20211009033009-93128226aaa3
github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c
github.com/pingcap/failpoint v0.0.0-20210316064728-7acb0f0a3dfd
github.com/pingcap/fn v0.0.0-20200306044125-d5540d389059
github.com/pingcap/kvproto v0.0.0-20211122024046-03abd340988f
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -568,8 +568,8 @@ github.com/pingcap/errors v0.11.5-0.20190809092503-95897b64e011/go.mod h1:Oi8TUi
github.com/pingcap/errors v0.11.5-0.20200917111840-a15ef68f753d/go.mod h1:g4vx//d6VakjJ0mk7iLBlKA8LFavV/sAVINT/1PFxeQ=
github.com/pingcap/errors v0.11.5-0.20201126102027-b0a155152ca3/go.mod h1:G7x87le1poQzLB/TqvTJI2ILrSgobnq4Ut7luOwvfvI=
github.com/pingcap/errors v0.11.5-0.20210425183316-da1aaba5fb63/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg=
github.com/pingcap/errors v0.11.5-0.20211009033009-93128226aaa3 h1:8l9lu9RjWkI/VeqrP+Fn3tvZNPu5GYP0rYLLN5Q46go=
github.com/pingcap/errors v0.11.5-0.20211009033009-93128226aaa3/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg=
github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTmyFqUwr+jcCvpVkK7sumiz+ko5H9eq4=
github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg=
github.com/pingcap/failpoint v0.0.0-20191029060244-12f4ac2fd11d/go.mod h1:DNS3Qg7bEDhU6EXNHF+XSv/PGznQaMJ5FWvctpm6pQI=
github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce/go.mod h1:w4PEZ5y16LeofeeGwdgZB4ddv9bLyDuIX+ljstgKZyk=
github.com/pingcap/failpoint v0.0.0-20210316064728-7acb0f0a3dfd h1:I8IeI8MNiZVKnwuXhcIIzz6pREcOSbq18Q31KYIzFVM=
Expand Down
3 changes: 2 additions & 1 deletion parser/charset/encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ const (
)

const (
OpReplace = opFromUTF8 | opTruncateReplace | opCollectFrom | opSkipError
OpReplaceNoErr = opFromUTF8 | opTruncateReplace | opCollectFrom | opSkipError
OpReplace = opFromUTF8 | opTruncateReplace | opCollectFrom
OpEncode = opFromUTF8 | opTruncateTrim | opCollectTo
OpEncodeNoErr = OpEncode | opSkipError
OpEncodeReplace = opFromUTF8 | opTruncateReplace | opCollectTo
Expand Down
5 changes: 3 additions & 2 deletions parser/charset/encoding_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ import (
"golang.org/x/text/transform"
)

var errInvalidCharacterString = terror.ClassParser.NewStd(mysql.ErrInvalidCharacterString)
// ErrInvalidCharacterString returns when the string is invalid in the specific charset.
var ErrInvalidCharacterString = terror.ClassParser.NewStd(mysql.ErrInvalidCharacterString)

// encodingBase defines some generic functions.
type encodingBase struct {
Expand Down Expand Up @@ -115,7 +116,7 @@ func beginWithReplacementChar(dst []byte) bool {
// generateEncodingErr generates an invalid string in charset error.
func generateEncodingErr(name string, invalidBytes []byte) error {
arg := fmt.Sprintf("%X", invalidBytes)
return errInvalidCharacterString.FastGenByArgs(name, arg)
return ErrInvalidCharacterString.FastGenByArgs(name, arg)
}

// HackSlice converts string to slice without copy.
Expand Down
3 changes: 2 additions & 1 deletion parser/charset/encoding_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ func TestEncoding(t *testing.T) {
}{
{"一二三", "涓?簩涓?", false}, // MySQL reports '涓?簩涓'.
{"一二三123", "涓?簩涓?23", false},
{"测试", "娴嬭瘯", true},
{"案1案2", "妗?妗?", false},
{"焊䏷菡釬", "鐒婁彿鑿¢嚞", true},
{"鞍杏以伊位依", "闉嶆潖浠ヤ紛浣嶄緷", true},
Expand Down Expand Up @@ -134,7 +135,7 @@ func TestEncodingValidate(t *testing.T) {
}
strBytes := []byte(tc.str)
require.Equal(t, tc.ok, enc.IsValid(strBytes), msg)
replace, _ := enc.Transform(nil, strBytes, charset.OpReplace)
replace, _ := enc.Transform(nil, strBytes, charset.OpReplaceNoErr)
require.Equal(t, tc.expected, string(replace), msg)
}
}
3 changes: 3 additions & 0 deletions sessionctx/stmtctx/stmtctx.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ type StatementContext struct {
IgnoreNoPartition bool
SkipPlanCache bool
IgnoreExplainIDSuffix bool
SkipUTF8Check bool
SkipASCIICheck bool
SkipUTF8MB4Check bool
// If the select statement was like 'select * from t as of timestamp ...' or in a stale read transaction
// or is affected by the tidb_read_staleness session variable, then the statement will be makred as isStaleness
// in stmtCtx
Expand Down
97 changes: 26 additions & 71 deletions table/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ import (
"strconv"
"strings"
"time"
"unicode"

"github.com/pingcap/tidb/config"
"github.com/pingcap/errors"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/parser"
"github.com/pingcap/tidb/parser/ast"
Expand Down Expand Up @@ -170,26 +169,29 @@ func truncateTrailingSpaces(v *types.Datum) {
v.SetString(str, v.Collation())
}

func handleWrongCharsetValue(ctx sessionctx.Context, col *model.ColumnInfo, str []byte, i int) error {
sc := ctx.GetSessionVars().StmtCtx
var strval strings.Builder
for j := 0; j < 6; j++ {
if len(str) > (i + j) {
if str[i+j] > unicode.MaxASCII {
fmt.Fprintf(&strval, "\\x%X", str[i+j])
} else {
strval.WriteRune(rune(str[i+j]))
}
}
// convertToIncorrectStringErr converts ErrInvalidCharacterString to ErrTruncatedWrongValueForField.
// The first argument is the invalid character in bytes.
func convertToIncorrectStringErr(err error, colName string) error {
inErr, ok := errors.Cause(err).(*errors.Error)
if !ok {
return err
}
if len(str) > i+6 {
strval.WriteString(`...`)
args := inErr.Args()
if len(args) != 2 {
return err
}
// TODO: Add 'at row %d'
err := ErrTruncatedWrongValueForField.FastGen("Incorrect string value '%s' for column '%s'", strval.String(), col.Name)
logutil.BgLogger().Error("incorrect string value", zap.Uint64("conn", ctx.GetSessionVars().ConnectionID), zap.Error(err))
err = sc.HandleTruncate(err)
return err
invalidStrHex, ok := args[1].(string)
if !ok {
return err
}
var res strings.Builder
for i := 0; i < len(invalidStrHex); i++ {
if i%2 == 0 {
res.WriteString("\\x")
}
res.WriteByte(invalidStrHex[i])
}
return ErrTruncatedWrongValueForField.FastGen("Incorrect string value '%s' for column '%s'", res.String(), colName)
}

// handleZeroDatetime handles Timestamp/Datetime/Date zero date and invalid dates.
Expand Down Expand Up @@ -314,6 +316,10 @@ func CastValue(ctx sessionctx.Context, val types.Datum, col *model.ColumnInfo, r
if innCasted, exit, innErr := handleZeroDatetime(ctx, col, casted, str, types.ErrWrongValue.Equal(err)); exit {
return innCasted, innErr
}
} else if err != nil && charset.ErrInvalidCharacterString.Equal(err) {
err = convertToIncorrectStringErr(err, col.Name.O)
logutil.BgLogger().Error("incorrect string value",
zap.Uint64("conn", ctx.GetSessionVars().ConnectionID), zap.Error(err))
}

err = sc.HandleTruncate(err)
Expand All @@ -327,60 +333,9 @@ func CastValue(ctx sessionctx.Context, val types.Datum, col *model.ColumnInfo, r
if col.Tp == mysql.TypeString && !types.IsBinaryStr(&col.FieldType) {
truncateTrailingSpaces(&casted)
}

err = validateStringDatum(ctx, &val, &casted, col)
if forceIgnoreTruncate {
err = nil
}
return casted, err
}

func validateStringDatum(ctx sessionctx.Context, origin, casted *types.Datum, col *model.ColumnInfo) error {
// Only strings are need to validate.
if !types.IsString(col.Tp) {
return nil
}
fromBinary := origin.Kind() == types.KindBinaryLiteral ||
(origin.Kind() == types.KindString && origin.Collation() == charset.CollationBin)
toBinary := types.IsTypeBlob(col.Tp) || col.Charset == charset.CharsetBin
if fromBinary && toBinary {
return nil
}
enc := charset.FindEncoding(col.Charset)
// Skip utf8 check if possible.
if enc.Tp() == charset.EncodingTpUTF8 && ctx.GetSessionVars().SkipUTF8Check {
return nil
}
// Skip ascii check if possible.
if enc.Tp() == charset.EncodingTpASCII && ctx.GetSessionVars().SkipASCIICheck {
return nil
}
if col.Charset == charset.CharsetUTF8 && config.GetGlobalConfig().CheckMb4ValueInUTF8 {
// Use a strict mode implementation. 4 bytes characters are invalid.
enc = charset.EncodingUTF8MB3StrictImpl
}
if fromBinary {
src := casted.GetBytes()
encBytes, err := enc.Transform(nil, src, charset.OpDecode)
if err != nil {
casted.SetBytesAsString(encBytes, col.Collate, 0)
nSrc := charset.CountValidBytesDecode(enc, src)
return handleWrongCharsetValue(ctx, col, src, nSrc)
}
casted.SetBytesAsString(encBytes, col.Collate, 0)
return nil
}
// Check if the string is valid in the given column charset.
str := casted.GetBytes()
if !enc.IsValid(str) {
replace, _ := enc.Transform(nil, str, charset.OpReplace)
casted.SetBytesAsString(replace, col.Collate, 0)
nSrc := charset.CountValidBytes(enc, str)
return handleWrongCharsetValue(ctx, col, str, nSrc)
}
return nil
}

// ColDesc describes column information like MySQL desc and show columns do.
type ColDesc struct {
Field string
Expand Down
77 changes: 77 additions & 0 deletions types/convert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,83 @@ func TestConvertToString(t *testing.T) {
}
}

func TestConvertToStringWithCheck(t *testing.T) {
nhUTF8 := "你好"
nhUTF8MB4 := "你好👋"
nhUTF8Invalid := "你好" + string([]byte{0x81})
type SC = *stmtctx.StatementContext
tests := []struct {
input string
outputChs string
setStmtCtx func(ctx *stmtctx.StatementContext)
output string
}{
{nhUTF8, "utf8mb4", func(s SC) { s.SkipUTF8Check = false }, nhUTF8},
{nhUTF8MB4, "utf8mb4", func(s SC) { s.SkipUTF8Check = false }, nhUTF8MB4},
{nhUTF8, "utf8mb4", func(s SC) { s.SkipUTF8Check = true }, nhUTF8},
{nhUTF8MB4, "utf8mb4", func(s SC) { s.SkipUTF8Check = true }, nhUTF8MB4},
{nhUTF8Invalid, "utf8mb4", func(s SC) { s.SkipUTF8Check = true }, nhUTF8Invalid},
{nhUTF8Invalid, "utf8mb4", func(s SC) { s.SkipUTF8Check = false }, ""},
{nhUTF8Invalid, "ascii", func(s SC) { s.SkipASCIICheck = false }, ""},
{nhUTF8Invalid, "ascii", func(s SC) { s.SkipASCIICheck = true }, nhUTF8Invalid},
{nhUTF8MB4, "utf8", func(s SC) { s.SkipUTF8MB4Check = false }, ""},
{nhUTF8MB4, "utf8", func(s SC) { s.SkipUTF8MB4Check = true }, nhUTF8MB4},
}
for _, tt := range tests {
ft := NewFieldType(mysql.TypeVarchar)
ft.Flen = 255
ft.Charset = tt.outputChs
inputDatum := NewStringDatum(tt.input)
sc := new(stmtctx.StatementContext)
tt.setStmtCtx(sc)
outputDatum, err := inputDatum.ConvertTo(sc, ft)
if len(tt.output) == 0 {
require.True(t, charset.ErrInvalidCharacterString.Equal(err), tt)
} else {
require.NoError(t, err, tt)
require.Equal(t, tt.output, outputDatum.GetString(), tt)
}
}
}

func TestConvertToBinaryString(t *testing.T) {
nhUTF8 := "你好"
nhGBK := string([]byte{0xC4, 0xE3, 0xBA, 0xC3}) // "你好" in GBK
nhUTF8Invalid := "你好" + string([]byte{0x81})
nhGBKInvalid := nhGBK + string([]byte{0x81})
tests := []struct {
input string
inputCollate string
outputCharset string
output string
}{
{nhUTF8, "utf8_bin", "utf8", nhUTF8},
{nhUTF8, "utf8mb4_bin", "utf8mb4", nhUTF8},
{nhUTF8, "gbk_bin", "utf8", nhUTF8},
{nhUTF8, "gbk_bin", "gbk", nhUTF8},
{nhUTF8, "binary", "utf8mb4", nhUTF8},
{nhGBK, "binary", "gbk", nhUTF8},
{nhUTF8, "utf8_bin", "binary", nhUTF8},
{nhUTF8, "gbk_bin", "binary", nhGBK},
{nhUTF8Invalid, "utf8_bin", "utf8", ""},
{nhGBKInvalid, "gbk_bin", "gbk", ""},
}
for _, tt := range tests {
ft := NewFieldType(mysql.TypeVarchar)
ft.Flen = 255
ft.Charset = tt.outputCharset
inputDatum := NewCollationStringDatum(tt.input, tt.inputCollate)
sc := new(stmtctx.StatementContext)
outputDatum, err := inputDatum.ConvertTo(sc, ft)
if len(tt.output) == 0 {
require.True(t, charset.ErrInvalidCharacterString.Equal(err), tt)
} else {
require.NoError(t, err, tt)
require.Equal(t, tt.output, outputDatum.GetString(), tt)
}
}
}

func testStrToInt(t *testing.T, str string, expect int64, truncateAsErr bool, expectErr error) {
sc := new(stmtctx.StatementContext)
sc.IgnoreTruncate = !truncateAsErr
Expand Down
Loading