Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ddl: check column data when execute alter table/column charset #34534

Closed
wants to merge 18 commits into from
62 changes: 62 additions & 0 deletions ddl/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/pingcap/tidb/meta/autoid"
"github.com/pingcap/tidb/metrics"
"github.com/pingcap/tidb/parser/ast"
"github.com/pingcap/tidb/parser/charset"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/parser/terror"
Expand Down Expand Up @@ -1556,6 +1557,53 @@ func updateChangingObjState(changingCol *model.ColumnInfo, changingIdxs []*model
}
}

func needCheckLatin1Convert(oldCol, newCol *model.ColumnInfo) bool {
if oldCol.GetCharset() != newCol.GetCharset() {
return oldCol.GetCharset() == charset.CharsetLatin1
}
return false
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved
}

func (w *worker) checkLatin1Convert(schema, t model.CIStr, oldCols []*model.ColumnInfo, newCols []*model.ColumnInfo) (err error) {
sctx, err := w.sessPool.get()
if err != nil {
return errors.Trace(err)
}
sctx.GetSessionVars().StmtCtx.TruncateAsWarning = false
sctx.GetSessionVars().StmtCtx.IgnoreTruncate = false
defer w.sessPool.put(sctx)

var buf strings.Builder
buf.WriteString("select * from %n.%n where ")
tangenta marked this conversation as resolved.
Show resolved Hide resolved
paramsList := make([]interface{}, 0, 2+3*len(oldCols))
paramsList = append(paramsList, schema.L, t.L)
for i, col := range oldCols {
if i == 0 {
buf.WriteString("convert(%n using %n) != %n")
paramsList = append(paramsList, col.Name.L, newCols[i].GetCharset(), col.Name.L)
} else {
buf.WriteString(" or convert(%n using %n) != %n")
paramsList = append(paramsList, col.Name.L, newCols[i].GetCharset(), col.Name.L)
}
}
buf.WriteString(" limit 1")
rows, _, err := sctx.(sqlexec.RestrictedSQLExecutor).ExecRestrictedSQL(w.ddlJobCtx, nil, buf.String(), paramsList...)
if err != nil {
return errors.Trace(err)
}
rowCount := len(rows)
if rowCount != 0 {
row := rows[0]
for i, col := range oldCols {
_, err = table.CastValue(sctx, row.GetDatum(col.Offset, &col.FieldType), newCols[i], false, false)
if err != nil {
return errors.Trace(err)
}
}
}
return nil
}

// doModifyColumn updates the column information and reorders all columns. It does not support modifying column data.
func (w *worker) doModifyColumn(
d *ddlCtx, t *meta.Meta, job *model.Job, dbInfo *model.DBInfo, tblInfo *model.TableInfo,
Expand Down Expand Up @@ -1588,6 +1636,20 @@ func (w *worker) doModifyColumn(
}
}

if needCheckLatin1Convert(oldCol, newCol) {
err := w.checkLatin1Convert(dbInfo.Name, tblInfo.Name, []*model.ColumnInfo{oldCol}, []*model.ColumnInfo{newCol})
if err != nil {
if job.ReorgMeta.SQLMode.HasStrictMode() {
job.State = model.JobStateRollingback
return ver, errors.Trace(err)
} else {
warn := errors.Cause(err).(*terror.Error)
job.ReorgMeta.Warnings[warn.ID()] = warn
job.ReorgMeta.WarningsCount[warn.ID()] = 1
}
}
}

if err := adjustTableInfoAfterModifyColumn(tblInfo, newCol, oldCol, pos); err != nil {
job.State = model.JobStateRollingback
return ver, errors.Trace(err)
Expand Down
44 changes: 44 additions & 0 deletions ddl/db_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"time"

"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/config"
"github.com/pingcap/tidb/ddl"
"github.com/pingcap/tidb/domain"
Expand Down Expand Up @@ -909,6 +910,49 @@ func TestChangingTableCharset(t *testing.T) {
}
}

func TestModifyInvalidColumnData(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
tk := testkit.NewTestKit(t, store)

tk.MustExec("use test")

require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/parser/charset/latin1EnableInvalidCharacter", "return(true)"))

tk.MustExec("drop table if exists t")
tk.MustExec("create table t (a varchar(20)) charset = latin1")
tk.MustExec("insert into t values (0x90)")
tk.MustGetErrCode("alter table t convert to charset utf8", errno.ErrTruncatedWrongValueForField)
tk.MustGetErrCode("alter table t modify column a varchar(20) charset utf8", errno.ErrTruncatedWrongValueForField)
tk.MustGetErrCode("alter table t modify column a varchar(19) charset utf8", errno.ErrTruncatedWrongValueForField)

tk.MustExec("drop table if exists t")
tk.MustExec("create table t (a varchar(20), b varchar(20), c varchar(20)) charset = latin1")
tk.MustExec("insert into t values (0x3F, 0x3F, 0x90)")
tk.MustGetErrCode("alter table t convert to charset utf8", errno.ErrTruncatedWrongValueForField)

tk.MustExec("set sql_mode=''")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t (a varchar(20)) charset = latin1")
tk.MustExec("insert into t values (0x90)")
tk.MustExec("alter table t convert to charset utf8")
zimulala marked this conversation as resolved.
Show resolved Hide resolved
tk.MustQuery("show warnings;").Check(testkit.Rows("Warning 1366 Incorrect string value '\\x90' for column 'a'"))
tk.MustQuery("select hex(a) from t").Check(testkit.Rows("90"))
tk.MustQuery("show create table t;").Check(testkit.Rows(
"t CREATE TABLE `t` (\n `a` varchar(20) DEFAULT NULL\n) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin"))
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved

tk.MustExec("drop table if exists t")
tk.MustExec("create table t (a varchar(20), b varchar(20)) charset = latin1")
tk.MustExec("insert into t values (0x90, 0x90)")
tk.MustExec("alter table t modify column a varchar(19) charset utf8")
tk.MustExec("alter table t modify column b varchar(20) charset utf8")
// change varchar(20) to varchar(19) will do reorg which uses '?' instead of invalid characters
tk.MustQuery("select hex(a), hex(b) from t").Check(testkit.Rows("3F 90"))

tk.MustExec("set sql_mode=default")
require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/parser/charset/latin1EnableInvalidCharacter"))
}

func TestModifyColumnOption(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
Expand Down
9 changes: 8 additions & 1 deletion ddl/ddl_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -4975,14 +4975,21 @@ func (d *ddl) AlterTableCharsetAndCollate(ctx sessionctx.Context, ident ast.Iden
return nil
}

tzName, tzOffset := ddlutil.GetTimeZone(ctx)
job := &model.Job{
SchemaID: schema.ID,
TableID: tb.Meta().ID,
SchemaName: schema.Name.L,
TableName: tb.Meta().Name.L,
Type: model.ActionModifyTableCharsetAndCollate,
BinlogInfo: &model.HistoryInfo{},
Args: []interface{}{toCharset, toCollate, needsOverwriteCols},
ReorgMeta: &model.DDLReorgMeta{
SQLMode: ctx.GetSessionVars().SQLMode,
Warnings: make(map[errors.ErrorID]*terror.Error),
WarningsCount: make(map[errors.ErrorID]int64),
Location: &model.TimeZoneLocation{Name: tzName, Offset: tzOffset},
},
Args: []interface{}{toCharset, toCollate, needsOverwriteCols},
}
err = d.DoDDLJob(ctx, job)
err = d.callHookOnChanged(err)
Expand Down
2 changes: 1 addition & 1 deletion ddl/ddl_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -909,7 +909,7 @@ func (w *worker) runDDLJob(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64,
case model.ActionAddTablePartition:
ver, err = w.onAddTablePartition(d, t, job)
case model.ActionModifyTableCharsetAndCollate:
ver, err = onModifyTableCharsetAndCollate(d, t, job)
ver, err = w.onModifyTableCharsetAndCollate(d, t, job)
case model.ActionRecoverTable:
ver, err = w.onRecoverTable(d, t, job)
case model.ActionLockTable:
Expand Down
33 changes: 29 additions & 4 deletions ddl/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/pingcap/tidb/parser/ast"
"github.com/pingcap/tidb/parser/charset"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/terror"
field_types "github.com/pingcap/tidb/parser/types"
"github.com/pingcap/tidb/table"
"github.com/pingcap/tidb/table/tables"
Expand Down Expand Up @@ -1015,7 +1016,7 @@ func onModifyTableComment(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64, _
return ver, nil
}

func onModifyTableCharsetAndCollate(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64, _ error) {
func (w *worker) onModifyTableCharsetAndCollate(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64, _ error) {
var toCharset, toCollate string
var needsOverwriteCols bool
if err := job.DecodeArgs(&toCharset, &toCollate, &needsOverwriteCols); err != nil {
Expand All @@ -1040,11 +1041,32 @@ func onModifyTableCharsetAndCollate(d *ddlCtx, t *meta.Meta, job *model.Job) (ve
return ver, errors.Trace(err)
}

tblInfo.Charset = toCharset
tblInfo.Collate = toCollate

if needsOverwriteCols {
oldCols := make([]*model.ColumnInfo, 0, len(tblInfo.Columns))
newCols := make([]*model.ColumnInfo, 0, len(tblInfo.Columns))
// update column charset.
for _, col := range tblInfo.Columns {
newCol := col.Clone()
newCol.SetCharset(toCharset)
newCol.SetCollate(toCollate)
if field_types.HasCharset(&col.FieldType) && needCheckLatin1Convert(col, newCol) {
oldCols = append(oldCols, col)
newCols = append(newCols, newCol)
}
}
if len(oldCols) != 0 {
err := w.checkLatin1Convert(dbInfo.Name, tblInfo.Name, oldCols, newCols)
if err != nil {
if job.ReorgMeta.SQLMode.HasStrictMode() {
job.State = model.JobStateCancelled
return ver, err
} else {
warn := errors.Cause(err).(*terror.Error)
job.ReorgMeta.Warnings[warn.ID()] = warn
job.ReorgMeta.WarningsCount[warn.ID()] = 1
}
}
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved
}
for _, col := range tblInfo.Columns {
if field_types.HasCharset(&col.FieldType) {
col.SetCharset(toCharset)
Expand All @@ -1056,6 +1078,9 @@ func onModifyTableCharsetAndCollate(d *ddlCtx, t *meta.Meta, job *model.Job) (ve
}
}

tblInfo.Charset = toCharset
tblInfo.Collate = toCollate

ver, err = updateVersionAndTableInfo(d, t, job, tblInfo, true)
if err != nil {
return ver, errors.Trace(err)
Expand Down
10 changes: 10 additions & 0 deletions parser/charset/encoding_latin1.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
package charset

import (
"bytes"
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved

"github.com/pingcap/failpoint"
"golang.org/x/text/encoding"
)

Expand All @@ -34,3 +37,10 @@ type encodingLatin1 struct {
func (e *encodingLatin1) Name() string {
return CharsetLatin1
}

func (e *encodingLatin1) Transform(dest *bytes.Buffer, src []byte, op Op) ([]byte, error) {
failpoint.Inject("latin1EnableInvalidCharacter", func() {
failpoint.Return(src, nil)
})
return e.encodingUTF8.Transform(dest, src, op)
}
1 change: 1 addition & 0 deletions parser/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
github.com/cznic/strutil v0.0.0-20171016134553-529a34b1c186
github.com/go-sql-driver/mysql v1.6.0
github.com/pingcap/errors v0.11.5-0.20210425183316-da1aaba5fb63
github.com/pingcap/failpoint v0.0.0-20220423142525-ae43b7f4e5c3
github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7
github.com/stretchr/testify v1.7.0
go.uber.org/goleak v1.1.10
Expand Down
11 changes: 9 additions & 2 deletions parser/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,21 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
github.com/pingcap/errors v0.11.5-0.20210425183316-da1aaba5fb63 h1:+FZIDR/D97YOPik4N4lPDaUcLDF/EQPogxtlHB2ZZRM=
github.com/pingcap/errors v0.11.5-0.20210425183316-da1aaba5fb63/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg=
github.com/pingcap/failpoint v0.0.0-20220423142525-ae43b7f4e5c3 h1:kJolJWbyadVeL8RKBlqmXQR7FRKPsIeU85TUYyhbhiQ=
github.com/pingcap/failpoint v0.0.0-20220423142525-ae43b7f4e5c3/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew=
github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7 h1:k2BbABz9+TNpYRwsCCFS8pEEnFVOdbgEjL/kTlLuzZQ=
github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM=
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk=
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
Expand Down Expand Up @@ -64,11 +69,13 @@ golang.org/x/tools v0.0.0-20191108193012-7d206e10da11 h1:Yq9t9jnGoR+dBuitxdo9l6Q
golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8=
gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
Expand Down