Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dumpling: fix default collation with upstream when dump database and table #30292

Merged
merged 26 commits into from
Dec 4, 2021
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
542c5f7
commit-message: fix dumpling default collation
WizardXiao Nov 30, 2021
3a61454
Merge branch 'master' of https://github.com/WizardXiao/tidb into fix-…
WizardXiao Nov 30, 2021
fac5b7a
commit-message: add ut
WizardXiao Dec 1, 2021
b189f0c
Merge branch 'master' of https://github.com/WizardXiao/tidb into fix-…
WizardXiao Dec 1, 2021
c1851c3
commit-message: add collation by show SHOW CHARACTER SET
WizardXiao Dec 2, 2021
cc1ca99
Merge branch 'master' of https://github.com/WizardXiao/tidb into fix-…
WizardXiao Dec 2, 2021
9ce96d9
Merge branch 'master' of https://github.com/WizardXiao/tidb into fix-…
WizardXiao Dec 2, 2021
afbb651
Merge branch 'master' into fix-diff-default-collation
Ehco1996 Dec 3, 2021
fa1aab5
Merge branch 'master' of https://github.com/WizardXiao/tidb into fix-…
WizardXiao Dec 3, 2021
136e06d
commit-message: adjust test
WizardXiao Dec 3, 2021
db755e5
Merge branch 'fix-diff-default-collation' of https://github.com/Wizar…
WizardXiao Dec 3, 2021
98480b5
commit-message: update the xx
WizardXiao Dec 3, 2021
7bcb3a4
commit-message: move table ajust into has schema
WizardXiao Dec 3, 2021
bd862ea
Merge branch 'master' of https://github.com/pingcap/tidb into fix-dif…
WizardXiao Dec 3, 2021
c03bc43
commit-message: update dumpling inegration test conf
WizardXiao Dec 3, 2021
7f94d8d
commit-message: fix rows close
WizardXiao Dec 3, 2021
f0e5b5b
Merge branch 'master' of https://github.com/pingcap/tidb into fix-dif…
WizardXiao Dec 3, 2021
57b326d
commit-message: fix integration conf
WizardXiao Dec 3, 2021
dfc5fdf
commit-message: fix log level
WizardXiao Dec 3, 2021
b901498
commit-message: fix log
WizardXiao Dec 3, 2021
8d27963
commit-message: delete blank line
WizardXiao Dec 3, 2021
8c5ab0c
commit-message: fix integration test about quote
WizardXiao Dec 3, 2021
bfe0c13
trigger test
Ehco1996 Dec 4, 2021
5cce586
commit-message: fix integration test in mysql 5.7.35
WizardXiao Dec 4, 2021
24d77c5
Merge branch 'fix-diff-default-collation' of https://github.com/Wizar…
WizardXiao Dec 4, 2021
38e8247
trigger test
Ehco1996 Dec 4, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 116 additions & 9 deletions dumpling/export/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"database/sql"
"encoding/hex"
"fmt"
"github.com/go-sql-driver/mysql"
"math/big"
"sort"
"strconv"
Expand All @@ -17,7 +16,7 @@ import (
"time"

// import mysql driver
_ "github.com/go-sql-driver/mysql"
"github.com/go-sql-driver/mysql"
"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
pclog "github.com/pingcap/log"
Expand All @@ -31,6 +30,9 @@ import (
"github.com/pingcap/tidb/dumpling/cli"
tcontext "github.com/pingcap/tidb/dumpling/context"
"github.com/pingcap/tidb/dumpling/log"
"github.com/pingcap/tidb/parser"
"github.com/pingcap/tidb/parser/ast"
"github.com/pingcap/tidb/parser/format"
"github.com/pingcap/tidb/store/helper"
"github.com/pingcap/tidb/tablecodec"
"github.com/pingcap/tidb/util/codec"
Expand All @@ -49,9 +51,10 @@ type Dumper struct {
extStore storage.ExternalStorage
dbHandle *sql.DB

tidbPDClientForGC pd.Client
selectTiDBTableRegionFunc func(tctx *tcontext.Context, conn *sql.Conn, meta TableMeta) (pkFields []string, pkVals [][]string, err error)
totalTables int64
tidbPDClientForGC pd.Client
selectTiDBTableRegionFunc func(tctx *tcontext.Context, conn *sql.Conn, meta TableMeta) (pkFields []string, pkVals [][]string, err error)
totalTables int64
charsetAndDefaultCollationMap map[string]string
}

// NewDumper returns a new Dumper
Expand Down Expand Up @@ -151,6 +154,12 @@ func (d *Dumper) Dump() (dumpErr error) {
tctx.L().Info("get global metadata failed", zap.Error(err))
}

//init charset and default collation map
d.charsetAndDefaultCollationMap, err = GetCharsetAndDefaultCollation(tctx.Context, metaConn)
if err != nil {
return err
}

// for other consistencies, we should get table list after consistency is set up and GlobalMetaData is cached
if conf.Consistency != consistencyTypeLock {
if err = prepareTableListToDump(tctx, conf, metaConn); err != nil {
Expand Down Expand Up @@ -320,7 +329,7 @@ func (d *Dumper) dumpDatabases(tctx *tcontext.Context, metaConn *sql.Conn, taskC
for _, policy := range policyNames {
createPolicySQL, err := ShowCreatePlacementPolicy(metaConn, policy)
if err != nil {
return err
return errors.Trace(err)
}
wrappedCreatePolicySQL := fmt.Sprintf("/*T![placement] %s */", createPolicySQL)
task := NewTaskPolicyMeta(policy, wrappedCreatePolicySQL)
Expand All @@ -331,11 +340,17 @@ func (d *Dumper) dumpDatabases(tctx *tcontext.Context, metaConn *sql.Conn, taskC
}
}

parser1 := parser.New()
for dbName, tables := range allTables {
if !conf.NoSchemas {
createDatabaseSQL, err := ShowCreateDatabase(metaConn, dbName)
if err != nil {
return err
return errors.Trace(err)
}
// adjust db collation
createDatabaseSQL, err = adjustDatabaseCollation(tctx, parser1, createDatabaseSQL, d.charsetAndDefaultCollationMap)
if err != nil {
return errors.Trace(err)
}
task := NewTaskDatabaseMeta(dbName, createDatabaseSQL)
ctxDone := d.sendTaskToChan(tctx, task, taskChan)
Expand All @@ -349,7 +364,7 @@ func (d *Dumper) dumpDatabases(tctx *tcontext.Context, metaConn *sql.Conn, taskC
zap.String("table", table.Name))
meta, err := dumpTableMeta(conf, metaConn, dbName, table)
if err != nil {
return err
return errors.Trace(err)
}

if !conf.NoSchemas {
Expand All @@ -360,6 +375,12 @@ func (d *Dumper) dumpDatabases(tctx *tcontext.Context, metaConn *sql.Conn, taskC
return tctx.Err()
}
} else {
// adjust table collation
newCreateSQL, err := adjustTableCollation(tctx, parser1, meta.ShowCreateTable(), d.charsetAndDefaultCollationMap)
if err != nil {
return errors.Trace(err)
}
meta.(*tableMeta).showCreateTable = newCreateSQL
task := NewTaskTableMeta(dbName, table.Name, meta.ShowCreateTable())
ctxDone := d.sendTaskToChan(tctx, task, taskChan)
if ctxDone {
Expand All @@ -370,7 +391,7 @@ func (d *Dumper) dumpDatabases(tctx *tcontext.Context, metaConn *sql.Conn, taskC
if table.Type == TableTypeBase {
err = d.dumpTableData(tctx, metaConn, meta, taskChan)
if err != nil {
return err
return errors.Trace(err)
}
}
}
Expand All @@ -379,6 +400,92 @@ func (d *Dumper) dumpDatabases(tctx *tcontext.Context, metaConn *sql.Conn, taskC
return nil
}

// adjustDatabaseCollation adjusts db collation and return new create sql and collation
func adjustDatabaseCollation(tctx *tcontext.Context, parser *parser.Parser, originSQL string, charsetAndDefaultCollationMap map[string]string) (string, error) {
stmt, err := parser.ParseOneStmt(originSQL, "", "")
if err != nil {
tctx.L().Warn("parse create database error, maybe tidb parser doesn't support it", zap.String("originSQL", originSQL), log.ShortError(err))
return originSQL, nil
}
createStmt, ok := stmt.(*ast.CreateDatabaseStmt)
if !ok {
return originSQL, nil
}
var charset string
for _, createOption := range createStmt.Options {
// already have 'Collation'
if createOption.Tp == ast.DatabaseOptionCollate {
return originSQL, nil
}
if createOption.Tp == ast.DatabaseOptionCharset {
charset = createOption.Value
}
}
// get db collation
collation, ok := charsetAndDefaultCollationMap[strings.ToLower(charset)]
WizardXiao marked this conversation as resolved.
Show resolved Hide resolved
if !ok {
tctx.L().Error("not found database charset default collation.", zap.String("originSQL", originSQL), zap.String("charset", strings.ToLower(charset)))
WizardXiao marked this conversation as resolved.
Show resolved Hide resolved
return originSQL, nil
}
// add collation
WizardXiao marked this conversation as resolved.
Show resolved Hide resolved
createStmt.Options = append(createStmt.Options, &ast.DatabaseOption{Tp: ast.DatabaseOptionCollate, Value: collation})
// rewrite sql
var b []byte
bf := bytes.NewBuffer(b)
err = createStmt.Restore(&format.RestoreCtx{
Flags: format.DefaultRestoreFlags | format.RestoreTiDBSpecialComment,
In: bf,
})
if err != nil {
return "", err
WizardXiao marked this conversation as resolved.
Show resolved Hide resolved
}
return bf.String(), nil
}

// adjustTableCollation adjusts table collation
func adjustTableCollation(tctx *tcontext.Context, parser *parser.Parser, originSQL string, charsetAndDefaultCollationMap map[string]string) (string, error) {
stmt, err := parser.ParseOneStmt(originSQL, "", "")
if err != nil {
tctx.L().Warn("parse create table error, maybe tidb parser doesn't support it", zap.String("originSQL", originSQL), log.ShortError(err))
return originSQL, nil
}
createStmt, ok := stmt.(*ast.CreateTableStmt)
if !ok {
return originSQL, nil
}
var charset string
for _, createOption := range createStmt.Options {
// already have 'Collation'
if createOption.Tp == ast.TableOptionCollate {
return originSQL, nil
}
if createOption.Tp == ast.TableOptionCharset {
charset = createOption.StrValue
}
}

// get db collation
collation, ok := charsetAndDefaultCollationMap[strings.ToLower(charset)]
if !ok {
tctx.L().Error("not found table charset default collation.", zap.String("originSQL", originSQL), zap.String("charset", strings.ToLower(charset)))
WizardXiao marked this conversation as resolved.
Show resolved Hide resolved
return originSQL, nil
}

// add collation
createStmt.Options = append(createStmt.Options, &ast.TableOption{Tp: ast.TableOptionCollate, StrValue: collation})
// rewrite sql
var b []byte
bf := bytes.NewBuffer(b)
err = createStmt.Restore(&format.RestoreCtx{
Flags: format.DefaultRestoreFlags | format.RestoreTiDBSpecialComment,
In: bf,
})
if err != nil {
return "", err
}
return bf.String(), nil
}

func (d *Dumper) dumpTableData(tctx *tcontext.Context, conn *sql.Conn, meta TableMeta, taskChan chan<- Task) error {
conf := d.conf
if conf.NoData {
Expand Down
54 changes: 54 additions & 0 deletions dumpling/export/dump_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (

"github.com/pingcap/tidb/br/pkg/version"
tcontext "github.com/pingcap/tidb/dumpling/context"
"github.com/pingcap/tidb/parser"
)

func TestDumpBlock(t *testing.T) {
Expand All @@ -29,6 +30,9 @@ func TestDumpBlock(t *testing.T) {
mock.ExpectQuery(fmt.Sprintf("SHOW CREATE DATABASE `%s`", escapeString(database))).
WillReturnRows(sqlmock.NewRows([]string{"Database", "Create Database"}).
AddRow("test", "CREATE DATABASE `test` /*!40100 DEFAULT CHARACTER SET utf8mb4 */"))
mock.ExpectQuery(fmt.Sprintf("SELECT DEFAULT_COLLATION_NAME FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = '%s'", escapeString(database))).
WillReturnRows(sqlmock.NewRows([]string{"DEFAULT_COLLATION_NAME"}).
AddRow("utf8mb4_bin"))

tctx, cancel := tcontext.Background().WithLogger(appLogger).WithCancel()
defer cancel()
Expand Down Expand Up @@ -128,3 +132,53 @@ func TestGetListTableTypeByConf(t *testing.T) {
require.Equalf(t, x.expected, getListTableTypeByConf(conf), "server info: %s, consistency: %s", x.serverInfo, x.consistency)
}
}

func TestAdjustDatabaseCollation(t *testing.T) {
t.Parallel()

tctx, cancel := tcontext.Background().WithLogger(appLogger).WithCancel()
defer cancel()
parser1 := parser.New()

originSQLs := []string{
"create database `test` CHARACTER SET=utf8mb4 COLLATE=utf8mb4_general_ci",
"create database `test` CHARACTER SET=utf8mb4",
}

expectedSQLs := []string{
"create database `test` CHARACTER SET=utf8mb4 COLLATE=utf8mb4_general_ci",
"CREATE DATABASE `test` CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci",
}
charsetAndDefaultCollationMap := map[string]string{"utf8mb4": "utf8mb4_general_ci"}
for i, originSQL := range originSQLs {
newSQL, err := adjustDatabaseCollation(tctx, parser1, originSQL, charsetAndDefaultCollationMap)
require.NoError(t, err)
require.Equal(t, expectedSQLs[i], newSQL)
}
}

func TestAdjustTableCollation(t *testing.T) {
t.Parallel()

tctx, cancel := tcontext.Background().WithLogger(appLogger).WithCancel()
defer cancel()

parser1 := parser.New()

originSQLs := []string{
"create table `test`.`t1` (id int) CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci",
"create table `test`.`t1` (id int) CHARSET=utf8mb4",
}

expectedSQLs := []string{
"create table `test`.`t1` (id int) CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci",
"CREATE TABLE `test`.`t1` (`id` INT) DEFAULT CHARACTER SET = UTF8MB4 DEFAULT COLLATE = UTF8MB4_GENERAL_CI",
}

charsetAndDefaultCollationMap := map[string]string{"utf8mb4": "utf8mb4_general_ci"}
for i, originSQL := range originSQLs {
newSQL, err := adjustTableCollation(tctx, parser1, originSQL, charsetAndDefaultCollationMap)
require.NoError(t, err)
require.Equal(t, expectedSQLs[i], newSQL)
}
}
40 changes: 40 additions & 0 deletions dumpling/export/sql.go
Original file line number Diff line number Diff line change
Expand Up @@ -1387,3 +1387,43 @@ func GetRegionInfos(db *sql.Conn) (*helper.RegionsInfo, error) {
})
return regionsInfo, err
}

// GetCharsetAndDefaultCollation gets charset and default collation map.
func GetCharsetAndDefaultCollation(ctx context.Context, db *sql.Conn) (map[string]string, error) {
charsetAndDefaultCollation := make(map[string]string)
query := "SHOW CHARACTER SET"

// Show an example.
/*
mysql> SHOW CHARACTER SET;
+----------+---------------------------------+---------------------+--------+
| Charset | Description | Default collation | Maxlen |
+----------+---------------------------------+---------------------+--------+
| armscii8 | ARMSCII-8 Armenian | armscii8_general_ci | 1 |
| ascii | US ASCII | ascii_general_ci | 1 |
| big5 | Big5 Traditional Chinese | big5_chinese_ci | 2 |
| binary | Binary pseudo charset | binary | 1 |
| cp1250 | Windows Central European | cp1250_general_ci | 1 |
| cp1251 | Windows Cyrillic | cp1251_general_ci | 1 |
+----------+---------------------------------+---------------------+--------+
*/

rows, err := db.QueryContext(ctx, query)
if err != nil {
return nil, errors.Annotatef(err, "sql: %s", query)
}

defer rows.Close()
for rows.Next() {
var charset, description, collation string
var maxlen int
if scanErr := rows.Scan(&charset, &description, &collation, &maxlen); scanErr != nil {
return nil, errors.Annotatef(err, "sql: %s", query)
}
charsetAndDefaultCollation[strings.ToLower(charset)] = collation
}
if err = rows.Close(); err != nil {
return nil, errors.Annotatef(err, "sql: %s", query)
}
return charsetAndDefaultCollation, err
}
25 changes: 24 additions & 1 deletion dumpling/export/sql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"encoding/csv"
"encoding/json"
"fmt"
"github.com/go-sql-driver/mysql"
"io"
"os"
"path"
Expand All @@ -18,6 +17,8 @@ import (
"strings"
"testing"

"github.com/go-sql-driver/mysql"

"github.com/DATA-DOG/go-sqlmock"
"github.com/coreos/go-semver/semver"
"github.com/pingcap/errors"
Expand Down Expand Up @@ -1746,6 +1747,28 @@ func TestPickupPossibleField(t *testing.T) {
}
}

func TestGetCharsetAndDefaultCollation(t *testing.T) {
t.Parallel()
db, mock, err := sqlmock.New()
require.NoError(t, err)
defer func() {
require.NoError(t, db.Close())
}()
ctx := context.Background()
conn, err := db.Conn(ctx)
require.NoError(t, err)

mock.ExpectQuery("SHOW CHARACTER SET").
WillReturnRows(sqlmock.NewRows([]string{"Charset", "Description", "Default collation", "Maxlen"}).
AddRow("utf8mb4", "UTF-8 Unicode", "utf8mb4_0900_ai_ci", 4).
AddRow("latin1", "cp1252 West European", "latin1_swedish_ci", 1))

charsetAndDefaultCollation, err := GetCharsetAndDefaultCollation(ctx, conn)
require.NoError(t, err)
require.Equal(t, "utf8mb4_0900_ai_ci", charsetAndDefaultCollation["utf8mb4"])
require.Equal(t, "latin1_swedish_ci", charsetAndDefaultCollation["latin1"])
}

func makeVersion(major, minor, patch int64, preRelease string) *semver.Version {
return &semver.Version{
Major: major,
Expand Down