From c67648ff2f8e2573d12afba09f17d8f8fecb42f7 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 28 Jan 2021 17:14:19 +0200 Subject: [PATCH 01/72] Towards VReplication based Online DDL Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/schema/online_ddl.go | 8 +++++--- go/vt/schema/online_ddl_test.go | 7 +++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/go/vt/schema/online_ddl.go b/go/vt/schema/online_ddl.go index 39b5bb08134..197348e1e87 100644 --- a/go/vt/schema/online_ddl.go +++ b/go/vt/schema/online_ddl.go @@ -32,7 +32,7 @@ var ( migrationBasePath = "schema-migration" onlineDdlUUIDRegexp = regexp.MustCompile(`^[0-f]{8}_[0-f]{4}_[0-f]{4}_[0-f]{4}_[0-f]{12}$`) strategyParserRegexp = regexp.MustCompile(`^([\S]+)\s+(.*)$`) - onlineDDLGeneratedTableNameRegexp = regexp.MustCompile(`^_[0-f]{8}_[0-f]{4}_[0-f]{4}_[0-f]{4}_[0-f]{12}_([0-9]{14})_(gho|ghc|del|new)$`) + onlineDDLGeneratedTableNameRegexp = regexp.MustCompile(`^_[0-f]{8}_[0-f]{4}_[0-f]{4}_[0-f]{4}_[0-f]{12}_([0-9]{14})_(gho|ghc|del|new|vrepl)$`) ptOSCGeneratedTableNameRegexp = regexp.MustCompile(`^_.*_old$`) ) @@ -82,6 +82,8 @@ type DDLStrategy string const ( // DDLStrategyDirect means not an online-ddl migration. Just a normal MySQL ALTER TABLE DDLStrategyDirect DDLStrategy = "direct" + // DDLStrategyOnline requests vreplication to run the migration + DDLStrategyOnline DDLStrategy = "online" // DDLStrategyGhost requests gh-ost to run the migration DDLStrategyGhost DDLStrategy = "gh-ost" // DDLStrategyPTOSC requests pt-online-schema-change to run the migration @@ -92,7 +94,7 @@ const ( // A strategy is direct if it's not explciitly one of the online DDL strategies func (s DDLStrategy) IsDirect() bool { switch s { - case DDLStrategyGhost, DDLStrategyPTOSC: + case DDLStrategyOnline, DDLStrategyGhost, DDLStrategyPTOSC: return false } return true @@ -125,7 +127,7 @@ func ParseDDLStrategy(strategyVariable string) (strategy DDLStrategy, options st switch strategy = DDLStrategy(strategyName); strategy { case "": // backwards compatiblity and to handle unspecified values return DDLStrategyDirect, options, nil - case DDLStrategyGhost, DDLStrategyPTOSC, DDLStrategyDirect: + case DDLStrategyOnline, DDLStrategyGhost, DDLStrategyPTOSC, DDLStrategyDirect: return strategy, options, nil default: return DDLStrategyDirect, options, fmt.Errorf("Unknown online DDL strategy: '%v'", strategy) diff --git a/go/vt/schema/online_ddl_test.go b/go/vt/schema/online_ddl_test.go index 28f28b9e6ad..4ea6b6e79b4 100644 --- a/go/vt/schema/online_ddl_test.go +++ b/go/vt/schema/online_ddl_test.go @@ -31,6 +31,7 @@ func TestCreateUUID(t *testing.T) { func TestIsDirect(t *testing.T) { assert.True(t, DDLStrategyDirect.IsDirect()) + assert.False(t, DDLStrategyOnline.IsDirect()) assert.False(t, DDLStrategyGhost.IsDirect()) assert.False(t, DDLStrategyPTOSC.IsDirect()) assert.True(t, DDLStrategy("").IsDirect()) @@ -50,6 +51,10 @@ func TestParseDDLStrategy(t *testing.T) { strategyVariable: "direct", strategy: DDLStrategyDirect, }, + { + strategyVariable: "online", + strategy: DDLStrategyOnline, + }, { strategyVariable: "gh-ost", strategy: DDLStrategyGhost, @@ -151,6 +156,7 @@ func TestIsOnlineDDLTableName(t *testing.T) { "_4e5dcf80_354b_11eb_82cd_f875a4d24e90_20201203114014_ghc", "_4e5dcf80_354b_11eb_82cd_f875a4d24e90_20201203114014_del", "_4e5dcf80_354b_11eb_82cd_f875a4d24e90_20201203114013_new", + "_84371a37_6153_11eb_9917_f875a4d24e90_20210128122816_vrepl", "_table_old", "__table_old", } @@ -164,6 +170,7 @@ func TestIsOnlineDDLTableName(t *testing.T) { "_table_gho", "_table_ghc", "_table_del", + "_table_vrepl", "table_old", } for _, tableName := range irrelevantNames { From 56cb0d6672ee4e67e0b5ead7d1f19b7a8b106c94 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 28 Jan 2021 17:15:50 +0200 Subject: [PATCH 02/72] testing for DDLStrategyOnline Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/schemamanager/tablet_executor_test.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/go/vt/schemamanager/tablet_executor_test.go b/go/vt/schemamanager/tablet_executor_test.go index 954d1247a90..ab6c1f16e83 100644 --- a/go/vt/schemamanager/tablet_executor_test.go +++ b/go/vt/schemamanager/tablet_executor_test.go @@ -239,6 +239,12 @@ func TestIsOnlineSchemaDDL(t *testing.T) { isOnlineDDL: true, strategy: schema.DDLStrategyGhost, }, + { + query: "ALTER TABLE t ADD COLUMN i INT", + ddlStrategy: "online", + isOnlineDDL: true, + strategy: schema.DDLStrategyOnline, + }, { query: "ALTER TABLE t ADD COLUMN i INT", ddlStrategy: "", @@ -257,6 +263,11 @@ func TestIsOnlineSchemaDDL(t *testing.T) { strategy: schema.DDLStrategyGhost, options: "--max-load=Threads_running=100", }, + { + query: "TRUNCATE TABLE t", + ddlStrategy: "online", + isOnlineDDL: false, + }, { query: "TRUNCATE TABLE t", ddlStrategy: "gh-ost", From 379d7ec1863b58516004af1442e7cc93d18068e9 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Sun, 31 Jan 2021 08:38:56 +0200 Subject: [PATCH 03/72] import from gh-ost and iterating Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/vrepl/encoding.go | 23 ++ go/vt/vttablet/onlineddl/vrepl/parser.go | 228 ++++++++++++ go/vt/vttablet/onlineddl/vrepl/parser_test.go | 319 +++++++++++++++++ go/vt/vttablet/onlineddl/vrepl/types.go | 332 ++++++++++++++++++ go/vt/vttablet/onlineddl/vrepl/types_test.go | 100 ++++++ 5 files changed, 1002 insertions(+) create mode 100644 go/vt/vttablet/onlineddl/vrepl/encoding.go create mode 100644 go/vt/vttablet/onlineddl/vrepl/parser.go create mode 100644 go/vt/vttablet/onlineddl/vrepl/parser_test.go create mode 100644 go/vt/vttablet/onlineddl/vrepl/types.go create mode 100644 go/vt/vttablet/onlineddl/vrepl/types_test.go diff --git a/go/vt/vttablet/onlineddl/vrepl/encoding.go b/go/vt/vttablet/onlineddl/vrepl/encoding.go new file mode 100644 index 00000000000..713c6925878 --- /dev/null +++ b/go/vt/vttablet/onlineddl/vrepl/encoding.go @@ -0,0 +1,23 @@ +/* + Copyright 2016 GitHub Inc. + See https://github.com/github/gh-ost/blob/master/LICENSE +*/ + +package vrepl + +import ( + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/charmap" + "golang.org/x/text/encoding/simplifiedchinese" +) + +type charsetEncoding map[string]encoding.Encoding + +var charsetEncodingMap charsetEncoding + +func init() { + charsetEncodingMap = make(map[string]encoding.Encoding) + // Begin mappings + charsetEncodingMap["latin1"] = charmap.Windows1252 + charsetEncodingMap["gbk"] = simplifiedchinese.GBK +} diff --git a/go/vt/vttablet/onlineddl/vrepl/parser.go b/go/vt/vttablet/onlineddl/vrepl/parser.go new file mode 100644 index 00000000000..57689d64498 --- /dev/null +++ b/go/vt/vttablet/onlineddl/vrepl/parser.go @@ -0,0 +1,228 @@ +/* + Copyright 2016 GitHub Inc. + See https://github.com/github/gh-ost/blob/master/LICENSE +*/ + +package vrepl + +import ( + "regexp" + "strconv" + "strings" +) + +var ( + sanitizeQuotesRegexp = regexp.MustCompile("('[^']*')") + renameColumnRegexp = regexp.MustCompile(`(?i)\bchange\s+(column\s+|)([\S]+)\s+([\S]+)\s+`) + dropColumnRegexp = regexp.MustCompile(`(?i)\bdrop\s+(column\s+|)([\S]+)$`) + renameTableRegexp = regexp.MustCompile(`(?i)\brename\s+(to|as)\s+`) + autoIncrementRegexp = regexp.MustCompile(`(?i)\bauto_increment[\s]*=[\s]*([0-9]+)`) + alterTableExplicitSchemaTableRegexps = []*regexp.Regexp{ + // ALTER TABLE `scm`.`tbl` something + regexp.MustCompile(`(?i)\balter\s+table\s+` + "`" + `([^` + "`" + `]+)` + "`" + `[.]` + "`" + `([^` + "`" + `]+)` + "`" + `\s+(.*$)`), + // ALTER TABLE `scm`.tbl something + regexp.MustCompile(`(?i)\balter\s+table\s+` + "`" + `([^` + "`" + `]+)` + "`" + `[.]([\S]+)\s+(.*$)`), + // ALTER TABLE scm.`tbl` something + regexp.MustCompile(`(?i)\balter\s+table\s+([\S]+)[.]` + "`" + `([^` + "`" + `]+)` + "`" + `\s+(.*$)`), + // ALTER TABLE scm.tbl something + regexp.MustCompile(`(?i)\balter\s+table\s+([\S]+)[.]([\S]+)\s+(.*$)`), + } + alterTableExplicitTableRegexps = []*regexp.Regexp{ + // ALTER TABLE `tbl` something + regexp.MustCompile(`(?i)\balter\s+table\s+` + "`" + `([^` + "`" + `]+)` + "`" + `\s+(.*$)`), + // ALTER TABLE tbl something + regexp.MustCompile(`(?i)\balter\s+table\s+([\S]+)\s+(.*$)`), + } +) + +// AlterTableParser is a parser tool for ALTER TABLE statements +// This is imported from gh-ost. In the future, we should replace that with Vitess parsing. +type AlterTableParser struct { + columnRenameMap map[string]string + droppedColumns map[string]bool + isRenameTable bool + isAutoIncrementDefined bool + + alterStatementOptions string + alterTokens []string + + explicitSchema string + explicitTable string +} + +// NewAlterTableParser creates a new parser +func NewAlterTableParser() *AlterTableParser { + return &AlterTableParser{ + columnRenameMap: make(map[string]string), + droppedColumns: make(map[string]bool), + } +} + +// NewParserFromAlterStatement creates a new parser with a ALTER TABLE statement +func NewParserFromAlterStatement(alterStatement string) *AlterTableParser { + parser := NewAlterTableParser() + parser.ParseAlterStatement(alterStatement) + return parser +} + +// tokenizeAlterStatement +func (p *AlterTableParser) tokenizeAlterStatement(alterStatement string) (tokens []string, err error) { + terminatingQuote := rune(0) + f := func(c rune) bool { + switch { + case c == terminatingQuote: + terminatingQuote = rune(0) + return false + case terminatingQuote != rune(0): + return false + case c == '\'': + terminatingQuote = c + return false + case c == '(': + terminatingQuote = ')' + return false + default: + return c == ',' + } + } + + tokens = strings.FieldsFunc(alterStatement, f) + for i := range tokens { + tokens[i] = strings.TrimSpace(tokens[i]) + } + return tokens, nil +} + +func (p *AlterTableParser) sanitizeQuotesFromAlterStatement(alterStatement string) (strippedStatement string) { + strippedStatement = alterStatement + strippedStatement = sanitizeQuotesRegexp.ReplaceAllString(strippedStatement, "''") + return strippedStatement +} + +// parseAlterToken parses a single ALTER option (e.g. a DROP COLUMN) +func (p *AlterTableParser) parseAlterToken(alterToken string) (err error) { + { + // rename + allStringSubmatch := renameColumnRegexp.FindAllStringSubmatch(alterToken, -1) + for _, submatch := range allStringSubmatch { + if unquoted, err := strconv.Unquote(submatch[2]); err == nil { + submatch[2] = unquoted + } + if unquoted, err := strconv.Unquote(submatch[3]); err == nil { + submatch[3] = unquoted + } + p.columnRenameMap[submatch[2]] = submatch[3] + } + } + { + // drop + allStringSubmatch := dropColumnRegexp.FindAllStringSubmatch(alterToken, -1) + for _, submatch := range allStringSubmatch { + if unquoted, err := strconv.Unquote(submatch[2]); err == nil { + submatch[2] = unquoted + } + p.droppedColumns[submatch[2]] = true + } + } + { + // rename table + if renameTableRegexp.MatchString(alterToken) { + p.isRenameTable = true + } + } + { + // auto_increment + if autoIncrementRegexp.MatchString(alterToken) { + p.isAutoIncrementDefined = true + } + } + return nil +} + +// ParseAlterStatement is the main function of th eparser, and parses an ALTER TABLE statement +func (p *AlterTableParser) ParseAlterStatement(alterStatement string) (err error) { + p.alterStatementOptions = alterStatement + for _, alterTableRegexp := range alterTableExplicitSchemaTableRegexps { + if submatch := alterTableRegexp.FindStringSubmatch(p.alterStatementOptions); len(submatch) > 0 { + p.explicitSchema = submatch[1] + p.explicitTable = submatch[2] + p.alterStatementOptions = submatch[3] + break + } + } + for _, alterTableRegexp := range alterTableExplicitTableRegexps { + if submatch := alterTableRegexp.FindStringSubmatch(p.alterStatementOptions); len(submatch) > 0 { + p.explicitTable = submatch[1] + p.alterStatementOptions = submatch[2] + break + } + } + alterTokens, _ := p.tokenizeAlterStatement(p.alterStatementOptions) + for _, alterToken := range alterTokens { + alterToken = p.sanitizeQuotesFromAlterStatement(alterToken) + p.parseAlterToken(alterToken) + p.alterTokens = append(p.alterTokens, alterToken) + } + return nil +} + +// GetNonTrivialRenames gets a list of renamed column +func (p *AlterTableParser) GetNonTrivialRenames() map[string]string { + result := make(map[string]string) + for column, renamed := range p.columnRenameMap { + if column != renamed { + result[column] = renamed + } + } + return result +} + +// HasNonTrivialRenames is true when columns have been renamed +func (p *AlterTableParser) HasNonTrivialRenames() bool { + return len(p.GetNonTrivialRenames()) > 0 +} + +// DroppedColumnsMap returns list of dropped columns +func (p *AlterTableParser) DroppedColumnsMap() map[string]bool { + return p.droppedColumns +} + +// IsRenameTable returns true when the ALTER TABLE statement inclusdes renaming the table +func (p *AlterTableParser) IsRenameTable() bool { + return p.isRenameTable +} + +// IsAutoIncrementDefined returns true when alter options include an explicit AUTO_INCREMENT value +func (p *AlterTableParser) IsAutoIncrementDefined() bool { + return p.isAutoIncrementDefined +} + +// GetExplicitSchema returns the explciit schema, if defined +func (p *AlterTableParser) GetExplicitSchema() string { + return p.explicitSchema +} + +// HasExplicitSchema returns true when the ALTER TABLE statement includes the schema qualifier +func (p *AlterTableParser) HasExplicitSchema() bool { + return p.GetExplicitSchema() != "" +} + +// GetExplicitTable return the table name +func (p *AlterTableParser) GetExplicitTable() string { + return p.explicitTable +} + +// HasExplicitTable checks if the ALTER TABLE statement has an explicit table name +func (p *AlterTableParser) HasExplicitTable() bool { + return p.GetExplicitTable() != "" +} + +// GetAlterStatementOptions returns the options section in the ALTER TABLE statement +func (p *AlterTableParser) GetAlterStatementOptions() string { + return p.alterStatementOptions +} + +// ColumnRenameMap returns the renamed column mapping +func (p *AlterTableParser) ColumnRenameMap() map[string]string { + return p.columnRenameMap +} diff --git a/go/vt/vttablet/onlineddl/vrepl/parser_test.go b/go/vt/vttablet/onlineddl/vrepl/parser_test.go new file mode 100644 index 00000000000..457ad062e6a --- /dev/null +++ b/go/vt/vttablet/onlineddl/vrepl/parser_test.go @@ -0,0 +1,319 @@ +/* + Copyright 2016 GitHub Inc. + See https://github.com/github/gh-ost/blob/master/LICENSE +*/ + +package vrepl + +import ( + "reflect" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParseAlterStatement(t *testing.T) { + statement := "add column t int, engine=innodb" + parser := NewAlterTableParser() + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, parser.alterStatementOptions, statement) + assert.False(t, parser.HasNonTrivialRenames()) + assert.False(t, parser.IsAutoIncrementDefined()) +} + +func TestParseAlterStatementTrivialRename(t *testing.T) { + statement := "add column t int, change ts ts timestamp, engine=innodb" + parser := NewAlterTableParser() + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, parser.alterStatementOptions, statement) + assert.False(t, parser.HasNonTrivialRenames()) + assert.False(t, parser.IsAutoIncrementDefined()) + assert.Equal(t, len(parser.columnRenameMap), 1) + assert.Equal(t, parser.columnRenameMap["ts"], "ts") +} + +func TestParseAlterStatementWithAutoIncrement(t *testing.T) { + + statements := []string{ + "auto_increment=7", + "auto_increment = 7", + "AUTO_INCREMENT = 71", + "add column t int, change ts ts timestamp, auto_increment=7 engine=innodb", + "add column t int, change ts ts timestamp, auto_increment =7 engine=innodb", + "add column t int, change ts ts timestamp, AUTO_INCREMENT = 7 engine=innodb", + "add column t int, change ts ts timestamp, engine=innodb auto_increment=73425", + } + for _, statement := range statements { + parser := NewAlterTableParser() + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, parser.alterStatementOptions, statement) + assert.True(t, parser.IsAutoIncrementDefined()) + } +} + +func TestParseAlterStatementTrivialRenames(t *testing.T) { + statement := "add column t int, change ts ts timestamp, CHANGE f `f` float, engine=innodb" + parser := NewAlterTableParser() + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, parser.alterStatementOptions, statement) + assert.False(t, parser.HasNonTrivialRenames()) + assert.False(t, parser.IsAutoIncrementDefined()) + assert.Equal(t, len(parser.columnRenameMap), 2) + assert.Equal(t, parser.columnRenameMap["ts"], "ts") + assert.Equal(t, parser.columnRenameMap["f"], "f") +} + +func TestParseAlterStatementNonTrivial(t *testing.T) { + statements := []string{ + `add column b bigint, change f fl float, change i count int, engine=innodb`, + "add column b bigint, change column `f` fl float, change `i` `count` int, engine=innodb", + "add column b bigint, change column `f` fl float, change `i` `count` int, change ts ts timestamp, engine=innodb", + `change + f fl float, + CHANGE COLUMN i + count int, engine=innodb`, + } + + for _, statement := range statements { + parser := NewAlterTableParser() + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.False(t, parser.IsAutoIncrementDefined()) + assert.Equal(t, parser.alterStatementOptions, statement) + renames := parser.GetNonTrivialRenames() + assert.Equal(t, len(renames), 2) + assert.Equal(t, renames["i"], "count") + assert.Equal(t, renames["f"], "fl") + } +} + +func TestTokenizeAlterStatement(t *testing.T) { + parser := NewAlterTableParser() + { + alterStatement := "add column t int" + tokens, _ := parser.tokenizeAlterStatement(alterStatement) + assert.True(t, reflect.DeepEqual(tokens, []string{"add column t int"})) + } + { + alterStatement := "add column t int, change column i int" + tokens, _ := parser.tokenizeAlterStatement(alterStatement) + assert.True(t, reflect.DeepEqual(tokens, []string{"add column t int", "change column i int"})) + } + { + alterStatement := "add column t int, change column i int 'some comment'" + tokens, _ := parser.tokenizeAlterStatement(alterStatement) + assert.True(t, reflect.DeepEqual(tokens, []string{"add column t int", "change column i int 'some comment'"})) + } + { + alterStatement := "add column t int, change column i int 'some comment, with comma'" + tokens, _ := parser.tokenizeAlterStatement(alterStatement) + assert.True(t, reflect.DeepEqual(tokens, []string{"add column t int", "change column i int 'some comment, with comma'"})) + } + { + alterStatement := "add column t int, add column d decimal(10,2)" + tokens, _ := parser.tokenizeAlterStatement(alterStatement) + assert.True(t, reflect.DeepEqual(tokens, []string{"add column t int", "add column d decimal(10,2)"})) + } + { + alterStatement := "add column t int, add column e enum('a','b','c')" + tokens, _ := parser.tokenizeAlterStatement(alterStatement) + assert.True(t, reflect.DeepEqual(tokens, []string{"add column t int", "add column e enum('a','b','c')"})) + } + { + alterStatement := "add column t int(11), add column e enum('a','b','c')" + tokens, _ := parser.tokenizeAlterStatement(alterStatement) + assert.True(t, reflect.DeepEqual(tokens, []string{"add column t int(11)", "add column e enum('a','b','c')"})) + } +} + +func TestSanitizeQuotesFromAlterStatement(t *testing.T) { + parser := NewAlterTableParser() + { + alterStatement := "add column e enum('a','b','c')" + strippedStatement := parser.sanitizeQuotesFromAlterStatement(alterStatement) + assert.Equal(t, strippedStatement, "add column e enum('','','')") + } + { + alterStatement := "change column i int 'some comment, with comma'" + strippedStatement := parser.sanitizeQuotesFromAlterStatement(alterStatement) + assert.Equal(t, strippedStatement, "change column i int ''") + } +} + +func TestParseAlterStatementDroppedColumns(t *testing.T) { + + { + parser := NewAlterTableParser() + statement := "drop column b" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, len(parser.droppedColumns), 1) + assert.True(t, parser.droppedColumns["b"]) + } + { + parser := NewAlterTableParser() + statement := "drop column b, drop key c_idx, drop column `d`" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, parser.alterStatementOptions, statement) + assert.Equal(t, len(parser.droppedColumns), 2) + assert.True(t, parser.droppedColumns["b"]) + assert.True(t, parser.droppedColumns["d"]) + } + { + parser := NewAlterTableParser() + statement := "drop column b, drop key c_idx, drop column `d`, drop `e`, drop primary key, drop foreign key fk_1" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, len(parser.droppedColumns), 3) + assert.True(t, parser.droppedColumns["b"]) + assert.True(t, parser.droppedColumns["d"]) + assert.True(t, parser.droppedColumns["e"]) + } + { + parser := NewAlterTableParser() + statement := "drop column b, drop bad statement, add column i int" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, len(parser.droppedColumns), 1) + assert.True(t, parser.droppedColumns["b"]) + } +} + +func TestParseAlterStatementRenameTable(t *testing.T) { + + { + parser := NewAlterTableParser() + statement := "drop column b" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.False(t, parser.isRenameTable) + } + { + parser := NewAlterTableParser() + statement := "rename as something_else" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.True(t, parser.isRenameTable) + } + { + parser := NewAlterTableParser() + statement := "drop column b, rename as something_else" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, parser.alterStatementOptions, statement) + assert.True(t, parser.isRenameTable) + } + { + parser := NewAlterTableParser() + statement := "engine=innodb rename as something_else" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.True(t, parser.isRenameTable) + } + { + parser := NewAlterTableParser() + statement := "rename as something_else, engine=innodb" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.True(t, parser.isRenameTable) + } +} + +func TestParseAlterStatementExplicitTable(t *testing.T) { + + { + parser := NewAlterTableParser() + statement := "drop column b" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, parser.explicitSchema, "") + assert.Equal(t, parser.explicitTable, "") + assert.Equal(t, parser.alterStatementOptions, "drop column b") + assert.True(t, reflect.DeepEqual(parser.alterTokens, []string{"drop column b"})) + } + { + parser := NewAlterTableParser() + statement := "alter table tbl drop column b" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, parser.explicitSchema, "") + assert.Equal(t, parser.explicitTable, "tbl") + assert.Equal(t, parser.alterStatementOptions, "drop column b") + assert.True(t, reflect.DeepEqual(parser.alterTokens, []string{"drop column b"})) + } + { + parser := NewAlterTableParser() + statement := "alter table `tbl` drop column b" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, parser.explicitSchema, "") + assert.Equal(t, parser.explicitTable, "tbl") + assert.Equal(t, parser.alterStatementOptions, "drop column b") + assert.True(t, reflect.DeepEqual(parser.alterTokens, []string{"drop column b"})) + } + { + parser := NewAlterTableParser() + statement := "alter table `scm with spaces`.`tbl` drop column b" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, parser.explicitSchema, "scm with spaces") + assert.Equal(t, parser.explicitTable, "tbl") + assert.Equal(t, parser.alterStatementOptions, "drop column b") + assert.True(t, reflect.DeepEqual(parser.alterTokens, []string{"drop column b"})) + } + { + parser := NewAlterTableParser() + statement := "alter table `scm`.`tbl with spaces` drop column b" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, parser.explicitSchema, "scm") + assert.Equal(t, parser.explicitTable, "tbl with spaces") + assert.Equal(t, parser.alterStatementOptions, "drop column b") + assert.True(t, reflect.DeepEqual(parser.alterTokens, []string{"drop column b"})) + } + { + parser := NewAlterTableParser() + statement := "alter table `scm`.tbl drop column b" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, parser.explicitSchema, "scm") + assert.Equal(t, parser.explicitTable, "tbl") + assert.Equal(t, parser.alterStatementOptions, "drop column b") + assert.True(t, reflect.DeepEqual(parser.alterTokens, []string{"drop column b"})) + } + { + parser := NewAlterTableParser() + statement := "alter table scm.`tbl` drop column b" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, parser.explicitSchema, "scm") + assert.Equal(t, parser.explicitTable, "tbl") + assert.Equal(t, parser.alterStatementOptions, "drop column b") + assert.True(t, reflect.DeepEqual(parser.alterTokens, []string{"drop column b"})) + } + { + parser := NewAlterTableParser() + statement := "alter table scm.tbl drop column b" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, parser.explicitSchema, "scm") + assert.Equal(t, parser.explicitTable, "tbl") + assert.Equal(t, parser.alterStatementOptions, "drop column b") + assert.True(t, reflect.DeepEqual(parser.alterTokens, []string{"drop column b"})) + } + { + parser := NewAlterTableParser() + statement := "alter table scm.tbl drop column b, add index idx(i)" + err := parser.ParseAlterStatement(statement) + assert.NoError(t, err) + assert.Equal(t, parser.explicitSchema, "scm") + assert.Equal(t, parser.explicitTable, "tbl") + assert.Equal(t, parser.alterStatementOptions, "drop column b, add index idx(i)") + assert.True(t, reflect.DeepEqual(parser.alterTokens, []string{"drop column b", "add index idx(i)"})) + } +} diff --git a/go/vt/vttablet/onlineddl/vrepl/types.go b/go/vt/vttablet/onlineddl/vrepl/types.go new file mode 100644 index 00000000000..23660555baa --- /dev/null +++ b/go/vt/vttablet/onlineddl/vrepl/types.go @@ -0,0 +1,332 @@ +/* + Original copyright by GitHub as follows. Additions by the Vitess authors as follows. +*/ +/* + Copyright 2016 GitHub Inc. + See https://github.com/github/gh-ost/blob/master/LICENSE +*/ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vrepl + +import ( + "fmt" + "reflect" + "strconv" + "strings" +) + +// ColumnType enumerates some important column types +type ColumnType int + +const ( + UnknownColumnType ColumnType = iota + TimestampColumnType + DateTimeColumnType + EnumColumnType + MediumIntColumnType + JSONColumnType + FloatColumnType +) + +const maxMediumintUnsigned int32 = 16777215 + +// TimezoneConversion indicates how to convert a timezone value +type TimezoneConversion struct { + ToTimezone string +} + +// Column represents a table column +type Column struct { + Name string + IsUnsigned bool + Charset string + Type ColumnType + timezoneConversion *TimezoneConversion +} + +func (c *Column) convertArg(arg interface{}) interface{} { + if s, ok := arg.(string); ok { + // string, charset conversion + if encoding, ok := charsetEncodingMap[c.Charset]; ok { + arg, _ = encoding.NewDecoder().String(s) + } + return arg + } + + if c.IsUnsigned { + if i, ok := arg.(int8); ok { + return uint8(i) + } + if i, ok := arg.(int16); ok { + return uint16(i) + } + if i, ok := arg.(int32); ok { + if c.Type == MediumIntColumnType { + // problem with mediumint is that it's a 3-byte type. There is no compatible golang type to match that. + // So to convert from negative to positive we'd need to convert the value manually + if i >= 0 { + return i + } + return uint32(maxMediumintUnsigned + i + 1) + } + return uint32(i) + } + if i, ok := arg.(int64); ok { + return strconv.FormatUint(uint64(i), 10) + } + if i, ok := arg.(int); ok { + return uint(i) + } + } + return arg +} + +// NewColumns creates a new column array from non empty names +func NewColumns(names []string) []Column { + result := []Column{} + for _, name := range names { + if name == "" { + continue + } + result = append(result, Column{Name: name}) + } + return result +} + +// ParseColumns creates a new column array fby parsing comma delimited names list +func ParseColumns(names string) []Column { + namesArray := strings.Split(names, ",") + return NewColumns(namesArray) +} + +// ColumnsMap maps a column name onto its ordinal position +type ColumnsMap map[string]int + +// NewEmptyColumnsMap creates an empty map +func NewEmptyColumnsMap() ColumnsMap { + columnsMap := make(map[string]int) + return ColumnsMap(columnsMap) +} + +// NewColumnsMap creates a column map based on ordered list of columns +func NewColumnsMap(orderedColumns []Column) ColumnsMap { + columnsMap := NewEmptyColumnsMap() + for i, column := range orderedColumns { + columnsMap[column.Name] = i + } + return columnsMap +} + +// ColumnList makes for a named list of columns +type ColumnList struct { + columns []Column + Ordinals ColumnsMap +} + +// NewColumnList creates an object given ordered list of column names +func NewColumnList(names []string) *ColumnList { + result := &ColumnList{ + columns: NewColumns(names), + } + result.Ordinals = NewColumnsMap(result.columns) + return result +} + +// ParseColumnList parses a comma delimited list of column names +func ParseColumnList(names string) *ColumnList { + result := &ColumnList{ + columns: ParseColumns(names), + } + result.Ordinals = NewColumnsMap(result.columns) + return result +} + +// Columns returns the list of columns +func (l *ColumnList) Columns() []Column { + return l.columns +} + +// Names returns list of column names +func (l *ColumnList) Names() []string { + names := make([]string, len(l.columns)) + for i := range l.columns { + names[i] = l.columns[i].Name + } + return names +} + +// GetColumn gets a column by name +func (l *ColumnList) GetColumn(columnName string) *Column { + if ordinal, ok := l.Ordinals[columnName]; ok { + return &l.columns[ordinal] + } + return nil +} + +// SetUnsigned toggles on the unsigned property +func (l *ColumnList) SetUnsigned(columnName string) { + l.GetColumn(columnName).IsUnsigned = true +} + +// IsUnsigned returns true when the column is an unsigned numeral +func (l *ColumnList) IsUnsigned(columnName string) bool { + return l.GetColumn(columnName).IsUnsigned +} + +// SetCharset sets the charset property +func (l *ColumnList) SetCharset(columnName string, charset string) { + l.GetColumn(columnName).Charset = charset +} + +// GetCharset returns the hcarset property +func (l *ColumnList) GetCharset(columnName string) string { + return l.GetColumn(columnName).Charset +} + +// SetColumnType sets the type of the column (for interesting types) +func (l *ColumnList) SetColumnType(columnName string, columnType ColumnType) { + l.GetColumn(columnName).Type = columnType +} + +// GetColumnType gets type of column, for interesting types +func (l *ColumnList) GetColumnType(columnName string) ColumnType { + return l.GetColumn(columnName).Type +} + +// SetConvertDatetimeToTimestamp sets the timezone conversion +func (l *ColumnList) SetConvertDatetimeToTimestamp(columnName string, toTimezone string) { + l.GetColumn(columnName).timezoneConversion = &TimezoneConversion{ToTimezone: toTimezone} +} + +// HasTimezoneConversion sees if there's timezone conversion defined (only applicable to temporal values) +func (l *ColumnList) HasTimezoneConversion(columnName string) bool { + return l.GetColumn(columnName).timezoneConversion != nil +} + +// String returns a comma separated list of column names +func (l *ColumnList) String() string { + return strings.Join(l.Names(), ",") +} + +// Equals checks for complete (deep) identities of columns, in order. +func (l *ColumnList) Equals(other *ColumnList) bool { + return reflect.DeepEqual(l.Columns, other.Columns) +} + +// EqualsByNames chcks if the names in this list equals the names of another list, in order. Type is ignored. +func (l *ColumnList) EqualsByNames(other *ColumnList) bool { + return reflect.DeepEqual(l.Names(), other.Names()) +} + +// IsSubsetOf returns 'true' when column names of this list are a subset of +// another list, in arbitrary order (order agnostic) +func (l *ColumnList) IsSubsetOf(other *ColumnList) bool { + for _, column := range l.columns { + if _, exists := other.Ordinals[column.Name]; !exists { + return false + } + } + return true +} + +// Len returns the length of this list +func (l *ColumnList) Len() int { + return len(l.columns) +} + +// UniqueKey is the combination of a key's name and columns +type UniqueKey struct { + Name string + Columns ColumnList + HasNullable bool + IsAutoIncrement bool +} + +// IsPrimary checks if this unique key is primary +func (k *UniqueKey) IsPrimary() bool { + return k.Name == "PRIMARY" +} + +// Len returns the length of this list +func (k *UniqueKey) Len() int { + return k.Columns.Len() +} + +// String returns a visual representation of this key +func (k *UniqueKey) String() string { + description := k.Name + if k.IsAutoIncrement { + description = fmt.Sprintf("%s (auto_increment)", description) + } + return fmt.Sprintf("%s: %s; has nullable: %+v", description, k.Columns.Names(), k.HasNullable) +} + +// ColumnValues contains table data values for columns +type ColumnValues struct { + abstractValues []interface{} + ValuesPointers []interface{} +} + +// NewColumnValues initiates a values list +func NewColumnValues(length int) *ColumnValues { + result := &ColumnValues{ + abstractValues: make([]interface{}, length), + ValuesPointers: make([]interface{}, length), + } + for i := 0; i < length; i++ { + result.ValuesPointers[i] = &result.abstractValues[i] + } + + return result +} + +// ToColumnValues converts abstrcat values into a ColumnValues list +func ToColumnValues(abstractValues []interface{}) *ColumnValues { + result := &ColumnValues{ + abstractValues: abstractValues, + ValuesPointers: make([]interface{}, len(abstractValues)), + } + for i := 0; i < len(abstractValues); i++ { + result.ValuesPointers[i] = &result.abstractValues[i] + } + + return result +} + +// AbstractValues gets the abtracts values of this list +func (v *ColumnValues) AbstractValues() []interface{} { + return v.abstractValues +} + +// StringColumn return the string value of a column +func (v *ColumnValues) StringColumn(index int) string { + val := v.AbstractValues()[index] + if ints, ok := val.([]uint8); ok { + return string(ints) + } + return fmt.Sprintf("%+v", val) +} + +// String returns a visual representation of this list +func (v *ColumnValues) String() string { + stringValues := []string{} + for i := range v.AbstractValues() { + stringValues = append(stringValues, v.StringColumn(i)) + } + return strings.Join(stringValues, ",") +} diff --git a/go/vt/vttablet/onlineddl/vrepl/types_test.go b/go/vt/vttablet/onlineddl/vrepl/types_test.go new file mode 100644 index 00000000000..25663f9e918 --- /dev/null +++ b/go/vt/vttablet/onlineddl/vrepl/types_test.go @@ -0,0 +1,100 @@ +/* + Copyright 2016 GitHub Inc. + See https://github.com/github/gh-ost/blob/master/LICENSE +*/ + +package vrepl + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParseColumnList(t *testing.T) { + names := "id,category,max_len" + + columnList := ParseColumnList(names) + assert.Equal(t, columnList.Len(), 3) + assert.Equal(t, columnList.Names(), []string{"id", "category", "max_len"}) + assert.Equal(t, columnList.Ordinals["id"], 0) + assert.Equal(t, columnList.Ordinals["category"], 1) + assert.Equal(t, columnList.Ordinals["max_len"], 2) +} + +func TestGetColumn(t *testing.T) { + names := "id,category,max_len" + columnList := ParseColumnList(names) + { + column := columnList.GetColumn("category") + assert.NotNil(t, column) + assert.Equal(t, column.Name, "category") + } + { + column := columnList.GetColumn("no_such_column") + assert.True(t, column == nil) + } +} + +func TestIsSubsetOf(t *testing.T) { + tt := []struct { + columns1 *ColumnList + columns2 *ColumnList + expectSubset bool + }{ + { + columns1: ParseColumnList(""), + columns2: ParseColumnList("a,b,c"), + expectSubset: true, + }, + { + columns1: ParseColumnList("a,b,c"), + columns2: ParseColumnList("a,b,c"), + expectSubset: true, + }, + { + columns1: ParseColumnList("a,c"), + columns2: ParseColumnList("a,b,c"), + expectSubset: true, + }, + { + columns1: ParseColumnList("b,c"), + columns2: ParseColumnList("a,b,c"), + expectSubset: true, + }, + { + columns1: ParseColumnList("b"), + columns2: ParseColumnList("a,b,c"), + expectSubset: true, + }, + { + columns1: ParseColumnList(""), + columns2: ParseColumnList("a,b,c"), + expectSubset: true, + }, + { + columns1: ParseColumnList("a,d"), + columns2: ParseColumnList("a,b,c"), + expectSubset: false, + }, + { + columns1: ParseColumnList("a,b,c"), + columns2: ParseColumnList("a,b"), + expectSubset: false, + }, + { + columns1: ParseColumnList("a,b,c"), + columns2: ParseColumnList(""), + expectSubset: false, + }, + } + for _, tc := range tt { + name := fmt.Sprintf("%v:%v", tc.columns1.Names(), tc.columns2.Names()) + t.Run(name, func(t *testing.T) { + isSubset := tc.columns1.IsSubsetOf(tc.columns2) + assert.Equal(t, tc.expectSubset, isSubset) + }, + ) + } +} From 52d3e8584b5afdc871b76bbe3a1602595a4dc4bb Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Sun, 31 Jan 2021 08:41:26 +0200 Subject: [PATCH 04/72] towards vreplication based online DDL Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 67 +++++++ go/vt/vttablet/onlineddl/ghost.go | 14 +- go/vt/vttablet/onlineddl/schema.go | 65 ++++++- go/vt/vttablet/onlineddl/vrepl.go | 262 +++++++++++++++++++++++++++ 4 files changed, 393 insertions(+), 15 deletions(-) create mode 100644 go/vt/vttablet/onlineddl/vrepl.go diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 4f815585027..43476c394ab 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -402,6 +402,62 @@ func (e *Executor) executeDirectly(ctx context.Context, onlineDDL *schema.Online return nil } +// ExecuteWithVReplication sets up the grounds for a vreplication schema migration +func (e *Executor) ExecuteWithVReplication(ctx context.Context, onlineDDL *schema.OnlineDDL) error { + e.migrationMutex.Lock() + defer e.migrationMutex.Unlock() + + if atomic.LoadInt64(&e.migrationRunning) > 0 { + return ErrExecutorMigrationAlreadyRunning + } + + if e.tabletTypeFunc() != topodatapb.TabletType_MASTER { + return ErrExecutorNotWritableTablet + } + + conn, err := dbconnpool.NewDBConnection(ctx, e.env.Config().DB.DbaWithDB()) + if err != nil { + return err + } + defer conn.Close() + + atomic.StoreInt64(&e.migrationRunning, 1) + e.lastMigrationUUID = onlineDDL.UUID + if err := e.onSchemaMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusRunning, false, progressPctStarted); err != nil { + return err + } + vreplTableName := fmt.Sprintf("_%s_%s_vrepl", onlineDDL.UUID, ReadableTimestamp()) + + if err := e.updateArtifacts(ctx, onlineDDL.UUID, vreplTableName); err != nil { + return err + } + + { + parsed := sqlparser.BuildParsedQuery(sqlCreateTableLike, vreplTableName, onlineDDL.Table) + if _, err := conn.ExecuteFetch(parsed.Query, 0, false); err != nil { + return err + } + } + // Temporary hack (2020-08-11) + // Because sqlparser does not do full blown ALTER TABLE parsing, + // and because we don't want gh-ost to know about WITH_GHOST and WITH_PT syntax, + // we resort to regexp-based parsing of the query. + // TODO(shlomi): generate _alter options_ via sqlparser when it full supports ALTER TABLE syntax. + _, _, alterOptions := schema.ParseAlterTableOptions(onlineDDL.SQL) + { + parsed := sqlparser.BuildParsedQuery(sqlAlterTableOptions, vreplTableName, alterOptions) + // Apply ALTER TABLE to materialized table + if _, err := conn.ExecuteFetch(parsed.Query, 0, false); err != nil { + return err + } + } + v := NewVRepl(e.dbName, onlineDDL.Table, vreplTableName) + if err := v.analyzeAlter(ctx, alterOptions); err != nil { + return err + } + return nil +} + // ExecuteWithGhost validates and runs a gh-ost process. // Validation included testing the backend MySQL server and the gh-ost binary itself // Execution runs first a dry run, then an actual migration @@ -870,6 +926,11 @@ func (e *Executor) terminateMigration(ctx context.Context, onlineDDL *schema.Onl } } switch onlineDDL.Strategy { + case schema.DDLStrategyOnline: + // TODO(shlomi): remove vreplication + _ = e.updateMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusFailed) + atomic.StoreInt64(&e.migrationRunning, 0) + case schema.DDLStrategyPTOSC: // see if pt-osc is running (could have been executed by this vttablet or one that crashed in the past) if running, pid, _ := e.isPTOSCMigrationRunning(ctx, onlineDDL.UUID); running { @@ -1065,6 +1126,12 @@ func (e *Executor) executeMigration(ctx context.Context, onlineDDL *schema.Onlin }() case sqlparser.AlterDDLAction: switch onlineDDL.Strategy { + case schema.DDLStrategyOnline: + go func() { + if err := e.ExecuteWithVReplication(ctx, onlineDDL); err != nil { + failMigration(err) + } + }() case schema.DDLStrategyGhost: go func() { if err := e.ExecuteWithGhost(ctx, onlineDDL); err != nil { diff --git a/go/vt/vttablet/onlineddl/ghost.go b/go/vt/vttablet/onlineddl/ghost.go index 40234c87f9b..5dd19a81a3f 100644 --- a/go/vt/vttablet/onlineddl/ghost.go +++ b/go/vt/vttablet/onlineddl/ghost.go @@ -1,5 +1,5 @@ /* -Copyright 2019 The Vitess Authors. +Copyright 2020 The Vitess Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,18 +14,6 @@ See the License for the specific language governing permissions and limitations under the License. */ -// Package heartbeat contains a writer and reader of heartbeats for a master-replica cluster. -// This is similar to Percona's pt-heartbeat, and is meant to supplement the information -// returned from SHOW SLAVE STATUS. In some circumstances, lag returned from SHOW SLAVE STATUS -// is incorrect and is at best only at 1 second resolution. The heartbeat package directly -// tests replication by writing a record with a timestamp on the master, and comparing that -// timestamp after reading it on the replica. This happens at the interval defined by heartbeat_interval. -// Note: the lag reported will be affected by clock drift, so it is recommended to run ntpd or similar. -// -// The data collected by the heartbeat package is made available in /debug/vars in counters prefixed by Heartbeat*. -// It's additionally used as a source for healthchecks and will impact the serving state of a tablet, if enabled. -// The heartbeat interval is purposefully kept distinct from the health check interval because lag measurement -// requires more frequent polling that the healthcheck typically is configured for. package onlineddl import ( diff --git a/go/vt/vttablet/onlineddl/schema.go b/go/vt/vttablet/onlineddl/schema.go index 322b5cf8b8c..df745e8c6af 100644 --- a/go/vt/vttablet/onlineddl/schema.go +++ b/go/vt/vttablet/onlineddl/schema.go @@ -216,8 +216,69 @@ const ( AND ACTION_TIMING='AFTER' AND LEFT(TRIGGER_NAME, 7)='pt_osc_' ` - sqlDropTrigger = "DROP TRIGGER IF EXISTS `%a`.`%a`" - sqlShowTablesLike = "SHOW TABLES LIKE '%a'" + sqlDropTrigger = "DROP TRIGGER IF EXISTS `%a`.`%a`" + sqlShowTablesLike = "SHOW TABLES LIKE '%a'" + sqlCreateTableLike = "CREATE TABLE `%a` LIKE `%a`" + sqlAlterTableOptions = "ALTER TABLE `%a` %s" + sqlShowColumnsFrom = "SHOW COLUMNS FROM `%a`" + // TODO(shlomi): consider removing: + // sqlGetUniqueKeys = ` + // SELECT + // COLUMNS.TABLE_SCHEMA, + // COLUMNS.TABLE_NAME, + // COLUMNS.COLUMN_NAME, + // UNIQUES.INDEX_NAME, + // UNIQUES.COLUMN_NAMES, + // UNIQUES.COUNT_COLUMN_IN_INDEX, + // COLUMNS.DATA_TYPE, + // COLUMNS.CHARACTER_SET_NAME, + // LOCATE('auto_increment', EXTRA) > 0 as is_auto_increment, + // has_nullable + // FROM INFORMATION_SCHEMA.COLUMNS INNER JOIN ( + // SELECT + // TABLE_SCHEMA, + // TABLE_NAME, + // INDEX_NAME, + // COUNT(*) AS COUNT_COLUMN_IN_INDEX, + // GROUP_CONCAT(COLUMN_NAME ORDER BY SEQ_IN_INDEX ASC) AS COLUMN_NAMES, + // SUBSTRING_INDEX(GROUP_CONCAT(COLUMN_NAME ORDER BY SEQ_IN_INDEX ASC), ',', 1) AS FIRST_COLUMN_NAME, + // SUM(NULLABLE='YES') > 0 AS has_nullable + // FROM INFORMATION_SCHEMA.STATISTICS + // WHERE + // NON_UNIQUE=0 + // AND TABLE_SCHEMA = %a + // AND TABLE_NAME = %a + // GROUP BY TABLE_SCHEMA, TABLE_NAME, INDEX_NAME + // ) AS UNIQUES + // ON ( + // COLUMNS.COLUMN_NAME = UNIQUES.FIRST_COLUMN_NAME + // ) + // WHERE + // COLUMNS.TABLE_SCHEMA = %a + // AND COLUMNS.TABLE_NAME = %a + // ORDER BY + // COLUMNS.TABLE_SCHEMA, COLUMNS.TABLE_NAME, + // CASE UNIQUES.INDEX_NAME + // WHEN 'PRIMARY' THEN 0 + // ELSE 1 + // END, + // CASE has_nullable + // WHEN 0 THEN 0 + // ELSE 1 + // END, + // CASE IFNULL(CHARACTER_SET_NAME, '') + // WHEN '' THEN 0 + // ELSE 1 + // END, + // CASE DATA_TYPE + // WHEN 'tinyint' THEN 0 + // WHEN 'smallint' THEN 1 + // WHEN 'int' THEN 2 + // WHEN 'bigint' THEN 3 + // ELSE 100 + // END, + // COUNT_COLUMN_IN_INDEX + // ` ) const ( diff --git a/go/vt/vttablet/onlineddl/vrepl.go b/go/vt/vttablet/onlineddl/vrepl.go new file mode 100644 index 00000000000..583c63228ac --- /dev/null +++ b/go/vt/vttablet/onlineddl/vrepl.go @@ -0,0 +1,262 @@ +/* + Original copyright by GitHub as follows. Additions by the Vitess authors as follows. +*/ +/* + Copyright 2016 GitHub Inc. + See https://github.com/github/gh-ost/blob/master/LICENSE +*/ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package onlineddl + +import ( + "context" + "fmt" + "math" + "strings" + + "vitess.io/vitess/go/sqltypes" + "vitess.io/vitess/go/vt/dbconnpool" + "vitess.io/vitess/go/vt/sqlparser" + "vitess.io/vitess/go/vt/vttablet/onlineddl/vrepl" +) + +// VRepl is an online DDL helper for VReplication based migrations (ddl_strategy="online") +type VRepl struct { + dbName string + sourceTable string + targetTable string + + sharedPKColumns *vrepl.ColumnList + + sourceSharedColumns *vrepl.ColumnList + targetSharedColumns *vrepl.ColumnList + sharedColumnsMap map[string]string + + parser *vrepl.AlterTableParser +} + +// NewVRepl creates a VReplication handler for Online DDL +func NewVRepl(dbName, sourceTable, targetTable string) *VRepl { + return &VRepl{ + dbName: dbName, + sourceTable: sourceTable, + targetTable: targetTable, + parser: vrepl.NewAlterTableParser(), + } +} + +// getCandidateUniqueKeys investigates a table and returns the list of unique keys +// candidate for chunking +func (v *VRepl) getCandidateUniqueKeys(ctx context.Context, conn *dbconnpool.DBConnection, tableName string) (uniqueKeys [](*vrepl.UniqueKey), err error) { + + query, err := sqlparser.ParseAndBind(sqlShowColumnsFrom, + sqltypes.StringBindVariable(v.dbName), + sqltypes.StringBindVariable(tableName), + sqltypes.StringBindVariable(v.dbName), + sqltypes.StringBindVariable(tableName), + ) + if err != nil { + return uniqueKeys, err + } + + rs, err := conn.ExecuteFetch(query, math.MaxInt64, true) + if err != nil { + return nil, err + } + for _, row := range rs.Named().Rows { + uniqueKey := &vrepl.UniqueKey{ + Name: row.AsString("INDEX_NAME", ""), + Columns: *vrepl.ParseColumnList(row.AsString("COLUMN_NAMES", "")), + HasNullable: row.AsBool("has_nullable", false), + IsAutoIncrement: row.AsBool("is_auto_increment", false), + } + uniqueKeys = append(uniqueKeys, uniqueKey) + } + return uniqueKeys, nil +} + +// readTableColumns reads column list from given table +func (v *VRepl) readTableColumns(ctx context.Context, conn *dbconnpool.DBConnection, tableName string) (columns *vrepl.ColumnList, virtualColumns *vrepl.ColumnList, pkColumns *vrepl.ColumnList, err error) { + parsed := sqlparser.BuildParsedQuery(sqlShowColumnsFrom, tableName) + rs, err := conn.ExecuteFetch(parsed.Query, math.MaxInt64, true) + if err != nil { + return nil, nil, nil, err + } + columnNames := []string{} + virtualColumnNames := []string{} + pkColumnNames := []string{} + for _, row := range rs.Named().Rows { + columnName := row.AsString("Field", "") + columnNames = append(columnNames, columnName) + + extra := row.AsString("Extra", "") + if strings.Contains(extra, " GENERATED") { + virtualColumnNames = append(virtualColumnNames, columnName) + } + + key := row.AsString("Key", "") + if key == "PRI" { + pkColumnNames = append(pkColumnNames, columnName) + } + } + if len(columnNames) == 0 { + return nil, nil, nil, fmt.Errorf("Found 0 columns on `%s`", tableName) + } + return vrepl.NewColumnList(columnNames), vrepl.NewColumnList(virtualColumnNames), vrepl.NewColumnList(pkColumnNames), nil +} + +// getSharedColumns returns the intersection of two lists of columns in same order as the first list +func (v *VRepl) getSharedColumns(sourceColumns, targetColumns *vrepl.ColumnList, sourceVirtualColumns, targetVirtualColumns *vrepl.ColumnList, columnRenameMap map[string]string) ( + sourceSharedColumns *vrepl.ColumnList, targetSharedColumns *vrepl.ColumnList, sharedColumnsMap map[string]string, +) { + sharedColumnNames := []string{} + for _, sourceColumn := range sourceColumns.Names() { + isSharedColumn := false + for _, targetColumn := range targetColumns.Names() { + if strings.EqualFold(sourceColumn, targetColumn) { + // both tables have this column. Good start. + isSharedColumn = true + break + } + if strings.EqualFold(columnRenameMap[sourceColumn], targetColumn) { + // column in source is renamed in target + isSharedColumn = true + break + } + } + for _, virtualColumn := range sourceVirtualColumns.Names() { + // virtual/generated columns on source are silently skipped + if strings.EqualFold(sourceColumn, virtualColumn) { + isSharedColumn = false + } + } + for _, virtualColumn := range targetVirtualColumns.Names() { + // virtual/generated columns on target are silently skipped + if strings.EqualFold(sourceColumn, virtualColumn) { + isSharedColumn = false + } + } + if isSharedColumn { + sharedColumnNames = append(sharedColumnNames, sourceColumn) + } + } + mappedSharedColumnNames := []string{} + sharedColumnsMap = map[string]string{} + for _, columnName := range sharedColumnNames { + if mapped, ok := columnRenameMap[columnName]; ok { + sharedColumnsMap[columnName] = mapped + } else { + sharedColumnsMap[columnName] = columnName + } + } + for _, columnName := range sharedColumnNames { + mappedSharedColumnNames = append(mappedSharedColumnNames, sharedColumnsMap[columnName]) + } + return vrepl.NewColumnList(sharedColumnNames), vrepl.NewColumnList(mappedSharedColumnNames), sharedColumnsMap +} + +// getSharedPKColumns returns the intersection of PRIMARY KEY columns (taking renaming into consideration) etween source and target tables +func (v *VRepl) getSharedPKColumns(sourcePKColumns, targetPKColumns *vrepl.ColumnList, columnRenameMap map[string]string) ( + sharedPKColumns *vrepl.ColumnList, +) { + sharedColumnNames := []string{} + for _, sourceColumn := range sourcePKColumns.Names() { + isSharedColumn := false + for _, targetColumn := range targetPKColumns.Names() { + if strings.EqualFold(sourceColumn, targetColumn) { + // both tables have this column. Good start. + isSharedColumn = true + break + } + if strings.EqualFold(columnRenameMap[sourceColumn], targetColumn) { + // column in source is renamed in target + isSharedColumn = true + break + } + } + if isSharedColumn { + sharedColumnNames = append(sharedColumnNames, sourceColumn) + } + } + return vrepl.NewColumnList(sharedColumnNames) +} + +// getSharedUniqueKeys returns the intersection of two given unique keys, +// testing by list of columns +func (v *VRepl) getSharedUniqueKeys(sourceUniqueKeys, targetUniqueKeys [](*vrepl.UniqueKey)) (uniqueKeys [](*vrepl.UniqueKey), err error) { + // We actually do NOT rely on key name, just on the set of columns. This is because maybe + // the ALTER is on the name itself... + for _, sourceUniqueKey := range sourceUniqueKeys { + for _, targetUniqueKey := range targetUniqueKeys { + if sourceUniqueKey.Columns.EqualsByNames(&targetUniqueKey.Columns) { + uniqueKeys = append(uniqueKeys, sourceUniqueKey) + } + } + } + return uniqueKeys, nil +} + +func (v *VRepl) analyzeAlter(ctx context.Context, alterOptions string) error { + if err := v.parser.ParseAlterStatement(alterOptions); err != nil { + return err + } + if v.parser.IsRenameTable() { + return fmt.Errorf("Renaming the table is not aupported in ALTER TABLE: %s", alterOptions) + } + return nil +} + +func (v *VRepl) analyzeTables(ctx context.Context, conn *dbconnpool.DBConnection) error { + // columns: + sourceColumns, sourceVirtualColumns, sourcePKColumns, err := v.readTableColumns(ctx, conn, v.sourceTable) + if err != nil { + return err + } + targetColumns, targetVirtualColumns, targetPKColumns, err := v.readTableColumns(ctx, conn, v.targetTable) + if err != nil { + return err + } + v.sourceSharedColumns, v.targetSharedColumns, v.sharedColumnsMap = v.getSharedColumns(sourceColumns, targetColumns, sourceVirtualColumns, targetVirtualColumns, v.parser.ColumnRenameMap()) + + v.sharedPKColumns = v.getSharedPKColumns(sourcePKColumns, targetPKColumns, v.parser.ColumnRenameMap()) + if v.sharedPKColumns.Len() == 0 { + // TODO(shlomi): need to carefully examine what happens when we extend/reduce a PRIMARY KEY + // is a column subset OK? + return fmt.Errorf("Found no shared PRIMARY KEY columns between `%s` and `%s`", v.sourceTable, v.targetTable) + } + + // // unique keys: + // sourceUniqueKeys, err := v.getCandidateUniqueKeys(ctx, conn, v.sourceTable) + // if err != nil { + // return err + // } + // targetUniqueKeys, err := v.getCandidateUniqueKeys(ctx, conn, v.targetTable) + // if err != nil { + // return err + // } + // sharedUniqueKeys, err := v.getSharedUniqueKeys(sourceUniqueKeys, targetUniqueKeys) + // if err != nil { + // return err + // } + // if len(sharedUniqueKeys) == 0 { + // // TODO(shlomi): need to carefully examine what happens when we extend/reduce a PRIMARY KEY + // // is a column subset OK? + // return fmt.Errorf("Found no shared unique keys between `%s` and `%s`", v.sourceTable, v.targetTable) + // } + return nil +} From 4f3fdc4da0a3ceba5d016a4347ea32b44422e7d7 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Sun, 31 Jan 2021 10:08:23 +0200 Subject: [PATCH 05/72] cleanup Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/vrepl/types.go | 55 ------------------------- 1 file changed, 55 deletions(-) diff --git a/go/vt/vttablet/onlineddl/vrepl/types.go b/go/vt/vttablet/onlineddl/vrepl/types.go index 23660555baa..fe7bcbcbd29 100644 --- a/go/vt/vttablet/onlineddl/vrepl/types.go +++ b/go/vt/vttablet/onlineddl/vrepl/types.go @@ -275,58 +275,3 @@ func (k *UniqueKey) String() string { } return fmt.Sprintf("%s: %s; has nullable: %+v", description, k.Columns.Names(), k.HasNullable) } - -// ColumnValues contains table data values for columns -type ColumnValues struct { - abstractValues []interface{} - ValuesPointers []interface{} -} - -// NewColumnValues initiates a values list -func NewColumnValues(length int) *ColumnValues { - result := &ColumnValues{ - abstractValues: make([]interface{}, length), - ValuesPointers: make([]interface{}, length), - } - for i := 0; i < length; i++ { - result.ValuesPointers[i] = &result.abstractValues[i] - } - - return result -} - -// ToColumnValues converts abstrcat values into a ColumnValues list -func ToColumnValues(abstractValues []interface{}) *ColumnValues { - result := &ColumnValues{ - abstractValues: abstractValues, - ValuesPointers: make([]interface{}, len(abstractValues)), - } - for i := 0; i < len(abstractValues); i++ { - result.ValuesPointers[i] = &result.abstractValues[i] - } - - return result -} - -// AbstractValues gets the abtracts values of this list -func (v *ColumnValues) AbstractValues() []interface{} { - return v.abstractValues -} - -// StringColumn return the string value of a column -func (v *ColumnValues) StringColumn(index int) string { - val := v.AbstractValues()[index] - if ints, ok := val.([]uint8); ok { - return string(ints) - } - return fmt.Sprintf("%+v", val) -} - -// String returns a visual representation of this list -func (v *ColumnValues) String() string { - stringValues := []string{} - for i := range v.AbstractValues() { - stringValues = append(stringValues, v.StringColumn(i)) - } - return strings.Join(stringValues, ",") -} From df0905c69e2c867ea40bfbd39425d460d35d3915 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Sun, 31 Jan 2021 10:08:47 +0200 Subject: [PATCH 06/72] call generalized 'analyze' Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 2 +- go/vt/vttablet/onlineddl/vrepl.go | 51 +++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 43476c394ab..c6fb9eb4e16 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -452,7 +452,7 @@ func (e *Executor) ExecuteWithVReplication(ctx context.Context, onlineDDL *schem } } v := NewVRepl(e.dbName, onlineDDL.Table, vreplTableName) - if err := v.analyzeAlter(ctx, alterOptions); err != nil { + if err := v.analyze(ctx, conn, alterOptions); err != nil { return err } return nil diff --git a/go/vt/vttablet/onlineddl/vrepl.go b/go/vt/vttablet/onlineddl/vrepl.go index 583c63228ac..d8ba224d416 100644 --- a/go/vt/vttablet/onlineddl/vrepl.go +++ b/go/vt/vttablet/onlineddl/vrepl.go @@ -27,6 +27,7 @@ import ( "context" "fmt" "math" + "strconv" "strings" "vitess.io/vitess/go/sqltypes" @@ -47,6 +48,8 @@ type VRepl struct { targetSharedColumns *vrepl.ColumnList sharedColumnsMap map[string]string + filterQuery string + parser *vrepl.AlterTableParser } @@ -155,7 +158,6 @@ func (v *VRepl) getSharedColumns(sourceColumns, targetColumns *vrepl.ColumnList, sharedColumnNames = append(sharedColumnNames, sourceColumn) } } - mappedSharedColumnNames := []string{} sharedColumnsMap = map[string]string{} for _, columnName := range sharedColumnNames { if mapped, ok := columnRenameMap[columnName]; ok { @@ -164,6 +166,7 @@ func (v *VRepl) getSharedColumns(sourceColumns, targetColumns *vrepl.ColumnList, sharedColumnsMap[columnName] = columnName } } + mappedSharedColumnNames := []string{} for _, columnName := range sharedColumnNames { mappedSharedColumnNames = append(mappedSharedColumnNames, sharedColumnsMap[columnName]) } @@ -260,3 +263,49 @@ func (v *VRepl) analyzeTables(ctx context.Context, conn *dbconnpool.DBConnection // } return nil } +func (v *VRepl) generateFilterQuery(ctx context.Context) error { + if v.sourceSharedColumns.Len() == 0 { + return fmt.Errorf("Empty column list") + } + var sb strings.Builder + + sb.WriteString("select ") + for i, name := range v.sourceSharedColumns.Names() { + targetName := v.sharedColumnsMap[name] + + if i > 0 { + sb.WriteString(", ") + } + sb.WriteString(escapeName(name)) + sb.WriteString(" as ") + sb.WriteString(escapeName(targetName)) + } + sb.WriteString(" from ") + sb.WriteString(v.sourceTable) + + v.filterQuery = sb.String() + return nil +} + +func (v *VRepl) analyze(ctx context.Context, conn *dbconnpool.DBConnection, alterOptions string) error { + if err := v.analyzeAlter(ctx, alterOptions); err != nil { + return err + } + if err := v.analyzeTables(ctx, conn); err != nil { + return err + } + if err := v.generateFilterQuery(ctx); err != nil { + return err + } + return nil +} + +// escapeName will escape a db/table/column/... name by wrapping with backticks. +// It is not fool proof. I'm just trying to do the right thing here, not solving +// SQL injection issues, which should be irrelevant for this tool. +func escapeName(name string) string { + if unquoted, err := strconv.Unquote(name); err == nil { + name = unquoted + } + return fmt.Sprintf("`%s`", name) +} From 29895c6c37a43270e51ce248f23efc06176a41c1 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Sun, 31 Jan 2021 12:55:45 +0200 Subject: [PATCH 07/72] kick schema screation as soon as Open() Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/tabletmanager/vreplication/engine.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/go/vt/vttablet/tabletmanager/vreplication/engine.go b/go/vt/vttablet/tabletmanager/vreplication/engine.go index 2c7939b705b..aa5a16cc20a 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/engine.go +++ b/go/vt/vttablet/tabletmanager/vreplication/engine.go @@ -59,7 +59,9 @@ const ( vrepl_id int, table_name varbinary(128), lastpk varbinary(2000), - primary key (vrepl_id, table_name))` + primary key (vrepl_id, table_name))` + + warmUpQuery = "select 1 from _vt.vreplication limit 1" ) var withDDL *withddl.WithDDL @@ -192,6 +194,9 @@ func (vre *Engine) Open(ctx context.Context) { vre.cancelRetry = cancel go vre.retry(ctx, err) } + + // Ensure the schema exists + go vre.Exec(warmUpQuery) } func (vre *Engine) openLocked(ctx context.Context) error { From c37f29a63e8faf418e0cc2aeb8aaa122fac032ab Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Sun, 31 Jan 2021 12:56:19 +0200 Subject: [PATCH 08/72] generate vreplication insert and start statements Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 19 +++++++++++++- go/vt/vttablet/onlineddl/schema.go | 1 + go/vt/vttablet/onlineddl/vrepl.go | 39 +++++++++++++++++++++++++++- 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index c6fb9eb4e16..3a49cf80ab9 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -451,10 +451,27 @@ func (e *Executor) ExecuteWithVReplication(ctx context.Context, onlineDDL *schem return err } } - v := NewVRepl(e.dbName, onlineDDL.Table, vreplTableName) + v := NewVRepl(onlineDDL.UUID, e.keyspace, e.shard, e.dbName, onlineDDL.Table, vreplTableName) if err := v.analyze(ctx, conn, alterOptions); err != nil { return err } + { + insert, err := v.generateInsert(ctx) + if err != nil { + return err + } + if _, err := e.execQuery(ctx, insert); err != nil { + return err + } + start, err := v.generateStartStatement(ctx) + if err != nil { + return err + } + if _, err := e.execQuery(ctx, start); err != nil { + return err + } + fmt.Printf("============== INSERT: %s\n", insert) + } return nil } diff --git a/go/vt/vttablet/onlineddl/schema.go b/go/vt/vttablet/onlineddl/schema.go index df745e8c6af..86724f0fda1 100644 --- a/go/vt/vttablet/onlineddl/schema.go +++ b/go/vt/vttablet/onlineddl/schema.go @@ -221,6 +221,7 @@ const ( sqlCreateTableLike = "CREATE TABLE `%a` LIKE `%a`" sqlAlterTableOptions = "ALTER TABLE `%a` %s" sqlShowColumnsFrom = "SHOW COLUMNS FROM `%a`" + sqlStartVReplStream = "update _vt.vreplication set state='Running' where db_name=%a and workflow=%a" // TODO(shlomi): consider removing: // sqlGetUniqueKeys = ` // SELECT diff --git a/go/vt/vttablet/onlineddl/vrepl.go b/go/vt/vttablet/onlineddl/vrepl.go index d8ba224d416..176688ed91a 100644 --- a/go/vt/vttablet/onlineddl/vrepl.go +++ b/go/vt/vttablet/onlineddl/vrepl.go @@ -31,13 +31,19 @@ import ( "strings" "vitess.io/vitess/go/sqltypes" + "vitess.io/vitess/go/vt/binlog/binlogplayer" "vitess.io/vitess/go/vt/dbconnpool" + binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" "vitess.io/vitess/go/vt/sqlparser" "vitess.io/vitess/go/vt/vttablet/onlineddl/vrepl" + "vitess.io/vitess/go/vt/vttablet/tabletmanager/vreplication" ) // VRepl is an online DDL helper for VReplication based migrations (ddl_strategy="online") type VRepl struct { + workflow string + keyspace string + shard string dbName string sourceTable string targetTable string @@ -54,8 +60,11 @@ type VRepl struct { } // NewVRepl creates a VReplication handler for Online DDL -func NewVRepl(dbName, sourceTable, targetTable string) *VRepl { +func NewVRepl(workflow, keyspace, shard, dbName, sourceTable, targetTable string) *VRepl { return &VRepl{ + workflow: workflow, + keyspace: keyspace, + shard: shard, dbName: dbName, sourceTable: sourceTable, targetTable: targetTable, @@ -300,6 +309,34 @@ func (v *VRepl) analyze(ctx context.Context, conn *dbconnpool.DBConnection, alte return nil } +// generateInsert generates the INSERT INTO _vt.replication stataement that creates the vreplication workflow +func (v *VRepl) generateInsert(ctx context.Context) (string, error) { + ig := vreplication.NewInsertGenerator(binlogplayer.BlpStopped, v.dbName) + + bls := &binlogdatapb.BinlogSource{ + Keyspace: v.keyspace, + Shard: v.shard, + Filter: &binlogdatapb.Filter{}, + StopAfterCopy: false, + } + rule := &binlogdatapb.Rule{ + Match: v.targetTable, + Filter: v.filterQuery, + } + bls.Filter.Rules = append(bls.Filter.Rules, rule) + ig.AddRow(v.workflow, bls, "", "", "MASTER") + + return ig.String(), nil +} + +// generateStartStatement Generates the statement to start VReplication running on the workflow +func (v *VRepl) generateStartStatement(ctx context.Context) (string, error) { + return sqlparser.ParseAndBind(sqlStartVReplStream, + sqltypes.StringBindVariable(v.dbName), + sqltypes.StringBindVariable(v.workflow), + ) +} + // escapeName will escape a db/table/column/... name by wrapping with backticks. // It is not fool proof. I'm just trying to do the right thing here, not solving // SQL injection issues, which should be irrelevant for this tool. From b8409d6c483897e9cc74699f852767be272c578f Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Sun, 31 Jan 2021 14:04:32 +0200 Subject: [PATCH 09/72] communicating to VREngine through gRPC Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 17 ++++++++++++----- go/vt/vttablet/onlineddl/vrepl.go | 4 ++-- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 3a49cf80ab9..f1a8f904a4d 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -50,6 +50,7 @@ import ( "vitess.io/vitess/go/vt/vterrors" "vitess.io/vitess/go/vt/vttablet/tabletserver/connpool" "vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv" + "vitess.io/vitess/go/vt/vttablet/tmclient" "vitess.io/vitess/go/vt/vttablet/vexec" querypb "vitess.io/vitess/go/vt/proto/query" @@ -456,21 +457,27 @@ func (e *Executor) ExecuteWithVReplication(ctx context.Context, onlineDDL *schem return err } { - insert, err := v.generateInsert(ctx) + // We need to talk to tabletmanager's VREngine. But we're on TabletServer. While we live in the same + // process as VREngine, it is actually simpler to get hold of it via gRPC, just like wrangler does. + tmClient := tmclient.NewTabletManagerClient() + tablet, err := e.ts.GetTablet(ctx, e.tabletAlias) if err != nil { return err } - if _, err := e.execQuery(ctx, insert); err != nil { + insertVReplicationQuery, err := v.generateInsertStatement(ctx) + if err != nil { + return err + } + if _, err := tmClient.VReplicationExec(ctx, tablet.Tablet, insertVReplicationQuery); err != nil { return err } - start, err := v.generateStartStatement(ctx) + startVReplicationQuery, err := v.generateStartStatement(ctx) if err != nil { return err } - if _, err := e.execQuery(ctx, start); err != nil { + if _, err := tmClient.VReplicationExec(ctx, tablet.Tablet, startVReplicationQuery); err != nil { return err } - fmt.Printf("============== INSERT: %s\n", insert) } return nil } diff --git a/go/vt/vttablet/onlineddl/vrepl.go b/go/vt/vttablet/onlineddl/vrepl.go index 176688ed91a..265e8e76d5d 100644 --- a/go/vt/vttablet/onlineddl/vrepl.go +++ b/go/vt/vttablet/onlineddl/vrepl.go @@ -309,8 +309,8 @@ func (v *VRepl) analyze(ctx context.Context, conn *dbconnpool.DBConnection, alte return nil } -// generateInsert generates the INSERT INTO _vt.replication stataement that creates the vreplication workflow -func (v *VRepl) generateInsert(ctx context.Context) (string, error) { +// generateInsertStatement generates the INSERT INTO _vt.replication stataement that creates the vreplication workflow +func (v *VRepl) generateInsertStatement(ctx context.Context) (string, error) { ig := vreplication.NewInsertGenerator(binlogplayer.BlpStopped, v.dbName) bls := &binlogdatapb.BinlogSource{ From 56b7a5998a1d32481e81121b196ba8ce51f381cd Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Sun, 31 Jan 2021 14:53:07 +0200 Subject: [PATCH 10/72] refactor parsing alterOptions; in the future this will go through sqlparser Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 31 ++++++++++++---------------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index f1a8f904a4d..3149115fb51 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -369,6 +369,16 @@ func (e *Executor) tableExists(ctx context.Context, tableName string) (bool, err return (row != nil), nil } +func (e *Executor) parseAlterOptions(ctx context.Context, onlineDDL *schema.OnlineDDL) string { + // Temporary hack (2020-08-11) + // Because sqlparser does not do full blown ALTER TABLE parsing, + // and because we don't want gh-ost to know about WITH_GHOST and WITH_PT syntax, + // we resort to regexp-based parsing of the query. + // TODO(shlomi): generate _alter options_ via sqlparser when it full supports ALTER TABLE syntax. + _, _, alterOptions := schema.ParseAlterTableOptions(onlineDDL.SQL) + return alterOptions +} + // executeDirectly runs a DDL query directly on the backend MySQL server func (e *Executor) executeDirectly(ctx context.Context, onlineDDL *schema.OnlineDDL, acceptableMySQLErrorCodes ...int) error { e.migrationMutex.Lock() @@ -439,12 +449,7 @@ func (e *Executor) ExecuteWithVReplication(ctx context.Context, onlineDDL *schem return err } } - // Temporary hack (2020-08-11) - // Because sqlparser does not do full blown ALTER TABLE parsing, - // and because we don't want gh-ost to know about WITH_GHOST and WITH_PT syntax, - // we resort to regexp-based parsing of the query. - // TODO(shlomi): generate _alter options_ via sqlparser when it full supports ALTER TABLE syntax. - _, _, alterOptions := schema.ParseAlterTableOptions(onlineDDL.SQL) + alterOptions := e.parseAlterOptions(ctx, onlineDDL) { parsed := sqlparser.BuildParsedQuery(sqlAlterTableOptions, vreplTableName, alterOptions) // Apply ALTER TABLE to materialized table @@ -593,12 +598,7 @@ curl -s 'http://localhost:%d/schema-migration/report-status?uuid=%s&status=%s&dr } runGhost := func(execute bool) error { - // Temporary hack (2020-08-11) - // Because sqlparser does not do full blown ALTER TABLE parsing, - // and because we don't want gh-ost to know about WITH_GHOST and WITH_PT syntax, - // we resort to regexp-based parsing of the query. - // TODO(shlomi): generate _alter options_ via sqlparser when it full supports ALTER TABLE syntax. - _, _, alterOptions := schema.ParseAlterTableOptions(onlineDDL.SQL) + alterOptions := e.parseAlterOptions(ctx, onlineDDL) forceTableNames := fmt.Sprintf("%s_%s", onlineDDL.UUID, ReadableTimestamp()) if err := e.updateArtifacts(ctx, onlineDDL.UUID, @@ -803,12 +803,7 @@ export MYSQL_PWD return err } - // Temporary hack (2020-08-11) - // Because sqlparser does not do full blown ALTER TABLE parsing, - // and because pt-online-schema-change requires only the table options part of the ALTER TABLE statement, - // we resort to regexp-based parsing of the query. - // TODO(shlomi): generate _alter options_ via sqlparser when it full supports ALTER TABLE syntax. - _, _, alterOptions := schema.ParseAlterTableOptions(onlineDDL.SQL) + alterOptions := e.parseAlterOptions(ctx, onlineDDL) // The following sleep() is temporary and artificial. Because we create a new user for this // migration, and because we throttle by replicas, we need to wait for the replicas to be From da96194ac156ac054860ac27aa4d6e40c7e51a15 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Sun, 31 Jan 2021 16:56:00 +0200 Subject: [PATCH 11/72] simplified queries in onlineddl Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 37 ++++++-------- go/vt/vttablet/onlineddl/schema.go | 76 +++++++++++++--------------- 2 files changed, 52 insertions(+), 61 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 3149115fb51..a98f3071e7a 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -903,7 +903,7 @@ export MYSQL_PWD func (e *Executor) readMigration(ctx context.Context, uuid string) (onlineDDL *schema.OnlineDDL, err error) { - parsed := sqlparser.BuildParsedQuery(sqlSelectMigration, "_vt", ":migration_uuid") + parsed := sqlparser.BuildParsedQuery(sqlSelectMigration, ":migration_uuid") bindVars := map[string]*querypb.BindVariable{ "migration_uuid": sqltypes.StringBindVariable(uuid), } @@ -1030,8 +1030,7 @@ func (e *Executor) cancelMigrations(ctx context.Context, uuids []string) (err er // cancelPendingMigrations cancels all pending migrations (that are expected to run or are running) // for this keyspace func (e *Executor) cancelPendingMigrations(ctx context.Context) (result *sqltypes.Result, err error) { - parsed := sqlparser.BuildParsedQuery(sqlSelectPendingMigrations, "_vt") - r, err := e.execQuery(ctx, parsed.Query) + r, err := e.execQuery(ctx, sqlSelectPendingMigrations) if err != nil { return result, err } @@ -1065,8 +1064,7 @@ func (e *Executor) scheduleNextMigration(ctx context.Context) error { } { - parsed := sqlparser.BuildParsedQuery(sqlSelectCountReadyMigrations, "_vt") - r, err := e.execQuery(ctx, parsed.Query) + r, err := e.execQuery(ctx, sqlSelectCountReadyMigrations) if err != nil { return err } @@ -1083,8 +1081,7 @@ func (e *Executor) scheduleNextMigration(ctx context.Context) error { } } // Cool, seems like no migration is ready. Let's try and make a single 'queued' migration 'ready' - parsed := sqlparser.BuildParsedQuery(sqlScheduleSingleMigration, "_vt") - _, err := e.execQuery(ctx, parsed.Query) + _, err := e.execQuery(ctx, sqlScheduleSingleMigration) return err } @@ -1180,8 +1177,7 @@ func (e *Executor) runNextMigration(ctx context.Context) error { return ErrExecutorMigrationAlreadyRunning } - parsed := sqlparser.BuildParsedQuery(sqlSelectReadyMigration, "_vt") - r, err := e.execQuery(ctx, parsed.Query) + r, err := e.execQuery(ctx, sqlSelectReadyMigration) if err != nil { return err } @@ -1279,7 +1275,7 @@ func (e *Executor) reviewRunningMigrations(ctx context.Context) (countRunnning i e.migrationMutex.Lock() defer e.migrationMutex.Unlock() - parsed := sqlparser.BuildParsedQuery(sqlSelectRunningMigrations, "_vt", ":strategy") + parsed := sqlparser.BuildParsedQuery(sqlSelectRunningMigrations, ":strategy") bindVars := map[string]*querypb.BindVariable{ "strategy": sqltypes.StringBindVariable(string(schema.DDLStrategyPTOSC)), } @@ -1317,7 +1313,7 @@ func (e *Executor) reviewStaleMigrations(ctx context.Context) error { e.migrationMutex.Lock() defer e.migrationMutex.Unlock() - parsed := sqlparser.BuildParsedQuery(sqlSelectStaleMigrations, "_vt", ":minutes") + parsed := sqlparser.BuildParsedQuery(sqlSelectStaleMigrations, ":minutes") bindVars := map[string]*querypb.BindVariable{ "minutes": sqltypes.Int64BindVariable(staleMigrationMinutes), } @@ -1392,8 +1388,7 @@ func (e *Executor) gcArtifacts(ctx context.Context) error { e.migrationMutex.Lock() defer e.migrationMutex.Unlock() - parsed := sqlparser.BuildParsedQuery(sqlSelectUncollectedArtifacts, "_vt") - r, err := e.execQuery(ctx, parsed.Query) + r, err := e.execQuery(ctx, sqlSelectUncollectedArtifacts) if err != nil { return err } @@ -1468,7 +1463,7 @@ func (e *Executor) onMigrationCheckTick() { } func (e *Executor) updateMigrationStartedTimestamp(ctx context.Context, uuid string) error { - parsed := sqlparser.BuildParsedQuery(sqlUpdateMigrationStartedTimestamp, "_vt", + parsed := sqlparser.BuildParsedQuery(sqlUpdateMigrationStartedTimestamp, ":migration_uuid", ) bindVars := map[string]*querypb.BindVariable{ @@ -1483,7 +1478,7 @@ func (e *Executor) updateMigrationStartedTimestamp(ctx context.Context, uuid str } func (e *Executor) updateMigrationTimestamp(ctx context.Context, timestampColumn string, uuid string) error { - parsed := sqlparser.BuildParsedQuery(sqlUpdateMigrationTimestamp, "_vt", timestampColumn, + parsed := sqlparser.BuildParsedQuery(sqlUpdateMigrationTimestamp, timestampColumn, ":migration_uuid", ) bindVars := map[string]*querypb.BindVariable{ @@ -1499,7 +1494,7 @@ func (e *Executor) updateMigrationTimestamp(ctx context.Context, timestampColumn func (e *Executor) updateMigrationLogPath(ctx context.Context, uuid string, hostname, path string) error { logPath := fmt.Sprintf("%s:%s", hostname, path) - parsed := sqlparser.BuildParsedQuery(sqlUpdateMigrationLogPath, "_vt", + parsed := sqlparser.BuildParsedQuery(sqlUpdateMigrationLogPath, ":log_path", ":migration_uuid", ) @@ -1517,7 +1512,7 @@ func (e *Executor) updateMigrationLogPath(ctx context.Context, uuid string, host func (e *Executor) updateArtifacts(ctx context.Context, uuid string, artifacts ...string) error { bindArtifacts := strings.Join(artifacts, ",") - parsed := sqlparser.BuildParsedQuery(sqlUpdateArtifacts, "_vt", + parsed := sqlparser.BuildParsedQuery(sqlUpdateArtifacts, ":artifacts", ":migration_uuid", ) @@ -1535,7 +1530,7 @@ func (e *Executor) updateArtifacts(ctx context.Context, uuid string, artifacts . // updateTabletFailure marks a given migration as "tablet_failed" func (e *Executor) updateTabletFailure(ctx context.Context, uuid string) error { - parsed := sqlparser.BuildParsedQuery(sqlUpdateTabletFailure, "_vt", + parsed := sqlparser.BuildParsedQuery(sqlUpdateTabletFailure, ":migration_uuid", ) bindVars := map[string]*querypb.BindVariable{ @@ -1550,7 +1545,7 @@ func (e *Executor) updateTabletFailure(ctx context.Context, uuid string) error { } func (e *Executor) updateMigrationStatus(ctx context.Context, uuid string, status schema.OnlineDDLStatus) error { - parsed := sqlparser.BuildParsedQuery(sqlUpdateMigrationStatus, "_vt", + parsed := sqlparser.BuildParsedQuery(sqlUpdateMigrationStatus, ":migration_status", ":migration_uuid", ) @@ -1573,7 +1568,7 @@ func (e *Executor) updateMigrationProgress(ctx context.Context, uuid string, pro // In both cases there's nothing to update return nil } - parsed := sqlparser.BuildParsedQuery(sqlUpdateMigrationProgress, "_vt", + parsed := sqlparser.BuildParsedQuery(sqlUpdateMigrationProgress, ":migration_progress", ":migration_uuid", ) @@ -1592,7 +1587,7 @@ func (e *Executor) updateMigrationProgress(ctx context.Context, uuid string, pro func (e *Executor) retryMigration(ctx context.Context, whereExpr string) (result *sqltypes.Result, err error) { e.migrationMutex.Lock() defer e.migrationMutex.Unlock() - parsed := sqlparser.BuildParsedQuery(sqlRetryMigration, "_vt", ":tablet", whereExpr) + parsed := sqlparser.BuildParsedQuery(sqlRetryMigration, ":tablet", whereExpr) bindVars := map[string]*querypb.BindVariable{ "tablet": sqltypes.StringBindVariable(e.TabletAliasString()), } diff --git a/go/vt/vttablet/onlineddl/schema.go b/go/vt/vttablet/onlineddl/schema.go index 86724f0fda1..5300900cac1 100644 --- a/go/vt/vttablet/onlineddl/schema.go +++ b/go/vt/vttablet/onlineddl/schema.go @@ -16,15 +16,11 @@ limitations under the License. package onlineddl -import ( - "fmt" -) - const ( // SchemaMigrationsTableName is used by VExec interceptor to call the correct handler SchemaMigrationsTableName = "schema_migrations" - sqlCreateSidecarDB = "create database if not exists %s" - sqlCreateSchemaMigrationsTable = `CREATE TABLE IF NOT EXISTS %s.schema_migrations ( + sqlCreateSidecarDB = "create database if not exists _vt" + sqlCreateSchemaMigrationsTable = `CREATE TABLE IF NOT EXISTS _vt.schema_migrations ( id bigint(20) unsigned NOT NULL AUTO_INCREMENT, migration_uuid varchar(64) NOT NULL, keyspace varchar(256) NOT NULL, @@ -50,16 +46,16 @@ const ( KEY status_idx (migration_status, liveness_timestamp), KEY cleanup_status_idx (cleanup_timestamp, migration_status) ) engine=InnoDB DEFAULT CHARSET=utf8mb4` - alterSchemaMigrationsTableRetries = "ALTER TABLE %s.schema_migrations add column retries int unsigned NOT NULL DEFAULT 0" - alterSchemaMigrationsTableTablet = "ALTER TABLE %s.schema_migrations add column tablet varchar(128) NOT NULL DEFAULT ''" - alterSchemaMigrationsTableArtifacts = "ALTER TABLE %s.schema_migrations modify artifacts TEXT NOT NULL" - alterSchemaMigrationsTableTabletFailure = "ALTER TABLE %s.schema_migrations add column tablet_failure tinyint unsigned NOT NULL DEFAULT 0" - alterSchemaMigrationsTableTabletFailureIndex = "ALTER TABLE %s.schema_migrations add KEY tablet_failure_idx (tablet_failure, migration_status, retries)" - alterSchemaMigrationsTableProgress = "ALTER TABLE %s.schema_migrations add column progress float NOT NULL DEFAULT 0" - alterSchemaMigrationsTableContext = "ALTER TABLE %s.schema_migrations add column migration_context varchar(1024) NOT NULL DEFAULT ''" - alterSchemaMigrationsTableDDLAction = "ALTER TABLE %s.schema_migrations add column ddl_action varchar(16) NOT NULL DEFAULT ''" + alterSchemaMigrationsTableRetries = "ALTER TABLE _vt.schema_migrations add column retries int unsigned NOT NULL DEFAULT 0" + alterSchemaMigrationsTableTablet = "ALTER TABLE _vt.schema_migrations add column tablet varchar(128) NOT NULL DEFAULT ''" + alterSchemaMigrationsTableArtifacts = "ALTER TABLE _vt.schema_migrations modify artifacts TEXT NOT NULL" + alterSchemaMigrationsTableTabletFailure = "ALTER TABLE _vt.schema_migrations add column tablet_failure tinyint unsigned NOT NULL DEFAULT 0" + alterSchemaMigrationsTableTabletFailureIndex = "ALTER TABLE _vt.schema_migrations add KEY tablet_failure_idx (tablet_failure, migration_status, retries)" + alterSchemaMigrationsTableProgress = "ALTER TABLE _vt.schema_migrations add column progress float NOT NULL DEFAULT 0" + alterSchemaMigrationsTableContext = "ALTER TABLE _vt.schema_migrations add column migration_context varchar(1024) NOT NULL DEFAULT ''" + alterSchemaMigrationsTableDDLAction = "ALTER TABLE _vt.schema_migrations add column ddl_action varchar(16) NOT NULL DEFAULT ''" - sqlScheduleSingleMigration = `UPDATE %s.schema_migrations + sqlScheduleSingleMigration = `UPDATE _vt.schema_migrations SET migration_status='ready', ready_timestamp=NOW() @@ -69,42 +65,42 @@ const ( requested_timestamp ASC LIMIT 1 ` - sqlUpdateMigrationStatus = `UPDATE %s.schema_migrations + sqlUpdateMigrationStatus = `UPDATE _vt.schema_migrations SET migration_status=%a WHERE migration_uuid=%a ` - sqlUpdateMigrationProgress = `UPDATE %s.schema_migrations + sqlUpdateMigrationProgress = `UPDATE _vt.schema_migrations SET progress=%a WHERE migration_uuid=%a ` - sqlUpdateMigrationStartedTimestamp = `UPDATE %s.schema_migrations + sqlUpdateMigrationStartedTimestamp = `UPDATE _vt.schema_migrations SET started_timestamp=IFNULL(started_timestamp, NOW()) WHERE migration_uuid=%a ` - sqlUpdateMigrationTimestamp = `UPDATE %s.schema_migrations + sqlUpdateMigrationTimestamp = `UPDATE _vt.schema_migrations SET %s=NOW() WHERE migration_uuid=%a ` - sqlUpdateMigrationLogPath = `UPDATE %s.schema_migrations + sqlUpdateMigrationLogPath = `UPDATE _vt.schema_migrations SET log_path=%a WHERE migration_uuid=%a ` - sqlUpdateArtifacts = `UPDATE %s.schema_migrations + sqlUpdateArtifacts = `UPDATE _vt.schema_migrations SET artifacts=concat(%a, ',', artifacts) WHERE migration_uuid=%a ` - sqlUpdateTabletFailure = `UPDATE %s.schema_migrations + sqlUpdateTabletFailure = `UPDATE _vt.schema_migrations SET tablet_failure=1 WHERE migration_uuid=%a ` - sqlRetryMigration = `UPDATE %s.schema_migrations + sqlRetryMigration = `UPDATE _vt.schema_migrations SET migration_status='queued', tablet=%a, @@ -127,34 +123,34 @@ const ( ` sqlSelectRunningMigrations = `SELECT migration_uuid - FROM %s.schema_migrations + FROM _vt.schema_migrations WHERE migration_status='running' AND strategy=%a ` sqlSelectCountReadyMigrations = `SELECT count(*) as count_ready - FROM %s.schema_migrations + FROM _vt.schema_migrations WHERE migration_status='ready' ` sqlSelectStaleMigrations = `SELECT migration_uuid - FROM %s.schema_migrations + FROM _vt.schema_migrations WHERE migration_status='running' AND liveness_timestamp < NOW() - INTERVAL %a MINUTE ` sqlSelectPendingMigrations = `SELECT migration_uuid - FROM %s.schema_migrations + FROM _vt.schema_migrations WHERE migration_status IN ('queued', 'ready', 'running') ` sqlSelectUncollectedArtifacts = `SELECT migration_uuid, artifacts - FROM %s.schema_migrations + FROM _vt.schema_migrations WHERE migration_status IN ('complete', 'failed') AND cleanup_timestamp IS NULL @@ -178,7 +174,7 @@ const ( log_path, retries, tablet - FROM %s.schema_migrations + FROM _vt.schema_migrations WHERE migration_uuid=%a ` @@ -201,7 +197,7 @@ const ( log_path, retries, tablet - FROM %s.schema_migrations + FROM _vt.schema_migrations WHERE migration_status='ready' LIMIT 1 @@ -301,14 +297,14 @@ var ( ) var applyDDL = []string{ - fmt.Sprintf(sqlCreateSidecarDB, "_vt"), - fmt.Sprintf(sqlCreateSchemaMigrationsTable, "_vt"), - fmt.Sprintf(alterSchemaMigrationsTableRetries, "_vt"), - fmt.Sprintf(alterSchemaMigrationsTableTablet, "_vt"), - fmt.Sprintf(alterSchemaMigrationsTableArtifacts, "_vt"), - fmt.Sprintf(alterSchemaMigrationsTableTabletFailure, "_vt"), - fmt.Sprintf(alterSchemaMigrationsTableTabletFailureIndex, "_vt"), - fmt.Sprintf(alterSchemaMigrationsTableProgress, "_vt"), - fmt.Sprintf(alterSchemaMigrationsTableContext, "_vt"), - fmt.Sprintf(alterSchemaMigrationsTableDDLAction, "_vt"), + sqlCreateSidecarDB, + sqlCreateSchemaMigrationsTable, + alterSchemaMigrationsTableRetries, + alterSchemaMigrationsTableTablet, + alterSchemaMigrationsTableArtifacts, + alterSchemaMigrationsTableTabletFailure, + alterSchemaMigrationsTableTabletFailureIndex, + alterSchemaMigrationsTableProgress, + alterSchemaMigrationsTableContext, + alterSchemaMigrationsTableDDLAction, } From d6d8eedcf1f1e40d6c99507909262b97da795b15 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 1 Feb 2021 07:23:58 +0200 Subject: [PATCH 12/72] init schema in InitDBConfig() Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/tabletmanager/vreplication/engine.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/go/vt/vttablet/tabletmanager/vreplication/engine.go b/go/vt/vttablet/tabletmanager/vreplication/engine.go index aa5a16cc20a..76d26b7f869 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/engine.go +++ b/go/vt/vttablet/tabletmanager/vreplication/engine.go @@ -151,6 +151,9 @@ func (vre *Engine) InitDBConfig(dbcfgs *dbconfigs.DBConfigs) { return binlogplayer.NewDBClient(dbcfgs.FilteredWithDB()) } vre.dbName = dbcfgs.DBName + + // Ensure the schema is created as early as possible + go vre.Exec(warmUpQuery) } // NewTestEngine creates a new Engine for testing. @@ -194,9 +197,6 @@ func (vre *Engine) Open(ctx context.Context) { vre.cancelRetry = cancel go vre.retry(ctx, err) } - - // Ensure the schema exists - go vre.Exec(warmUpQuery) } func (vre *Engine) openLocked(ctx context.Context) error { From c6f758d45247d90f3bf4c5f15c744b9b2e8b887c Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 1 Feb 2021 07:25:32 +0200 Subject: [PATCH 13/72] mutex protestion Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/tabletmanager/vreplication/engine.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/go/vt/vttablet/tabletmanager/vreplication/engine.go b/go/vt/vttablet/tabletmanager/vreplication/engine.go index 76d26b7f869..f01dc7b5ec2 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/engine.go +++ b/go/vt/vttablet/tabletmanager/vreplication/engine.go @@ -153,7 +153,11 @@ func (vre *Engine) InitDBConfig(dbcfgs *dbconfigs.DBConfigs) { vre.dbName = dbcfgs.DBName // Ensure the schema is created as early as possible - go vre.Exec(warmUpQuery) + go func() { + vre.mu.Lock() + defer vre.mu.Unlock() + vre.Exec(warmUpQuery) + }() } // NewTestEngine creates a new Engine for testing. From 54428fe01e7b8d38798773004065bd5388e6d228 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 1 Feb 2021 07:28:16 +0200 Subject: [PATCH 14/72] whoops; mutex was already there Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/tabletmanager/vreplication/engine.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/go/vt/vttablet/tabletmanager/vreplication/engine.go b/go/vt/vttablet/tabletmanager/vreplication/engine.go index f01dc7b5ec2..76d26b7f869 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/engine.go +++ b/go/vt/vttablet/tabletmanager/vreplication/engine.go @@ -153,11 +153,7 @@ func (vre *Engine) InitDBConfig(dbcfgs *dbconfigs.DBConfigs) { vre.dbName = dbcfgs.DBName // Ensure the schema is created as early as possible - go func() { - vre.mu.Lock() - defer vre.mu.Unlock() - vre.Exec(warmUpQuery) - }() + go vre.Exec(warmUpQuery) } // NewTestEngine creates a new Engine for testing. From 67844484a09cfdfa305f708f2f569e6e13577015 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 1 Feb 2021 09:32:46 +0200 Subject: [PATCH 15/72] fix chech type. Backport of https://github.com/vitessio/vitess/pull/7422 Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/tabletmanager/vreplication/engine.go | 2 +- go/vt/vttablet/tabletserver/gc/tablegc.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/go/vt/vttablet/tabletmanager/vreplication/engine.go b/go/vt/vttablet/tabletmanager/vreplication/engine.go index 76d26b7f869..90c3767ea78 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/engine.go +++ b/go/vt/vttablet/tabletmanager/vreplication/engine.go @@ -136,7 +136,7 @@ func NewEngine(config *tabletenv.TabletConfig, ts *topo.Server, cell string, mys mysqld: mysqld, journaler: make(map[string]*journalEvent), ec: newExternalConnector(config.ExternalConnections), - throttlerClient: throttle.NewBackgroundClient(lagThrottler, throttlerAppName, throttle.ThrottleCheckSelf), + throttlerClient: throttle.NewBackgroundClient(lagThrottler, throttlerAppName, throttle.ThrottleCheckPrimaryWrite), } return vre } diff --git a/go/vt/vttablet/tabletserver/gc/tablegc.go b/go/vt/vttablet/tabletserver/gc/tablegc.go index d66fc9912ce..105623a4bc3 100644 --- a/go/vt/vttablet/tabletserver/gc/tablegc.go +++ b/go/vt/vttablet/tabletserver/gc/tablegc.go @@ -122,7 +122,7 @@ type GCStatus struct { // NewTableGC creates a table collector func NewTableGC(env tabletenv.Env, ts *topo.Server, tabletTypeFunc func() topodatapb.TabletType, lagThrottler *throttle.Throttler) *TableGC { collector := &TableGC{ - throttlerClient: throttle.NewBackgroundClient(lagThrottler, throttlerAppName, throttle.ThrottleCheckSelf), + throttlerClient: throttle.NewBackgroundClient(lagThrottler, throttlerAppName, throttle.ThrottleCheckPrimaryWrite), isPrimary: 0, isOpen: 0, From 5a620da29a11dbf6f7432568c0721a6b643152e4 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 1 Feb 2021 11:25:44 +0200 Subject: [PATCH 16/72] engine's early schema creation did not work as expected. Not improtant on this branch's radar Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/tabletmanager/vreplication/engine.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/go/vt/vttablet/tabletmanager/vreplication/engine.go b/go/vt/vttablet/tabletmanager/vreplication/engine.go index 90c3767ea78..086dbecb1f3 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/engine.go +++ b/go/vt/vttablet/tabletmanager/vreplication/engine.go @@ -60,8 +60,6 @@ const ( table_name varbinary(128), lastpk varbinary(2000), primary key (vrepl_id, table_name))` - - warmUpQuery = "select 1 from _vt.vreplication limit 1" ) var withDDL *withddl.WithDDL @@ -151,9 +149,6 @@ func (vre *Engine) InitDBConfig(dbcfgs *dbconfigs.DBConfigs) { return binlogplayer.NewDBClient(dbcfgs.FilteredWithDB()) } vre.dbName = dbcfgs.DBName - - // Ensure the schema is created as early as possible - go vre.Exec(warmUpQuery) } // NewTestEngine creates a new Engine for testing. From 82a0aa01707991a600cd4c81f43054533e665a25 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 1 Feb 2021 15:39:33 +0200 Subject: [PATCH 17/72] check vreplication liveness, cancel vreplication migration Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 140 ++++++++++++++++-- go/vt/vttablet/onlineddl/schema.go | 20 ++- .../tabletmanager/vreplication/vplayer.go | 2 - 3 files changed, 143 insertions(+), 19 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index a98f3071e7a..6f8ddbc0453 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -413,6 +413,40 @@ func (e *Executor) executeDirectly(ctx context.Context, onlineDDL *schema.Online return nil } +// terminateVReplMigration stops vreplication, then removes the _vt.vreplication entry for the given migration +func (e *Executor) terminateVReplMigration(ctx context.Context, onlineDDL *schema.OnlineDDL) error { + tmClient := tmclient.NewTabletManagerClient() + tablet, err := e.ts.GetTablet(ctx, e.tabletAlias) + if err != nil { + return err + } + { + query, err := sqlparser.ParseAndBind(sqlStopVReplStream, + sqltypes.StringBindVariable(e.dbName), + sqltypes.StringBindVariable(onlineDDL.UUID), + ) + if err != nil { + return err + } + // silently skip error; stopping the stream is just a graceful act; later deleting it is more important + _, _ = tmClient.VReplicationExec(ctx, tablet.Tablet, query) + } + { + query, err := sqlparser.ParseAndBind(sqlDeleteVReplStream, + sqltypes.StringBindVariable(e.dbName), + sqltypes.StringBindVariable(onlineDDL.UUID), + ) + if err != nil { + return err + } + // silently skip error; stopping the stream is just a graceful act; later deleting it is more important + if _, err := tmClient.VReplicationExec(ctx, tablet.Tablet, query); err != nil { + return err + } + } + return nil +} + // ExecuteWithVReplication sets up the grounds for a vreplication schema migration func (e *Executor) ExecuteWithVReplication(ctx context.Context, onlineDDL *schema.OnlineDDL) error { e.migrationMutex.Lock() @@ -946,7 +980,9 @@ func (e *Executor) terminateMigration(ctx context.Context, onlineDDL *schema.Onl } switch onlineDDL.Strategy { case schema.DDLStrategyOnline: - // TODO(shlomi): remove vreplication + if err := e.terminateVReplMigration(ctx, onlineDDL); err != nil { + return foundRunning, fmt.Errorf("Error cancelling migration, vreplication exec error: %+v", err) + } _ = e.updateMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusFailed) atomic.StoreInt64(&e.migrationRunning, 0) @@ -1269,30 +1305,106 @@ func (e *Executor) dropPTOSCMigrationTriggers(ctx context.Context, onlineDDL *sc return err } +// isVReplMigrationReadyToCutOver sees if the vreplication migration has completed the row copy +// and is up to date with the binlogs. +func (e *Executor) isVReplMigrationReadyToCutOver(ctx context.Context, uuid string) (isRunning bool, err error) { + // fmt.Printf("============== isVReplMigrationRunning \n") + // query, err := sqlparser.ParseAndBind(sqlReadVReplStream, + // sqltypes.StringBindVariable(uuid), + // ) + // if err != nil { + // return false, err + // } + // fmt.Printf("============== isVReplMigrationRunning query=%v\n", query) + // r, err := e.execQuery(ctx, query) + // if err != nil { + // return false, err + // } + // timeNow := time.Now() + // for _, row := range r.Named().Rows { + // timeUpdated := row.AsInt64("time_updated", 0) + // fmt.Printf("============== isVReplMigrationRunning timeUpdated=%v\n", timeUpdated) + // durationDiff := timeNow.Sub(time.Unix(timeUpdated, 0)) + // if durationDiff < 0 { + // durationDiff = -durationDiff + // } + // fmt.Printf("============== isVReplMigrationRunning durationDiff=%v\n", durationDiff) + // if durationDiff < time.Minute { + // isRunning = true + // } + // } + // fmt.Printf("============== isVReplMigrationRunning isRunning=%v\n", isRunning) + return isRunning, nil +} + +// isVReplMigrationRunning sees if there is a VReplication migration actively running +func (e *Executor) isVReplMigrationRunning(ctx context.Context, uuid string) (isRunning bool, err error) { + fmt.Printf("============== isVReplMigrationRunning \n") + query, err := sqlparser.ParseAndBind(sqlReadVReplStream, + sqltypes.StringBindVariable(uuid), + ) + if err != nil { + return false, err + } + fmt.Printf("============== isVReplMigrationRunning query=%v\n", query) + r, err := e.execQuery(ctx, query) + if err != nil { + return false, err + } + named := r.Named() + if len(named.Rows) == 0 { + // no vreplication entry + return false, nil + } + row := named.Row() + if row == nil { + // multiple workflows under same name: + return false, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "multiple workflows found for UUID: %+v", uuid) + } + message := row.AsString("message", "") + if strings.Contains(strings.ToLower(message), "error") { + return false, nil + } + return true, nil +} + // reviewRunningMigrations iterates migrations in 'running' state (there really should just be one that is // actually running). func (e *Executor) reviewRunningMigrations(ctx context.Context) (countRunnning int, runningNotByThisProcess []string, err error) { e.migrationMutex.Lock() defer e.migrationMutex.Unlock() - parsed := sqlparser.BuildParsedQuery(sqlSelectRunningMigrations, ":strategy") - bindVars := map[string]*querypb.BindVariable{ - "strategy": sqltypes.StringBindVariable(string(schema.DDLStrategyPTOSC)), - } - bound, err := parsed.GenerateQuery(bindVars, nil) - if err != nil { - return countRunnning, runningNotByThisProcess, err - } - r, err := e.execQuery(ctx, bound) + r, err := e.execQuery(ctx, sqlSelectRunningMigrations) if err != nil { return countRunnning, runningNotByThisProcess, err } for _, row := range r.Named().Rows { uuid := row["migration_uuid"].ToString() - // Since pt-osc doesn't have a "liveness" plugin entry point, we do it externally: - // if the process is alive, we update the `liveness_timestamp` for this migration. - if running, _, _ := e.isPTOSCMigrationRunning(ctx, uuid); running { - _ = e.updateMigrationTimestamp(ctx, "liveness_timestamp", uuid) + strategy := schema.DDLStrategy(row["strategy"].ToString()) + switch strategy { + case schema.DDLStrategyPTOSC: + { + // Since pt-osc doesn't have a "liveness" plugin entry point, we do it externally: + // if the process is alive, we update the `liveness_timestamp` for this migration. + running, _, err := e.isPTOSCMigrationRunning(ctx, uuid) + if err != nil { + return countRunnning, runningNotByThisProcess, err + } + if running { + _ = e.updateMigrationTimestamp(ctx, "liveness_timestamp", uuid) + } + } + case schema.DDLStrategyOnline: + { + // We check the _vt.vreplication table + running, err := e.isVReplMigrationRunning(ctx, uuid) + if err != nil { + return countRunnning, runningNotByThisProcess, err + } + if running { + _ = e.updateMigrationTimestamp(ctx, "liveness_timestamp", uuid) + } + } } countRunnning++ diff --git a/go/vt/vttablet/onlineddl/schema.go b/go/vt/vttablet/onlineddl/schema.go index 5300900cac1..c366314cf98 100644 --- a/go/vt/vttablet/onlineddl/schema.go +++ b/go/vt/vttablet/onlineddl/schema.go @@ -122,11 +122,11 @@ const ( AND retries=0 ` sqlSelectRunningMigrations = `SELECT - migration_uuid + migration_uuid, + strategy FROM _vt.schema_migrations WHERE migration_status='running' - AND strategy=%a ` sqlSelectCountReadyMigrations = `SELECT count(*) as count_ready @@ -217,7 +217,21 @@ const ( sqlCreateTableLike = "CREATE TABLE `%a` LIKE `%a`" sqlAlterTableOptions = "ALTER TABLE `%a` %s" sqlShowColumnsFrom = "SHOW COLUMNS FROM `%a`" - sqlStartVReplStream = "update _vt.vreplication set state='Running' where db_name=%a and workflow=%a" + sqlStartVReplStream = "UPDATE _vt.vreplication set state='Running' where db_name=%a and workflow=%a" + sqlStopVReplStream = "UPDATE _vt.vreplication set state='Stopped' where db_name=%a and workflow=%a" + sqlDeleteVReplStream = "DELETE FROM _vt.vreplication where db_name=%a and workflow=%a" + sqlReadVReplStream = `SELECT + workflow, + pos, + time_updated, + transaction_timestamp, + state, + message + FROM _vt.vreplication + WHERE + workflow=%a + + ` // TODO(shlomi): consider removing: // sqlGetUniqueKeys = ` // SELECT diff --git a/go/vt/vttablet/tabletmanager/vreplication/vplayer.go b/go/vt/vttablet/tabletmanager/vreplication/vplayer.go index 444676fe668..77f688d9d31 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/vplayer.go +++ b/go/vt/vttablet/tabletmanager/vreplication/vplayer.go @@ -259,8 +259,6 @@ func (vp *vplayer) recordHeartbeat() (err error) { return nil } -// applyEvents is the main thread that applies the events. It has the following use - // applyEvents is the main thread that applies the events. It has the following use // cases to take into account: // * Normal transaction that has row mutations. In this case, the transaction From f825e56336d2e971d14516eeae9edab4912cf249 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Tue, 2 Feb 2021 07:54:02 +0200 Subject: [PATCH 18/72] check if vreplication migration is ready to cut-over Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 153 ++++++++++++++++++--------- go/vt/vttablet/onlineddl/schema.go | 7 ++ 2 files changed, 112 insertions(+), 48 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 6f8ddbc0453..baffe8d8d13 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -40,6 +40,7 @@ import ( "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/textutil" "vitess.io/vitess/go/timer" + "vitess.io/vitess/go/vt/binlog/binlogplayer" "vitess.io/vitess/go/vt/dbconnpool" "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/schema" @@ -62,9 +63,9 @@ import ( var ( // ErrExecutorNotWritableTablet is generated when executor is asked to run gh-ost on a read-only server - ErrExecutorNotWritableTablet = errors.New("Cannot run gh-ost migration on non-writable tablet") + ErrExecutorNotWritableTablet = errors.New("Cannot run migration on non-writable tablet") // ErrExecutorMigrationAlreadyRunning is generated when an attempt is made to run an operation that conflicts with a running migration - ErrExecutorMigrationAlreadyRunning = errors.New("Cannot run gh-ost migration since a migration is already running") + ErrExecutorMigrationAlreadyRunning = errors.New("Cannot run migration since a migration is already running") // ErrMigrationNotFound is returned by readMigration when given UUI cannot be found ErrMigrationNotFound = errors.New("Migration not found") ) @@ -110,6 +111,7 @@ const ( progressPctFull float64 = 100.0 gcHoldHours = 72 databasePoolSize = 3 + cutOverThreshold = 3 * time.Second ) var ( @@ -1305,53 +1307,100 @@ func (e *Executor) dropPTOSCMigrationTriggers(ctx context.Context, onlineDDL *sc return err } -// isVReplMigrationReadyToCutOver sees if the vreplication migration has completed the row copy -// and is up to date with the binlogs. -func (e *Executor) isVReplMigrationReadyToCutOver(ctx context.Context, uuid string) (isRunning bool, err error) { - // fmt.Printf("============== isVReplMigrationRunning \n") - // query, err := sqlparser.ParseAndBind(sqlReadVReplStream, - // sqltypes.StringBindVariable(uuid), - // ) - // if err != nil { - // return false, err - // } - // fmt.Printf("============== isVReplMigrationRunning query=%v\n", query) - // r, err := e.execQuery(ctx, query) - // if err != nil { - // return false, err - // } - // timeNow := time.Now() - // for _, row := range r.Named().Rows { - // timeUpdated := row.AsInt64("time_updated", 0) - // fmt.Printf("============== isVReplMigrationRunning timeUpdated=%v\n", timeUpdated) - // durationDiff := timeNow.Sub(time.Unix(timeUpdated, 0)) - // if durationDiff < 0 { - // durationDiff = -durationDiff - // } - // fmt.Printf("============== isVReplMigrationRunning durationDiff=%v\n", durationDiff) - // if durationDiff < time.Minute { - // isRunning = true - // } - // } - // fmt.Printf("============== isVReplMigrationRunning isRunning=%v\n", isRunning) - return isRunning, nil -} - -// isVReplMigrationRunning sees if there is a VReplication migration actively running -func (e *Executor) isVReplMigrationRunning(ctx context.Context, uuid string) (isRunning bool, err error) { - fmt.Printf("============== isVReplMigrationRunning \n") +// readVReplStream reads _vt.vreplication entries for given workflow +func (e *Executor) readVReplStream(ctx context.Context, uuid string) (*sqltypes.NamedResult, error) { query, err := sqlparser.ParseAndBind(sqlReadVReplStream, sqltypes.StringBindVariable(uuid), ) if err != nil { - return false, err + return nil, err } - fmt.Printf("============== isVReplMigrationRunning query=%v\n", query) r, err := e.execQuery(ctx, query) + if err != nil { + return nil, err + } + return r.Named(), nil +} + +// isVReplMigrationReadyToCutOver sees if the vreplication migration has completed the row copy +// and is up to date with the binlogs. +func (e *Executor) isVReplMigrationReadyToCutOver(ctx context.Context, uuid string) (isReady bool, err error) { + named, err := e.readVReplStream(ctx, uuid) + if err != nil { + return false, err + } + if len(named.Rows) == 0 { + // no vreplication entry + return false, nil + } + row := named.Row() + if row == nil { + // multiple workflows under same name: + return false, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "multiple workflows found for UUID: %+v", uuid) + } + + // Check all the cases where migration is still running: + { + // when ready to cut-over, pos must have some value + pos := row.AsString("pos", "") + if pos == "" { + return false, nil + } + } + { + // Both time_updated and transaction_timestamp must be in close priximity to each + // other and to the time now, otherwise that means we're lagging and it's not a good time + // to cut-over + durationDiff := func(t1, t2 time.Time) time.Duration { + diff := t1.Sub(t2) + if diff < 0 { + diff = -diff + } + return diff + } + timeUpdated := time.Unix(row.AsInt64("time_updated", 0), 0) + if durationDiff(time.Now(), timeUpdated) > cutOverThreshold { + return false, nil + } + transactionTimestamp := time.Unix(row.AsInt64("transaction_timestamp", 0), 0) + if durationDiff(transactionTimestamp, timeUpdated) > cutOverThreshold { + return false, nil + } + } + { + // copy_state must have no entries for this vreplication id: if entries are + // present that means copy is still in progress + id := row.AsInt64("id", 0) + + query, err := sqlparser.ParseAndBind(sqlReadCountCopyState, + sqltypes.Int64BindVariable(id), + ) + if err != nil { + return false, err + } + r, err := e.execQuery(ctx, query) + if err != nil { + return false, err + } + csRow := r.Named().Row() + if csRow == nil { + return false, nil + } + count := csRow.AsInt64("cnt", 0) + if count > 0 { + // Still copying + return false, nil + } + } + return true, nil +} + +// isVReplMigrationRunning sees if there is a VReplication migration actively running +func (e *Executor) isVReplMigrationRunning(ctx context.Context, uuid string) (isRunning bool, err error) { + named, err := e.readVReplStream(ctx, uuid) if err != nil { return false, err } - named := r.Named() if len(named.Rows) == 0 { // no vreplication entry return false, nil @@ -1365,7 +1414,12 @@ func (e *Executor) isVReplMigrationRunning(ctx context.Context, uuid string) (is if strings.Contains(strings.ToLower(message), "error") { return false, nil } - return true, nil + state := row.AsString("state", "") + switch state { + case binlogplayer.VReplicationInit, binlogplayer.VReplicationCopying, binlogplayer.BlpRunning: + return true, nil + } + return false, nil } // reviewRunningMigrations iterates migrations in 'running' state (there really should just be one that is @@ -1382,22 +1436,25 @@ func (e *Executor) reviewRunningMigrations(ctx context.Context) (countRunnning i uuid := row["migration_uuid"].ToString() strategy := schema.DDLStrategy(row["strategy"].ToString()) switch strategy { - case schema.DDLStrategyPTOSC: + case schema.DDLStrategyOnline: { - // Since pt-osc doesn't have a "liveness" plugin entry point, we do it externally: - // if the process is alive, we update the `liveness_timestamp` for this migration. - running, _, err := e.isPTOSCMigrationRunning(ctx, uuid) + // We check the _vt.vreplication table + running, err := e.isVReplMigrationRunning(ctx, uuid) if err != nil { return countRunnning, runningNotByThisProcess, err } if running { _ = e.updateMigrationTimestamp(ctx, "liveness_timestamp", uuid) } + + isReady, err := e.isVReplMigrationReadyToCutOver(ctx, uuid) + fmt.Printf("========== isVReplMigrationReadyToCutOver: %v, %v, %v\n", uuid, isReady, err) } - case schema.DDLStrategyOnline: + case schema.DDLStrategyPTOSC: { - // We check the _vt.vreplication table - running, err := e.isVReplMigrationRunning(ctx, uuid) + // Since pt-osc doesn't have a "liveness" plugin entry point, we do it externally: + // if the process is alive, we update the `liveness_timestamp` for this migration. + running, _, err := e.isPTOSCMigrationRunning(ctx, uuid) if err != nil { return countRunnning, runningNotByThisProcess, err } diff --git a/go/vt/vttablet/onlineddl/schema.go b/go/vt/vttablet/onlineddl/schema.go index c366314cf98..9f8e8fd87f6 100644 --- a/go/vt/vttablet/onlineddl/schema.go +++ b/go/vt/vttablet/onlineddl/schema.go @@ -221,6 +221,7 @@ const ( sqlStopVReplStream = "UPDATE _vt.vreplication set state='Stopped' where db_name=%a and workflow=%a" sqlDeleteVReplStream = "DELETE FROM _vt.vreplication where db_name=%a and workflow=%a" sqlReadVReplStream = `SELECT + id, workflow, pos, time_updated, @@ -232,6 +233,12 @@ const ( workflow=%a ` + sqlReadCountCopyState = `SELECT + count(*) as cnt + FROM + _vt.copy_state + WHERE vrepl_id=%a + ` // TODO(shlomi): consider removing: // sqlGetUniqueKeys = ` // SELECT From 17816404000cb8865c90b8d9bcd6fcd9a9020085 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Tue, 2 Feb 2021 07:54:35 +0200 Subject: [PATCH 19/72] added index on 'workflow' column in vreplication Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/binlog/binlogplayer/binlog_player.go | 1 + 1 file changed, 1 insertion(+) diff --git a/go/vt/binlog/binlogplayer/binlog_player.go b/go/vt/binlog/binlogplayer/binlog_player.go index 55a460e0205..2282a7c0c4c 100644 --- a/go/vt/binlog/binlogplayer/binlog_player.go +++ b/go/vt/binlog/binlogplayer/binlog_player.go @@ -551,6 +551,7 @@ func CreateVReplicationTable() []string { var AlterVReplicationTable = []string{ "ALTER TABLE _vt.vreplication ADD COLUMN db_name VARBINARY(255) NOT NULL", "ALTER TABLE _vt.vreplication MODIFY source BLOB NOT NULL", + "ALTER TABLE _vt.vreplication ADD KEY workflow_idx (workflow(128))", } // VRSettings contains the settings of a vreplication table. From 24b3899ec3af953f5d0b0948ce24cdf42f07be0c Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 3 Feb 2021 13:43:18 +0200 Subject: [PATCH 20/72] refactor schemaMigrationsTableName outside vexec and into go/vt/schema Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/schema/online_ddl.go | 4 ++++ go/vt/wrangler/vexec.go | 5 ++--- go/vt/wrangler/vexec_plan.go | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/go/vt/schema/online_ddl.go b/go/vt/schema/online_ddl.go index 197348e1e87..4ee42565367 100644 --- a/go/vt/schema/online_ddl.go +++ b/go/vt/schema/online_ddl.go @@ -36,6 +36,10 @@ var ( ptOSCGeneratedTableNameRegexp = regexp.MustCompile(`^_.*_old$`) ) +const ( + SchemaMigrationsTableName = "schema_migrations" +) + // MigrationBasePath is the root for all schema migration entries func MigrationBasePath() string { return migrationBasePath diff --git a/go/vt/wrangler/vexec.go b/go/vt/wrangler/vexec.go index 5b7259800a5..bdba82ee111 100644 --- a/go/vt/wrangler/vexec.go +++ b/go/vt/wrangler/vexec.go @@ -40,9 +40,8 @@ import ( ) const ( - vexecTableQualifier = "_vt" - vreplicationTableName = "vreplication" - schemaMigrationsTableName = "schema_migrations" + vexecTableQualifier = "_vt" + vreplicationTableName = "vreplication" ) // vexec is the construct by which we run a query against backend shards. vexec is created by user-facing diff --git a/go/vt/wrangler/vexec_plan.go b/go/vt/wrangler/vexec_plan.go index 4beb5be4fd9..8c51f9e8e7b 100644 --- a/go/vt/wrangler/vexec_plan.go +++ b/go/vt/wrangler/vexec_plan.go @@ -24,6 +24,7 @@ import ( "vitess.io/vitess/go/vt/log" querypb "vitess.io/vitess/go/vt/proto/query" topodatapb "vitess.io/vitess/go/vt/proto/topodata" + "vitess.io/vitess/go/vt/schema" "vitess.io/vitess/go/vt/sqlparser" "github.com/olekukonko/tablewriter" @@ -190,7 +191,7 @@ func qualifiedTableName(tableName string) string { // getPlanner returns a specific planner appropriate for the queried table func (vx *vexec) getPlanner(ctx context.Context) error { switch vx.tableName { - case qualifiedTableName(schemaMigrationsTableName): + case qualifiedTableName(schema.SchemaMigrationsTableName): vx.planner = newSchemaMigrationsPlanner(vx) case qualifiedTableName(vreplicationTableName): vx.planner = newVReplicationPlanner(vx) From 5693c13e8c370bcf1db1cc6fc7fab0e83f9d1ed7 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 3 Feb 2021 13:44:47 +0200 Subject: [PATCH 21/72] new interface for VExec executors on tablet Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/vexec/executor.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 go/vt/vttablet/vexec/executor.go diff --git a/go/vt/vttablet/vexec/executor.go b/go/vt/vttablet/vexec/executor.go new file mode 100644 index 00000000000..27c4590432a --- /dev/null +++ b/go/vt/vttablet/vexec/executor.go @@ -0,0 +1,12 @@ +package vexec + +import ( + "context" + + querypb "vitess.io/vitess/go/vt/proto/query" +) + +// Executor should be implemented by any tablet-side structs which accept VExec commands +type Executor interface { + VExec(ctx context.Context, vx *TabletVExec) (qr *querypb.QueryResult, err error) +} From 5d1cad0c7d6de939eca2637dd38daaf3c91e2f58 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 3 Feb 2021 13:46:01 +0200 Subject: [PATCH 22/72] use new vexec.Executor interface Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/tabletmanager/rpc_vexec.go | 4 ++-- go/vt/vttablet/tabletserver/controller.go | 4 ++-- go/vt/vttablet/tabletserver/tabletserver.go | 3 ++- go/vt/vttablet/tabletservermock/controller.go | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/go/vt/vttablet/tabletmanager/rpc_vexec.go b/go/vt/vttablet/tabletmanager/rpc_vexec.go index 6897159b2fb..8783230c696 100644 --- a/go/vt/vttablet/tabletmanager/rpc_vexec.go +++ b/go/vt/vttablet/tabletmanager/rpc_vexec.go @@ -20,7 +20,7 @@ import ( "fmt" querypb "vitess.io/vitess/go/vt/proto/query" - "vitess.io/vitess/go/vt/vttablet/onlineddl" + "vitess.io/vitess/go/vt/schema" "vitess.io/vitess/go/vt/vttablet/vexec" "context" @@ -33,7 +33,7 @@ func (tm *TabletManager) VExec(ctx context.Context, query, workflow, keyspace st return nil, err } switch vx.TableName { - case fmt.Sprintf("%s.%s", vexec.TableQualifier, onlineddl.SchemaMigrationsTableName): + case fmt.Sprintf("%s.%s", vexec.TableQualifier, schema.SchemaMigrationsTableName): return tm.QueryServiceControl.OnlineDDLExecutor().VExec(ctx, vx) default: return nil, fmt.Errorf("table not supported by vexec: %v", vx.TableName) diff --git a/go/vt/vttablet/tabletserver/controller.go b/go/vt/vttablet/tabletserver/controller.go index 7590c8db82e..c95a2a1646d 100644 --- a/go/vt/vttablet/tabletserver/controller.go +++ b/go/vt/vttablet/tabletserver/controller.go @@ -22,11 +22,11 @@ import ( "vitess.io/vitess/go/vt/dbconfigs" "vitess.io/vitess/go/vt/mysqlctl" "vitess.io/vitess/go/vt/topo" - "vitess.io/vitess/go/vt/vttablet/onlineddl" "vitess.io/vitess/go/vt/vttablet/queryservice" "vitess.io/vitess/go/vt/vttablet/tabletserver/rules" "vitess.io/vitess/go/vt/vttablet/tabletserver/schema" "vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv" + "vitess.io/vitess/go/vt/vttablet/vexec" "time" @@ -83,7 +83,7 @@ type Controller interface { QueryService() queryservice.QueryService // OnlineDDLExecutor the online DDL executor used by this Controller - OnlineDDLExecutor() *onlineddl.Executor + OnlineDDLExecutor() vexec.Executor // SchemaEngine returns the SchemaEngine object used by this Controller SchemaEngine() *schema.Engine diff --git a/go/vt/vttablet/tabletserver/tabletserver.go b/go/vt/vttablet/tabletserver/tabletserver.go index aa4c8929d71..de91362ba20 100644 --- a/go/vt/vttablet/tabletserver/tabletserver.go +++ b/go/vt/vttablet/tabletserver/tabletserver.go @@ -66,6 +66,7 @@ import ( "vitess.io/vitess/go/vt/vttablet/tabletserver/txserializer" "vitess.io/vitess/go/vt/vttablet/tabletserver/txthrottler" "vitess.io/vitess/go/vt/vttablet/tabletserver/vstreamer" + "vitess.io/vitess/go/vt/vttablet/vexec" ) // logPoolFull is for throttling transaction / query pool full messages in the log. @@ -412,7 +413,7 @@ func (tsv *TabletServer) QueryService() queryservice.QueryService { } // OnlineDDLExecutor returns the onlineddl.Executor part of TabletServer. -func (tsv *TabletServer) OnlineDDLExecutor() *onlineddl.Executor { +func (tsv *TabletServer) OnlineDDLExecutor() vexec.Executor { return tsv.onlineDDLExecutor } diff --git a/go/vt/vttablet/tabletservermock/controller.go b/go/vt/vttablet/tabletservermock/controller.go index 1e826cca50b..a0d5b716cd2 100644 --- a/go/vt/vttablet/tabletservermock/controller.go +++ b/go/vt/vttablet/tabletservermock/controller.go @@ -28,11 +28,11 @@ import ( "vitess.io/vitess/go/vt/mysqlctl" "vitess.io/vitess/go/vt/servenv" "vitess.io/vitess/go/vt/topo" - "vitess.io/vitess/go/vt/vttablet/onlineddl" "vitess.io/vitess/go/vt/vttablet/queryservice" "vitess.io/vitess/go/vt/vttablet/tabletserver/rules" "vitess.io/vitess/go/vt/vttablet/tabletserver/schema" "vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv" + "vitess.io/vitess/go/vt/vttablet/vexec" querypb "vitess.io/vitess/go/vt/proto/query" topodatapb "vitess.io/vitess/go/vt/proto/topodata" @@ -176,7 +176,7 @@ func (tqsc *Controller) ReloadSchema(ctx context.Context) error { } // OnlineDDLExecutor is part of the tabletserver.Controller interface -func (tqsc *Controller) OnlineDDLExecutor() *onlineddl.Executor { +func (tqsc *Controller) OnlineDDLExecutor() vexec.Executor { return nil } From 691096a923de63d770c16abbf825e2b9efea2153 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 3 Feb 2021 13:48:37 +0200 Subject: [PATCH 23/72] VReplStream struct Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/vrepl.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/go/vt/vttablet/onlineddl/vrepl.go b/go/vt/vttablet/onlineddl/vrepl.go index 265e8e76d5d..9a7d9fe04c4 100644 --- a/go/vt/vttablet/onlineddl/vrepl.go +++ b/go/vt/vttablet/onlineddl/vrepl.go @@ -39,6 +39,19 @@ import ( "vitess.io/vitess/go/vt/vttablet/tabletmanager/vreplication" ) +// VReplStream represents a row in _vt.vreplication table +type VReplStream struct { + id int64 + workflow string + source string + pos string + timeUpdated int64 + transactionTimestamp int64 + state string + message string + bls *binlogdatapb.BinlogSource +} + // VRepl is an online DDL helper for VReplication based migrations (ddl_strategy="online") type VRepl struct { workflow string From c6487c18cbc816a680352d88cd180510d8a96142 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 3 Feb 2021 13:52:00 +0200 Subject: [PATCH 24/72] refactor SchemaMigrationsTableName to go/vt/schema. Generate table swap query Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/schema.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/go/vt/vttablet/onlineddl/schema.go b/go/vt/vttablet/onlineddl/schema.go index 9f8e8fd87f6..5424acf5f6d 100644 --- a/go/vt/vttablet/onlineddl/schema.go +++ b/go/vt/vttablet/onlineddl/schema.go @@ -18,7 +18,6 @@ package onlineddl const ( // SchemaMigrationsTableName is used by VExec interceptor to call the correct handler - SchemaMigrationsTableName = "schema_migrations" sqlCreateSidecarDB = "create database if not exists _vt" sqlCreateSchemaMigrationsTable = `CREATE TABLE IF NOT EXISTS _vt.schema_migrations ( id bigint(20) unsigned NOT NULL AUTO_INCREMENT, @@ -223,6 +222,7 @@ const ( sqlReadVReplStream = `SELECT id, workflow, + source, pos, time_updated, transaction_timestamp, @@ -239,6 +239,7 @@ const ( _vt.copy_state WHERE vrepl_id=%a ` + sqlSwapTables = "RENAME TABLE `%a` TO `%a`, `%a` TO `%a`, `%a` TO `%a`" // TODO(shlomi): consider removing: // sqlGetUniqueKeys = ` // SELECT From 9c2dd6c7b163eff89b1d4e299a7b4ae89b0347ad Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 3 Feb 2021 13:52:55 +0200 Subject: [PATCH 25/72] check if vreplication DDL is ready for cutover; cut-over by stopping writes to table, waiting for updated pos, renaming tables, releasing table, releasing locks Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 213 ++++++++++++++++++++------- 1 file changed, 162 insertions(+), 51 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index baffe8d8d13..4fc79e43b32 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -43,6 +43,7 @@ import ( "vitess.io/vitess/go/vt/binlog/binlogplayer" "vitess.io/vitess/go/vt/dbconnpool" "vitess.io/vitess/go/vt/log" + binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" "vitess.io/vitess/go/vt/schema" "vitess.io/vitess/go/vt/servenv" "vitess.io/vitess/go/vt/sqlparser" @@ -58,6 +59,7 @@ import ( topodatapb "vitess.io/vitess/go/vt/proto/topodata" vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" + "github.com/golang/protobuf/proto" "github.com/google/shlex" ) @@ -449,6 +451,110 @@ func (e *Executor) terminateVReplMigration(ctx context.Context, onlineDDL *schem return nil } +// cutOverVReplMigration stops vreplication, then removes the _vt.vreplication entry for the given migration +func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream) error { + // sanity checks: + if s == nil { + return vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "No vreplicatoin stream migration %s", s.workflow) + } + if s.bls.Filter == nil { + return vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "No binlog source filter for migration %s", s.workflow) + } + if len(s.bls.Filter.Rules) != 1 { + return vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "Cannot detect filter rules for migration/vreplication %+v", s.workflow) + } + vreplTable := s.bls.Filter.Rules[0].Match + + // get topology client & entities: + tmClient := tmclient.NewTabletManagerClient() + tablet, err := e.ts.GetTablet(ctx, e.tabletAlias) + if err != nil { + return err + } + shardInfo, err := e.ts.GetShard(ctx, e.keyspace, e.shard) + if err != nil { + return err + } + + // information about source tablet + onlineDDL, err := e.readMigration(ctx, s.workflow) + if err != nil { + return err + } + // come up with temporary name for swap table + swapTable, err := schema.CreateUUID() + if err != nil { + return err + } + swapTable = strings.Replace(swapTable, "-", "", -1) + swapTable = fmt.Sprintf("_swap_%s", swapTable) + + // Preparation is complete. We proceed to cut-over. + + // lock keyspace: + { + lctx, unlockKeyspace, err := e.ts.LockKeyspace(ctx, e.keyspace, "OnlineDDLCutOver") + if err != nil { + return err + } + // lctx has the lock info, needed for UpdateShardFields + ctx = lctx + defer unlockKeyspace(&err) + } + toggleWrites := func(allowWrites bool) error { + if _, err := e.ts.UpdateShardFields(ctx, e.keyspace, shardInfo.ShardName(), func(si *topo.ShardInfo) error { + err := si.UpdateSourceBlacklistedTables(ctx, topodatapb.TabletType_MASTER, nil, allowWrites, []string{onlineDDL.Table}) + return err + }); err != nil { + return err + } + if err := tmClient.RefreshState(ctx, tablet.Tablet); err != nil { + return err + } + return nil + } + // stop writes on source: + if err := toggleWrites(false); err != nil { + return err + } + defer toggleWrites(true) + + // Writes are now disabled on table. Read up-to-date vreplication info, specifically to get latest (and fixed) pos: + s, err = e.readVReplStream(ctx, s.workflow, false) + if err != nil { + return err + } + + // Wait for target to reach the up-to-date pos + if err := tmClient.VReplicationWaitForPos(ctx, tablet.Tablet, int(s.id), s.pos); err != nil { + return err + } + // Target is now in sync with source! + + // Stop vreplication + if _, err := tmClient.VReplicationExec(ctx, tablet.Tablet, binlogplayer.StopVReplication(uint32(s.id), "stopped for online DDL cutover")); err != nil { + return err + } + + // rename tables atomically (remember, writes on source tables are stopped) + { + parsed := sqlparser.BuildParsedQuery(sqlSwapTables, + onlineDDL.Table, swapTable, + vreplTable, onlineDDL.Table, + swapTable, vreplTable, + ) + if _, err = e.execQuery(ctx, parsed.Query); err != nil { + return err + } + } + + // Tables are now swapped! Migration is successful + _ = e.onSchemaMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusComplete, false, progressPctFull) + atomic.StoreInt64(&e.migrationRunning, 0) + + return nil +} + // ExecuteWithVReplication sets up the grounds for a vreplication schema migration func (e *Executor) ExecuteWithVReplication(ctx context.Context, onlineDDL *schema.OnlineDDL) error { e.migrationMutex.Lock() @@ -505,6 +611,7 @@ func (e *Executor) ExecuteWithVReplication(ctx context.Context, onlineDDL *schem if err != nil { return err } + // create vreplication entry insertVReplicationQuery, err := v.generateInsertStatement(ctx) if err != nil { return err @@ -512,6 +619,7 @@ func (e *Executor) ExecuteWithVReplication(ctx context.Context, onlineDDL *schem if _, err := tmClient.VReplicationExec(ctx, tablet.Tablet, insertVReplicationQuery); err != nil { return err } + // start stream! startVReplicationQuery, err := v.generateStartStatement(ctx) if err != nil { return err @@ -1308,7 +1416,7 @@ func (e *Executor) dropPTOSCMigrationTriggers(ctx context.Context, onlineDDL *sc } // readVReplStream reads _vt.vreplication entries for given workflow -func (e *Executor) readVReplStream(ctx context.Context, uuid string) (*sqltypes.NamedResult, error) { +func (e *Executor) readVReplStream(ctx context.Context, uuid string, okIfMissing bool) (*VReplStream, error) { query, err := sqlparser.ParseAndBind(sqlReadVReplStream, sqltypes.StringBindVariable(uuid), ) @@ -1319,32 +1427,38 @@ func (e *Executor) readVReplStream(ctx context.Context, uuid string) (*sqltypes. if err != nil { return nil, err } - return r.Named(), nil -} - -// isVReplMigrationReadyToCutOver sees if the vreplication migration has completed the row copy -// and is up to date with the binlogs. -func (e *Executor) isVReplMigrationReadyToCutOver(ctx context.Context, uuid string) (isReady bool, err error) { - named, err := e.readVReplStream(ctx, uuid) - if err != nil { - return false, err + if len(r.Rows) == 0 && okIfMissing { + return nil, nil } - if len(named.Rows) == 0 { - // no vreplication entry - return false, nil - } - row := named.Row() + row := r.Named().Row() if row == nil { - // multiple workflows under same name: - return false, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "multiple workflows found for UUID: %+v", uuid) + return nil, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "Cannot find unique workflow for UUID: %+v", uuid) + } + s := &VReplStream{ + id: row.AsInt64("id", 0), + workflow: row.AsString("workflow", ""), + source: row.AsString("source", ""), + pos: row.AsString("pos", ""), + timeUpdated: row.AsInt64("time_updated", 0), + transactionTimestamp: row.AsInt64("transaction_timestamp", 0), + state: row.AsString("state", ""), + message: row.AsString("message", ""), + bls: &binlogdatapb.BinlogSource{}, + } + if err := proto.UnmarshalText(s.source, s.bls); err != nil { + return nil, err } + return s, nil +} +// isVReplMigrationReadyToCutOver sees if the vreplication migration has completed the row copy +// and is up to date with the binlogs. +func (e *Executor) isVReplMigrationReadyToCutOver(ctx context.Context, s *VReplStream) (isReady bool, err error) { // Check all the cases where migration is still running: { // when ready to cut-over, pos must have some value - pos := row.AsString("pos", "") - if pos == "" { - return false, nil + if s.pos == "" { + return false, err } } { @@ -1358,22 +1472,20 @@ func (e *Executor) isVReplMigrationReadyToCutOver(ctx context.Context, uuid stri } return diff } - timeUpdated := time.Unix(row.AsInt64("time_updated", 0), 0) + timeUpdated := time.Unix(s.timeUpdated, 0) if durationDiff(time.Now(), timeUpdated) > cutOverThreshold { - return false, nil + return false, err } - transactionTimestamp := time.Unix(row.AsInt64("transaction_timestamp", 0), 0) + transactionTimestamp := time.Unix(s.transactionTimestamp, 0) if durationDiff(transactionTimestamp, timeUpdated) > cutOverThreshold { - return false, nil + return false, err } } { // copy_state must have no entries for this vreplication id: if entries are // present that means copy is still in progress - id := row.AsInt64("id", 0) - query, err := sqlparser.ParseAndBind(sqlReadCountCopyState, - sqltypes.Int64BindVariable(id), + sqltypes.Int64BindVariable(s.id), ) if err != nil { return false, err @@ -1384,42 +1496,34 @@ func (e *Executor) isVReplMigrationReadyToCutOver(ctx context.Context, uuid stri } csRow := r.Named().Row() if csRow == nil { - return false, nil + return false, err } count := csRow.AsInt64("cnt", 0) if count > 0 { // Still copying - return false, nil + return false, err } } return true, nil } // isVReplMigrationRunning sees if there is a VReplication migration actively running -func (e *Executor) isVReplMigrationRunning(ctx context.Context, uuid string) (isRunning bool, err error) { - named, err := e.readVReplStream(ctx, uuid) +func (e *Executor) isVReplMigrationRunning(ctx context.Context, uuid string) (isRunning bool, s *VReplStream, err error) { + s, err = e.readVReplStream(ctx, uuid, true) if err != nil { - return false, err - } - if len(named.Rows) == 0 { - // no vreplication entry - return false, nil + return false, s, err } - row := named.Row() - if row == nil { - // multiple workflows under same name: - return false, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "multiple workflows found for UUID: %+v", uuid) + if s == nil { + return false, s, nil } - message := row.AsString("message", "") - if strings.Contains(strings.ToLower(message), "error") { - return false, nil + if strings.Contains(strings.ToLower(s.message), "error") { + return false, s, nil } - state := row.AsString("state", "") - switch state { + switch s.state { case binlogplayer.VReplicationInit, binlogplayer.VReplicationCopying, binlogplayer.BlpRunning: - return true, nil + return true, s, nil } - return false, nil + return false, s, nil } // reviewRunningMigrations iterates migrations in 'running' state (there really should just be one that is @@ -1439,16 +1543,22 @@ func (e *Executor) reviewRunningMigrations(ctx context.Context) (countRunnning i case schema.DDLStrategyOnline: { // We check the _vt.vreplication table - running, err := e.isVReplMigrationRunning(ctx, uuid) + running, s, err := e.isVReplMigrationRunning(ctx, uuid) if err != nil { return countRunnning, runningNotByThisProcess, err } if running { _ = e.updateMigrationTimestamp(ctx, "liveness_timestamp", uuid) + isReady, err := e.isVReplMigrationReadyToCutOver(ctx, s) + if err != nil { + return countRunnning, runningNotByThisProcess, err + } + if isReady { + if err := e.cutOverVReplMigration(ctx, s); err != nil { + return countRunnning, runningNotByThisProcess, err + } + } } - - isReady, err := e.isVReplMigrationReadyToCutOver(ctx, uuid) - fmt.Printf("========== isVReplMigrationReadyToCutOver: %v, %v, %v\n", uuid, isReady, err) } case schema.DDLStrategyPTOSC: { @@ -1826,6 +1936,7 @@ func (e *Executor) OnSchemaMigrationStatus(ctx context.Context, uuidParam, statu } // VExec is called by a VExec invocation +// Implements vitess.io/vitess/go/vt/vttablet/vexec.Executor interface func (e *Executor) VExec(ctx context.Context, vx *vexec.TabletVExec) (qr *querypb.QueryResult, err error) { response := func(result *sqltypes.Result, err error) (*querypb.QueryResult, error) { if err != nil { From 3ba8845ca6547cf7562fbc1e1758bd684d84a9d7 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 3 Feb 2021 14:03:04 +0200 Subject: [PATCH 26/72] WithDDL: force apply the schema on first use Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/withddl/withddl.go | 47 +++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/go/vt/withddl/withddl.go b/go/vt/withddl/withddl.go index 92526d5055d..6bda404713b 100644 --- a/go/vt/withddl/withddl.go +++ b/go/vt/withddl/withddl.go @@ -22,6 +22,7 @@ package withddl import ( "context" "fmt" + "sync" "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/sqltypes" @@ -36,6 +37,8 @@ import ( // to the desired state and retry. type WithDDL struct { ddls []string + + initApply sync.Once } // New creates a new WithDDL. @@ -45,6 +48,29 @@ func New(ddls []string) *WithDDL { } } +// applyDDLs applies DDLs and ignores any schema error +func (wd *WithDDL) applyDDLs(ctx context.Context, f interface{}) error { + exec, err := wd.unify(ctx, f) + if err != nil { + return err + } + + log.Infof("Updating schema") + for _, applyQuery := range wd.ddls { + _, merr := exec(applyQuery) + if merr == nil { + continue + } + if mysql.IsSchemaApplyError(merr) { + continue + } + log.Warningf("DDL apply %v failed: %v", applyQuery, merr) + // Return the original error. + return err + } + return nil +} + // Exec executes the query using the supplied function. // If there are any schema errors, it applies the DDLs and retries. // Funcs can be any of these types: @@ -57,6 +83,14 @@ func (wd *WithDDL) Exec(ctx context.Context, query string, f interface{}) (*sqlt if err != nil { return nil, err } + + // On the first time this ever gets called, just go ahead and brute force the schema. + // this ensures even "soft" changes, like adding an index, are applied. + wd.initApply.Do(func() { + wd.applyDDLs(ctx, f) + }) + + // Attempt to run queries: qr, err := exec(query) if err == nil { return qr, nil @@ -65,19 +99,12 @@ func (wd *WithDDL) Exec(ctx context.Context, query string, f interface{}) (*sqlt return nil, err } + // Got here? Means we hit a schema error log.Infof("Updating schema for %v and retrying: %v", sqlparser.TruncateForUI(err.Error()), err) - for _, applyQuery := range wd.ddls { - _, merr := exec(applyQuery) - if merr == nil { - continue - } - if mysql.IsSchemaApplyError(merr) { - continue - } - log.Warningf("DDL apply %v failed: %v", applyQuery, merr) - // Return the original error. + if err := wd.applyDDLs(ctx, f); err != nil { return nil, err } + // Try the query again return exec(query) } From 6ce7564fde6cf13d08c6c3c139d7cf303ffde338 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 3 Feb 2021 14:08:03 +0200 Subject: [PATCH 27/72] smaller index Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/binlog/binlogplayer/binlog_player.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/vt/binlog/binlogplayer/binlog_player.go b/go/vt/binlog/binlogplayer/binlog_player.go index 2282a7c0c4c..8b19df408f7 100644 --- a/go/vt/binlog/binlogplayer/binlog_player.go +++ b/go/vt/binlog/binlogplayer/binlog_player.go @@ -551,7 +551,7 @@ func CreateVReplicationTable() []string { var AlterVReplicationTable = []string{ "ALTER TABLE _vt.vreplication ADD COLUMN db_name VARBINARY(255) NOT NULL", "ALTER TABLE _vt.vreplication MODIFY source BLOB NOT NULL", - "ALTER TABLE _vt.vreplication ADD KEY workflow_idx (workflow(128))", + "ALTER TABLE _vt.vreplication ADD KEY workflow_idx (workflow(64))", } // VRSettings contains the settings of a vreplication table. From b770b0b05c8a5fee7f618bca3ec1c8b2dbfe13ba Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 3 Feb 2021 14:18:11 +0200 Subject: [PATCH 28/72] reuse exec function Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/withddl/withddl.go | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/go/vt/withddl/withddl.go b/go/vt/withddl/withddl.go index 6bda404713b..15095a94463 100644 --- a/go/vt/withddl/withddl.go +++ b/go/vt/withddl/withddl.go @@ -49,22 +49,17 @@ func New(ddls []string) *WithDDL { } // applyDDLs applies DDLs and ignores any schema error -func (wd *WithDDL) applyDDLs(ctx context.Context, f interface{}) error { - exec, err := wd.unify(ctx, f) - if err != nil { - return err - } - +func (wd *WithDDL) applyDDLs(ctx context.Context, exec func(query string) (*sqltypes.Result, error)) error { log.Infof("Updating schema") for _, applyQuery := range wd.ddls { - _, merr := exec(applyQuery) - if merr == nil { + _, err := exec(applyQuery) + if err == nil { continue } - if mysql.IsSchemaApplyError(merr) { + if mysql.IsSchemaApplyError(err) { continue } - log.Warningf("DDL apply %v failed: %v", applyQuery, merr) + log.Warningf("DDL apply %v failed: %v", applyQuery, err) // Return the original error. return err } @@ -87,7 +82,7 @@ func (wd *WithDDL) Exec(ctx context.Context, query string, f interface{}) (*sqlt // On the first time this ever gets called, just go ahead and brute force the schema. // this ensures even "soft" changes, like adding an index, are applied. wd.initApply.Do(func() { - wd.applyDDLs(ctx, f) + wd.applyDDLs(ctx, exec) }) // Attempt to run queries: @@ -101,7 +96,7 @@ func (wd *WithDDL) Exec(ctx context.Context, query string, f interface{}) (*sqlt // Got here? Means we hit a schema error log.Infof("Updating schema for %v and retrying: %v", sqlparser.TruncateForUI(err.Error()), err) - if err := wd.applyDDLs(ctx, f); err != nil { + if err := wd.applyDDLs(ctx, exec); err != nil { return nil, err } // Try the query again From 50e2468129580a67745176cddc052a53345edfb4 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 3 Feb 2021 14:18:30 +0200 Subject: [PATCH 29/72] cleanup Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/schema.go | 58 ------------------------------ go/vt/vttablet/onlineddl/vrepl.go | 18 ---------- 2 files changed, 76 deletions(-) diff --git a/go/vt/vttablet/onlineddl/schema.go b/go/vt/vttablet/onlineddl/schema.go index 5424acf5f6d..7bed9f12d1f 100644 --- a/go/vt/vttablet/onlineddl/schema.go +++ b/go/vt/vttablet/onlineddl/schema.go @@ -240,64 +240,6 @@ const ( WHERE vrepl_id=%a ` sqlSwapTables = "RENAME TABLE `%a` TO `%a`, `%a` TO `%a`, `%a` TO `%a`" - // TODO(shlomi): consider removing: - // sqlGetUniqueKeys = ` - // SELECT - // COLUMNS.TABLE_SCHEMA, - // COLUMNS.TABLE_NAME, - // COLUMNS.COLUMN_NAME, - // UNIQUES.INDEX_NAME, - // UNIQUES.COLUMN_NAMES, - // UNIQUES.COUNT_COLUMN_IN_INDEX, - // COLUMNS.DATA_TYPE, - // COLUMNS.CHARACTER_SET_NAME, - // LOCATE('auto_increment', EXTRA) > 0 as is_auto_increment, - // has_nullable - // FROM INFORMATION_SCHEMA.COLUMNS INNER JOIN ( - // SELECT - // TABLE_SCHEMA, - // TABLE_NAME, - // INDEX_NAME, - // COUNT(*) AS COUNT_COLUMN_IN_INDEX, - // GROUP_CONCAT(COLUMN_NAME ORDER BY SEQ_IN_INDEX ASC) AS COLUMN_NAMES, - // SUBSTRING_INDEX(GROUP_CONCAT(COLUMN_NAME ORDER BY SEQ_IN_INDEX ASC), ',', 1) AS FIRST_COLUMN_NAME, - // SUM(NULLABLE='YES') > 0 AS has_nullable - // FROM INFORMATION_SCHEMA.STATISTICS - // WHERE - // NON_UNIQUE=0 - // AND TABLE_SCHEMA = %a - // AND TABLE_NAME = %a - // GROUP BY TABLE_SCHEMA, TABLE_NAME, INDEX_NAME - // ) AS UNIQUES - // ON ( - // COLUMNS.COLUMN_NAME = UNIQUES.FIRST_COLUMN_NAME - // ) - // WHERE - // COLUMNS.TABLE_SCHEMA = %a - // AND COLUMNS.TABLE_NAME = %a - // ORDER BY - // COLUMNS.TABLE_SCHEMA, COLUMNS.TABLE_NAME, - // CASE UNIQUES.INDEX_NAME - // WHEN 'PRIMARY' THEN 0 - // ELSE 1 - // END, - // CASE has_nullable - // WHEN 0 THEN 0 - // ELSE 1 - // END, - // CASE IFNULL(CHARACTER_SET_NAME, '') - // WHEN '' THEN 0 - // ELSE 1 - // END, - // CASE DATA_TYPE - // WHEN 'tinyint' THEN 0 - // WHEN 'smallint' THEN 1 - // WHEN 'int' THEN 2 - // WHEN 'bigint' THEN 3 - // ELSE 100 - // END, - // COUNT_COLUMN_IN_INDEX - // ` ) const ( diff --git a/go/vt/vttablet/onlineddl/vrepl.go b/go/vt/vttablet/onlineddl/vrepl.go index 9a7d9fe04c4..b0a275eecaa 100644 --- a/go/vt/vttablet/onlineddl/vrepl.go +++ b/go/vt/vttablet/onlineddl/vrepl.go @@ -265,24 +265,6 @@ func (v *VRepl) analyzeTables(ctx context.Context, conn *dbconnpool.DBConnection return fmt.Errorf("Found no shared PRIMARY KEY columns between `%s` and `%s`", v.sourceTable, v.targetTable) } - // // unique keys: - // sourceUniqueKeys, err := v.getCandidateUniqueKeys(ctx, conn, v.sourceTable) - // if err != nil { - // return err - // } - // targetUniqueKeys, err := v.getCandidateUniqueKeys(ctx, conn, v.targetTable) - // if err != nil { - // return err - // } - // sharedUniqueKeys, err := v.getSharedUniqueKeys(sourceUniqueKeys, targetUniqueKeys) - // if err != nil { - // return err - // } - // if len(sharedUniqueKeys) == 0 { - // // TODO(shlomi): need to carefully examine what happens when we extend/reduce a PRIMARY KEY - // // is a column subset OK? - // return fmt.Errorf("Found no shared unique keys between `%s` and `%s`", v.sourceTable, v.targetTable) - // } return nil } func (v *VRepl) generateFilterQuery(ctx context.Context) error { From 065c677f83248de9b8a7c25ab33eeddb8a19679f Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 3 Feb 2021 14:30:27 +0200 Subject: [PATCH 30/72] with_ddl: fix test Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/tabletserver/repltracker/writer_test.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/go/vt/vttablet/tabletserver/repltracker/writer_test.go b/go/vt/vttablet/tabletserver/repltracker/writer_test.go index a8b1820f8fd..a9de595926a 100644 --- a/go/vt/vttablet/tabletserver/repltracker/writer_test.go +++ b/go/vt/vttablet/tabletserver/repltracker/writer_test.go @@ -24,7 +24,6 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/mysql/fakesqldb" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/vt/dbconfigs" @@ -49,11 +48,6 @@ func TestCreateSchema(t *testing.T) { db.OrderMatters() upsert := fmt.Sprintf("INSERT INTO %s.heartbeat (ts, tabletUid, keyspaceShard) VALUES (%d, %d, '%s') ON DUPLICATE KEY UPDATE ts=VALUES(ts), tabletUid=VALUES(tabletUid)", "_vt", now.UnixNano(), tw.tabletAlias.Uid, tw.keyspaceShard) - failInsert := fakesqldb.ExpectedExecuteFetch{ - Query: upsert, - Error: mysql.NewSQLError(mysql.ERBadDb, "", "bad db error"), - } - db.AddExpectedExecuteFetch(failInsert) db.AddExpectedQuery(fmt.Sprintf(sqlCreateSidecarDB, "_vt"), nil) db.AddExpectedQuery(fmt.Sprintf(sqlCreateHeartbeatTable, "_vt"), nil) db.AddExpectedQuery(upsert, nil) From 215678f7d2adb8a2616f879532c29cdad6988b36 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 3 Feb 2021 15:18:47 +0200 Subject: [PATCH 31/72] fix with_ddl_test Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/withddl/withddl_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/vt/withddl/withddl_test.go b/go/vt/withddl/withddl_test.go index 1c2569a9c3c..f016f4027f9 100644 --- a/go/vt/withddl/withddl_test.go +++ b/go/vt/withddl/withddl_test.go @@ -144,7 +144,7 @@ func TestExec(t *testing.T) { "invalid sql", }, query: "insert into a values(1)", - err: "doesn't exist", + err: "error in your SQL syntax", }} withdb := connParams From ce425fd36056e540eb7e267cee00590d5b7c78d0 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 3 Feb 2021 16:14:59 +0200 Subject: [PATCH 32/72] rename Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/withddl/withddl.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go/vt/withddl/withddl.go b/go/vt/withddl/withddl.go index 15095a94463..2b966daaece 100644 --- a/go/vt/withddl/withddl.go +++ b/go/vt/withddl/withddl.go @@ -38,7 +38,7 @@ import ( type WithDDL struct { ddls []string - initApply sync.Once + applyOnce sync.Once } // New creates a new WithDDL. @@ -81,7 +81,7 @@ func (wd *WithDDL) Exec(ctx context.Context, query string, f interface{}) (*sqlt // On the first time this ever gets called, just go ahead and brute force the schema. // this ensures even "soft" changes, like adding an index, are applied. - wd.initApply.Do(func() { + wd.applyOnce.Do(func() { wd.applyDDLs(ctx, exec) }) From bad3a928ea39b6885f2dcf3294107c753ee3602f Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 3 Feb 2021 17:01:13 +0200 Subject: [PATCH 33/72] support withDDLs() Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/withddl/withddl.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/go/vt/withddl/withddl.go b/go/vt/withddl/withddl.go index 2b966daaece..99b4cb606f3 100644 --- a/go/vt/withddl/withddl.go +++ b/go/vt/withddl/withddl.go @@ -48,6 +48,11 @@ func New(ddls []string) *WithDDL { } } +// DDLs returns the ddl statements used by this WithDDL +func (wd *WithDDL) DDLs() []string { + return wd.ddls +} + // applyDDLs applies DDLs and ignores any schema error func (wd *WithDDL) applyDDLs(ctx context.Context, exec func(query string) (*sqltypes.Result, error)) error { log.Infof("Updating schema") From 7666035824df95ab55a0efaf27a83a5232505bb8 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 3 Feb 2021 17:01:46 +0200 Subject: [PATCH 34/72] fixed engine_test: expect withDDL DDLs Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../tabletmanager/vreplication/engine_test.go | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/go/vt/vttablet/tabletmanager/vreplication/engine_test.go b/go/vt/vttablet/tabletmanager/vreplication/engine_test.go index 3c945557660..daa42c31be2 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/engine_test.go +++ b/go/vt/vttablet/tabletmanager/vreplication/engine_test.go @@ -154,6 +154,9 @@ func TestEngineExec(t *testing.T) { vre.Open(context.Background()) defer vre.Close() + for _, ddl := range withDDL.DDLs() { + dbClient.ExpectRequest(ddl, &sqltypes.Result{}, nil) + } dbClient.ExpectRequest("use _vt", &sqltypes.Result{}, nil) dbClient.ExpectRequest("insert into _vt.vreplication values(null)", &sqltypes.Result{InsertID: 1}, nil) dbClient.ExpectRequest("select * from _vt.vreplication where id = 1", sqltypes.MakeTestResult( @@ -492,13 +495,9 @@ func TestCreateDBAndTable(t *testing.T) { expectDDLs := func() { t.Helper() - dbClient.ExpectRequest("CREATE DATABASE IF NOT EXISTS _vt", &sqltypes.Result{}, nil) - dbClient.ExpectRequest("DROP TABLE IF EXISTS _vt.blp_checkpoint", &sqltypes.Result{}, nil) - dbClient.ExpectRequestRE("CREATE TABLE IF NOT EXISTS _vt.vreplication.*", &sqltypes.Result{}, nil) - dbClient.ExpectRequestRE("ALTER TABLE _vt.vreplication ADD COLUMN db_name.*", &sqltypes.Result{}, nil) - dbClient.ExpectRequestRE("ALTER TABLE _vt.vreplication MODIFY source.*", &sqltypes.Result{}, nil) - dbClient.ExpectRequestRE("create table if not exists _vt.resharding_journal.*", &sqltypes.Result{}, nil) - dbClient.ExpectRequestRE("create table if not exists _vt.copy_state.*", &sqltypes.Result{}, nil) + for _, ddl := range withDDL.DDLs() { + dbClient.ExpectRequest(ddl, &sqltypes.Result{}, nil) + } } expectDDLs() dbClient.ExpectRequest("use _vt", &sqltypes.Result{}, nil) From f19a80c92211b939d25e88404c774f3d7994b582 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 4 Feb 2021 08:24:32 +0200 Subject: [PATCH 35/72] reverted with_ddl changes -- extracted to a different branch. This seems to be more complex than I hoped for due to multiple tests/expectations affected. It's a matter of time investment Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../tabletmanager/vreplication/engine_test.go | 14 +++--- .../tabletserver/repltracker/writer_test.go | 6 +++ go/vt/withddl/withddl.go | 47 ++++--------------- go/vt/withddl/withddl_test.go | 2 +- 4 files changed, 25 insertions(+), 44 deletions(-) diff --git a/go/vt/vttablet/tabletmanager/vreplication/engine_test.go b/go/vt/vttablet/tabletmanager/vreplication/engine_test.go index daa42c31be2..9567623a051 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/engine_test.go +++ b/go/vt/vttablet/tabletmanager/vreplication/engine_test.go @@ -154,9 +154,6 @@ func TestEngineExec(t *testing.T) { vre.Open(context.Background()) defer vre.Close() - for _, ddl := range withDDL.DDLs() { - dbClient.ExpectRequest(ddl, &sqltypes.Result{}, nil) - } dbClient.ExpectRequest("use _vt", &sqltypes.Result{}, nil) dbClient.ExpectRequest("insert into _vt.vreplication values(null)", &sqltypes.Result{InsertID: 1}, nil) dbClient.ExpectRequest("select * from _vt.vreplication where id = 1", sqltypes.MakeTestResult( @@ -495,9 +492,14 @@ func TestCreateDBAndTable(t *testing.T) { expectDDLs := func() { t.Helper() - for _, ddl := range withDDL.DDLs() { - dbClient.ExpectRequest(ddl, &sqltypes.Result{}, nil) - } + dbClient.ExpectRequest("CREATE DATABASE IF NOT EXISTS _vt", &sqltypes.Result{}, nil) + dbClient.ExpectRequest("DROP TABLE IF EXISTS _vt.blp_checkpoint", &sqltypes.Result{}, nil) + dbClient.ExpectRequestRE("CREATE TABLE IF NOT EXISTS _vt.vreplication.*", &sqltypes.Result{}, nil) + dbClient.ExpectRequestRE("ALTER TABLE _vt.vreplication ADD COLUMN db_name.*", &sqltypes.Result{}, nil) + dbClient.ExpectRequestRE("ALTER TABLE _vt.vreplication MODIFY source.*", &sqltypes.Result{}, nil) + dbClient.ExpectRequestRE("ALTER TABLE _vt.vreplication ADD KEY.*", &sqltypes.Result{}, nil) + dbClient.ExpectRequestRE("create table if not exists _vt.resharding_journal.*", &sqltypes.Result{}, nil) + dbClient.ExpectRequestRE("create table if not exists _vt.copy_state.*", &sqltypes.Result{}, nil) } expectDDLs() dbClient.ExpectRequest("use _vt", &sqltypes.Result{}, nil) diff --git a/go/vt/vttablet/tabletserver/repltracker/writer_test.go b/go/vt/vttablet/tabletserver/repltracker/writer_test.go index a9de595926a..a8b1820f8fd 100644 --- a/go/vt/vttablet/tabletserver/repltracker/writer_test.go +++ b/go/vt/vttablet/tabletserver/repltracker/writer_test.go @@ -24,6 +24,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/mysql/fakesqldb" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/vt/dbconfigs" @@ -48,6 +49,11 @@ func TestCreateSchema(t *testing.T) { db.OrderMatters() upsert := fmt.Sprintf("INSERT INTO %s.heartbeat (ts, tabletUid, keyspaceShard) VALUES (%d, %d, '%s') ON DUPLICATE KEY UPDATE ts=VALUES(ts), tabletUid=VALUES(tabletUid)", "_vt", now.UnixNano(), tw.tabletAlias.Uid, tw.keyspaceShard) + failInsert := fakesqldb.ExpectedExecuteFetch{ + Query: upsert, + Error: mysql.NewSQLError(mysql.ERBadDb, "", "bad db error"), + } + db.AddExpectedExecuteFetch(failInsert) db.AddExpectedQuery(fmt.Sprintf(sqlCreateSidecarDB, "_vt"), nil) db.AddExpectedQuery(fmt.Sprintf(sqlCreateHeartbeatTable, "_vt"), nil) db.AddExpectedQuery(upsert, nil) diff --git a/go/vt/withddl/withddl.go b/go/vt/withddl/withddl.go index 99b4cb606f3..92526d5055d 100644 --- a/go/vt/withddl/withddl.go +++ b/go/vt/withddl/withddl.go @@ -22,7 +22,6 @@ package withddl import ( "context" "fmt" - "sync" "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/sqltypes" @@ -37,8 +36,6 @@ import ( // to the desired state and retry. type WithDDL struct { ddls []string - - applyOnce sync.Once } // New creates a new WithDDL. @@ -48,29 +45,6 @@ func New(ddls []string) *WithDDL { } } -// DDLs returns the ddl statements used by this WithDDL -func (wd *WithDDL) DDLs() []string { - return wd.ddls -} - -// applyDDLs applies DDLs and ignores any schema error -func (wd *WithDDL) applyDDLs(ctx context.Context, exec func(query string) (*sqltypes.Result, error)) error { - log.Infof("Updating schema") - for _, applyQuery := range wd.ddls { - _, err := exec(applyQuery) - if err == nil { - continue - } - if mysql.IsSchemaApplyError(err) { - continue - } - log.Warningf("DDL apply %v failed: %v", applyQuery, err) - // Return the original error. - return err - } - return nil -} - // Exec executes the query using the supplied function. // If there are any schema errors, it applies the DDLs and retries. // Funcs can be any of these types: @@ -83,14 +57,6 @@ func (wd *WithDDL) Exec(ctx context.Context, query string, f interface{}) (*sqlt if err != nil { return nil, err } - - // On the first time this ever gets called, just go ahead and brute force the schema. - // this ensures even "soft" changes, like adding an index, are applied. - wd.applyOnce.Do(func() { - wd.applyDDLs(ctx, exec) - }) - - // Attempt to run queries: qr, err := exec(query) if err == nil { return qr, nil @@ -99,12 +65,19 @@ func (wd *WithDDL) Exec(ctx context.Context, query string, f interface{}) (*sqlt return nil, err } - // Got here? Means we hit a schema error log.Infof("Updating schema for %v and retrying: %v", sqlparser.TruncateForUI(err.Error()), err) - if err := wd.applyDDLs(ctx, exec); err != nil { + for _, applyQuery := range wd.ddls { + _, merr := exec(applyQuery) + if merr == nil { + continue + } + if mysql.IsSchemaApplyError(merr) { + continue + } + log.Warningf("DDL apply %v failed: %v", applyQuery, merr) + // Return the original error. return nil, err } - // Try the query again return exec(query) } diff --git a/go/vt/withddl/withddl_test.go b/go/vt/withddl/withddl_test.go index f016f4027f9..1c2569a9c3c 100644 --- a/go/vt/withddl/withddl_test.go +++ b/go/vt/withddl/withddl_test.go @@ -144,7 +144,7 @@ func TestExec(t *testing.T) { "invalid sql", }, query: "insert into a values(1)", - err: "error in your SQL syntax", + err: "doesn't exist", }} withdb := connParams From 58ba93b045ad85add4efcaf931fa874a6d9669cd Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 4 Feb 2021 08:39:37 +0200 Subject: [PATCH 36/72] tabs to spaces Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/tabletmanager/vreplication/engine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/vt/vttablet/tabletmanager/vreplication/engine.go b/go/vt/vttablet/tabletmanager/vreplication/engine.go index 086dbecb1f3..d301915f4ca 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/engine.go +++ b/go/vt/vttablet/tabletmanager/vreplication/engine.go @@ -59,7 +59,7 @@ const ( vrepl_id int, table_name varbinary(128), lastpk varbinary(2000), - primary key (vrepl_id, table_name))` + primary key (vrepl_id, table_name))` ) var withDDL *withddl.WithDDL From e8548ab9fac00acbe251067e2ff5cb414c03fbf4 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 4 Feb 2021 11:34:06 +0200 Subject: [PATCH 37/72] minor refactor in VRepl, bls is a member Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/vrepl.go | 37 ++++++++++++++++++------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/go/vt/vttablet/onlineddl/vrepl.go b/go/vt/vttablet/onlineddl/vrepl.go index b0a275eecaa..1d6f2fc10cb 100644 --- a/go/vt/vttablet/onlineddl/vrepl.go +++ b/go/vt/vttablet/onlineddl/vrepl.go @@ -68,6 +68,7 @@ type VRepl struct { sharedColumnsMap map[string]string filterQuery string + bls *binlogdatapb.BinlogSource parser *vrepl.AlterTableParser } @@ -267,12 +268,14 @@ func (v *VRepl) analyzeTables(ctx context.Context, conn *dbconnpool.DBConnection return nil } + +// generateFilterQuery creates a SELECT query used by vreplication as a filter. It SELECTs all +// non-generated columns between source & target tables, and takes care of column renames. func (v *VRepl) generateFilterQuery(ctx context.Context) error { if v.sourceSharedColumns.Len() == 0 { return fmt.Errorf("Empty column list") } var sb strings.Builder - sb.WriteString("select ") for i, name := range v.sourceSharedColumns.Names() { targetName := v.sharedColumnsMap[name] @@ -285,12 +288,27 @@ func (v *VRepl) generateFilterQuery(ctx context.Context) error { sb.WriteString(escapeName(targetName)) } sb.WriteString(" from ") - sb.WriteString(v.sourceTable) + sb.WriteString(escapeName(v.sourceTable)) v.filterQuery = sb.String() return nil } +func (v *VRepl) analyzeBinlogSource(ctx context.Context) { + bls := &binlogdatapb.BinlogSource{ + Keyspace: v.keyspace, + Shard: v.shard, + Filter: &binlogdatapb.Filter{}, + StopAfterCopy: false, + } + rule := &binlogdatapb.Rule{ + Match: v.targetTable, + Filter: v.filterQuery, + } + bls.Filter.Rules = append(bls.Filter.Rules, rule) + v.bls = bls +} + func (v *VRepl) analyze(ctx context.Context, conn *dbconnpool.DBConnection, alterOptions string) error { if err := v.analyzeAlter(ctx, alterOptions); err != nil { return err @@ -301,25 +319,14 @@ func (v *VRepl) analyze(ctx context.Context, conn *dbconnpool.DBConnection, alte if err := v.generateFilterQuery(ctx); err != nil { return err } + v.analyzeBinlogSource(ctx) return nil } // generateInsertStatement generates the INSERT INTO _vt.replication stataement that creates the vreplication workflow func (v *VRepl) generateInsertStatement(ctx context.Context) (string, error) { ig := vreplication.NewInsertGenerator(binlogplayer.BlpStopped, v.dbName) - - bls := &binlogdatapb.BinlogSource{ - Keyspace: v.keyspace, - Shard: v.shard, - Filter: &binlogdatapb.Filter{}, - StopAfterCopy: false, - } - rule := &binlogdatapb.Rule{ - Match: v.targetTable, - Filter: v.filterQuery, - } - bls.Filter.Rules = append(bls.Filter.Rules, rule) - ig.AddRow(v.workflow, bls, "", "", "MASTER") + ig.AddRow(v.workflow, v.bls, "", "", "MASTER") return ig.String(), nil } From 2c44de6ee92f557f41fd767134f09108eae983b1 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 4 Feb 2021 13:42:29 +0200 Subject: [PATCH 38/72] handling vreplication migrations executed from a previous tablet Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 114 +++++++++++++++++---------- 1 file changed, 72 insertions(+), 42 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 4fc79e43b32..5a0c07b930e 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -134,11 +134,13 @@ type Executor struct { shard string dbName string - initMutex sync.Mutex - migrationMutex sync.Mutex - migrationRunning int64 - lastMigrationUUID string - tickReentranceFlag int64 + initMutex sync.Mutex + migrationMutex sync.Mutex + vreplMigrationRunning int64 + ghostMigrationRunning int64 + ptoscMigrationRunning int64 + lastMigrationUUID string + tickReentranceFlag int64 ticks *timer.Timer isOpen bool @@ -269,6 +271,20 @@ func (e *Executor) triggerNextCheckInterval() { e.ticks.TriggerAfter(migrationNextCheckInterval) } +// isAnyMigrationRunning sees if there's any migration running right now +func (e *Executor) isAnyMigrationRunning() bool { + if atomic.LoadInt64(&e.vreplMigrationRunning) > 0 { + return true + } + if atomic.LoadInt64(&e.ghostMigrationRunning) > 0 { + return true + } + if atomic.LoadInt64(&e.ptoscMigrationRunning) > 0 { + return true + } + return false +} + func (e *Executor) ghostPanicFlagFileName(uuid string) string { return path.Join(os.TempDir(), fmt.Sprintf("ghost.%s.panic.flag", uuid)) } @@ -418,7 +434,7 @@ func (e *Executor) executeDirectly(ctx context.Context, onlineDDL *schema.Online } // terminateVReplMigration stops vreplication, then removes the _vt.vreplication entry for the given migration -func (e *Executor) terminateVReplMigration(ctx context.Context, onlineDDL *schema.OnlineDDL) error { +func (e *Executor) terminateVReplMigration(ctx context.Context, uuid string) error { tmClient := tmclient.NewTabletManagerClient() tablet, err := e.ts.GetTablet(ctx, e.tabletAlias) if err != nil { @@ -427,7 +443,7 @@ func (e *Executor) terminateVReplMigration(ctx context.Context, onlineDDL *schem { query, err := sqlparser.ParseAndBind(sqlStopVReplStream, sqltypes.StringBindVariable(e.dbName), - sqltypes.StringBindVariable(onlineDDL.UUID), + sqltypes.StringBindVariable(uuid), ) if err != nil { return err @@ -438,7 +454,7 @@ func (e *Executor) terminateVReplMigration(ctx context.Context, onlineDDL *schem { query, err := sqlparser.ParseAndBind(sqlDeleteVReplStream, sqltypes.StringBindVariable(e.dbName), - sqltypes.StringBindVariable(onlineDDL.UUID), + sqltypes.StringBindVariable(uuid), ) if err != nil { return err @@ -550,8 +566,6 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream) er // Tables are now swapped! Migration is successful _ = e.onSchemaMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusComplete, false, progressPctFull) - atomic.StoreInt64(&e.migrationRunning, 0) - return nil } @@ -560,7 +574,10 @@ func (e *Executor) ExecuteWithVReplication(ctx context.Context, onlineDDL *schem e.migrationMutex.Lock() defer e.migrationMutex.Unlock() - if atomic.LoadInt64(&e.migrationRunning) > 0 { + // make sure there's no vreplication workflow running under same name + _ = e.terminateVReplMigration(ctx, onlineDDL.UUID) + + if e.isAnyMigrationRunning() { return ErrExecutorMigrationAlreadyRunning } @@ -574,7 +591,7 @@ func (e *Executor) ExecuteWithVReplication(ctx context.Context, onlineDDL *schem } defer conn.Close() - atomic.StoreInt64(&e.migrationRunning, 1) + atomic.StoreInt64(&e.vreplMigrationRunning, 1) e.lastMigrationUUID = onlineDDL.UUID if err := e.onSchemaMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusRunning, false, progressPctStarted); err != nil { return err @@ -638,7 +655,7 @@ func (e *Executor) ExecuteWithGhost(ctx context.Context, onlineDDL *schema.Onlin e.migrationMutex.Lock() defer e.migrationMutex.Unlock() - if atomic.LoadInt64(&e.migrationRunning) > 0 { + if e.isAnyMigrationRunning() { return ErrExecutorMigrationAlreadyRunning } @@ -784,11 +801,11 @@ curl -s 'http://localhost:%d/schema-migration/report-status?uuid=%s&status=%s&dr return err } - atomic.StoreInt64(&e.migrationRunning, 1) + atomic.StoreInt64(&e.ghostMigrationRunning, 1) e.lastMigrationUUID = onlineDDL.UUID go func() error { - defer atomic.StoreInt64(&e.migrationRunning, 0) + defer atomic.StoreInt64(&e.ghostMigrationRunning, 0) defer e.dropOnlineDDLUser(ctx) defer e.gcArtifacts(ctx) @@ -826,7 +843,7 @@ func (e *Executor) ExecuteWithPTOSC(ctx context.Context, onlineDDL *schema.Onlin e.migrationMutex.Lock() defer e.migrationMutex.Unlock() - if atomic.LoadInt64(&e.migrationRunning) > 0 { + if e.isAnyMigrationRunning() { return ErrExecutorMigrationAlreadyRunning } @@ -1007,11 +1024,11 @@ export MYSQL_PWD return err } - atomic.StoreInt64(&e.migrationRunning, 1) + atomic.StoreInt64(&e.ptoscMigrationRunning, 1) e.lastMigrationUUID = onlineDDL.UUID go func() error { - defer atomic.StoreInt64(&e.migrationRunning, 0) + defer atomic.StoreInt64(&e.ptoscMigrationRunning, 0) defer e.dropOnlineDDLUser(ctx) defer e.gcArtifacts(ctx) @@ -1081,21 +1098,14 @@ func (e *Executor) readMigration(ctx context.Context, uuid string) (onlineDDL *s // terminateMigration attempts to interrupt and hard-stop a running migration func (e *Executor) terminateMigration(ctx context.Context, onlineDDL *schema.OnlineDDL, lastMigrationUUID string) (foundRunning bool, err error) { - if atomic.LoadInt64(&e.migrationRunning) > 0 { - // double check: is the running migration the very same one we wish to cancel? - if onlineDDL.UUID == lastMigrationUUID { - // assuming all goes well in next steps, we can already report that there has indeed been a migration - foundRunning = true - } - } switch onlineDDL.Strategy { case schema.DDLStrategyOnline: - if err := e.terminateVReplMigration(ctx, onlineDDL); err != nil { + // migration could have started by a different tablet. We need to actively verify if it is running + foundRunning, _, _ = e.isVReplMigrationRunning(ctx, onlineDDL.UUID) + if err := e.terminateVReplMigration(ctx, onlineDDL.UUID); err != nil { return foundRunning, fmt.Errorf("Error cancelling migration, vreplication exec error: %+v", err) } _ = e.updateMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusFailed) - atomic.StoreInt64(&e.migrationRunning, 0) - case schema.DDLStrategyPTOSC: // see if pt-osc is running (could have been executed by this vttablet or one that crashed in the past) if running, pid, _ := e.isPTOSCMigrationRunning(ctx, onlineDDL.UUID); running { @@ -1116,6 +1126,13 @@ func (e *Executor) terminateMigration(ctx context.Context, onlineDDL *schema.Onl } } case schema.DDLStrategyGhost: + if atomic.LoadInt64(&e.ghostMigrationRunning) > 0 { + // double check: is the running migration the very same one we wish to cancel? + if onlineDDL.UUID == lastMigrationUUID { + // assuming all goes well in next steps, we can already report that there has indeed been a migration + foundRunning = true + } + } // gh-ost migrations are easy to kill: just touch their specific panic flag files. We trust // gh-ost to terminate. No need to KILL it. And there's no trigger cleanup. if err := e.createGhostPanicFlagFile(onlineDDL.UUID); err != nil { @@ -1205,7 +1222,7 @@ func (e *Executor) scheduleNextMigration(ctx context.Context) error { e.migrationMutex.Lock() defer e.migrationMutex.Unlock() - if atomic.LoadInt64(&e.migrationRunning) > 0 { + if e.isAnyMigrationRunning() { return ErrExecutorMigrationAlreadyRunning } @@ -1319,7 +1336,7 @@ func (e *Executor) runNextMigration(ctx context.Context) error { e.migrationMutex.Lock() defer e.migrationMutex.Unlock() - if atomic.LoadInt64(&e.migrationRunning) > 0 { + if e.isAnyMigrationRunning() { return ErrExecutorMigrationAlreadyRunning } @@ -1526,16 +1543,18 @@ func (e *Executor) isVReplMigrationRunning(ctx context.Context, uuid string) (is return false, s, nil } -// reviewRunningMigrations iterates migrations in 'running' state (there really should just be one that is -// actually running). -func (e *Executor) reviewRunningMigrations(ctx context.Context) (countRunnning int, runningNotByThisProcess []string, err error) { +// reviewRunningMigrations iterates migrations in 'running' state. Normally there's only one running, which was +// spawned by this tablet; but vreplication migrations could also resume from failure. +func (e *Executor) reviewRunningMigrations(ctx context.Context) (countRunnning int, cancellable []string, err error) { e.migrationMutex.Lock() defer e.migrationMutex.Unlock() r, err := e.execQuery(ctx, sqlSelectRunningMigrations) if err != nil { - return countRunnning, runningNotByThisProcess, err + return countRunnning, cancellable, err } + // we identify running vreplication migrations in this function + atomic.StoreInt64(&e.vreplMigrationRunning, 0) for _, row := range r.Named().Rows { uuid := row["migration_uuid"].ToString() strategy := schema.DDLStrategy(row["strategy"].ToString()) @@ -1545,17 +1564,21 @@ func (e *Executor) reviewRunningMigrations(ctx context.Context) (countRunnning i // We check the _vt.vreplication table running, s, err := e.isVReplMigrationRunning(ctx, uuid) if err != nil { - return countRunnning, runningNotByThisProcess, err + return countRunnning, cancellable, err } if running { + // This VRepl migration may have started from outside this tablet, so + // vreplMigrationRunning could be zero. Whatever the case is, we're under + // migrationMutex lock and it's now safe to ensure vreplMigrationRunning is 1 + atomic.StoreInt64(&e.vreplMigrationRunning, 1) _ = e.updateMigrationTimestamp(ctx, "liveness_timestamp", uuid) isReady, err := e.isVReplMigrationReadyToCutOver(ctx, s) if err != nil { - return countRunnning, runningNotByThisProcess, err + return countRunnning, cancellable, err } if isReady { if err := e.cutOverVReplMigration(ctx, s); err != nil { - return countRunnning, runningNotByThisProcess, err + return countRunnning, cancellable, err } } } @@ -1566,11 +1589,18 @@ func (e *Executor) reviewRunningMigrations(ctx context.Context) (countRunnning i // if the process is alive, we update the `liveness_timestamp` for this migration. running, _, err := e.isPTOSCMigrationRunning(ctx, uuid) if err != nil { - return countRunnning, runningNotByThisProcess, err + return countRunnning, cancellable, err } if running { _ = e.updateMigrationTimestamp(ctx, "liveness_timestamp", uuid) } + if uuid != e.lastMigrationUUID { + // This executor can only spawn one migration at a time. And that + // migration is identified by e.lastMigrationUUID. + // If we find a _running_ migration that does not have this UUID, it _must_ + // mean the migration was started by a former vttablet (ie vttablet crashed and restarted) + cancellable = append(cancellable, uuid) + } } } countRunnning++ @@ -1580,10 +1610,10 @@ func (e *Executor) reviewRunningMigrations(ctx context.Context) (countRunnning i // migration is identified by e.lastMigrationUUID. // If we find a _running_ migration that does not have this UUID, it _must_ // mean the migration was started by a former vttablet (ie vttablet crashed and restarted) - runningNotByThisProcess = append(runningNotByThisProcess, uuid) + cancellable = append(cancellable, uuid) } } - return countRunnning, runningNotByThisProcess, err + return countRunnning, cancellable, err } // reviewStaleMigrations marks as 'failed' migrations whose status is 'running' but which have @@ -1728,9 +1758,9 @@ func (e *Executor) onMigrationCheckTick() { if err := e.runNextMigration(ctx); err != nil { log.Error(err) } - if _, runningNotByThisProcess, err := e.reviewRunningMigrations(ctx); err != nil { + if _, cancellable, err := e.reviewRunningMigrations(ctx); err != nil { log.Error(err) - } else if err := e.cancelMigrations(ctx, runningNotByThisProcess); err != nil { + } else if err := e.cancelMigrations(ctx, cancellable); err != nil { log.Error(err) } if err := e.reviewStaleMigrations(ctx); err != nil { From e0dbc6d4fd3281d56ae0ae313b405d470abd6834 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 4 Feb 2021 14:01:57 +0200 Subject: [PATCH 39/72] renaming test 26 as onlineddl_ghost Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .github/workflows/cluster_endtoend_26.yml | 6 +++--- .../onlineddl_test.go | 0 test/config.json | 13 +++++++++++-- 3 files changed, 14 insertions(+), 5 deletions(-) rename go/test/endtoend/{onlineddl => onlineddl_ghost}/onlineddl_test.go (100%) diff --git a/.github/workflows/cluster_endtoend_26.yml b/.github/workflows/cluster_endtoend_26.yml index a4b33e82742..ab819956ddf 100644 --- a/.github/workflows/cluster_endtoend_26.yml +++ b/.github/workflows/cluster_endtoend_26.yml @@ -1,9 +1,9 @@ -name: Cluster (26) +name: Cluster (onlineddl_ghost) on: [push, pull_request] jobs: build: - name: Run endtoend tests on Cluster (26) + name: Run endtoend tests on Cluster (onlineddl_ghost) runs-on: ubuntu-latest steps: @@ -35,4 +35,4 @@ jobs: timeout-minutes: 30 run: | source build.env - eatmydata -- go run test.go -docker=false -print-log -follow -shard 26 + eatmydata -- go run test.go -docker=false -print-log -follow -shard onlineddl_ghost diff --git a/go/test/endtoend/onlineddl/onlineddl_test.go b/go/test/endtoend/onlineddl_ghost/onlineddl_test.go similarity index 100% rename from go/test/endtoend/onlineddl/onlineddl_test.go rename to go/test/endtoend/onlineddl_ghost/onlineddl_test.go diff --git a/test/config.json b/test/config.json index c740f048fca..1538e197253 100644 --- a/test/config.json +++ b/test/config.json @@ -266,12 +266,21 @@ "site_test" ] }, - "onlineddl": { + "onlineddl_ghost": { "File": "unused.go", "Args": ["vitess.io/vitess/go/test/endtoend/onlineddl"], "Command": [], "Manual": false, - "Shard": "26", + "Shard": "onlineddl_ghost", + "RetryMax": 0, + "Tags": [] + }, + "onlineddl_vrepl": { + "File": "unused.go", + "Args": ["vitess.io/vitess/go/test/endtoend/onlineddl"], + "Command": [], + "Manual": false, + "Shard": "onlineddl_vrepl", "RetryMax": 0, "Tags": [] }, From 245f900166af694f4817d890b70f7fbe21a2be4d Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 4 Feb 2021 14:03:20 +0200 Subject: [PATCH 40/72] renamed workflow Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- ...uster_endtoend_26.yml => cluster_endtoend_onlineddl_ghost.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{cluster_endtoend_26.yml => cluster_endtoend_onlineddl_ghost.yml} (100%) diff --git a/.github/workflows/cluster_endtoend_26.yml b/.github/workflows/cluster_endtoend_onlineddl_ghost.yml similarity index 100% rename from .github/workflows/cluster_endtoend_26.yml rename to .github/workflows/cluster_endtoend_onlineddl_ghost.yml From a7b277c6eb2867f3839b75d04b2bfae771af1216 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 4 Feb 2021 14:05:03 +0200 Subject: [PATCH 41/72] fixed path for onlineddl_ghost test Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- test/config.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/config.json b/test/config.json index 1538e197253..b143bb21ee4 100644 --- a/test/config.json +++ b/test/config.json @@ -268,7 +268,7 @@ }, "onlineddl_ghost": { "File": "unused.go", - "Args": ["vitess.io/vitess/go/test/endtoend/onlineddl"], + "Args": ["vitess.io/vitess/go/test/endtoend/onlineddl_ghost"], "Command": [], "Manual": false, "Shard": "onlineddl_ghost", @@ -277,7 +277,7 @@ }, "onlineddl_vrepl": { "File": "unused.go", - "Args": ["vitess.io/vitess/go/test/endtoend/onlineddl"], + "Args": ["vitess.io/vitess/go/test/endtoend/onlineddl_vrepl"], "Command": [], "Manual": false, "Shard": "onlineddl_vrepl", From 30fe3064b991d1a2d9cbab78dd4b225d1ba08cd0 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 4 Feb 2021 14:05:29 +0200 Subject: [PATCH 42/72] adding endtoend onlineddl_vrepl test workflow Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../cluster_endtoend_onlineddl_vrepl.yml | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 .github/workflows/cluster_endtoend_onlineddl_vrepl.yml diff --git a/.github/workflows/cluster_endtoend_onlineddl_vrepl.yml b/.github/workflows/cluster_endtoend_onlineddl_vrepl.yml new file mode 100644 index 00000000000..5cbe31e6161 --- /dev/null +++ b/.github/workflows/cluster_endtoend_onlineddl_vrepl.yml @@ -0,0 +1,38 @@ +name: Cluster (onlineddl_vrepl) +on: [push, pull_request] +jobs: + + build: + name: Run endtoend tests on Cluster (onlineddl_vrepl) + runs-on: ubuntu-latest + + steps: + - name: Set up Go + uses: actions/setup-go@v1 + with: + go-version: 1.15 + + - name: Check out code + uses: actions/checkout@v2 + + - name: Get dependencies + run: | + sudo apt-get update + sudo apt-get install -y mysql-server mysql-client make unzip g++ etcd curl git wget eatmydata + sudo service mysql stop + sudo service etcd stop + sudo ln -s /etc/apparmor.d/usr.sbin.mysqld /etc/apparmor.d/disable/ + sudo apparmor_parser -R /etc/apparmor.d/usr.sbin.mysqld + go mod download + + wget https://repo.percona.com/apt/percona-release_latest.$(lsb_release -sc)_all.deb + sudo apt-get install -y gnupg2 + sudo dpkg -i percona-release_latest.$(lsb_release -sc)_all.deb + sudo apt-get update + sudo apt-get install percona-xtrabackup-24 + + - name: Run cluster endtoend test + timeout-minutes: 30 + run: | + source build.env + eatmydata -- go run test.go -docker=false -print-log -follow -shard onlineddl_vrepl From 0ff54232b7d7df6aa746b94f1465b048cbc61fe9 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 4 Feb 2021 14:15:21 +0200 Subject: [PATCH 43/72] renamed test file Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../{onlineddl_test.go => onlineddl_ghost_test.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename go/test/endtoend/onlineddl_ghost/{onlineddl_test.go => onlineddl_ghost_test.go} (100%) diff --git a/go/test/endtoend/onlineddl_ghost/onlineddl_test.go b/go/test/endtoend/onlineddl_ghost/onlineddl_ghost_test.go similarity index 100% rename from go/test/endtoend/onlineddl_ghost/onlineddl_test.go rename to go/test/endtoend/onlineddl_ghost/onlineddl_ghost_test.go From 81d0a1c384756874f58eb5783752bae85a453825 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 4 Feb 2021 14:16:13 +0200 Subject: [PATCH 44/72] clarifying where online DDL tests are Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 2 +- go/vt/vttablet/onlineddl/executor_test.go | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 go/vt/vttablet/onlineddl/executor_test.go diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 5a0c07b930e..1950e86b714 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -15,7 +15,7 @@ limitations under the License. */ /* -Functionality of this Executor is tested in go/test/endtoend/onlineddl/onlineddl_test.go +Functionality of this Executor is tested in go/test/endtoend/onlineddl_ghost/... and go/test/endtoend/onlineddl_vrepl/... */ package onlineddl diff --git a/go/vt/vttablet/onlineddl/executor_test.go b/go/vt/vttablet/onlineddl/executor_test.go new file mode 100644 index 00000000000..17448c8c554 --- /dev/null +++ b/go/vt/vttablet/onlineddl/executor_test.go @@ -0,0 +1,21 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +/* +Functionality of this Executor is tested in go/test/endtoend/onlineddl_ghost/... and go/test/endtoend/onlineddl_vrepl/... +*/ + +package onlineddl From 8d6705eb5ee503020160295b319e86351cc3009e Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 4 Feb 2021 14:30:35 +0200 Subject: [PATCH 45/72] renamed a couple functions Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/test/endtoend/vreplication/vreplication_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/go/test/endtoend/vreplication/vreplication_test.go b/go/test/endtoend/vreplication/vreplication_test.go index 5cf1078344e..722af7cb9c5 100644 --- a/go/test/endtoend/vreplication/vreplication_test.go +++ b/go/test/endtoend/vreplication/vreplication_test.go @@ -64,11 +64,11 @@ func throttleResponse(tablet *cluster.VttabletProcess, path string) (resp *http. return resp, respBody, err } -func throttleStreamer(tablet *cluster.VttabletProcess, app string) (*http.Response, string, error) { +func throttleApp(tablet *cluster.VttabletProcess, app string) (*http.Response, string, error) { return throttleResponse(tablet, fmt.Sprintf("throttler/throttle-app?app=%s&duration=1h", app)) } -func unthrottleStreamer(tablet *cluster.VttabletProcess, app string) (*http.Response, string, error) { +func unthrottleApp(tablet *cluster.VttabletProcess, app string) (*http.Response, string, error) { return throttleResponse(tablet, fmt.Sprintf("throttler/unthrottle-app?app=%s", app)) } @@ -604,7 +604,7 @@ func materializeProduct(t *testing.T) { t.Run("throttle-app-product", func(t *testing.T) { // Now, throttle the streamer on source tablets, insert some rows for _, tab := range productTablets { - _, body, err := throttleStreamer(tab, sourceThrottlerAppName) + _, body, err := throttleApp(tab, sourceThrottlerAppName) assert.NoError(t, err) assert.Contains(t, body, sourceThrottlerAppName) } @@ -633,7 +633,7 @@ func materializeProduct(t *testing.T) { t.Run("unthrottle-app-product", func(t *testing.T) { // unthrottle on source tablets, and expect the rows to show up for _, tab := range productTablets { - _, body, err := unthrottleStreamer(tab, sourceThrottlerAppName) + _, body, err := unthrottleApp(tab, sourceThrottlerAppName) assert.NoError(t, err) assert.Contains(t, body, sourceThrottlerAppName) } @@ -654,7 +654,7 @@ func materializeProduct(t *testing.T) { t.Run("throttle-app-customer", func(t *testing.T) { // Now, throttle the streamer on source tablets, insert some rows for _, tab := range customerTablets { - _, body, err := throttleStreamer(tab, targetThrottlerAppName) + _, body, err := throttleApp(tab, targetThrottlerAppName) assert.NoError(t, err) assert.Contains(t, body, targetThrottlerAppName) } @@ -683,7 +683,7 @@ func materializeProduct(t *testing.T) { t.Run("unthrottle-app-customer", func(t *testing.T) { // unthrottle on source tablets, and expect the rows to show up for _, tab := range customerTablets { - _, body, err := unthrottleStreamer(tab, targetThrottlerAppName) + _, body, err := unthrottleApp(tab, targetThrottlerAppName) assert.NoError(t, err) assert.Contains(t, body, targetThrottlerAppName) } From 3d41289864a0147e8cccd41508a68c3c09fd2622 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 4 Feb 2021 14:30:50 +0200 Subject: [PATCH 46/72] adding onlineddl_vrepl tests Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../onlineddl_vrepl/onlineddl_vrepl_test.go | 445 ++++++++++++++++++ 1 file changed, 445 insertions(+) create mode 100644 go/test/endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go diff --git a/go/test/endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go b/go/test/endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go new file mode 100644 index 00000000000..b6f4c4fa6fc --- /dev/null +++ b/go/test/endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go @@ -0,0 +1,445 @@ +/* +Copyright 2019 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package onlineddl + +import ( + "context" + "flag" + "fmt" + "io/ioutil" + "net/http" + "os" + "path" + "regexp" + "strings" + "sync" + "testing" + "time" + + "vitess.io/vitess/go/mysql" + "vitess.io/vitess/go/sqltypes" + "vitess.io/vitess/go/vt/schema" + throttlebase "vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/base" + + "vitess.io/vitess/go/test/endtoend/cluster" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ( + clusterInstance *cluster.LocalProcessCluster + vtParams mysql.ConnParams + httpClient = throttlebase.SetupHTTPClient(time.Second) + throttlerAppName = "vreplication" + + hostname = "localhost" + keyspaceName = "ks" + cell = "zone1" + schemaChangeDirectory = "" + totalTableCount = 4 + createTable = ` + CREATE TABLE %s ( + id bigint(20) NOT NULL, + msg varchar(64), + PRIMARY KEY (id) + ) ENGINE=InnoDB;` + // To verify non online-DDL behavior + alterTableNormalStatement = ` + ALTER TABLE %s + ADD COLUMN non_online int UNSIGNED NOT NULL` + // A trivial statement which must succeed and does not change the schema + alterTableTrivialStatement = ` + ALTER TABLE %s + ENGINE=InnoDB` + // The following statement is valid + alterTableSuccessfulStatement = ` + ALTER TABLE %s + MODIFY id bigint UNSIGNED NOT NULL, + ADD COLUMN vrepl_col int NOT NULL, + ADD INDEX idx_msg(msg)` + // The following statement will fail because vreplication requires shared PRIMARY KEY columns + alterTableFailedStatement = ` + ALTER TABLE %s + DROP PRIMARY KEY, + DROP COLUMN vrepl_col` + // We will run this query while throttling vreplication + alterTableThrottlingStatement = ` + ALTER TABLE %s + DROP COLUMN vrepl_col` + onlineDDLCreateTableStatement = ` + CREATE TABLE %s ( + id bigint NOT NULL, + online_ddl_create_col INT NOT NULL, + PRIMARY KEY (id) + ) ENGINE=InnoDB;` + onlineDDLDropTableStatement = ` + DROP TABLE %s` + onlineDDLDropTableIfExistsStatement = ` + DROP TABLE IF EXISTS %s` +) + +func fullWordUUIDRegexp(uuid, searchWord string) *regexp.Regexp { + return regexp.MustCompile(uuid + `.*?\b` + searchWord + `\b`) +} +func fullWordRegexp(searchWord string) *regexp.Regexp { + return regexp.MustCompile(`.*?\b` + searchWord + `\b`) +} + +func TestMain(m *testing.M) { + defer cluster.PanicHandler(nil) + flag.Parse() + + exitcode, err := func() (int, error) { + clusterInstance = cluster.NewCluster(cell, hostname) + schemaChangeDirectory = path.Join("/tmp", fmt.Sprintf("schema_change_dir_%d", clusterInstance.GetAndReserveTabletUID())) + defer os.RemoveAll(schemaChangeDirectory) + defer clusterInstance.Teardown() + + if _, err := os.Stat(schemaChangeDirectory); os.IsNotExist(err) { + _ = os.Mkdir(schemaChangeDirectory, 0700) + } + + clusterInstance.VtctldExtraArgs = []string{ + "-schema_change_dir", schemaChangeDirectory, + "-schema_change_controller", "local", + "-schema_change_check_interval", "1"} + + clusterInstance.VtTabletExtraArgs = []string{ + "-migration_check_interval", "5s", + } + clusterInstance.VtGateExtraArgs = []string{ + "-ddl_strategy", "online", + } + + if err := clusterInstance.StartTopo(); err != nil { + return 1, err + } + + // Start keyspace + keyspace := &cluster.Keyspace{ + Name: keyspaceName, + } + + if err := clusterInstance.StartUnshardedKeyspace(*keyspace, 2, true); err != nil { + return 1, err + } + if err := clusterInstance.StartKeyspace(*keyspace, []string{"1"}, 1, false); err != nil { + return 1, err + } + + vtgateInstance := clusterInstance.NewVtgateInstance() + // set the gateway we want to use + vtgateInstance.GatewayImplementation = "tabletgateway" + // Start vtgate + if err := vtgateInstance.Setup(); err != nil { + return 1, err + } + // ensure it is torn down during cluster TearDown + clusterInstance.VtgateProcess = *vtgateInstance + vtParams = mysql.ConnParams{ + Host: clusterInstance.Hostname, + Port: clusterInstance.VtgateMySQLPort, + } + + return m.Run(), nil + }() + if err != nil { + fmt.Printf("%v\n", err) + os.Exit(1) + } else { + os.Exit(exitcode) + } + +} + +func throttleResponse(tablet *cluster.Vttablet, path string) (resp *http.Response, respBody string, err error) { + apiURL := fmt.Sprintf("http://%s:%d/%s", tablet.VttabletProcess.TabletHostname, tablet.HTTPPort, path) + resp, err = httpClient.Get(apiURL) + if err != nil { + return resp, respBody, err + } + b, err := ioutil.ReadAll(resp.Body) + respBody = string(b) + return resp, respBody, err +} + +func throttleApp(tablet *cluster.Vttablet, app string) (*http.Response, string, error) { + return throttleResponse(tablet, fmt.Sprintf("throttler/throttle-app?app=%s&duration=1h", app)) +} + +func unthrottleApp(tablet *cluster.Vttablet, app string) (*http.Response, string, error) { + return throttleResponse(tablet, fmt.Sprintf("throttler/unthrottle-app?app=%s", app)) +} + +func TestSchemaChange(t *testing.T) { + defer cluster.PanicHandler(t) + assert.Equal(t, 2, len(clusterInstance.Keyspaces[0].Shards)) + testWithInitialSchema(t) + t.Run("create non_online", func(t *testing.T) { + _ = testOnlineDDLStatement(t, alterTableNormalStatement, string(schema.DDLStrategyDirect), "vtctl", "non_online") + }) + t.Run("successful online alter, vtgate", func(t *testing.T) { + uuid := testOnlineDDLStatement(t, alterTableSuccessfulStatement, "online", "vtgate", "vrepl_col") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) + checkCancelMigration(t, uuid, false) + checkRetryMigration(t, uuid, false) + }) + t.Run("successful online alter, vtctl", func(t *testing.T) { + uuid := testOnlineDDLStatement(t, alterTableTrivialStatement, "online", "vtctl", "vrepl_col") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) + checkCancelMigration(t, uuid, false) + checkRetryMigration(t, uuid, false) + }) + t.Run("throttled migration", func(t *testing.T) { + // TODO(shlomi): throttle app, unthrottle + for i := range clusterInstance.Keyspaces[0].Shards { + throttleApp(clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], throttlerAppName) + defer unthrottleApp(clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], throttlerAppName) + } + uuid := testOnlineDDLStatement(t, alterTableThrottlingStatement, "online", "vtgate", "vrepl_col") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusRunning) + checkCancelMigration(t, uuid, true) + time.Sleep(2 * time.Second) + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusFailed) + }) + t.Run("failed migration", func(t *testing.T) { + uuid := testOnlineDDLStatement(t, alterTableFailedStatement, "online", "vtgate", "vrepl_col") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusFailed) + checkCancelMigration(t, uuid, false) + checkRetryMigration(t, uuid, true) + // migration will fail again + }) + t.Run("cancel all migrations: nothing to cancel", func(t *testing.T) { + // no migrations pending at this time + time.Sleep(10 * time.Second) + checkCancelAllMigrations(t, 0) + }) + t.Run("cancel all migrations: some migrations to cancel", func(t *testing.T) { + // spawn n migrations; cancel them via cancel-all + var wg sync.WaitGroup + count := 4 + for i := 0; i < count; i++ { + wg.Add(1) + go func() { + defer wg.Done() + _ = testOnlineDDLStatement(t, alterTableThrottlingStatement, "online --max-load=Threads_running=1", "vtgate", "vrepl_col") + }() + } + wg.Wait() + checkCancelAllMigrations(t, count) + }) + t.Run("Online DROP, vtctl", func(t *testing.T) { + uuid := testOnlineDDLStatement(t, onlineDDLDropTableStatement, "online", "vtctl", "") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) + checkCancelMigration(t, uuid, false) + checkRetryMigration(t, uuid, false) + }) + t.Run("Online CREATE, vtctl", func(t *testing.T) { + uuid := testOnlineDDLStatement(t, onlineDDLCreateTableStatement, "online", "vtctl", "online_ddl_create_col") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) + checkCancelMigration(t, uuid, false) + checkRetryMigration(t, uuid, false) + }) + t.Run("Online DROP TABLE IF EXISTS, vtgate", func(t *testing.T) { + uuid := testOnlineDDLStatement(t, onlineDDLDropTableIfExistsStatement, "online", "vtgate", "") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) + checkCancelMigration(t, uuid, false) + checkRetryMigration(t, uuid, false) + // this table existed + checkTables(t, schema.OnlineDDLToGCUUID(uuid), 1) + }) + t.Run("Online DROP TABLE IF EXISTS for nonexistent table, vtgate", func(t *testing.T) { + uuid := testOnlineDDLStatement(t, onlineDDLDropTableIfExistsStatement, "online", "vtgate", "") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) + checkCancelMigration(t, uuid, false) + checkRetryMigration(t, uuid, false) + // this table did not exist + checkTables(t, schema.OnlineDDLToGCUUID(uuid), 0) + }) + t.Run("Online DROP TABLE for nonexistent table, expect error, vtgate", func(t *testing.T) { + uuid := testOnlineDDLStatement(t, onlineDDLDropTableStatement, "online", "vtgate", "") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusFailed) + checkCancelMigration(t, uuid, false) + checkRetryMigration(t, uuid, true) + }) +} + +func testWithInitialSchema(t *testing.T) { + // Create 4 tables + var sqlQuery = "" //nolint + for i := 0; i < totalTableCount; i++ { + sqlQuery = fmt.Sprintf(createTable, fmt.Sprintf("vt_onlineddl_test_%02d", i)) + err := clusterInstance.VtctlclientProcess.ApplySchema(keyspaceName, sqlQuery) + require.Nil(t, err) + } + + // Check if 4 tables are created + checkTables(t, "", totalTableCount) +} + +// testOnlineDDLStatement runs an online DDL, ALTER statement +func testOnlineDDLStatement(t *testing.T, alterStatement string, ddlStrategy string, executeStrategy string, expectColumn string) (uuid string) { + tableName := fmt.Sprintf("vt_onlineddl_test_%02d", 3) + sqlQuery := fmt.Sprintf(alterStatement, tableName) + if executeStrategy == "vtgate" { + row := vtgateExec(t, ddlStrategy, sqlQuery, "").Named().Row() + if row != nil { + uuid = row.AsString("uuid", "") + } + } else { + var err error + uuid, err = clusterInstance.VtctlclientProcess.ApplySchemaWithOutput(keyspaceName, sqlQuery, ddlStrategy) + assert.NoError(t, err) + } + uuid = strings.TrimSpace(uuid) + fmt.Println("# Generated UUID (for debug purposes):") + fmt.Printf("<%s>\n", uuid) + + strategy, _, err := schema.ParseDDLStrategy(ddlStrategy) + assert.NoError(t, err) + if !strategy.IsDirect() { + time.Sleep(time.Second * 20) + } + + if expectColumn != "" { + checkMigratedTable(t, tableName, expectColumn) + } + return uuid +} + +// checkTables checks the number of tables in the first two shards. +func checkTables(t *testing.T, showTableName string, expectCount int) { + for i := range clusterInstance.Keyspaces[0].Shards { + checkTablesCount(t, clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], showTableName, expectCount) + } +} + +// checkTablesCount checks the number of tables in the given tablet +func checkTablesCount(t *testing.T, tablet *cluster.Vttablet, showTableName string, expectCount int) { + query := fmt.Sprintf(`show tables like '%%%s%%';`, showTableName) + queryResult, err := tablet.VttabletProcess.QueryTablet(query, keyspaceName, true) + require.Nil(t, err) + assert.Equal(t, expectCount, len(queryResult.Rows)) +} + +// checkRecentMigrations checks 'OnlineDDL show recent' output. Example to such output: +// +------------------+-------+--------------+----------------------+--------------------------------------+----------+---------------------+---------------------+------------------+ +// | Tablet | shard | mysql_schema | mysql_table | migration_uuid | strategy | started_timestamp | completed_timestamp | migration_status | +// +------------------+-------+--------------+----------------------+--------------------------------------+----------+---------------------+---------------------+------------------+ +// | zone1-0000003880 | 0 | vt_ks | vt_onlineddl_test_03 | a0638f6b_ec7b_11ea_9bf8_000d3a9b8a9a | online | 2020-09-01 17:50:40 | 2020-09-01 17:50:41 | complete | +// | zone1-0000003884 | 1 | vt_ks | vt_onlineddl_test_03 | a0638f6b_ec7b_11ea_9bf8_000d3a9b8a9a | online | 2020-09-01 17:50:40 | 2020-09-01 17:50:41 | complete | +// +------------------+-------+--------------+----------------------+--------------------------------------+----------+---------------------+---------------------+------------------+ + +func checkRecentMigrations(t *testing.T, uuid string, expectStatus schema.OnlineDDLStatus) { + result, err := clusterInstance.VtctlclientProcess.OnlineDDLShowRecent(keyspaceName) + assert.NoError(t, err) + fmt.Println("# 'vtctlclient OnlineDDL show recent' output (for debug purposes):") + fmt.Println(result) + assert.Equal(t, len(clusterInstance.Keyspaces[0].Shards), strings.Count(result, uuid)) + // We ensure "full word" regexp becuase some column names may conflict + expectStatusRegexp := fullWordUUIDRegexp(uuid, string(expectStatus)) + m := expectStatusRegexp.FindAllString(result, -1) + assert.Equal(t, len(clusterInstance.Keyspaces[0].Shards), len(m)) +} + +// checkCancelMigration attempts to cancel a migration, and expects rejection +func checkCancelMigration(t *testing.T, uuid string, expectCancelPossible bool) { + result, err := clusterInstance.VtctlclientProcess.OnlineDDLCancelMigration(keyspaceName, uuid) + fmt.Println("# 'vtctlclient OnlineDDL cancel ' output (for debug purposes):") + fmt.Println(result) + assert.NoError(t, err) + + var r *regexp.Regexp + if expectCancelPossible { + r = fullWordRegexp("1") + } else { + r = fullWordRegexp("0") + } + m := r.FindAllString(result, -1) + assert.Equal(t, len(clusterInstance.Keyspaces[0].Shards), len(m)) +} + +// checkCancelAllMigrations all pending migrations +func checkCancelAllMigrations(t *testing.T, expectCount int) { + result, err := clusterInstance.VtctlclientProcess.OnlineDDLCancelAllMigrations(keyspaceName) + fmt.Println("# 'vtctlclient OnlineDDL cancel-all' output (for debug purposes):") + fmt.Println(result) + assert.NoError(t, err) + + r := fullWordRegexp(fmt.Sprintf("%d", expectCount)) + m := r.FindAllString(result, -1) + assert.Equal(t, len(clusterInstance.Keyspaces[0].Shards), len(m)) +} + +// checkRetryMigration attempts to retry a migration, and expects rejection +func checkRetryMigration(t *testing.T, uuid string, expectRetryPossible bool) { + result, err := clusterInstance.VtctlclientProcess.OnlineDDLRetryMigration(keyspaceName, uuid) + fmt.Println("# 'vtctlclient OnlineDDL retry ' output (for debug purposes):") + fmt.Println(result) + assert.NoError(t, err) + + var r *regexp.Regexp + if expectRetryPossible { + r = fullWordRegexp("1") + } else { + r = fullWordRegexp("0") + } + m := r.FindAllString(result, -1) + assert.Equal(t, len(clusterInstance.Keyspaces[0].Shards), len(m)) +} + +// checkMigratedTables checks the CREATE STATEMENT of a table after migration +func checkMigratedTable(t *testing.T, tableName, expectColumn string) { + for i := range clusterInstance.Keyspaces[0].Shards { + createStatement := getCreateTableStatement(t, clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], tableName) + assert.Contains(t, createStatement, expectColumn) + } +} + +// getCreateTableStatement returns the CREATE TABLE statement for a given table +func getCreateTableStatement(t *testing.T, tablet *cluster.Vttablet, tableName string) (statement string) { + queryResult, err := tablet.VttabletProcess.QueryTablet(fmt.Sprintf("show create table %s;", tableName), keyspaceName, true) + require.Nil(t, err) + + assert.Equal(t, len(queryResult.Rows), 1) + assert.Equal(t, len(queryResult.Rows[0]), 2) // table name, create statement + statement = queryResult.Rows[0][1].ToString() + return statement +} + +func vtgateExec(t *testing.T, ddlStrategy string, query string, expectError string) *sqltypes.Result { + t.Helper() + + ctx := context.Background() + conn, err := mysql.Connect(ctx, &vtParams) + require.Nil(t, err) + defer conn.Close() + + setSession := fmt.Sprintf("set @@ddl_strategy='%s'", ddlStrategy) + _, err = conn.ExecuteFetch(setSession, 1000, true) + assert.NoError(t, err) + + qr, err := conn.ExecuteFetch(query, 1000, true) + if expectError == "" { + require.NoError(t, err) + } else { + require.Error(t, err, "error should not be nil") + assert.Contains(t, err.Error(), expectError, "Unexpected error") + } + return qr +} From 05676f152ceb4712d8611204d8d51f37a1ee4811 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 4 Feb 2021 16:31:58 +0200 Subject: [PATCH 47/72] abort cancel when migration is in complete/failed statis Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 1950e86b714..788180d5430 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -1155,6 +1155,8 @@ func (e *Executor) cancelMigration(ctx context.Context, uuid string, terminateRu } switch onlineDDL.Status { + case schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed: + return emptyResult, nil case schema.OnlineDDLStatusQueued, schema.OnlineDDLStatusReady: if err := e.updateMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusCancelled); err != nil { return nil, err From 29fa14bf3239322478e180ac5575b245c342342b Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 4 Feb 2021 16:57:19 +0200 Subject: [PATCH 48/72] Reload schema before starting vreplication migration Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 788180d5430..6b194debe07 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -628,6 +628,11 @@ func (e *Executor) ExecuteWithVReplication(ctx context.Context, onlineDDL *schem if err != nil { return err } + // reload schema + if err := tmClient.ReloadSchema(ctx, tablet.Tablet, ""); err != nil { + return err + } + // create vreplication entry insertVReplicationQuery, err := v.generateInsertStatement(ctx) if err != nil { From d3d3e313bf63c198b99b304980b0de18f26ec06b Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 4 Feb 2021 17:31:00 +0200 Subject: [PATCH 49/72] fixing endtoend tests Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/go/test/endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go b/go/test/endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go index b6f4c4fa6fc..19cceffb039 100644 --- a/go/test/endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go +++ b/go/test/endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go @@ -120,6 +120,10 @@ func TestMain(m *testing.M) { "-schema_change_check_interval", "1"} clusterInstance.VtTabletExtraArgs = []string{ + "-enable-lag-throttler", + "-throttle_threshold", "1s", + "-heartbeat_enable", + "-heartbeat_interval", "250ms", "-migration_check_interval", "5s", } clusterInstance.VtGateExtraArgs = []string{ @@ -206,7 +210,6 @@ func TestSchemaChange(t *testing.T) { checkRetryMigration(t, uuid, false) }) t.Run("throttled migration", func(t *testing.T) { - // TODO(shlomi): throttle app, unthrottle for i := range clusterInstance.Keyspaces[0].Shards { throttleApp(clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], throttlerAppName) defer unthrottleApp(clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], throttlerAppName) @@ -230,6 +233,10 @@ func TestSchemaChange(t *testing.T) { checkCancelAllMigrations(t, 0) }) t.Run("cancel all migrations: some migrations to cancel", func(t *testing.T) { + for i := range clusterInstance.Keyspaces[0].Shards { + throttleApp(clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], throttlerAppName) + defer unthrottleApp(clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], throttlerAppName) + } // spawn n migrations; cancel them via cancel-all var wg sync.WaitGroup count := 4 @@ -237,7 +244,7 @@ func TestSchemaChange(t *testing.T) { wg.Add(1) go func() { defer wg.Done() - _ = testOnlineDDLStatement(t, alterTableThrottlingStatement, "online --max-load=Threads_running=1", "vtgate", "vrepl_col") + _ = testOnlineDDLStatement(t, alterTableThrottlingStatement, "online", "vtgate", "vrepl_col") }() } wg.Wait() @@ -312,6 +319,7 @@ func testOnlineDDLStatement(t *testing.T, alterStatement string, ddlStrategy str strategy, _, err := schema.ParseDDLStrategy(ddlStrategy) assert.NoError(t, err) + if !strategy.IsDirect() { time.Sleep(time.Second * 20) } From c27ea9d5752ed9387d6cb312593bad91ffb5d8c0 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 4 Feb 2021 18:49:38 +0200 Subject: [PATCH 50/72] cut-over timeout for wait-for-pos and stop writes Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 6b194debe07..07e986c48a1 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -471,7 +471,7 @@ func (e *Executor) terminateVReplMigration(ctx context.Context, uuid string) err func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream) error { // sanity checks: if s == nil { - return vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "No vreplicatoin stream migration %s", s.workflow) + return vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "No vreplication stream migration %s", s.workflow) } if s.bls.Filter == nil { return vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "No binlog source filter for migration %s", s.workflow) @@ -541,6 +541,8 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream) er return err } + ctx, cancel := context.WithTimeout(ctx, 2*cutOverThreshold) + defer cancel() // Wait for target to reach the up-to-date pos if err := tmClient.VReplicationWaitForPos(ctx, tablet.Tablet, int(s.id), s.pos); err != nil { return err From 40063e83fdb58e49e025b6417e0c5e47686bc7e2 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 4 Feb 2021 20:14:08 +0200 Subject: [PATCH 51/72] fix ctx with cancel() Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 07e986c48a1..8b44d02ab74 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -541,14 +541,19 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream) er return err } - ctx, cancel := context.WithTimeout(ctx, 2*cutOverThreshold) - defer cancel() - // Wait for target to reach the up-to-date pos - if err := tmClient.VReplicationWaitForPos(ctx, tablet.Tablet, int(s.id), s.pos); err != nil { + waitForPos := func() error { + ctx, cancel := context.WithTimeout(ctx, 2*cutOverThreshold) + defer cancel() + // Wait for target to reach the up-to-date pos + if err := tmClient.VReplicationWaitForPos(ctx, tablet.Tablet, int(s.id), s.pos); err != nil { + return err + } + // Target is now in sync with source! + return nil + } + if err := waitForPos(); err != nil { return err } - // Target is now in sync with source! - // Stop vreplication if _, err := tmClient.VReplicationExec(ctx, tablet.Tablet, binlogplayer.StopVReplication(uint32(s.id), "stopped for online DDL cutover")); err != nil { return err From 86f6d2fc75997afa9ea041ae0b9c765c8111ad70 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 8 Feb 2021 07:59:37 +0200 Subject: [PATCH 52/72] schema_migrations.message column Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 39 ++++++++++++++++++++++------ go/vt/vttablet/onlineddl/schema.go | 7 +++++ go/vt/vttablet/onlineddl/util.go | 3 ++- 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 8b44d02ab74..982c3a03236 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -825,6 +825,8 @@ curl -s 'http://localhost:%d/schema-migration/report-status?uuid=%s&status=%s&dr if err := runGhost(false); err != nil { // perhaps gh-ost was interrupted midway and didn't have the chance to send a "failes" status _ = e.updateMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusFailed) + _ = e.updateMigrationMessage(ctx, onlineDDL.UUID, err.Error()) + log.Errorf("Error executing gh-ost dry run: %+v", err) return err } @@ -835,6 +837,7 @@ curl -s 'http://localhost:%d/schema-migration/report-status?uuid=%s&status=%s&dr if err := runGhost(true); err != nil { // perhaps gh-ost was interrupted midway and didn't have the chance to send a "failes" status _ = e.updateMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusFailed) + _ = e.updateMigrationMessage(ctx, onlineDDL.UUID, err.Error()) failedMigrations.Add(1) log.Errorf("Error running gh-ost: %+v", err) return err @@ -1048,6 +1051,7 @@ export MYSQL_PWD if err := runPTOSC(false); err != nil { // perhaps pt-osc was interrupted midway and didn't have the chance to send a "failes" status _ = e.updateMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusFailed) + _ = e.updateMigrationMessage(ctx, onlineDDL.UUID, err.Error()) _ = e.updateMigrationTimestamp(ctx, "completed_timestamp", onlineDDL.UUID) log.Errorf("Error executing pt-online-schema-change dry run: %+v", err) return err @@ -1059,6 +1063,7 @@ export MYSQL_PWD if err := runPTOSC(true); err != nil { // perhaps pt-osc was interrupted midway and didn't have the chance to send a "failes" status _ = e.updateMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusFailed) + _ = e.updateMigrationMessage(ctx, onlineDDL.UUID, err.Error()) _ = e.updateMigrationTimestamp(ctx, "completed_timestamp", onlineDDL.UUID) _ = e.dropPTOSCMigrationTriggers(ctx, onlineDDL) failedMigrations.Add(1) @@ -1155,7 +1160,7 @@ func (e *Executor) terminateMigration(ctx context.Context, onlineDDL *schema.Onl } // cancelMigration attempts to abort a scheduled or a running migration -func (e *Executor) cancelMigration(ctx context.Context, uuid string, terminateRunningMigration bool) (result *sqltypes.Result, err error) { +func (e *Executor) cancelMigration(ctx context.Context, uuid string, terminateRunningMigration bool, message string) (result *sqltypes.Result, err error) { e.migrationMutex.Lock() defer e.migrationMutex.Unlock() @@ -1178,6 +1183,8 @@ func (e *Executor) cancelMigration(ctx context.Context, uuid string, terminateRu if terminateRunningMigration { migrationFound, err := e.terminateMigration(ctx, onlineDDL, e.lastMigrationUUID) + defer e.updateMigrationMessage(ctx, onlineDDL.UUID, message) + if migrationFound { rowsAffected = 1 } @@ -1194,10 +1201,10 @@ func (e *Executor) cancelMigration(ctx context.Context, uuid string, terminateRu } // cancelMigrations attempts to abort a list of migrations -func (e *Executor) cancelMigrations(ctx context.Context, uuids []string) (err error) { +func (e *Executor) cancelMigrations(ctx context.Context, uuids []string, message string) (err error) { for _, uuid := range uuids { log.Infof("cancelMigrations: cancelling %s", uuid) - if _, err := e.cancelMigration(ctx, uuid, true); err != nil { + if _, err := e.cancelMigration(ctx, uuid, true, message); err != nil { return err } } @@ -1206,7 +1213,7 @@ func (e *Executor) cancelMigrations(ctx context.Context, uuids []string) (err er // cancelPendingMigrations cancels all pending migrations (that are expected to run or are running) // for this keyspace -func (e *Executor) cancelPendingMigrations(ctx context.Context) (result *sqltypes.Result, err error) { +func (e *Executor) cancelPendingMigrations(ctx context.Context, message string) (result *sqltypes.Result, err error) { r, err := e.execQuery(ctx, sqlSelectPendingMigrations) if err != nil { return result, err @@ -1220,7 +1227,7 @@ func (e *Executor) cancelPendingMigrations(ctx context.Context) (result *sqltype result = &sqltypes.Result{} for _, uuid := range uuids { log.Infof("cancelPendingMigrations: cancelling %s", uuid) - res, err := e.cancelMigration(ctx, uuid, true) + res, err := e.cancelMigration(ctx, uuid, true, message) if err != nil { return result, err } @@ -1266,6 +1273,9 @@ func (e *Executor) scheduleNextMigration(ctx context.Context) error { func (e *Executor) executeMigration(ctx context.Context, onlineDDL *schema.OnlineDDL) error { failMigration := func(err error) error { _ = e.updateMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusFailed) + if err != nil { + _ = e.updateMigrationMessage(ctx, onlineDDL.UUID, err.Error()) + } e.triggerNextCheckInterval() return err } @@ -1671,6 +1681,7 @@ func (e *Executor) reviewStaleMigrations(ctx context.Context) error { if err := e.updateMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusFailed); err != nil { return err } + _ = e.updateMigrationMessage(ctx, onlineDDL.UUID, "stale migration") } return nil @@ -1774,7 +1785,7 @@ func (e *Executor) onMigrationCheckTick() { } if _, cancellable, err := e.reviewRunningMigrations(ctx); err != nil { log.Error(err) - } else if err := e.cancelMigrations(ctx, cancellable); err != nil { + } else if err := e.cancelMigrations(ctx, cancellable, "auto cancel"); err != nil { log.Error(err) } if err := e.reviewStaleMigrations(ctx); err != nil { @@ -1884,6 +1895,18 @@ func (e *Executor) updateMigrationStatus(ctx context.Context, uuid string, statu return err } +func (e *Executor) updateMigrationMessage(ctx context.Context, uuid string, message string) error { + query, err := sqlparser.ParseAndBind(sqlUpdateMessage, + sqltypes.StringBindVariable(message), + sqltypes.StringBindVariable(uuid), + ) + if err != nil { + return err + } + _, err = e.execQuery(ctx, query) + return err +} + func (e *Executor) updateMigrationProgress(ctx context.Context, uuid string, progress float64) error { if progress <= 0 { // progress starts at 0, and can only increase. @@ -2044,13 +2067,13 @@ func (e *Executor) VExec(ctx context.Context, vx *vexec.TabletVExec) (qr *queryp if !schema.IsOnlineDDLUUID(uuid) { return nil, fmt.Errorf("Not an Online DDL UUID: %s", uuid) } - return response(e.cancelMigration(ctx, uuid, true)) + return response(e.cancelMigration(ctx, uuid, true, "cancel by user")) case cancelAllMigrationHint: uuid, _ := vx.ColumnStringVal(vx.WhereCols, "migration_uuid") if uuid != "" { return nil, fmt.Errorf("Unexpetced UUID: %s", uuid) } - return response(e.cancelPendingMigrations(ctx)) + return response(e.cancelPendingMigrations(ctx, "cancel-all by user")) default: return nil, fmt.Errorf("Unexpected value for migration_status: %v. Supported values are: %s, %s", statusVal, retryMigrationHint, cancelMigrationHint) diff --git a/go/vt/vttablet/onlineddl/schema.go b/go/vt/vttablet/onlineddl/schema.go index 7bed9f12d1f..3d7959fc19f 100644 --- a/go/vt/vttablet/onlineddl/schema.go +++ b/go/vt/vttablet/onlineddl/schema.go @@ -53,6 +53,7 @@ const ( alterSchemaMigrationsTableProgress = "ALTER TABLE _vt.schema_migrations add column progress float NOT NULL DEFAULT 0" alterSchemaMigrationsTableContext = "ALTER TABLE _vt.schema_migrations add column migration_context varchar(1024) NOT NULL DEFAULT ''" alterSchemaMigrationsTableDDLAction = "ALTER TABLE _vt.schema_migrations add column ddl_action varchar(16) NOT NULL DEFAULT ''" + alterSchemaMigrationsTableMessage = "ALTER TABLE _vt.schema_migrations add column message TEXT NOT NULL" sqlScheduleSingleMigration = `UPDATE _vt.schema_migrations SET @@ -99,6 +100,11 @@ const ( WHERE migration_uuid=%a ` + sqlUpdateMessage = `UPDATE _vt.schema_migrations + SET message=%a + WHERE + migration_uuid=%a + ` sqlRetryMigration = `UPDATE _vt.schema_migrations SET migration_status='queued', @@ -271,4 +277,5 @@ var applyDDL = []string{ alterSchemaMigrationsTableProgress, alterSchemaMigrationsTableContext, alterSchemaMigrationsTableDDLAction, + alterSchemaMigrationsTableMessage, } diff --git a/go/vt/vttablet/onlineddl/util.go b/go/vt/vttablet/onlineddl/util.go index e901fe7dabd..00c7f3fd844 100644 --- a/go/vt/vttablet/onlineddl/util.go +++ b/go/vt/vttablet/onlineddl/util.go @@ -25,6 +25,7 @@ import ( "io/ioutil" "os/exec" "path/filepath" + "strings" "time" "vitess.io/vitess/go/vt/log" @@ -55,7 +56,7 @@ func execCmd(name string, args, env []string, dir string, input io.Reader, outpu } err = cmd.Run() if err != nil { - err = fmt.Errorf("execCmd failed: %v, %v", name, err) + err = fmt.Errorf("failed running command: %v %s; error=%v", name, strings.Join(args, " "), err) log.Errorf(err.Error()) } log.Infof("execCmd success: %v", name) From 187f93d1e92aaa8a4b5cff1c566b1e7074e80773 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 10 Feb 2021 09:15:07 +0200 Subject: [PATCH 53/72] update workflo gen Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .github/workflows/cluster_endtoend_onlineddl_vrepl.yml | 2 ++ test/ci_workflow_gen.go | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cluster_endtoend_onlineddl_vrepl.yml b/.github/workflows/cluster_endtoend_onlineddl_vrepl.yml index 5cbe31e6161..a3aee6a6ab5 100644 --- a/.github/workflows/cluster_endtoend_onlineddl_vrepl.yml +++ b/.github/workflows/cluster_endtoend_onlineddl_vrepl.yml @@ -1,3 +1,5 @@ +# DO NOT MODIFY: THIS FILE IS GENERATED USING "make generate_ci_workflows" + name: Cluster (onlineddl_vrepl) on: [push, pull_request] jobs: diff --git a/test/ci_workflow_gen.go b/test/ci_workflow_gen.go index 2f4bbb126ff..6050f8a49c5 100644 --- a/test/ci_workflow_gen.go +++ b/test/ci_workflow_gen.go @@ -32,7 +32,7 @@ const ( unitTestDatabases = "percona56, mysql57, mysql80, mariadb101, mariadb102, mariadb103" clusterTestTemplate = "templates/cluster_endtoend_test.tpl" - clusterList = "11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,vreplication_basic,vreplication_multicell,vreplication_cellalias,vreplication_v2" + clusterList = "11,12,13,14,15,16,17,18,19,20,21,22,23,24,27,vreplication_basic,vreplication_multicell,vreplication_cellalias,vreplication_v2,onlineddl_ghost,onlineddl_vrepl" // TODO: currently some percona tools including xtrabackup are installed on all clusters, we can possibly optimize // this by only installing them in the required clusters clustersRequiringXtraBackup = clusterList From 32987c6634336643db6dea05da2eaa6e428ad1af Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 10 Feb 2021 09:37:19 +0200 Subject: [PATCH 54/72] restored shard 26 Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .github/workflows/cluster_endtoend_26.yml | 40 +++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 .github/workflows/cluster_endtoend_26.yml diff --git a/.github/workflows/cluster_endtoend_26.yml b/.github/workflows/cluster_endtoend_26.yml new file mode 100644 index 00000000000..998162f91bd --- /dev/null +++ b/.github/workflows/cluster_endtoend_26.yml @@ -0,0 +1,40 @@ +# DO NOT MODIFY: THIS FILE IS GENERATED USING "make generate_ci_workflows" + +name: Cluster (26) +on: [push, pull_request] +jobs: + + build: + name: Run endtoend tests on Cluster (26) + runs-on: ubuntu-latest + + steps: + - name: Set up Go + uses: actions/setup-go@v1 + with: + go-version: 1.15 + + - name: Check out code + uses: actions/checkout@v2 + + - name: Get dependencies + run: | + sudo apt-get update + sudo apt-get install -y mysql-server mysql-client make unzip g++ etcd curl git wget eatmydata + sudo service mysql stop + sudo service etcd stop + sudo ln -s /etc/apparmor.d/usr.sbin.mysqld /etc/apparmor.d/disable/ + sudo apparmor_parser -R /etc/apparmor.d/usr.sbin.mysqld + go mod download + + wget https://repo.percona.com/apt/percona-release_latest.$(lsb_release -sc)_all.deb + sudo apt-get install -y gnupg2 + sudo dpkg -i percona-release_latest.$(lsb_release -sc)_all.deb + sudo apt-get update + sudo apt-get install percona-xtrabackup-24 + + - name: Run cluster endtoend test + timeout-minutes: 30 + run: | + source build.env + eatmydata -- go run test.go -docker=false -print-log -follow -shard 26 From b358297f375f2b3a38c6db767d2f6d813ba7c042 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 10 Feb 2021 16:06:33 +0200 Subject: [PATCH 55/72] endtoend test: validate table data is as expected Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../onlineddl_vrepl/onlineddl_vrepl_test.go | 72 ++++++++++++++++++- 1 file changed, 70 insertions(+), 2 deletions(-) diff --git a/go/test/endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go b/go/test/endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go index 19cceffb039..e594d79a997 100644 --- a/go/test/endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go +++ b/go/test/endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go @@ -55,13 +55,14 @@ var ( createTable = ` CREATE TABLE %s ( id bigint(20) NOT NULL, + test_val bigint unsigned NOT NULL DEFAULT 0, msg varchar(64), PRIMARY KEY (id) ) ENGINE=InnoDB;` // To verify non online-DDL behavior alterTableNormalStatement = ` ALTER TABLE %s - ADD COLUMN non_online int UNSIGNED NOT NULL` + ADD COLUMN non_online int UNSIGNED NOT NULL DEFAULT 0` // A trivial statement which must succeed and does not change the schema alterTableTrivialStatement = ` ALTER TABLE %s @@ -70,7 +71,7 @@ var ( alterTableSuccessfulStatement = ` ALTER TABLE %s MODIFY id bigint UNSIGNED NOT NULL, - ADD COLUMN vrepl_col int NOT NULL, + ADD COLUMN vrepl_col int NOT NULL DEFAULT 0, ADD INDEX idx_msg(msg)` // The following statement will fail because vreplication requires shared PRIMARY KEY columns alterTableFailedStatement = ` @@ -84,6 +85,7 @@ var ( onlineDDLCreateTableStatement = ` CREATE TABLE %s ( id bigint NOT NULL, + test_val bigint unsigned NOT NULL DEFAULT 0, online_ddl_create_col INT NOT NULL, PRIMARY KEY (id) ) ENGINE=InnoDB;` @@ -91,6 +93,14 @@ var ( DROP TABLE %s` onlineDDLDropTableIfExistsStatement = ` DROP TABLE IF EXISTS %s` + insertRowStatement = ` + INSERT INTO %s (id, test_val) VALUES (%d, 1) + ` + selectCountRowsStatement = ` + SELECT COUNT(*) AS c FROM %s + ` + countInserts int64 + insertMutex sync.Mutex ) func fullWordUUIDRegexp(uuid, searchWord string) *regexp.Regexp { @@ -196,33 +206,43 @@ func TestSchemaChange(t *testing.T) { testWithInitialSchema(t) t.Run("create non_online", func(t *testing.T) { _ = testOnlineDDLStatement(t, alterTableNormalStatement, string(schema.DDLStrategyDirect), "vtctl", "non_online") + insertRows(t, 2) + testRows(t) }) t.Run("successful online alter, vtgate", func(t *testing.T) { + insertRows(t, 2) uuid := testOnlineDDLStatement(t, alterTableSuccessfulStatement, "online", "vtgate", "vrepl_col") checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) + testRows(t) checkCancelMigration(t, uuid, false) checkRetryMigration(t, uuid, false) }) t.Run("successful online alter, vtctl", func(t *testing.T) { + insertRows(t, 2) uuid := testOnlineDDLStatement(t, alterTableTrivialStatement, "online", "vtctl", "vrepl_col") checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) + testRows(t) checkCancelMigration(t, uuid, false) checkRetryMigration(t, uuid, false) }) t.Run("throttled migration", func(t *testing.T) { + insertRows(t, 2) for i := range clusterInstance.Keyspaces[0].Shards { throttleApp(clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], throttlerAppName) defer unthrottleApp(clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], throttlerAppName) } uuid := testOnlineDDLStatement(t, alterTableThrottlingStatement, "online", "vtgate", "vrepl_col") checkRecentMigrations(t, uuid, schema.OnlineDDLStatusRunning) + testRows(t) checkCancelMigration(t, uuid, true) time.Sleep(2 * time.Second) checkRecentMigrations(t, uuid, schema.OnlineDDLStatusFailed) }) t.Run("failed migration", func(t *testing.T) { + insertRows(t, 2) uuid := testOnlineDDLStatement(t, alterTableFailedStatement, "online", "vtgate", "vrepl_col") checkRecentMigrations(t, uuid, schema.OnlineDDLStatusFailed) + testRows(t) checkCancelMigration(t, uuid, false) checkRetryMigration(t, uuid, true) // migration will fail again @@ -286,6 +306,36 @@ func TestSchemaChange(t *testing.T) { }) } +func insertRow(t *testing.T) { + insertMutex.Lock() + defer insertMutex.Unlock() + + tableName := fmt.Sprintf("vt_onlineddl_test_%02d", 3) + sqlQuery := fmt.Sprintf(insertRowStatement, tableName, countInserts) + r := vtgateExecQuery(t, sqlQuery, "") + require.NotNil(t, r) + countInserts++ +} + +func insertRows(t *testing.T, count int) { + for i := 0; i < count; i++ { + insertRow(t) + } +} + +func testRows(t *testing.T) { + insertMutex.Lock() + defer insertMutex.Unlock() + + tableName := fmt.Sprintf("vt_onlineddl_test_%02d", 3) + sqlQuery := fmt.Sprintf(selectCountRowsStatement, tableName) + r := vtgateExecQuery(t, sqlQuery, "") + require.NotNil(t, r) + row := r.Named().Row() + require.NotNil(t, row) + require.Equal(t, countInserts, row.AsInt64("c", 0)) +} + func testWithInitialSchema(t *testing.T) { // Create 4 tables var sqlQuery = "" //nolint @@ -430,6 +480,24 @@ func getCreateTableStatement(t *testing.T, tablet *cluster.Vttablet, tableName s return statement } +func vtgateExecQuery(t *testing.T, query string, expectError string) *sqltypes.Result { + t.Helper() + + ctx := context.Background() + conn, err := mysql.Connect(ctx, &vtParams) + require.Nil(t, err) + defer conn.Close() + + qr, err := conn.ExecuteFetch(query, 1000, true) + if expectError == "" { + require.NoError(t, err) + } else { + require.Error(t, err, "error should not be nil") + assert.Contains(t, err.Error(), expectError, "Unexpected error") + } + return qr +} + func vtgateExec(t *testing.T, ddlStrategy string, query string, expectError string) *sqltypes.Result { t.Helper() From 56b96a72a9cc057d15b872b2447c77247aacc5cb Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 10 Feb 2021 17:31:54 +0200 Subject: [PATCH 56/72] towards online ddl/vreplication mini stress tests in CI Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- ...luster_endtoend_onlineddl_vrepl_stress.yml | 40 ++ .../onlineddl_vrepl_mini_stress_test.go | 521 ++++++++++++++++++ test/ci_workflow_gen.go | 2 +- test/config.json | 9 + 4 files changed, 571 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/cluster_endtoend_onlineddl_vrepl_stress.yml create mode 100644 go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go diff --git a/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress.yml b/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress.yml new file mode 100644 index 00000000000..5ae84cee56e --- /dev/null +++ b/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress.yml @@ -0,0 +1,40 @@ +# DO NOT MODIFY: THIS FILE IS GENERATED USING "make generate_ci_workflows" + +name: Cluster (onlineddl_vrepl_stress) +on: [push, pull_request] +jobs: + + build: + name: Run endtoend tests on Cluster (onlineddl_vrepl_stress) + runs-on: ubuntu-latest + + steps: + - name: Set up Go + uses: actions/setup-go@v1 + with: + go-version: 1.15 + + - name: Check out code + uses: actions/checkout@v2 + + - name: Get dependencies + run: | + sudo apt-get update + sudo apt-get install -y mysql-server mysql-client make unzip g++ etcd curl git wget eatmydata + sudo service mysql stop + sudo service etcd stop + sudo ln -s /etc/apparmor.d/usr.sbin.mysqld /etc/apparmor.d/disable/ + sudo apparmor_parser -R /etc/apparmor.d/usr.sbin.mysqld + go mod download + + wget https://repo.percona.com/apt/percona-release_latest.$(lsb_release -sc)_all.deb + sudo apt-get install -y gnupg2 + sudo dpkg -i percona-release_latest.$(lsb_release -sc)_all.deb + sudo apt-get update + sudo apt-get install percona-xtrabackup-24 + + - name: Run cluster endtoend test + timeout-minutes: 30 + run: | + source build.env + eatmydata -- go run test.go -docker=false -print-log -follow -shard onlineddl_vrepl_stress diff --git a/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go new file mode 100644 index 00000000000..e594d79a997 --- /dev/null +++ b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go @@ -0,0 +1,521 @@ +/* +Copyright 2019 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package onlineddl + +import ( + "context" + "flag" + "fmt" + "io/ioutil" + "net/http" + "os" + "path" + "regexp" + "strings" + "sync" + "testing" + "time" + + "vitess.io/vitess/go/mysql" + "vitess.io/vitess/go/sqltypes" + "vitess.io/vitess/go/vt/schema" + throttlebase "vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/base" + + "vitess.io/vitess/go/test/endtoend/cluster" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ( + clusterInstance *cluster.LocalProcessCluster + vtParams mysql.ConnParams + httpClient = throttlebase.SetupHTTPClient(time.Second) + throttlerAppName = "vreplication" + + hostname = "localhost" + keyspaceName = "ks" + cell = "zone1" + schemaChangeDirectory = "" + totalTableCount = 4 + createTable = ` + CREATE TABLE %s ( + id bigint(20) NOT NULL, + test_val bigint unsigned NOT NULL DEFAULT 0, + msg varchar(64), + PRIMARY KEY (id) + ) ENGINE=InnoDB;` + // To verify non online-DDL behavior + alterTableNormalStatement = ` + ALTER TABLE %s + ADD COLUMN non_online int UNSIGNED NOT NULL DEFAULT 0` + // A trivial statement which must succeed and does not change the schema + alterTableTrivialStatement = ` + ALTER TABLE %s + ENGINE=InnoDB` + // The following statement is valid + alterTableSuccessfulStatement = ` + ALTER TABLE %s + MODIFY id bigint UNSIGNED NOT NULL, + ADD COLUMN vrepl_col int NOT NULL DEFAULT 0, + ADD INDEX idx_msg(msg)` + // The following statement will fail because vreplication requires shared PRIMARY KEY columns + alterTableFailedStatement = ` + ALTER TABLE %s + DROP PRIMARY KEY, + DROP COLUMN vrepl_col` + // We will run this query while throttling vreplication + alterTableThrottlingStatement = ` + ALTER TABLE %s + DROP COLUMN vrepl_col` + onlineDDLCreateTableStatement = ` + CREATE TABLE %s ( + id bigint NOT NULL, + test_val bigint unsigned NOT NULL DEFAULT 0, + online_ddl_create_col INT NOT NULL, + PRIMARY KEY (id) + ) ENGINE=InnoDB;` + onlineDDLDropTableStatement = ` + DROP TABLE %s` + onlineDDLDropTableIfExistsStatement = ` + DROP TABLE IF EXISTS %s` + insertRowStatement = ` + INSERT INTO %s (id, test_val) VALUES (%d, 1) + ` + selectCountRowsStatement = ` + SELECT COUNT(*) AS c FROM %s + ` + countInserts int64 + insertMutex sync.Mutex +) + +func fullWordUUIDRegexp(uuid, searchWord string) *regexp.Regexp { + return regexp.MustCompile(uuid + `.*?\b` + searchWord + `\b`) +} +func fullWordRegexp(searchWord string) *regexp.Regexp { + return regexp.MustCompile(`.*?\b` + searchWord + `\b`) +} + +func TestMain(m *testing.M) { + defer cluster.PanicHandler(nil) + flag.Parse() + + exitcode, err := func() (int, error) { + clusterInstance = cluster.NewCluster(cell, hostname) + schemaChangeDirectory = path.Join("/tmp", fmt.Sprintf("schema_change_dir_%d", clusterInstance.GetAndReserveTabletUID())) + defer os.RemoveAll(schemaChangeDirectory) + defer clusterInstance.Teardown() + + if _, err := os.Stat(schemaChangeDirectory); os.IsNotExist(err) { + _ = os.Mkdir(schemaChangeDirectory, 0700) + } + + clusterInstance.VtctldExtraArgs = []string{ + "-schema_change_dir", schemaChangeDirectory, + "-schema_change_controller", "local", + "-schema_change_check_interval", "1"} + + clusterInstance.VtTabletExtraArgs = []string{ + "-enable-lag-throttler", + "-throttle_threshold", "1s", + "-heartbeat_enable", + "-heartbeat_interval", "250ms", + "-migration_check_interval", "5s", + } + clusterInstance.VtGateExtraArgs = []string{ + "-ddl_strategy", "online", + } + + if err := clusterInstance.StartTopo(); err != nil { + return 1, err + } + + // Start keyspace + keyspace := &cluster.Keyspace{ + Name: keyspaceName, + } + + if err := clusterInstance.StartUnshardedKeyspace(*keyspace, 2, true); err != nil { + return 1, err + } + if err := clusterInstance.StartKeyspace(*keyspace, []string{"1"}, 1, false); err != nil { + return 1, err + } + + vtgateInstance := clusterInstance.NewVtgateInstance() + // set the gateway we want to use + vtgateInstance.GatewayImplementation = "tabletgateway" + // Start vtgate + if err := vtgateInstance.Setup(); err != nil { + return 1, err + } + // ensure it is torn down during cluster TearDown + clusterInstance.VtgateProcess = *vtgateInstance + vtParams = mysql.ConnParams{ + Host: clusterInstance.Hostname, + Port: clusterInstance.VtgateMySQLPort, + } + + return m.Run(), nil + }() + if err != nil { + fmt.Printf("%v\n", err) + os.Exit(1) + } else { + os.Exit(exitcode) + } + +} + +func throttleResponse(tablet *cluster.Vttablet, path string) (resp *http.Response, respBody string, err error) { + apiURL := fmt.Sprintf("http://%s:%d/%s", tablet.VttabletProcess.TabletHostname, tablet.HTTPPort, path) + resp, err = httpClient.Get(apiURL) + if err != nil { + return resp, respBody, err + } + b, err := ioutil.ReadAll(resp.Body) + respBody = string(b) + return resp, respBody, err +} + +func throttleApp(tablet *cluster.Vttablet, app string) (*http.Response, string, error) { + return throttleResponse(tablet, fmt.Sprintf("throttler/throttle-app?app=%s&duration=1h", app)) +} + +func unthrottleApp(tablet *cluster.Vttablet, app string) (*http.Response, string, error) { + return throttleResponse(tablet, fmt.Sprintf("throttler/unthrottle-app?app=%s", app)) +} + +func TestSchemaChange(t *testing.T) { + defer cluster.PanicHandler(t) + assert.Equal(t, 2, len(clusterInstance.Keyspaces[0].Shards)) + testWithInitialSchema(t) + t.Run("create non_online", func(t *testing.T) { + _ = testOnlineDDLStatement(t, alterTableNormalStatement, string(schema.DDLStrategyDirect), "vtctl", "non_online") + insertRows(t, 2) + testRows(t) + }) + t.Run("successful online alter, vtgate", func(t *testing.T) { + insertRows(t, 2) + uuid := testOnlineDDLStatement(t, alterTableSuccessfulStatement, "online", "vtgate", "vrepl_col") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) + testRows(t) + checkCancelMigration(t, uuid, false) + checkRetryMigration(t, uuid, false) + }) + t.Run("successful online alter, vtctl", func(t *testing.T) { + insertRows(t, 2) + uuid := testOnlineDDLStatement(t, alterTableTrivialStatement, "online", "vtctl", "vrepl_col") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) + testRows(t) + checkCancelMigration(t, uuid, false) + checkRetryMigration(t, uuid, false) + }) + t.Run("throttled migration", func(t *testing.T) { + insertRows(t, 2) + for i := range clusterInstance.Keyspaces[0].Shards { + throttleApp(clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], throttlerAppName) + defer unthrottleApp(clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], throttlerAppName) + } + uuid := testOnlineDDLStatement(t, alterTableThrottlingStatement, "online", "vtgate", "vrepl_col") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusRunning) + testRows(t) + checkCancelMigration(t, uuid, true) + time.Sleep(2 * time.Second) + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusFailed) + }) + t.Run("failed migration", func(t *testing.T) { + insertRows(t, 2) + uuid := testOnlineDDLStatement(t, alterTableFailedStatement, "online", "vtgate", "vrepl_col") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusFailed) + testRows(t) + checkCancelMigration(t, uuid, false) + checkRetryMigration(t, uuid, true) + // migration will fail again + }) + t.Run("cancel all migrations: nothing to cancel", func(t *testing.T) { + // no migrations pending at this time + time.Sleep(10 * time.Second) + checkCancelAllMigrations(t, 0) + }) + t.Run("cancel all migrations: some migrations to cancel", func(t *testing.T) { + for i := range clusterInstance.Keyspaces[0].Shards { + throttleApp(clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], throttlerAppName) + defer unthrottleApp(clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], throttlerAppName) + } + // spawn n migrations; cancel them via cancel-all + var wg sync.WaitGroup + count := 4 + for i := 0; i < count; i++ { + wg.Add(1) + go func() { + defer wg.Done() + _ = testOnlineDDLStatement(t, alterTableThrottlingStatement, "online", "vtgate", "vrepl_col") + }() + } + wg.Wait() + checkCancelAllMigrations(t, count) + }) + t.Run("Online DROP, vtctl", func(t *testing.T) { + uuid := testOnlineDDLStatement(t, onlineDDLDropTableStatement, "online", "vtctl", "") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) + checkCancelMigration(t, uuid, false) + checkRetryMigration(t, uuid, false) + }) + t.Run("Online CREATE, vtctl", func(t *testing.T) { + uuid := testOnlineDDLStatement(t, onlineDDLCreateTableStatement, "online", "vtctl", "online_ddl_create_col") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) + checkCancelMigration(t, uuid, false) + checkRetryMigration(t, uuid, false) + }) + t.Run("Online DROP TABLE IF EXISTS, vtgate", func(t *testing.T) { + uuid := testOnlineDDLStatement(t, onlineDDLDropTableIfExistsStatement, "online", "vtgate", "") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) + checkCancelMigration(t, uuid, false) + checkRetryMigration(t, uuid, false) + // this table existed + checkTables(t, schema.OnlineDDLToGCUUID(uuid), 1) + }) + t.Run("Online DROP TABLE IF EXISTS for nonexistent table, vtgate", func(t *testing.T) { + uuid := testOnlineDDLStatement(t, onlineDDLDropTableIfExistsStatement, "online", "vtgate", "") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) + checkCancelMigration(t, uuid, false) + checkRetryMigration(t, uuid, false) + // this table did not exist + checkTables(t, schema.OnlineDDLToGCUUID(uuid), 0) + }) + t.Run("Online DROP TABLE for nonexistent table, expect error, vtgate", func(t *testing.T) { + uuid := testOnlineDDLStatement(t, onlineDDLDropTableStatement, "online", "vtgate", "") + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusFailed) + checkCancelMigration(t, uuid, false) + checkRetryMigration(t, uuid, true) + }) +} + +func insertRow(t *testing.T) { + insertMutex.Lock() + defer insertMutex.Unlock() + + tableName := fmt.Sprintf("vt_onlineddl_test_%02d", 3) + sqlQuery := fmt.Sprintf(insertRowStatement, tableName, countInserts) + r := vtgateExecQuery(t, sqlQuery, "") + require.NotNil(t, r) + countInserts++ +} + +func insertRows(t *testing.T, count int) { + for i := 0; i < count; i++ { + insertRow(t) + } +} + +func testRows(t *testing.T) { + insertMutex.Lock() + defer insertMutex.Unlock() + + tableName := fmt.Sprintf("vt_onlineddl_test_%02d", 3) + sqlQuery := fmt.Sprintf(selectCountRowsStatement, tableName) + r := vtgateExecQuery(t, sqlQuery, "") + require.NotNil(t, r) + row := r.Named().Row() + require.NotNil(t, row) + require.Equal(t, countInserts, row.AsInt64("c", 0)) +} + +func testWithInitialSchema(t *testing.T) { + // Create 4 tables + var sqlQuery = "" //nolint + for i := 0; i < totalTableCount; i++ { + sqlQuery = fmt.Sprintf(createTable, fmt.Sprintf("vt_onlineddl_test_%02d", i)) + err := clusterInstance.VtctlclientProcess.ApplySchema(keyspaceName, sqlQuery) + require.Nil(t, err) + } + + // Check if 4 tables are created + checkTables(t, "", totalTableCount) +} + +// testOnlineDDLStatement runs an online DDL, ALTER statement +func testOnlineDDLStatement(t *testing.T, alterStatement string, ddlStrategy string, executeStrategy string, expectColumn string) (uuid string) { + tableName := fmt.Sprintf("vt_onlineddl_test_%02d", 3) + sqlQuery := fmt.Sprintf(alterStatement, tableName) + if executeStrategy == "vtgate" { + row := vtgateExec(t, ddlStrategy, sqlQuery, "").Named().Row() + if row != nil { + uuid = row.AsString("uuid", "") + } + } else { + var err error + uuid, err = clusterInstance.VtctlclientProcess.ApplySchemaWithOutput(keyspaceName, sqlQuery, ddlStrategy) + assert.NoError(t, err) + } + uuid = strings.TrimSpace(uuid) + fmt.Println("# Generated UUID (for debug purposes):") + fmt.Printf("<%s>\n", uuid) + + strategy, _, err := schema.ParseDDLStrategy(ddlStrategy) + assert.NoError(t, err) + + if !strategy.IsDirect() { + time.Sleep(time.Second * 20) + } + + if expectColumn != "" { + checkMigratedTable(t, tableName, expectColumn) + } + return uuid +} + +// checkTables checks the number of tables in the first two shards. +func checkTables(t *testing.T, showTableName string, expectCount int) { + for i := range clusterInstance.Keyspaces[0].Shards { + checkTablesCount(t, clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], showTableName, expectCount) + } +} + +// checkTablesCount checks the number of tables in the given tablet +func checkTablesCount(t *testing.T, tablet *cluster.Vttablet, showTableName string, expectCount int) { + query := fmt.Sprintf(`show tables like '%%%s%%';`, showTableName) + queryResult, err := tablet.VttabletProcess.QueryTablet(query, keyspaceName, true) + require.Nil(t, err) + assert.Equal(t, expectCount, len(queryResult.Rows)) +} + +// checkRecentMigrations checks 'OnlineDDL show recent' output. Example to such output: +// +------------------+-------+--------------+----------------------+--------------------------------------+----------+---------------------+---------------------+------------------+ +// | Tablet | shard | mysql_schema | mysql_table | migration_uuid | strategy | started_timestamp | completed_timestamp | migration_status | +// +------------------+-------+--------------+----------------------+--------------------------------------+----------+---------------------+---------------------+------------------+ +// | zone1-0000003880 | 0 | vt_ks | vt_onlineddl_test_03 | a0638f6b_ec7b_11ea_9bf8_000d3a9b8a9a | online | 2020-09-01 17:50:40 | 2020-09-01 17:50:41 | complete | +// | zone1-0000003884 | 1 | vt_ks | vt_onlineddl_test_03 | a0638f6b_ec7b_11ea_9bf8_000d3a9b8a9a | online | 2020-09-01 17:50:40 | 2020-09-01 17:50:41 | complete | +// +------------------+-------+--------------+----------------------+--------------------------------------+----------+---------------------+---------------------+------------------+ + +func checkRecentMigrations(t *testing.T, uuid string, expectStatus schema.OnlineDDLStatus) { + result, err := clusterInstance.VtctlclientProcess.OnlineDDLShowRecent(keyspaceName) + assert.NoError(t, err) + fmt.Println("# 'vtctlclient OnlineDDL show recent' output (for debug purposes):") + fmt.Println(result) + assert.Equal(t, len(clusterInstance.Keyspaces[0].Shards), strings.Count(result, uuid)) + // We ensure "full word" regexp becuase some column names may conflict + expectStatusRegexp := fullWordUUIDRegexp(uuid, string(expectStatus)) + m := expectStatusRegexp.FindAllString(result, -1) + assert.Equal(t, len(clusterInstance.Keyspaces[0].Shards), len(m)) +} + +// checkCancelMigration attempts to cancel a migration, and expects rejection +func checkCancelMigration(t *testing.T, uuid string, expectCancelPossible bool) { + result, err := clusterInstance.VtctlclientProcess.OnlineDDLCancelMigration(keyspaceName, uuid) + fmt.Println("# 'vtctlclient OnlineDDL cancel ' output (for debug purposes):") + fmt.Println(result) + assert.NoError(t, err) + + var r *regexp.Regexp + if expectCancelPossible { + r = fullWordRegexp("1") + } else { + r = fullWordRegexp("0") + } + m := r.FindAllString(result, -1) + assert.Equal(t, len(clusterInstance.Keyspaces[0].Shards), len(m)) +} + +// checkCancelAllMigrations all pending migrations +func checkCancelAllMigrations(t *testing.T, expectCount int) { + result, err := clusterInstance.VtctlclientProcess.OnlineDDLCancelAllMigrations(keyspaceName) + fmt.Println("# 'vtctlclient OnlineDDL cancel-all' output (for debug purposes):") + fmt.Println(result) + assert.NoError(t, err) + + r := fullWordRegexp(fmt.Sprintf("%d", expectCount)) + m := r.FindAllString(result, -1) + assert.Equal(t, len(clusterInstance.Keyspaces[0].Shards), len(m)) +} + +// checkRetryMigration attempts to retry a migration, and expects rejection +func checkRetryMigration(t *testing.T, uuid string, expectRetryPossible bool) { + result, err := clusterInstance.VtctlclientProcess.OnlineDDLRetryMigration(keyspaceName, uuid) + fmt.Println("# 'vtctlclient OnlineDDL retry ' output (for debug purposes):") + fmt.Println(result) + assert.NoError(t, err) + + var r *regexp.Regexp + if expectRetryPossible { + r = fullWordRegexp("1") + } else { + r = fullWordRegexp("0") + } + m := r.FindAllString(result, -1) + assert.Equal(t, len(clusterInstance.Keyspaces[0].Shards), len(m)) +} + +// checkMigratedTables checks the CREATE STATEMENT of a table after migration +func checkMigratedTable(t *testing.T, tableName, expectColumn string) { + for i := range clusterInstance.Keyspaces[0].Shards { + createStatement := getCreateTableStatement(t, clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], tableName) + assert.Contains(t, createStatement, expectColumn) + } +} + +// getCreateTableStatement returns the CREATE TABLE statement for a given table +func getCreateTableStatement(t *testing.T, tablet *cluster.Vttablet, tableName string) (statement string) { + queryResult, err := tablet.VttabletProcess.QueryTablet(fmt.Sprintf("show create table %s;", tableName), keyspaceName, true) + require.Nil(t, err) + + assert.Equal(t, len(queryResult.Rows), 1) + assert.Equal(t, len(queryResult.Rows[0]), 2) // table name, create statement + statement = queryResult.Rows[0][1].ToString() + return statement +} + +func vtgateExecQuery(t *testing.T, query string, expectError string) *sqltypes.Result { + t.Helper() + + ctx := context.Background() + conn, err := mysql.Connect(ctx, &vtParams) + require.Nil(t, err) + defer conn.Close() + + qr, err := conn.ExecuteFetch(query, 1000, true) + if expectError == "" { + require.NoError(t, err) + } else { + require.Error(t, err, "error should not be nil") + assert.Contains(t, err.Error(), expectError, "Unexpected error") + } + return qr +} + +func vtgateExec(t *testing.T, ddlStrategy string, query string, expectError string) *sqltypes.Result { + t.Helper() + + ctx := context.Background() + conn, err := mysql.Connect(ctx, &vtParams) + require.Nil(t, err) + defer conn.Close() + + setSession := fmt.Sprintf("set @@ddl_strategy='%s'", ddlStrategy) + _, err = conn.ExecuteFetch(setSession, 1000, true) + assert.NoError(t, err) + + qr, err := conn.ExecuteFetch(query, 1000, true) + if expectError == "" { + require.NoError(t, err) + } else { + require.Error(t, err, "error should not be nil") + assert.Contains(t, err.Error(), expectError, "Unexpected error") + } + return qr +} diff --git a/test/ci_workflow_gen.go b/test/ci_workflow_gen.go index 6050f8a49c5..72650ea52f7 100644 --- a/test/ci_workflow_gen.go +++ b/test/ci_workflow_gen.go @@ -32,7 +32,7 @@ const ( unitTestDatabases = "percona56, mysql57, mysql80, mariadb101, mariadb102, mariadb103" clusterTestTemplate = "templates/cluster_endtoend_test.tpl" - clusterList = "11,12,13,14,15,16,17,18,19,20,21,22,23,24,27,vreplication_basic,vreplication_multicell,vreplication_cellalias,vreplication_v2,onlineddl_ghost,onlineddl_vrepl" + clusterList = "11,12,13,14,15,16,17,18,19,20,21,22,23,24,27,vreplication_basic,vreplication_multicell,vreplication_cellalias,vreplication_v2,onlineddl_ghost,onlineddl_vrepl,onlineddl_vrepl_stress" // TODO: currently some percona tools including xtrabackup are installed on all clusters, we can possibly optimize // this by only installing them in the required clusters clustersRequiringXtraBackup = clusterList diff --git a/test/config.json b/test/config.json index ed678fb272c..f20214175b3 100644 --- a/test/config.json +++ b/test/config.json @@ -284,6 +284,15 @@ "RetryMax": 0, "Tags": [] }, + "onlineddl_vrepl_stress": { + "File": "unused.go", + "Args": ["vitess.io/vitess/go/test/endtoend/onlineddl_vrepl_stress"], + "Command": [], + "Manual": false, + "Shard": "onlineddl_vrepl_stress", + "RetryMax": 0, + "Tags": [] + }, "pitr": { "File": "unused.go", "Args": ["vitess.io/vitess/go/test/endtoend/recovery/pitr"], From 28e1e9309be5ade26694ae7ce84f5e51e1e5d44c Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 15 Feb 2021 09:33:54 +0200 Subject: [PATCH 57/72] mini stress testing for vreplication based online DDL Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../endtoend/cluster/vtctlclient_process.go | 9 + .../onlineddl_vrepl_mini_stress_test.go | 526 ++++++++---------- test/config.json | 2 +- 3 files changed, 256 insertions(+), 281 deletions(-) diff --git a/go/test/endtoend/cluster/vtctlclient_process.go b/go/test/endtoend/cluster/vtctlclient_process.go index ee47a9e5fae..420656d4ff9 100644 --- a/go/test/endtoend/cluster/vtctlclient_process.go +++ b/go/test/endtoend/cluster/vtctlclient_process.go @@ -122,6 +122,15 @@ func (vtctlclient *VtctlClientProcess) OnlineDDLRetryMigration(Keyspace, uuid st ) } +// VExec runs a VExec query +func (vtctlclient *VtctlClientProcess) VExec(Keyspace, workflow, query string) (result string, err error) { + return vtctlclient.ExecuteCommandWithOutput( + "VExec", + fmt.Sprintf("%s.%s", Keyspace, workflow), + query, + ) +} + // ExecuteCommand executes any vtctlclient command func (vtctlclient *VtctlClientProcess) ExecuteCommand(args ...string) (err error) { output, err := vtctlclient.ExecuteCommandWithOutput(args...) diff --git a/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go index e594d79a997..a9ced2ba3cf 100644 --- a/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go +++ b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go @@ -20,8 +20,7 @@ import ( "context" "flag" "fmt" - "io/ioutil" - "net/http" + "math/rand" "os" "path" "regexp" @@ -32,8 +31,8 @@ import ( "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/sqltypes" + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/schema" - throttlebase "vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/base" "vitess.io/vitess/go/test/endtoend/cluster" @@ -41,74 +40,76 @@ import ( "github.com/stretchr/testify/require" ) +type WriteMetrics struct { + mu sync.Mutex + inserts, updates, deletes int64 +} + +func (w *WriteMetrics) Clear() { + w.mu.Lock() + defer w.mu.Unlock() + + w.inserts = 0 + w.updates = 0 + w.deletes = 0 +} + +func (w *WriteMetrics) String() string { + return fmt.Sprintf("WriteMetrics: inserts=%d, updates=%d, deletes=%d", w.inserts, w.updates, w.deletes) +} + var ( - clusterInstance *cluster.LocalProcessCluster - vtParams mysql.ConnParams - httpClient = throttlebase.SetupHTTPClient(time.Second) - throttlerAppName = "vreplication" + clusterInstance *cluster.LocalProcessCluster + vtParams mysql.ConnParams hostname = "localhost" keyspaceName = "ks" cell = "zone1" schemaChangeDirectory = "" - totalTableCount = 4 - createTable = ` - CREATE TABLE %s ( - id bigint(20) NOT NULL, - test_val bigint unsigned NOT NULL DEFAULT 0, - msg varchar(64), - PRIMARY KEY (id) - ) ENGINE=InnoDB;` - // To verify non online-DDL behavior - alterTableNormalStatement = ` - ALTER TABLE %s - ADD COLUMN non_online int UNSIGNED NOT NULL DEFAULT 0` - // A trivial statement which must succeed and does not change the schema - alterTableTrivialStatement = ` - ALTER TABLE %s - ENGINE=InnoDB` - // The following statement is valid - alterTableSuccessfulStatement = ` - ALTER TABLE %s - MODIFY id bigint UNSIGNED NOT NULL, - ADD COLUMN vrepl_col int NOT NULL DEFAULT 0, - ADD INDEX idx_msg(msg)` - // The following statement will fail because vreplication requires shared PRIMARY KEY columns - alterTableFailedStatement = ` - ALTER TABLE %s - DROP PRIMARY KEY, - DROP COLUMN vrepl_col` - // We will run this query while throttling vreplication - alterTableThrottlingStatement = ` - ALTER TABLE %s - DROP COLUMN vrepl_col` - onlineDDLCreateTableStatement = ` - CREATE TABLE %s ( - id bigint NOT NULL, - test_val bigint unsigned NOT NULL DEFAULT 0, - online_ddl_create_col INT NOT NULL, - PRIMARY KEY (id) - ) ENGINE=InnoDB;` - onlineDDLDropTableStatement = ` - DROP TABLE %s` - onlineDDLDropTableIfExistsStatement = ` - DROP TABLE IF EXISTS %s` + tableName = `stress_test` + createStatement = ` + CREATE TABLE stress_test ( + id bigint(20) not null, + rand_val varchar(32) null default '', + hint_col varchar(64) not null default '', + created_timestamp timestamp not null default current_timestamp, + updates int unsigned not null default 0, + PRIMARY KEY (id), + key created_idx(created_timestamp), + key updates_idx(updates) + ) ENGINE=InnoDB + ` + alterHintStatement = ` + ALTER TABLE stress_test modify hint_col varchar(64) not null default '%s' + ` insertRowStatement = ` - INSERT INTO %s (id, test_val) VALUES (%d, 1) + INSERT IGNORE INTO stress_test (id, rand_val) VALUES (%d, left(md5(rand()), 8)) + ` + updateRowStatement = ` + UPDATE stress_test SET updates=updates+1 ORDER BY RAND() LIMIT 1 ` + deleteRowStatement = ` + DELETE FROM stress_test WHERE updates=1 ORDER BY RAND() LIMIT 1 + ` + // We use CAST(SUM(updates) AS SIGNED) because SUM() returns a DECIMAL datatype, and we want to read a SIGNED INTEGER type selectCountRowsStatement = ` - SELECT COUNT(*) AS c FROM %s + SELECT COUNT(*) AS num_rows, CAST(SUM(updates) AS SIGNED) AS num_updates FROM stress_test + ` + truncateStatement = ` + TRUNCATE TABLE stress_test ` - countInserts int64 - insertMutex sync.Mutex + writeMetrics WriteMetrics +) + +const ( + maxTableRows = 4096 + maxConcurrency = 5 + countIterations = 5 ) func fullWordUUIDRegexp(uuid, searchWord string) *regexp.Regexp { return regexp.MustCompile(uuid + `.*?\b` + searchWord + `\b`) } -func fullWordRegexp(searchWord string) *regexp.Regexp { - return regexp.MustCompile(`.*?\b` + searchWord + `\b`) -} func TestMain(m *testing.M) { defer cluster.PanicHandler(nil) @@ -181,186 +182,63 @@ func TestMain(m *testing.M) { } -func throttleResponse(tablet *cluster.Vttablet, path string) (resp *http.Response, respBody string, err error) { - apiURL := fmt.Sprintf("http://%s:%d/%s", tablet.VttabletProcess.TabletHostname, tablet.HTTPPort, path) - resp, err = httpClient.Get(apiURL) - if err != nil { - return resp, respBody, err - } - b, err := ioutil.ReadAll(resp.Body) - respBody = string(b) - return resp, respBody, err -} - -func throttleApp(tablet *cluster.Vttablet, app string) (*http.Response, string, error) { - return throttleResponse(tablet, fmt.Sprintf("throttler/throttle-app?app=%s&duration=1h", app)) -} - -func unthrottleApp(tablet *cluster.Vttablet, app string) (*http.Response, string, error) { - return throttleResponse(tablet, fmt.Sprintf("throttler/unthrottle-app?app=%s", app)) -} - func TestSchemaChange(t *testing.T) { defer cluster.PanicHandler(t) - assert.Equal(t, 2, len(clusterInstance.Keyspaces[0].Shards)) - testWithInitialSchema(t) - t.Run("create non_online", func(t *testing.T) { - _ = testOnlineDDLStatement(t, alterTableNormalStatement, string(schema.DDLStrategyDirect), "vtctl", "non_online") - insertRows(t, 2) - testRows(t) - }) - t.Run("successful online alter, vtgate", func(t *testing.T) { - insertRows(t, 2) - uuid := testOnlineDDLStatement(t, alterTableSuccessfulStatement, "online", "vtgate", "vrepl_col") - checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) - testRows(t) - checkCancelMigration(t, uuid, false) - checkRetryMigration(t, uuid, false) - }) - t.Run("successful online alter, vtctl", func(t *testing.T) { - insertRows(t, 2) - uuid := testOnlineDDLStatement(t, alterTableTrivialStatement, "online", "vtctl", "vrepl_col") - checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) - testRows(t) - checkCancelMigration(t, uuid, false) - checkRetryMigration(t, uuid, false) - }) - t.Run("throttled migration", func(t *testing.T) { - insertRows(t, 2) - for i := range clusterInstance.Keyspaces[0].Shards { - throttleApp(clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], throttlerAppName) - defer unthrottleApp(clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], throttlerAppName) - } - uuid := testOnlineDDLStatement(t, alterTableThrottlingStatement, "online", "vtgate", "vrepl_col") - checkRecentMigrations(t, uuid, schema.OnlineDDLStatusRunning) - testRows(t) - checkCancelMigration(t, uuid, true) - time.Sleep(2 * time.Second) - checkRecentMigrations(t, uuid, schema.OnlineDDLStatusFailed) - }) - t.Run("failed migration", func(t *testing.T) { - insertRows(t, 2) - uuid := testOnlineDDLStatement(t, alterTableFailedStatement, "online", "vtgate", "vrepl_col") - checkRecentMigrations(t, uuid, schema.OnlineDDLStatusFailed) - testRows(t) - checkCancelMigration(t, uuid, false) - checkRetryMigration(t, uuid, true) - // migration will fail again - }) - t.Run("cancel all migrations: nothing to cancel", func(t *testing.T) { - // no migrations pending at this time - time.Sleep(10 * time.Second) - checkCancelAllMigrations(t, 0) - }) - t.Run("cancel all migrations: some migrations to cancel", func(t *testing.T) { - for i := range clusterInstance.Keyspaces[0].Shards { - throttleApp(clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], throttlerAppName) - defer unthrottleApp(clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], throttlerAppName) - } - // spawn n migrations; cancel them via cancel-all - var wg sync.WaitGroup - count := 4 - for i := 0; i < count; i++ { - wg.Add(1) - go func() { - defer wg.Done() - _ = testOnlineDDLStatement(t, alterTableThrottlingStatement, "online", "vtgate", "vrepl_col") - }() - } - wg.Wait() - checkCancelAllMigrations(t, count) - }) - t.Run("Online DROP, vtctl", func(t *testing.T) { - uuid := testOnlineDDLStatement(t, onlineDDLDropTableStatement, "online", "vtctl", "") - checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) - checkCancelMigration(t, uuid, false) - checkRetryMigration(t, uuid, false) - }) - t.Run("Online CREATE, vtctl", func(t *testing.T) { - uuid := testOnlineDDLStatement(t, onlineDDLCreateTableStatement, "online", "vtctl", "online_ddl_create_col") - checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) - checkCancelMigration(t, uuid, false) - checkRetryMigration(t, uuid, false) + + ctx := context.Background() + + t.Run("create schema", func(t *testing.T) { + assert.Equal(t, 2, len(clusterInstance.Keyspaces[0].Shards)) + testWithInitialSchema(t) }) - t.Run("Online DROP TABLE IF EXISTS, vtgate", func(t *testing.T) { - uuid := testOnlineDDLStatement(t, onlineDDLDropTableIfExistsStatement, "online", "vtgate", "") - checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) - checkCancelMigration(t, uuid, false) - checkRetryMigration(t, uuid, false) - // this table existed - checkTables(t, schema.OnlineDDLToGCUUID(uuid), 1) + + t.Run("workload without ALTER TABLE", func(t *testing.T) { + initTable(t) + testSelectTableMetrics(t) }) - t.Run("Online DROP TABLE IF EXISTS for nonexistent table, vtgate", func(t *testing.T) { - uuid := testOnlineDDLStatement(t, onlineDDLDropTableIfExistsStatement, "online", "vtgate", "") + t.Run("ALTER TABLE without workload", func(t *testing.T) { + initTable(t) + hint := "hint-alter-without-workload" + uuid := testOnlineDDLStatement(t, fmt.Sprintf(alterHintStatement, hint), "online", "vtgate", hint) checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) - checkCancelMigration(t, uuid, false) - checkRetryMigration(t, uuid, false) - // this table did not exist - checkTables(t, schema.OnlineDDLToGCUUID(uuid), 0) - }) - t.Run("Online DROP TABLE for nonexistent table, expect error, vtgate", func(t *testing.T) { - uuid := testOnlineDDLStatement(t, onlineDDLDropTableStatement, "online", "vtgate", "") - checkRecentMigrations(t, uuid, schema.OnlineDDLStatusFailed) - checkCancelMigration(t, uuid, false) - checkRetryMigration(t, uuid, true) + testSelectTableMetrics(t) }) -} - -func insertRow(t *testing.T) { - insertMutex.Lock() - defer insertMutex.Unlock() - - tableName := fmt.Sprintf("vt_onlineddl_test_%02d", 3) - sqlQuery := fmt.Sprintf(insertRowStatement, tableName, countInserts) - r := vtgateExecQuery(t, sqlQuery, "") - require.NotNil(t, r) - countInserts++ -} -func insertRows(t *testing.T, count int) { - for i := 0; i < count; i++ { - insertRow(t) + for i := 0; i < countIterations; i++ { + testName := fmt.Sprintf("ALTER TABLE with workload %d/%d", (i + 1), countIterations) + t.Run(testName, func(t *testing.T) { + initTable(t) + done := make(chan bool) + go runMultipleConnections(ctx, t, done) + hint := "hint-alter-with-workload" + uuid := testOnlineDDLStatement(t, fmt.Sprintf(alterHintStatement, hint), "online", "vtgate", hint) + checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) + done <- true + time.Sleep(2 * time.Second) + testSelectTableMetrics(t) + }) } } -func testRows(t *testing.T) { - insertMutex.Lock() - defer insertMutex.Unlock() - - tableName := fmt.Sprintf("vt_onlineddl_test_%02d", 3) - sqlQuery := fmt.Sprintf(selectCountRowsStatement, tableName) - r := vtgateExecQuery(t, sqlQuery, "") - require.NotNil(t, r) - row := r.Named().Row() - require.NotNil(t, row) - require.Equal(t, countInserts, row.AsInt64("c", 0)) -} - func testWithInitialSchema(t *testing.T) { - // Create 4 tables - var sqlQuery = "" //nolint - for i := 0; i < totalTableCount; i++ { - sqlQuery = fmt.Sprintf(createTable, fmt.Sprintf("vt_onlineddl_test_%02d", i)) - err := clusterInstance.VtctlclientProcess.ApplySchema(keyspaceName, sqlQuery) - require.Nil(t, err) - } + // Create the stress table + err := clusterInstance.VtctlclientProcess.ApplySchema(keyspaceName, createStatement) + require.Nil(t, err) - // Check if 4 tables are created - checkTables(t, "", totalTableCount) + // Check if table is created + checkTable(t, tableName) } // testOnlineDDLStatement runs an online DDL, ALTER statement -func testOnlineDDLStatement(t *testing.T, alterStatement string, ddlStrategy string, executeStrategy string, expectColumn string) (uuid string) { - tableName := fmt.Sprintf("vt_onlineddl_test_%02d", 3) - sqlQuery := fmt.Sprintf(alterStatement, tableName) +func testOnlineDDLStatement(t *testing.T, alterStatement string, ddlStrategy string, executeStrategy string, expectHint string) (uuid string) { if executeStrategy == "vtgate" { - row := vtgateExec(t, ddlStrategy, sqlQuery, "").Named().Row() + row := vtgateExec(t, ddlStrategy, alterStatement, "").Named().Row() if row != nil { uuid = row.AsString("uuid", "") } } else { var err error - uuid, err = clusterInstance.VtctlclientProcess.ApplySchemaWithOutput(keyspaceName, sqlQuery, ddlStrategy) + uuid, err = clusterInstance.VtctlclientProcess.ApplySchemaWithOutput(keyspaceName, alterStatement, ddlStrategy) assert.NoError(t, err) } uuid = strings.TrimSpace(uuid) @@ -374,16 +252,16 @@ func testOnlineDDLStatement(t *testing.T, alterStatement string, ddlStrategy str time.Sleep(time.Second * 20) } - if expectColumn != "" { - checkMigratedTable(t, tableName, expectColumn) + if expectHint != "" { + checkMigratedTable(t, tableName, expectHint) } return uuid } -// checkTables checks the number of tables in the first two shards. -func checkTables(t *testing.T, showTableName string, expectCount int) { +// checkTable checks the number of tables in the first two shards. +func checkTable(t *testing.T, showTableName string) { for i := range clusterInstance.Keyspaces[0].Shards { - checkTablesCount(t, clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], showTableName, expectCount) + checkTablesCount(t, clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], showTableName, 1) } } @@ -396,12 +274,12 @@ func checkTablesCount(t *testing.T, tablet *cluster.Vttablet, showTableName stri } // checkRecentMigrations checks 'OnlineDDL show recent' output. Example to such output: -// +------------------+-------+--------------+----------------------+--------------------------------------+----------+---------------------+---------------------+------------------+ -// | Tablet | shard | mysql_schema | mysql_table | migration_uuid | strategy | started_timestamp | completed_timestamp | migration_status | -// +------------------+-------+--------------+----------------------+--------------------------------------+----------+---------------------+---------------------+------------------+ -// | zone1-0000003880 | 0 | vt_ks | vt_onlineddl_test_03 | a0638f6b_ec7b_11ea_9bf8_000d3a9b8a9a | online | 2020-09-01 17:50:40 | 2020-09-01 17:50:41 | complete | -// | zone1-0000003884 | 1 | vt_ks | vt_onlineddl_test_03 | a0638f6b_ec7b_11ea_9bf8_000d3a9b8a9a | online | 2020-09-01 17:50:40 | 2020-09-01 17:50:41 | complete | -// +------------------+-------+--------------+----------------------+--------------------------------------+----------+---------------------+---------------------+------------------+ +// +------------------+-------+--------------+-------------+--------------------------------------+----------+---------------------+---------------------+------------------+ +// | Tablet | shard | mysql_schema | mysql_table | migration_uuid | strategy | started_timestamp | completed_timestamp | migration_status | +// +------------------+-------+--------------+-------------+--------------------------------------+----------+---------------------+---------------------+------------------+ +// | zone1-0000003880 | 0 | vt_ks | stress_test | a0638f6b_ec7b_11ea_9bf8_000d3a9b8a9a | online | 2020-09-01 17:50:40 | 2020-09-01 17:50:41 | complete | +// | zone1-0000003884 | 1 | vt_ks | stress_test | a0638f6b_ec7b_11ea_9bf8_000d3a9b8a9a | online | 2020-09-01 17:50:40 | 2020-09-01 17:50:41 | complete | +// +------------------+-------+--------------+-------------+--------------------------------------+----------+---------------------+---------------------+------------------+ func checkRecentMigrations(t *testing.T, uuid string, expectStatus schema.OnlineDDLStatus) { result, err := clusterInstance.VtctlclientProcess.OnlineDDLShowRecent(keyspaceName) @@ -413,59 +291,18 @@ func checkRecentMigrations(t *testing.T, uuid string, expectStatus schema.Online expectStatusRegexp := fullWordUUIDRegexp(uuid, string(expectStatus)) m := expectStatusRegexp.FindAllString(result, -1) assert.Equal(t, len(clusterInstance.Keyspaces[0].Shards), len(m)) -} - -// checkCancelMigration attempts to cancel a migration, and expects rejection -func checkCancelMigration(t *testing.T, uuid string, expectCancelPossible bool) { - result, err := clusterInstance.VtctlclientProcess.OnlineDDLCancelMigration(keyspaceName, uuid) - fmt.Println("# 'vtctlclient OnlineDDL cancel ' output (for debug purposes):") - fmt.Println(result) - assert.NoError(t, err) - - var r *regexp.Regexp - if expectCancelPossible { - r = fullWordRegexp("1") - } else { - r = fullWordRegexp("0") - } - m := r.FindAllString(result, -1) - assert.Equal(t, len(clusterInstance.Keyspaces[0].Shards), len(m)) -} -// checkCancelAllMigrations all pending migrations -func checkCancelAllMigrations(t *testing.T, expectCount int) { - result, err := clusterInstance.VtctlclientProcess.OnlineDDLCancelAllMigrations(keyspaceName) - fmt.Println("# 'vtctlclient OnlineDDL cancel-all' output (for debug purposes):") - fmt.Println(result) + result, err = clusterInstance.VtctlclientProcess.VExec(keyspaceName, uuid, `select migration_status, message from _vt.schema_migrations`) assert.NoError(t, err) - - r := fullWordRegexp(fmt.Sprintf("%d", expectCount)) - m := r.FindAllString(result, -1) - assert.Equal(t, len(clusterInstance.Keyspaces[0].Shards), len(m)) -} - -// checkRetryMigration attempts to retry a migration, and expects rejection -func checkRetryMigration(t *testing.T, uuid string, expectRetryPossible bool) { - result, err := clusterInstance.VtctlclientProcess.OnlineDDLRetryMigration(keyspaceName, uuid) - fmt.Println("# 'vtctlclient OnlineDDL retry ' output (for debug purposes):") + fmt.Println("# 'vtctlclient VExec' output (for debug purposes):") fmt.Println(result) - assert.NoError(t, err) - - var r *regexp.Regexp - if expectRetryPossible { - r = fullWordRegexp("1") - } else { - r = fullWordRegexp("0") - } - m := r.FindAllString(result, -1) - assert.Equal(t, len(clusterInstance.Keyspaces[0].Shards), len(m)) } // checkMigratedTables checks the CREATE STATEMENT of a table after migration -func checkMigratedTable(t *testing.T, tableName, expectColumn string) { +func checkMigratedTable(t *testing.T, tableName, expectHint string) { for i := range clusterInstance.Keyspaces[0].Shards { createStatement := getCreateTableStatement(t, clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], tableName) - assert.Contains(t, createStatement, expectColumn) + assert.Contains(t, createStatement, expectHint) } } @@ -480,22 +317,151 @@ func getCreateTableStatement(t *testing.T, tablet *cluster.Vttablet, tableName s return statement } -func vtgateExecQuery(t *testing.T, query string, expectError string) *sqltypes.Result { - t.Helper() +func generateInsert(t *testing.T, conn *mysql.Conn) error { + id := rand.Int31n(int32(maxTableRows)) + query := fmt.Sprintf(insertRowStatement, id) + qr, err := conn.ExecuteFetch(query, 1000, true) + if err != nil { + return err + } + if qr.RowsAffected > 0 { + writeMetrics.mu.Lock() + defer writeMetrics.mu.Unlock() + writeMetrics.inserts++ + } + return nil +} + +func generateUpdate(t *testing.T, conn *mysql.Conn) error { + qr, err := conn.ExecuteFetch(updateRowStatement, 1000, true) + if err != nil { + return err + } + if qr.RowsAffected > 0 { + writeMetrics.mu.Lock() + defer writeMetrics.mu.Unlock() + writeMetrics.updates++ + } + return nil +} + +func generateDelete(t *testing.T, conn *mysql.Conn) error { + qr, err := conn.ExecuteFetch(deleteRowStatement, 1000, true) + if err != nil { + return err + } + if qr.RowsAffected > 0 { + writeMetrics.mu.Lock() + defer writeMetrics.mu.Unlock() + writeMetrics.deletes++ + writeMetrics.updates-- // because we delete `where updates=1` + } + return nil +} + +func runSingleConnection(ctx context.Context, t *testing.T, done chan bool) { + log.Infof("Running single connection") + conn, err := mysql.Connect(ctx, &vtParams) + require.Nil(t, err) + defer conn.Close() + + _, err = conn.ExecuteFetch("set autocommit=1", 1000, true) + require.Nil(t, err) + + for { + select { + case <-done: + log.Infof("Terminating single connection") + return + default: + } + switch rand.Int31n(3) { + case 0: + err = generateInsert(t, conn) + case 1: + err = generateUpdate(t, conn) + case 2: + err = generateDelete(t, conn) + } + if err != nil { + if strings.Contains(err.Error(), "disallowed due to rule: enforce blacklisted tables") { + err = nil + } else if strings.Contains(err.Error(), "Deadlock found when trying to get lock") { + err = nil + } + } + assert.Nil(t, err) + time.Sleep(50 * time.Millisecond) + } +} + +func runMultipleConnections(ctx context.Context, t *testing.T, done chan bool) { + log.Infof("Running multiple connections") + var chans []chan bool + for i := 0; i < maxConcurrency; i++ { + d := make(chan bool) + chans = append(chans, d) + go runSingleConnection(ctx, t, d) + } + <-done + log.Infof("Running multiple connections: done") + for _, d := range chans { + log.Infof("Cancelling single connection") + d <- true + } + log.Infof("All connections cancelled") +} + +func initTable(t *testing.T) { + log.Infof("initTable begin") + defer log.Infof("initTable complete") ctx := context.Background() conn, err := mysql.Connect(ctx, &vtParams) require.Nil(t, err) defer conn.Close() - qr, err := conn.ExecuteFetch(query, 1000, true) - if expectError == "" { - require.NoError(t, err) - } else { - require.Error(t, err, "error should not be nil") - assert.Contains(t, err.Error(), expectError, "Unexpected error") + writeMetrics.Clear() + _, err = conn.ExecuteFetch(truncateStatement, 1000, true) + require.Nil(t, err) + + for i := 0; i < maxTableRows/2; i++ { + generateInsert(t, conn) } - return qr + for i := 0; i < maxTableRows/4; i++ { + generateUpdate(t, conn) + } + for i := 0; i < maxTableRows/4; i++ { + generateDelete(t, conn) + } + log.Infof("writeMetrics: %v", writeMetrics.String()) +} + +func testSelectTableMetrics(t *testing.T) { + writeMetrics.mu.Lock() + defer writeMetrics.mu.Unlock() + + ctx := context.Background() + conn, err := mysql.Connect(ctx, &vtParams) + require.Nil(t, err) + defer conn.Close() + + rs, err := conn.ExecuteFetch(selectCountRowsStatement, 1000, true) + require.Nil(t, err) + + row := rs.Named().Row() + require.NotNil(t, row) + log.Infof("testSelectTableMetrics, row: %v", row) + numRows := row.AsInt64("num_rows", 0) + numUpdates := row.AsInt64("num_updates", 0) + + assert.NotZero(t, numRows) + assert.NotZero(t, numUpdates) + assert.NotZero(t, writeMetrics.inserts) + assert.NotZero(t, writeMetrics.deletes) + assert.NotZero(t, writeMetrics.updates) + assert.Equal(t, numRows, writeMetrics.inserts-writeMetrics.deletes) + assert.Equal(t, numUpdates, writeMetrics.updates) } func vtgateExec(t *testing.T, ddlStrategy string, query string, expectError string) *sqltypes.Result { diff --git a/test/config.json b/test/config.json index f20214175b3..a8e4d3b6eeb 100644 --- a/test/config.json +++ b/test/config.json @@ -290,7 +290,7 @@ "Command": [], "Manual": false, "Shard": "onlineddl_vrepl_stress", - "RetryMax": 0, + "RetryMax": 1, "Tags": [] }, "pitr": { From 80e9f0423061b23ed2555c5e89f1385de5bf6d5b Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 15 Feb 2021 11:22:11 +0200 Subject: [PATCH 58/72] eliminating deadlock with point UPDATE and point DELETE Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../onlineddl_vrepl_mini_stress_test.go | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go index a9ced2ba3cf..0e45dae9ae7 100644 --- a/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go +++ b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go @@ -55,7 +55,7 @@ func (w *WriteMetrics) Clear() { } func (w *WriteMetrics) String() string { - return fmt.Sprintf("WriteMetrics: inserts=%d, updates=%d, deletes=%d", w.inserts, w.updates, w.deletes) + return fmt.Sprintf("WriteMetrics: inserts=%d, updates=%d, deletes=%d, inserts-deletes=%d, updates-deletes=%d", w.inserts, w.updates, w.deletes, w.inserts-w.deletes, w.updates-w.deletes) } var ( @@ -86,14 +86,14 @@ var ( INSERT IGNORE INTO stress_test (id, rand_val) VALUES (%d, left(md5(rand()), 8)) ` updateRowStatement = ` - UPDATE stress_test SET updates=updates+1 ORDER BY RAND() LIMIT 1 + UPDATE stress_test SET updates=updates+1 WHERE id=%d ` deleteRowStatement = ` - DELETE FROM stress_test WHERE updates=1 ORDER BY RAND() LIMIT 1 + DELETE FROM stress_test WHERE id=%d AND updates=1 ` // We use CAST(SUM(updates) AS SIGNED) because SUM() returns a DECIMAL datatype, and we want to read a SIGNED INTEGER type selectCountRowsStatement = ` - SELECT COUNT(*) AS num_rows, CAST(SUM(updates) AS SIGNED) AS num_updates FROM stress_test + SELECT COUNT(*) AS num_rows, CAST(SUM(updates) AS SIGNED) AS sum_updates FROM stress_test ` truncateStatement = ` TRUNCATE TABLE stress_test @@ -210,7 +210,7 @@ func TestSchemaChange(t *testing.T) { initTable(t) done := make(chan bool) go runMultipleConnections(ctx, t, done) - hint := "hint-alter-with-workload" + hint := fmt.Sprintf("hint-alter-with-workload-%d", i) uuid := testOnlineDDLStatement(t, fmt.Sprintf(alterHintStatement, hint), "online", "vtgate", hint) checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) done <- true @@ -324,6 +324,7 @@ func generateInsert(t *testing.T, conn *mysql.Conn) error { if err != nil { return err } + assert.Less(t, qr.RowsAffected, uint64(2)) if qr.RowsAffected > 0 { writeMetrics.mu.Lock() defer writeMetrics.mu.Unlock() @@ -333,10 +334,13 @@ func generateInsert(t *testing.T, conn *mysql.Conn) error { } func generateUpdate(t *testing.T, conn *mysql.Conn) error { - qr, err := conn.ExecuteFetch(updateRowStatement, 1000, true) + id := rand.Int31n(int32(maxTableRows)) + query := fmt.Sprintf(updateRowStatement, id) + qr, err := conn.ExecuteFetch(query, 1000, true) if err != nil { return err } + assert.Less(t, qr.RowsAffected, uint64(2)) if qr.RowsAffected > 0 { writeMetrics.mu.Lock() defer writeMetrics.mu.Unlock() @@ -346,15 +350,17 @@ func generateUpdate(t *testing.T, conn *mysql.Conn) error { } func generateDelete(t *testing.T, conn *mysql.Conn) error { - qr, err := conn.ExecuteFetch(deleteRowStatement, 1000, true) + id := rand.Int31n(int32(maxTableRows)) + query := fmt.Sprintf(deleteRowStatement, id) + qr, err := conn.ExecuteFetch(query, 1000, true) if err != nil { return err } + assert.Less(t, qr.RowsAffected, uint64(2)) if qr.RowsAffected > 0 { writeMetrics.mu.Lock() defer writeMetrics.mu.Unlock() writeMetrics.deletes++ - writeMetrics.updates-- // because we delete `where updates=1` } return nil } @@ -386,12 +392,10 @@ func runSingleConnection(ctx context.Context, t *testing.T, done chan bool) { if err != nil { if strings.Contains(err.Error(), "disallowed due to rule: enforce blacklisted tables") { err = nil - } else if strings.Contains(err.Error(), "Deadlock found when trying to get lock") { - err = nil } } assert.Nil(t, err) - time.Sleep(50 * time.Millisecond) + time.Sleep(20 * time.Millisecond) } } @@ -434,13 +438,14 @@ func initTable(t *testing.T) { for i := 0; i < maxTableRows/4; i++ { generateDelete(t, conn) } - log.Infof("writeMetrics: %v", writeMetrics.String()) } func testSelectTableMetrics(t *testing.T) { writeMetrics.mu.Lock() defer writeMetrics.mu.Unlock() + log.Infof("writeMetrics: %v", writeMetrics.String()) + ctx := context.Background() conn, err := mysql.Connect(ctx, &vtParams) require.Nil(t, err) @@ -453,15 +458,15 @@ func testSelectTableMetrics(t *testing.T) { require.NotNil(t, row) log.Infof("testSelectTableMetrics, row: %v", row) numRows := row.AsInt64("num_rows", 0) - numUpdates := row.AsInt64("num_updates", 0) + sumUpdates := row.AsInt64("sum_updates", 0) assert.NotZero(t, numRows) - assert.NotZero(t, numUpdates) + assert.NotZero(t, sumUpdates) assert.NotZero(t, writeMetrics.inserts) assert.NotZero(t, writeMetrics.deletes) assert.NotZero(t, writeMetrics.updates) assert.Equal(t, numRows, writeMetrics.inserts-writeMetrics.deletes) - assert.Equal(t, numUpdates, writeMetrics.updates) + assert.Equal(t, sumUpdates, writeMetrics.updates-writeMetrics.deletes) // because we DELETE WHERE updates=1 } func vtgateExec(t *testing.T, ddlStrategy string, query string, expectError string) *sqltypes.Result { From a1bff1c201bd124d71e8a4f29541a3a50554d80b Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 15 Feb 2021 15:10:03 +0200 Subject: [PATCH 59/72] more metrics, count metrics in goroutine Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../onlineddl_vrepl_mini_stress_test.go | 102 +++++++++++++----- 1 file changed, 75 insertions(+), 27 deletions(-) diff --git a/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go index 0e45dae9ae7..1e83152855a 100644 --- a/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go +++ b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go @@ -41,8 +41,11 @@ import ( ) type WriteMetrics struct { - mu sync.Mutex - inserts, updates, deletes int64 + mu sync.Mutex + inserts, updates, deletes int64 + insertsAttempts, insertsFailures, insertsNoops int64 + updatesAttempts, updatesFailures, updatesNoops int64 + deletesAttempts, deletesFailures, deletesNoops int64 } func (w *WriteMetrics) Clear() { @@ -52,10 +55,31 @@ func (w *WriteMetrics) Clear() { w.inserts = 0 w.updates = 0 w.deletes = 0 + + w.insertsAttempts = 0 + w.insertsFailures = 0 + w.insertsNoops = 0 + + w.updatesAttempts = 0 + w.updatesFailures = 0 + w.updatesNoops = 0 + + w.deletesAttempts = 0 + w.deletesFailures = 0 + w.deletesNoops = 0 } func (w *WriteMetrics) String() string { - return fmt.Sprintf("WriteMetrics: inserts=%d, updates=%d, deletes=%d, inserts-deletes=%d, updates-deletes=%d", w.inserts, w.updates, w.deletes, w.inserts-w.deletes, w.updates-w.deletes) + return fmt.Sprintf(`WriteMetrics: inserts=%d, updates=%d, deletes=%d, inserts-deletes=%d, updates-deletes=%d, +insertsAttempts=%d, insertsFailures=%d, insertsNoops=%d, inserts=%d, +updatesAttempts=%d, updatesFailures=%d, updatesNoops=%d, updates=%d, +deletesAttempts=%d, deletesFailures=%d, deletesNoops=%d, deletes=%d, +`, + w.inserts, w.updates, w.deletes, w.inserts-w.deletes, w.updates-w.deletes, + w.insertsAttempts, w.insertsFailures, w.insertsNoops, w.inserts, + w.updatesAttempts, w.updatesFailures, w.updatesNoops, w.updates, + w.deletesAttempts, w.deletesFailures, w.deletesNoops, w.deletes, + ) } var ( @@ -321,48 +345,72 @@ func generateInsert(t *testing.T, conn *mysql.Conn) error { id := rand.Int31n(int32(maxTableRows)) query := fmt.Sprintf(insertRowStatement, id) qr, err := conn.ExecuteFetch(query, 1000, true) - if err != nil { - return err - } - assert.Less(t, qr.RowsAffected, uint64(2)) - if qr.RowsAffected > 0 { + + go func() { writeMetrics.mu.Lock() defer writeMetrics.mu.Unlock() + + writeMetrics.insertsAttempts++ + if err != nil { + writeMetrics.insertsFailures++ + return + } + assert.Less(t, qr.RowsAffected, uint64(2)) + if qr.RowsAffected == 0 { + writeMetrics.insertsNoops++ + return + } writeMetrics.inserts++ - } - return nil + }() + return err } func generateUpdate(t *testing.T, conn *mysql.Conn) error { id := rand.Int31n(int32(maxTableRows)) query := fmt.Sprintf(updateRowStatement, id) qr, err := conn.ExecuteFetch(query, 1000, true) - if err != nil { - return err - } - assert.Less(t, qr.RowsAffected, uint64(2)) - if qr.RowsAffected > 0 { + + go func() { writeMetrics.mu.Lock() defer writeMetrics.mu.Unlock() + + writeMetrics.updatesAttempts++ + if err != nil { + writeMetrics.updatesFailures++ + return + } + assert.Less(t, qr.RowsAffected, uint64(2)) + if qr.RowsAffected == 0 { + writeMetrics.updatesNoops++ + return + } writeMetrics.updates++ - } - return nil + }() + return err } func generateDelete(t *testing.T, conn *mysql.Conn) error { id := rand.Int31n(int32(maxTableRows)) query := fmt.Sprintf(deleteRowStatement, id) qr, err := conn.ExecuteFetch(query, 1000, true) - if err != nil { - return err - } - assert.Less(t, qr.RowsAffected, uint64(2)) - if qr.RowsAffected > 0 { + + go func() { writeMetrics.mu.Lock() defer writeMetrics.mu.Unlock() + + writeMetrics.deletesAttempts++ + if err != nil { + writeMetrics.deletesFailures++ + return + } + assert.Less(t, qr.RowsAffected, uint64(2)) + if qr.RowsAffected == 0 { + writeMetrics.deletesNoops++ + return + } writeMetrics.deletes++ - } - return nil + }() + return err } func runSingleConnection(ctx context.Context, t *testing.T, done chan bool) { @@ -444,7 +492,7 @@ func testSelectTableMetrics(t *testing.T) { writeMetrics.mu.Lock() defer writeMetrics.mu.Unlock() - log.Infof("writeMetrics: %v", writeMetrics.String()) + log.Infof("%s", writeMetrics.String()) ctx := context.Background() conn, err := mysql.Connect(ctx, &vtParams) @@ -465,8 +513,8 @@ func testSelectTableMetrics(t *testing.T) { assert.NotZero(t, writeMetrics.inserts) assert.NotZero(t, writeMetrics.deletes) assert.NotZero(t, writeMetrics.updates) - assert.Equal(t, numRows, writeMetrics.inserts-writeMetrics.deletes) - assert.Equal(t, sumUpdates, writeMetrics.updates-writeMetrics.deletes) // because we DELETE WHERE updates=1 + assert.Equal(t, writeMetrics.inserts-writeMetrics.deletes, numRows) + assert.Equal(t, writeMetrics.updates-writeMetrics.deletes, sumUpdates) // because we DELETE WHERE updates=1 } func vtgateExec(t *testing.T, ddlStrategy string, query string, expectError string) *sqltypes.Result { From 80c119d555f6b6f30d15d48b0a7faf17759f7b09 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 15 Feb 2021 16:22:36 +0200 Subject: [PATCH 60/72] multiple iterations for 'workload without ALTER TABLE' Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../onlineddl_vrepl_mini_stress_test.go | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go index 1e83152855a..fb3ca0be3a2 100644 --- a/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go +++ b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go @@ -41,11 +41,10 @@ import ( ) type WriteMetrics struct { - mu sync.Mutex - inserts, updates, deletes int64 - insertsAttempts, insertsFailures, insertsNoops int64 - updatesAttempts, updatesFailures, updatesNoops int64 - deletesAttempts, deletesFailures, deletesNoops int64 + mu sync.Mutex + insertsAttempts, insertsFailures, insertsNoops, inserts int64 + updatesAttempts, updatesFailures, updatesNoops, updates int64 + deletesAttempts, deletesFailures, deletesNoops, deletes int64 } func (w *WriteMetrics) Clear() { @@ -70,12 +69,12 @@ func (w *WriteMetrics) Clear() { } func (w *WriteMetrics) String() string { - return fmt.Sprintf(`WriteMetrics: inserts=%d, updates=%d, deletes=%d, inserts-deletes=%d, updates-deletes=%d, + return fmt.Sprintf(`WriteMetrics: inserts-deletes=%d, updates-deletes=%d, insertsAttempts=%d, insertsFailures=%d, insertsNoops=%d, inserts=%d, updatesAttempts=%d, updatesFailures=%d, updatesNoops=%d, updates=%d, deletesAttempts=%d, deletesFailures=%d, deletesNoops=%d, deletes=%d, `, - w.inserts, w.updates, w.deletes, w.inserts-w.deletes, w.updates-w.deletes, + w.inserts-w.deletes, w.updates-w.deletes, w.insertsAttempts, w.insertsFailures, w.insertsNoops, w.inserts, w.updatesAttempts, w.updatesFailures, w.updatesNoops, w.updates, w.deletesAttempts, w.deletesFailures, w.deletesNoops, w.deletes, @@ -209,17 +208,17 @@ func TestMain(m *testing.M) { func TestSchemaChange(t *testing.T) { defer cluster.PanicHandler(t) - ctx := context.Background() - t.Run("create schema", func(t *testing.T) { assert.Equal(t, 2, len(clusterInstance.Keyspaces[0].Shards)) testWithInitialSchema(t) }) - - t.Run("workload without ALTER TABLE", func(t *testing.T) { - initTable(t) - testSelectTableMetrics(t) - }) + for i := 0; i < countIterations; i++ { + testName := fmt.Sprintf("workload without ALTER TABLE %d/%d", (i + 1), countIterations) + t.Run(testName, func(t *testing.T) { + initTable(t) + testSelectTableMetrics(t) + }) + } t.Run("ALTER TABLE without workload", func(t *testing.T) { initTable(t) hint := "hint-alter-without-workload" @@ -228,6 +227,7 @@ func TestSchemaChange(t *testing.T) { testSelectTableMetrics(t) }) + ctx := context.Background() for i := 0; i < countIterations; i++ { testName := fmt.Sprintf("ALTER TABLE with workload %d/%d", (i + 1), countIterations) t.Run(testName, func(t *testing.T) { @@ -238,7 +238,6 @@ func TestSchemaChange(t *testing.T) { uuid := testOnlineDDLStatement(t, fmt.Sprintf(alterHintStatement, hint), "online", "vtgate", hint) checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) done <- true - time.Sleep(2 * time.Second) testSelectTableMetrics(t) }) } @@ -421,6 +420,8 @@ func runSingleConnection(ctx context.Context, t *testing.T, done chan bool) { _, err = conn.ExecuteFetch("set autocommit=1", 1000, true) require.Nil(t, err) + _, err = conn.ExecuteFetch("set transaction isolation level read committed", 1000, true) + require.Nil(t, err) for { select { @@ -443,7 +444,7 @@ func runSingleConnection(ctx context.Context, t *testing.T, done chan bool) { } } assert.Nil(t, err) - time.Sleep(20 * time.Millisecond) + time.Sleep(10 * time.Millisecond) } } @@ -489,6 +490,9 @@ func initTable(t *testing.T) { } func testSelectTableMetrics(t *testing.T) { + // More than reasonable safety margin to sllow goroutines to complete. + time.Sleep(2 * time.Second) + writeMetrics.mu.Lock() defer writeMetrics.mu.Unlock() From e5ab8b335346b50b7e4f00fadf4663794d939799 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 15 Feb 2021 19:04:25 +0200 Subject: [PATCH 61/72] using waitgroup Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../onlineddl_vrepl_mini_stress_test.go | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go index fb3ca0be3a2..c3bb2577c86 100644 --- a/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go +++ b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go @@ -340,12 +340,14 @@ func getCreateTableStatement(t *testing.T, tablet *cluster.Vttablet, tableName s return statement } -func generateInsert(t *testing.T, conn *mysql.Conn) error { +func generateInsert(t *testing.T, conn *mysql.Conn, wg *sync.WaitGroup) error { + wg.Add(1) id := rand.Int31n(int32(maxTableRows)) query := fmt.Sprintf(insertRowStatement, id) qr, err := conn.ExecuteFetch(query, 1000, true) go func() { + defer wg.Done() writeMetrics.mu.Lock() defer writeMetrics.mu.Unlock() @@ -364,12 +366,14 @@ func generateInsert(t *testing.T, conn *mysql.Conn) error { return err } -func generateUpdate(t *testing.T, conn *mysql.Conn) error { +func generateUpdate(t *testing.T, conn *mysql.Conn, wg *sync.WaitGroup) error { + wg.Add(1) id := rand.Int31n(int32(maxTableRows)) query := fmt.Sprintf(updateRowStatement, id) qr, err := conn.ExecuteFetch(query, 1000, true) go func() { + defer wg.Done() writeMetrics.mu.Lock() defer writeMetrics.mu.Unlock() @@ -388,12 +392,14 @@ func generateUpdate(t *testing.T, conn *mysql.Conn) error { return err } -func generateDelete(t *testing.T, conn *mysql.Conn) error { +func generateDelete(t *testing.T, conn *mysql.Conn, wg *sync.WaitGroup) error { + wg.Add(1) id := rand.Int31n(int32(maxTableRows)) query := fmt.Sprintf(deleteRowStatement, id) qr, err := conn.ExecuteFetch(query, 1000, true) go func() { + defer wg.Done() writeMetrics.mu.Lock() defer writeMetrics.mu.Unlock() @@ -412,7 +418,7 @@ func generateDelete(t *testing.T, conn *mysql.Conn) error { return err } -func runSingleConnection(ctx context.Context, t *testing.T, done chan bool) { +func runSingleConnection(ctx context.Context, t *testing.T, done chan bool, wg *sync.WaitGroup) { log.Infof("Running single connection") conn, err := mysql.Connect(ctx, &vtParams) require.Nil(t, err) @@ -432,11 +438,11 @@ func runSingleConnection(ctx context.Context, t *testing.T, done chan bool) { } switch rand.Int31n(3) { case 0: - err = generateInsert(t, conn) + err = generateInsert(t, conn, wg) case 1: - err = generateUpdate(t, conn) + err = generateUpdate(t, conn, wg) case 2: - err = generateDelete(t, conn) + err = generateDelete(t, conn, wg) } if err != nil { if strings.Contains(err.Error(), "disallowed due to rule: enforce blacklisted tables") { @@ -451,10 +457,11 @@ func runSingleConnection(ctx context.Context, t *testing.T, done chan bool) { func runMultipleConnections(ctx context.Context, t *testing.T, done chan bool) { log.Infof("Running multiple connections") var chans []chan bool + var wg sync.WaitGroup for i := 0; i < maxConcurrency; i++ { d := make(chan bool) chans = append(chans, d) - go runSingleConnection(ctx, t, d) + go runSingleConnection(ctx, t, d, &wg) } <-done log.Infof("Running multiple connections: done") @@ -462,6 +469,7 @@ func runMultipleConnections(ctx context.Context, t *testing.T, done chan bool) { log.Infof("Cancelling single connection") d <- true } + wg.Wait() log.Infof("All connections cancelled") } @@ -478,21 +486,20 @@ func initTable(t *testing.T) { _, err = conn.ExecuteFetch(truncateStatement, 1000, true) require.Nil(t, err) + var wg sync.WaitGroup for i := 0; i < maxTableRows/2; i++ { - generateInsert(t, conn) + generateInsert(t, conn, &wg) } for i := 0; i < maxTableRows/4; i++ { - generateUpdate(t, conn) + generateUpdate(t, conn, &wg) } for i := 0; i < maxTableRows/4; i++ { - generateDelete(t, conn) + generateDelete(t, conn, &wg) } + wg.Wait() } func testSelectTableMetrics(t *testing.T) { - // More than reasonable safety margin to sllow goroutines to complete. - time.Sleep(2 * time.Second) - writeMetrics.mu.Lock() defer writeMetrics.mu.Unlock() From e5dabb43f300ab9af053ae32d7b2bc231e82491a Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 15 Feb 2021 19:07:54 +0200 Subject: [PATCH 62/72] typo Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/vrepl.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/vt/vttablet/onlineddl/vrepl.go b/go/vt/vttablet/onlineddl/vrepl.go index 1d6f2fc10cb..6a3d4035ce8 100644 --- a/go/vt/vttablet/onlineddl/vrepl.go +++ b/go/vt/vttablet/onlineddl/vrepl.go @@ -196,7 +196,7 @@ func (v *VRepl) getSharedColumns(sourceColumns, targetColumns *vrepl.ColumnList, return vrepl.NewColumnList(sharedColumnNames), vrepl.NewColumnList(mappedSharedColumnNames), sharedColumnsMap } -// getSharedPKColumns returns the intersection of PRIMARY KEY columns (taking renaming into consideration) etween source and target tables +// getSharedPKColumns returns the intersection of PRIMARY KEY columns (taking renaming into consideration) between source and target tables func (v *VRepl) getSharedPKColumns(sourcePKColumns, targetPKColumns *vrepl.ColumnList, columnRenameMap map[string]string) ( sharedPKColumns *vrepl.ColumnList, ) { From 114642b3ae2de09250c30132ed8b1dbad8b2182c Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 15 Feb 2021 19:11:11 +0200 Subject: [PATCH 63/72] remove transactionTimestamp evaluation Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 9fdd92349a3..686104be29b 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -1515,10 +1515,10 @@ func (e *Executor) isVReplMigrationReadyToCutOver(ctx context.Context, s *VReplS if durationDiff(time.Now(), timeUpdated) > cutOverThreshold { return false, err } - transactionTimestamp := time.Unix(s.transactionTimestamp, 0) - if durationDiff(transactionTimestamp, timeUpdated) > cutOverThreshold { - return false, err - } + // previously, we also tested for transactionTimestamp. However, as pointed out in + // https://github.com/vitessio/vitess/pull/7419#discussion_r576316376, if the table is idle and + // receives no traffic, transactionTimestamp natually goes stale, and this should not prevent + // cut-over from taking place. } { // copy_state must have no entries for this vreplication id: if entries are From 323562ba69b98dc8ab82b415ebf6c236b4c73963 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 15 Feb 2021 19:13:55 +0200 Subject: [PATCH 64/72] fixes error message case Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 686104be29b..6500e57f7f2 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -65,11 +65,11 @@ import ( var ( // ErrExecutorNotWritableTablet is generated when executor is asked to run gh-ost on a read-only server - ErrExecutorNotWritableTablet = errors.New("Cannot run migration on non-writable tablet") + ErrExecutorNotWritableTablet = errors.New("cannot run migration on non-writable tablet") // ErrExecutorMigrationAlreadyRunning is generated when an attempt is made to run an operation that conflicts with a running migration - ErrExecutorMigrationAlreadyRunning = errors.New("Cannot run migration since a migration is already running") + ErrExecutorMigrationAlreadyRunning = errors.New("cannot run migration since a migration is already running") // ErrMigrationNotFound is returned by readMigration when given UUI cannot be found - ErrMigrationNotFound = errors.New("Migration not found") + ErrMigrationNotFound = errors.New("migration not found") ) var vexecUpdateTemplates = []string{ From 8650ef92b7d9bc0bf8ba02362919eaeea1c533e6 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Tue, 16 Feb 2021 08:36:26 +0200 Subject: [PATCH 65/72] no need for replicas, make test more lightweight Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go index c3bb2577c86..61cfbc5c634 100644 --- a/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go +++ b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go @@ -173,7 +173,7 @@ func TestMain(m *testing.M) { Name: keyspaceName, } - if err := clusterInstance.StartUnshardedKeyspace(*keyspace, 2, true); err != nil { + if err := clusterInstance.StartUnshardedKeyspace(*keyspace, 0, false); err != nil { return 1, err } if err := clusterInstance.StartKeyspace(*keyspace, []string{"1"}, 1, false); err != nil { From f1344692162d1314c83997ba17cdbf2ca9835957 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Tue, 16 Feb 2021 09:24:35 +0200 Subject: [PATCH 66/72] wait for runMultipleConnections() to complete Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../onlineddl_vrepl_mini_stress_test.go | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go index 61cfbc5c634..58c730fa50e 100644 --- a/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go +++ b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go @@ -106,7 +106,7 @@ var ( ALTER TABLE stress_test modify hint_col varchar(64) not null default '%s' ` insertRowStatement = ` - INSERT IGNORE INTO stress_test (id, rand_val) VALUES (%d, left(md5(rand()), 8)) + INSERT INTO stress_test (id, rand_val) VALUES (%d, left(md5(rand()), 8)) ` updateRowStatement = ` UPDATE stress_test SET updates=updates+1 WHERE id=%d @@ -212,10 +212,28 @@ func TestSchemaChange(t *testing.T) { assert.Equal(t, 2, len(clusterInstance.Keyspaces[0].Shards)) testWithInitialSchema(t) }) + for i := 0; i < countIterations; i++ { + testName := fmt.Sprintf("init table %d/%d", (i + 1), countIterations) + t.Run(testName, func(t *testing.T) { + initTable(t) + testSelectTableMetrics(t) + }) + } for i := 0; i < countIterations; i++ { testName := fmt.Sprintf("workload without ALTER TABLE %d/%d", (i + 1), countIterations) t.Run(testName, func(t *testing.T) { + ctx := context.Background() initTable(t) + done := make(chan bool) + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + runMultipleConnections(ctx, t, done) + }() + time.Sleep(5 * time.Second) + done <- true + wg.Wait() testSelectTableMetrics(t) }) } @@ -227,17 +245,23 @@ func TestSchemaChange(t *testing.T) { testSelectTableMetrics(t) }) - ctx := context.Background() for i := 0; i < countIterations; i++ { testName := fmt.Sprintf("ALTER TABLE with workload %d/%d", (i + 1), countIterations) t.Run(testName, func(t *testing.T) { + ctx := context.Background() initTable(t) done := make(chan bool) - go runMultipleConnections(ctx, t, done) + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + runMultipleConnections(ctx, t, done) + }() hint := fmt.Sprintf("hint-alter-with-workload-%d", i) uuid := testOnlineDDLStatement(t, fmt.Sprintf(alterHintStatement, hint), "online", "vtgate", hint) checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) done <- true + wg.Wait() testSelectTableMetrics(t) }) } @@ -447,6 +471,8 @@ func runSingleConnection(ctx context.Context, t *testing.T, done chan bool, wg * if err != nil { if strings.Contains(err.Error(), "disallowed due to rule: enforce blacklisted tables") { err = nil + } else if strings.Contains(err.Error(), "AlreadyExists") { + err = nil } } assert.Nil(t, err) From 534201e882a45c307738913216c80677b53e87a5 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Tue, 16 Feb 2021 09:54:07 +0200 Subject: [PATCH 67/72] using context.WithCancel, simplify logic Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../onlineddl_vrepl_mini_stress_test.go | 90 +++++++++---------- 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go index 58c730fa50e..6fe4f6cbb52 100644 --- a/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go +++ b/go/test/endtoend/onlineddl_vrepl_stress/onlineddl_vrepl_mini_stress_test.go @@ -26,6 +26,7 @@ import ( "regexp" "strings" "sync" + "sync/atomic" "testing" "time" @@ -106,7 +107,7 @@ var ( ALTER TABLE stress_test modify hint_col varchar(64) not null default '%s' ` insertRowStatement = ` - INSERT INTO stress_test (id, rand_val) VALUES (%d, left(md5(rand()), 8)) + INSERT IGNORE INTO stress_test (id, rand_val) VALUES (%d, left(md5(rand()), 8)) ` updateRowStatement = ` UPDATE stress_test SET updates=updates+1 WHERE id=%d @@ -173,6 +174,7 @@ func TestMain(m *testing.M) { Name: keyspaceName, } + // No need for replicas in this stress test if err := clusterInstance.StartUnshardedKeyspace(*keyspace, 0, false); err != nil { return 1, err } @@ -213,6 +215,8 @@ func TestSchemaChange(t *testing.T) { testWithInitialSchema(t) }) for i := 0; i < countIterations; i++ { + // This first tests the general functionality of initializing the table with data, + // no concurrency involved. Just counting. testName := fmt.Sprintf("init table %d/%d", (i + 1), countIterations) t.Run(testName, func(t *testing.T) { initTable(t) @@ -220,24 +224,28 @@ func TestSchemaChange(t *testing.T) { }) } for i := 0; i < countIterations; i++ { + // This tests running a workload on the table, then comparing expected metrics with + // actual table metrics. All this without any ALTER TABLE: this is to validate + // that our testing/metrics logic is sound in the first place. testName := fmt.Sprintf("workload without ALTER TABLE %d/%d", (i + 1), countIterations) t.Run(testName, func(t *testing.T) { - ctx := context.Background() + ctx, cancel := context.WithCancel(context.Background()) initTable(t) - done := make(chan bool) var wg sync.WaitGroup wg.Add(1) go func() { defer wg.Done() - runMultipleConnections(ctx, t, done) + runMultipleConnections(ctx, t) }() time.Sleep(5 * time.Second) - done <- true + cancel() // will cause runMultipleConnections() to terminate wg.Wait() testSelectTableMetrics(t) }) } t.Run("ALTER TABLE without workload", func(t *testing.T) { + // A single ALTER TABLE. Generally this is covered in endtoend/onlineddl_vrepl, + // but we wish to verify the ALTER statement used in these tests is sound initTable(t) hint := "hint-alter-without-workload" uuid := testOnlineDDLStatement(t, fmt.Sprintf(alterHintStatement, hint), "online", "vtgate", hint) @@ -246,21 +254,26 @@ func TestSchemaChange(t *testing.T) { }) for i := 0; i < countIterations; i++ { + // Finally, this is the real test: + // We populate a table, and begin a concurrent workload (this is the "mini stress") + // We then ALTER TABLE via vreplication. + // Once convinced ALTER TABLE is complete, we stop the workload. + // We then compare expected metrics with table metrics. If they agree, then + // the vreplication/ALTER TABLE did not corrupt our data and we are happy. testName := fmt.Sprintf("ALTER TABLE with workload %d/%d", (i + 1), countIterations) t.Run(testName, func(t *testing.T) { - ctx := context.Background() + ctx, cancel := context.WithCancel(context.Background()) initTable(t) - done := make(chan bool) var wg sync.WaitGroup wg.Add(1) go func() { defer wg.Done() - runMultipleConnections(ctx, t, done) + runMultipleConnections(ctx, t) }() hint := fmt.Sprintf("hint-alter-with-workload-%d", i) uuid := testOnlineDDLStatement(t, fmt.Sprintf(alterHintStatement, hint), "online", "vtgate", hint) checkRecentMigrations(t, uuid, schema.OnlineDDLStatusComplete) - done <- true + cancel() // will cause runMultipleConnections() to terminate wg.Wait() testSelectTableMetrics(t) }) @@ -364,14 +377,12 @@ func getCreateTableStatement(t *testing.T, tablet *cluster.Vttablet, tableName s return statement } -func generateInsert(t *testing.T, conn *mysql.Conn, wg *sync.WaitGroup) error { - wg.Add(1) +func generateInsert(t *testing.T, conn *mysql.Conn) error { id := rand.Int31n(int32(maxTableRows)) query := fmt.Sprintf(insertRowStatement, id) qr, err := conn.ExecuteFetch(query, 1000, true) - go func() { - defer wg.Done() + func() { writeMetrics.mu.Lock() defer writeMetrics.mu.Unlock() @@ -390,14 +401,12 @@ func generateInsert(t *testing.T, conn *mysql.Conn, wg *sync.WaitGroup) error { return err } -func generateUpdate(t *testing.T, conn *mysql.Conn, wg *sync.WaitGroup) error { - wg.Add(1) +func generateUpdate(t *testing.T, conn *mysql.Conn) error { id := rand.Int31n(int32(maxTableRows)) query := fmt.Sprintf(updateRowStatement, id) qr, err := conn.ExecuteFetch(query, 1000, true) - go func() { - defer wg.Done() + func() { writeMetrics.mu.Lock() defer writeMetrics.mu.Unlock() @@ -416,14 +425,12 @@ func generateUpdate(t *testing.T, conn *mysql.Conn, wg *sync.WaitGroup) error { return err } -func generateDelete(t *testing.T, conn *mysql.Conn, wg *sync.WaitGroup) error { - wg.Add(1) +func generateDelete(t *testing.T, conn *mysql.Conn) error { id := rand.Int31n(int32(maxTableRows)) query := fmt.Sprintf(deleteRowStatement, id) qr, err := conn.ExecuteFetch(query, 1000, true) - go func() { - defer wg.Done() + func() { writeMetrics.mu.Lock() defer writeMetrics.mu.Unlock() @@ -442,7 +449,7 @@ func generateDelete(t *testing.T, conn *mysql.Conn, wg *sync.WaitGroup) error { return err } -func runSingleConnection(ctx context.Context, t *testing.T, done chan bool, wg *sync.WaitGroup) { +func runSingleConnection(ctx context.Context, t *testing.T, done *int64) { log.Infof("Running single connection") conn, err := mysql.Connect(ctx, &vtParams) require.Nil(t, err) @@ -454,25 +461,21 @@ func runSingleConnection(ctx context.Context, t *testing.T, done chan bool, wg * require.Nil(t, err) for { - select { - case <-done: + if atomic.LoadInt64(done) == 1 { log.Infof("Terminating single connection") return - default: } switch rand.Int31n(3) { case 0: - err = generateInsert(t, conn, wg) + err = generateInsert(t, conn) case 1: - err = generateUpdate(t, conn, wg) + err = generateUpdate(t, conn) case 2: - err = generateDelete(t, conn, wg) + err = generateDelete(t, conn) } if err != nil { if strings.Contains(err.Error(), "disallowed due to rule: enforce blacklisted tables") { err = nil - } else if strings.Contains(err.Error(), "AlreadyExists") { - err = nil } } assert.Nil(t, err) @@ -480,21 +483,20 @@ func runSingleConnection(ctx context.Context, t *testing.T, done chan bool, wg * } } -func runMultipleConnections(ctx context.Context, t *testing.T, done chan bool) { +func runMultipleConnections(ctx context.Context, t *testing.T) { log.Infof("Running multiple connections") - var chans []chan bool + var done int64 var wg sync.WaitGroup for i := 0; i < maxConcurrency; i++ { - d := make(chan bool) - chans = append(chans, d) - go runSingleConnection(ctx, t, d, &wg) + wg.Add(1) + go func() { + defer wg.Done() + runSingleConnection(ctx, t, &done) + }() } - <-done + <-ctx.Done() + atomic.StoreInt64(&done, 1) log.Infof("Running multiple connections: done") - for _, d := range chans { - log.Infof("Cancelling single connection") - d <- true - } wg.Wait() log.Infof("All connections cancelled") } @@ -512,17 +514,15 @@ func initTable(t *testing.T) { _, err = conn.ExecuteFetch(truncateStatement, 1000, true) require.Nil(t, err) - var wg sync.WaitGroup for i := 0; i < maxTableRows/2; i++ { - generateInsert(t, conn, &wg) + generateInsert(t, conn) } for i := 0; i < maxTableRows/4; i++ { - generateUpdate(t, conn, &wg) + generateUpdate(t, conn) } for i := 0; i < maxTableRows/4; i++ { - generateDelete(t, conn, &wg) + generateDelete(t, conn) } - wg.Wait() } func testSelectTableMetrics(t *testing.T) { From 942b5bec1dbe082920aef846e6774039bd2def98 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Tue, 16 Feb 2021 10:49:57 +0200 Subject: [PATCH 68/72] async ReloadScema at cut-over Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 6500e57f7f2..406710d5a04 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -569,9 +569,23 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream) er } } + go func() { + // Tables are swapped! Let's take the opportunity to ReloadSchema now + // We do this in a goroutine because it might take time on a schema with thousands of tables, and we don't want to delay + // the cut-over. + // this means ReloadSchema is not in sync with the actual schema change. Users will still need to run tracker if they want to sync. + // In the future, we will want to reload the single table, instead of reloading the schema. + if err := tmClient.ReloadSchema(ctx, tablet.Tablet, ""); err != nil { + vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "Error on ReloadSchema while cutting over vreplication migration UUID: %+v", onlineDDL.UUID) + } + }() + // Tables are now swapped! Migration is successful _ = e.onSchemaMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusComplete, false, progressPctFull) return nil + + // deferred function will re-enable writes now + // deferred function will unlock keyspace } // ExecuteWithVReplication sets up the grounds for a vreplication schema migration From 1428eead3777545c526152b0ed5cf6371c0b423c Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Tue, 16 Feb 2021 11:21:06 +0200 Subject: [PATCH 69/72] restore transaction_timestamp test Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 406710d5a04..b883ddd1c96 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -1511,7 +1511,7 @@ func (e *Executor) isVReplMigrationReadyToCutOver(ctx context.Context, s *VReplS { // when ready to cut-over, pos must have some value if s.pos == "" { - return false, err + return false, nil } } { @@ -1527,12 +1527,19 @@ func (e *Executor) isVReplMigrationReadyToCutOver(ctx context.Context, s *VReplS } timeUpdated := time.Unix(s.timeUpdated, 0) if durationDiff(time.Now(), timeUpdated) > cutOverThreshold { - return false, err + return false, nil + } + // Let's look at transaction timestamp. This gets written by any ongoing + // writes on the server (whether on this table or any other table) + transactionTimestamp := time.Unix(s.transactionTimestamp, 0) + if durationDiff(transactionTimestamp, timeUpdated) > cutOverThreshold { + // There's two ways the diff can be high: + // 1. High workload: vreplication is unable to apply events in a timely manner. This is + // a normal situation and is why we're testing transaction_timestamp + // 2. Lack of writes on the server. Possibly the server is stale. In which case, cut-over + // should be good to go. But we need (TODO) some mechanism to inform us that this is indeed the case. + return false, nil } - // previously, we also tested for transactionTimestamp. However, as pointed out in - // https://github.com/vitessio/vitess/pull/7419#discussion_r576316376, if the table is idle and - // receives no traffic, transactionTimestamp natually goes stale, and this should not prevent - // cut-over from taking place. } { // copy_state must have no entries for this vreplication id: if entries are @@ -1554,7 +1561,7 @@ func (e *Executor) isVReplMigrationReadyToCutOver(ctx context.Context, s *VReplS count := csRow.AsInt64("cnt", 0) if count > 0 { // Still copying - return false, err + return false, nil } } return true, nil From 7e2a0748a50d79ca51f14bb69b65765e87376ffd Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Tue, 16 Feb 2021 17:05:54 +0200 Subject: [PATCH 70/72] inject dummy statements when vreplication transaction_timestamp is stale Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 43 ++++++++++++++++++++++++++-- go/vt/vttablet/onlineddl/schema.go | 3 +- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index b883ddd1c96..9abb0821a8d 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -140,6 +140,8 @@ type Executor struct { lastMigrationUUID string tickReentranceFlag int64 + requestForDummyInjection int64 + ticks *timer.Timer isOpen bool schemaInitialized bool @@ -387,6 +389,24 @@ func (e *Executor) tableExists(ctx context.Context, tableName string) (bool, err return (row != nil), nil } +// injectDummyStatements issues several no-op statements on the backend MySQL server. This is done to +// ensure binary logs are being written, to solve a possible scenario for vreplication-based migrations +// where the server is otherwise completely stale. Vreplication does not write heartbeats like gh-ost does, +// and if the server is stale (and assuming lag-throttler is not enabled) then binary logs may be completely silent, +// which makes it impossible to know when to cut-over. +func (e *Executor) injectDummyStatements(ctx context.Context) (err error) { + conn, err := dbconnpool.NewDBConnection(ctx, e.env.Config().DB.DbaWithDB()) + if err != nil { + return err + } + defer conn.Close() + + if _, err := conn.ExecuteFetch(sqlDummyDropView, 0, false); err != nil { + return err + } + return nil +} + func (e *Executor) parseAlterOptions(ctx context.Context, onlineDDL *schema.OnlineDDL) string { // Temporary hack (2020-08-11) // Because sqlparser does not do full blown ALTER TABLE parsing, @@ -1525,19 +1545,26 @@ func (e *Executor) isVReplMigrationReadyToCutOver(ctx context.Context, s *VReplS } return diff } + timeNow := time.Now() timeUpdated := time.Unix(s.timeUpdated, 0) - if durationDiff(time.Now(), timeUpdated) > cutOverThreshold { + if durationDiff(timeNow, timeUpdated) > cutOverThreshold { return false, nil } // Let's look at transaction timestamp. This gets written by any ongoing // writes on the server (whether on this table or any other table) transactionTimestamp := time.Unix(s.transactionTimestamp, 0) - if durationDiff(transactionTimestamp, timeUpdated) > cutOverThreshold { + if diff := durationDiff(timeNow, transactionTimestamp); diff > cutOverThreshold { // There's two ways the diff can be high: // 1. High workload: vreplication is unable to apply events in a timely manner. This is // a normal situation and is why we're testing transaction_timestamp // 2. Lack of writes on the server. Possibly the server is stale. In which case, cut-over // should be good to go. But we need (TODO) some mechanism to inform us that this is indeed the case. + + // To handle (2), we request an injection of a dummy transaction, heuristically in good time for the next check. + // To not overdo it, and avoid spamming the server with dummy statements, we only request injection if lag is very high. + if diff > *migrationCheckInterval { + atomic.StoreInt64(&e.requestForDummyInjection, 1) + } return false, nil } } @@ -1793,6 +1820,18 @@ func (e *Executor) onMigrationCheckTick() { log.Error(err) return } + + if atomic.LoadInt64(&e.requestForDummyInjection) > 0 { + // This is a special scenario. VReplication-based online-DDL notices transaction timestamp is old. Either it + // is very busy, or the server is compeltely stale. In th elatter case, it requests dummy injection of + // statements. We comply here. + // We inject a few well-timed statements, scheduled to be fresh by *next iteration*, to be intercepted by e.reviewRunningMigrations(ctx), following + atomic.StoreInt64(&e.requestForDummyInjection, 0) + intervals := []time.Duration{*migrationCheckInterval - 2*time.Second, *migrationCheckInterval - time.Second, *migrationCheckInterval} + for _, d := range intervals { + time.AfterFunc(d, func() { e.injectDummyStatements(ctx) }) + } + } if err := e.retryTabletFailureMigrations(ctx); err != nil { log.Error(err) } diff --git a/go/vt/vttablet/onlineddl/schema.go b/go/vt/vttablet/onlineddl/schema.go index 3d7959fc19f..2e8f5b128b9 100644 --- a/go/vt/vttablet/onlineddl/schema.go +++ b/go/vt/vttablet/onlineddl/schema.go @@ -245,7 +245,8 @@ const ( _vt.copy_state WHERE vrepl_id=%a ` - sqlSwapTables = "RENAME TABLE `%a` TO `%a`, `%a` TO `%a`, `%a` TO `%a`" + sqlSwapTables = "RENAME TABLE `%a` TO `%a`, `%a` TO `%a`, `%a` TO `%a`" + sqlDummyDropView = "DROP VIEW IF EXISTS `_vt:onlineddl:executor:inject:binlog:entry`" ) const ( From ed4207ef47bdd6305683e3f274a3552d7666c40a Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 17 Feb 2021 15:03:13 +0200 Subject: [PATCH 71/72] minor naming change Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/test/endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/test/endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go b/go/test/endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go index e594d79a997..91ac3246a60 100644 --- a/go/test/endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go +++ b/go/test/endtoend/onlineddl_vrepl/onlineddl_vrepl_test.go @@ -204,7 +204,7 @@ func TestSchemaChange(t *testing.T) { defer cluster.PanicHandler(t) assert.Equal(t, 2, len(clusterInstance.Keyspaces[0].Shards)) testWithInitialSchema(t) - t.Run("create non_online", func(t *testing.T) { + t.Run("alter non_online", func(t *testing.T) { _ = testOnlineDDLStatement(t, alterTableNormalStatement, string(schema.DDLStrategyDirect), "vtctl", "non_online") insertRows(t, 2) testRows(t) From 7f6a800be4f9e77dc96c37936f602bfc33c3cb7d Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 18 Feb 2021 07:25:29 +0200 Subject: [PATCH 72/72] revert 7e2a0748a50d79ca51f14bb69b65765e87376ffd: no need for dummy injections Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/vttablet/onlineddl/executor.go | 44 +--------------------------- go/vt/vttablet/onlineddl/schema.go | 3 +- 2 files changed, 2 insertions(+), 45 deletions(-) diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 9abb0821a8d..1307660a7eb 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -140,8 +140,6 @@ type Executor struct { lastMigrationUUID string tickReentranceFlag int64 - requestForDummyInjection int64 - ticks *timer.Timer isOpen bool schemaInitialized bool @@ -389,24 +387,6 @@ func (e *Executor) tableExists(ctx context.Context, tableName string) (bool, err return (row != nil), nil } -// injectDummyStatements issues several no-op statements on the backend MySQL server. This is done to -// ensure binary logs are being written, to solve a possible scenario for vreplication-based migrations -// where the server is otherwise completely stale. Vreplication does not write heartbeats like gh-ost does, -// and if the server is stale (and assuming lag-throttler is not enabled) then binary logs may be completely silent, -// which makes it impossible to know when to cut-over. -func (e *Executor) injectDummyStatements(ctx context.Context) (err error) { - conn, err := dbconnpool.NewDBConnection(ctx, e.env.Config().DB.DbaWithDB()) - if err != nil { - return err - } - defer conn.Close() - - if _, err := conn.ExecuteFetch(sqlDummyDropView, 0, false); err != nil { - return err - } - return nil -} - func (e *Executor) parseAlterOptions(ctx context.Context, onlineDDL *schema.OnlineDDL) string { // Temporary hack (2020-08-11) // Because sqlparser does not do full blown ALTER TABLE parsing, @@ -1553,18 +1533,7 @@ func (e *Executor) isVReplMigrationReadyToCutOver(ctx context.Context, s *VReplS // Let's look at transaction timestamp. This gets written by any ongoing // writes on the server (whether on this table or any other table) transactionTimestamp := time.Unix(s.transactionTimestamp, 0) - if diff := durationDiff(timeNow, transactionTimestamp); diff > cutOverThreshold { - // There's two ways the diff can be high: - // 1. High workload: vreplication is unable to apply events in a timely manner. This is - // a normal situation and is why we're testing transaction_timestamp - // 2. Lack of writes on the server. Possibly the server is stale. In which case, cut-over - // should be good to go. But we need (TODO) some mechanism to inform us that this is indeed the case. - - // To handle (2), we request an injection of a dummy transaction, heuristically in good time for the next check. - // To not overdo it, and avoid spamming the server with dummy statements, we only request injection if lag is very high. - if diff > *migrationCheckInterval { - atomic.StoreInt64(&e.requestForDummyInjection, 1) - } + if durationDiff(timeNow, transactionTimestamp) > cutOverThreshold { return false, nil } } @@ -1821,17 +1790,6 @@ func (e *Executor) onMigrationCheckTick() { return } - if atomic.LoadInt64(&e.requestForDummyInjection) > 0 { - // This is a special scenario. VReplication-based online-DDL notices transaction timestamp is old. Either it - // is very busy, or the server is compeltely stale. In th elatter case, it requests dummy injection of - // statements. We comply here. - // We inject a few well-timed statements, scheduled to be fresh by *next iteration*, to be intercepted by e.reviewRunningMigrations(ctx), following - atomic.StoreInt64(&e.requestForDummyInjection, 0) - intervals := []time.Duration{*migrationCheckInterval - 2*time.Second, *migrationCheckInterval - time.Second, *migrationCheckInterval} - for _, d := range intervals { - time.AfterFunc(d, func() { e.injectDummyStatements(ctx) }) - } - } if err := e.retryTabletFailureMigrations(ctx); err != nil { log.Error(err) } diff --git a/go/vt/vttablet/onlineddl/schema.go b/go/vt/vttablet/onlineddl/schema.go index 2e8f5b128b9..3d7959fc19f 100644 --- a/go/vt/vttablet/onlineddl/schema.go +++ b/go/vt/vttablet/onlineddl/schema.go @@ -245,8 +245,7 @@ const ( _vt.copy_state WHERE vrepl_id=%a ` - sqlSwapTables = "RENAME TABLE `%a` TO `%a`, `%a` TO `%a`, `%a` TO `%a`" - sqlDummyDropView = "DROP VIEW IF EXISTS `_vt:onlineddl:executor:inject:binlog:entry`" + sqlSwapTables = "RENAME TABLE `%a` TO `%a`, `%a` TO `%a`, `%a` TO `%a`" ) const (