From 5d5e6d72d389e860fd8b12b36b6a0fe7aaba1d6f Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Wed, 26 Jul 2023 19:04:19 -0400 Subject: [PATCH 01/56] Get initial PoC implementation working for initializing sequence tables being used by tables being moved Signed-off-by: Matt Lord --- go/vt/wrangler/traffic_switcher.go | 207 ++++++++++++++++++++++++++++- 1 file changed, 206 insertions(+), 1 deletion(-) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index e3f45283ee2..4ba9b119052 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -28,6 +28,7 @@ import ( "vitess.io/vitess/go/json2" "vitess.io/vitess/go/sqlescape" + "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/vt/binlog/binlogplayer" "vitess.io/vitess/go/vt/concurrency" "vitess.io/vitess/go/vt/discovery" @@ -56,11 +57,31 @@ const ( renameTableTemplate = "_%.59s_old" // limit table name to 64 characters sqlDeleteWorkflow = "delete from _vt.vreplication where db_name = %s and workflow = %s" + + sqlGetMaxSequenceVal = "select max(%a) as maxval from %a.%a" + sqlInitSequenceTable = "insert into %a.%a (id, next_id, cache) values (0, %d, 1000) on duplicate key update next_id = if(next_id < %d, %d, next_id)" ) // accessType specifies the type of access for a shard (allow/disallow writes). type accessType int +// sequenceMetadata contains all of the relevant metadata for a sequence that +// is being used by a table involved in a vreplication workflow. +type sequenceMetadata struct { + // The name of the sequence table. + backingTableName string + // The keyspace where the backing table lives. + backingTableKeyspace string + // The dbName in use by the keyspace where the backing table lives. + backingTableDBName string + // The name of the table using the sequence. + usingTableName string + // The dbName in use by the keyspace where the using table lives. + usingTableDBName string + // The using table definition. + usingTableDefinition *vschemapb.Table +} + const ( allowWrites = accessType(iota) disallowWrites @@ -611,6 +632,12 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowNa ts.Logger().Errorf("createJournals failed: %v", err) return 0, nil, err } + // Initialize any target sequences before allowing new writes. + if err := ts.initializeTargetSequenceTables(ctx); err != nil { + werr := vterrors.Wrapf(err, "initializeTargetSequenceTables failed") + ts.Logger().Error(werr) + return 0, nil, werr + } if err := sw.allowTargetWrites(ctx); err != nil { ts.Logger().Errorf("allowTargetWrites failed: %v", err) return 0, nil, err @@ -1886,7 +1913,7 @@ func (ts *trafficSwitcher) isSequenceParticipating(ctx context.Context) (bool, e if err != nil { return false, err } - if vschema == nil || vschema.Tables == nil { + if vschema == nil || vschema.Tables == nil || len(vschema.Tables) == 0 { return false, nil } sequenceFound := false @@ -1903,6 +1930,184 @@ func (ts *trafficSwitcher) isSequenceParticipating(ctx context.Context) (bool, e return sequenceFound, nil } +func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context) error { + log.Error("DEBUG: initializeTargetSequenceTables") + vschema, err := ts.TopoServer().GetVSchema(ctx, ts.targetKeyspace) + if err != nil { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for target keyspace %s: %v", + ts.targetKeyspace, err) + } + if vschema == nil || vschema.Tables == nil || len(vschema.Tables) == 0 { // Nothing to do + return nil + } + + // We maintain two maps of the same sequence metadata so + // that we have fast lookups for both the using table and + // the backing sequence table. + sequencesByUsingTable := make(map[string]*sequenceMetadata) + sequencesByBackingTable := make(map[string]*sequenceMetadata) + for _, table := range ts.Tables() { + vs, ok := vschema.Tables[table] + if !ok || vs == nil { + continue + } + if vs.AutoIncrement != nil && vs.AutoIncrement.Sequence != "" { + sm := &sequenceMetadata{ + usingTableName: table, + usingTableDefinition: vs, + backingTableName: vs.AutoIncrement.Sequence, + // TODO: get and set this properly to deal with db_name_overrides + usingTableDBName: "vt_" + ts.targetKeyspace, + } + sequencesByUsingTable[table] = sm + sequencesByBackingTable[vs.AutoIncrement.Sequence] = sm + } + } + if len(sequencesByUsingTable) == 0 { // Nothing to do + return nil + } + + log.Errorf("DEBUG: sequences: %+v", sequencesByUsingTable) + + // Now we need to locate the backing sequence tables which will + // be in another unsharded keyspace. + keyspaces, err := ts.TopoServer().GetKeyspaces(ctx) + if err != nil { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get keyspaces: %v", err) + } + log.Errorf("DEBUG: keyspaces: %+v", keyspaces) + for _, keyspace := range keyspaces { + vschema, err = ts.TopoServer().GetVSchema(ctx, keyspace) + if err != nil { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", + keyspace, err) + } + if vschema == nil || vschema.Sharded || vschema.Tables == nil || len(vschema.Tables) == 0 { + continue + } + for tableName, tableDef := range vschema.Tables { + sm := sequencesByBackingTable[tableName] + if tableDef != nil && tableDef.Type == vindexes.TypeSequence && + sm != nil && tableName == sm.backingTableName { + // If the sequence backing table is being moved then we do not + // want to initialize it. + if keyspace == ts.targetKeyspace { + delete(sequencesByBackingTable, tableName) + delete(sequencesByUsingTable, tableName) + continue + } + sm.backingTableKeyspace = keyspace + // TODO: get and set this properly in order to deal with db_name_overrides + sm.backingTableDBName = "vt_" + keyspace + } + } + } + // Now we need to make sure we found all of the backing sequence tables. + for _, sm := range sequencesByUsingTable { + if sm.backingTableKeyspace == "" { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to locate all of the backing sequence tables being used; sequence tables metadata: %+v", + sequencesByUsingTable) + } + } + log.Errorf("DEBUG: sequence backing tables: %+v", sequencesByBackingTable) + + // Now we need to initialize the backing sequence tables so that + // the next values they generate are greater than those that + // currently exist in the using table on the target keyspace. + for sequenceTableName, sequenceMetadata := range sequencesByBackingTable { + log.Errorf("DEBUG: sequence table: %v, sequenceMetadata: %+v", sequenceTableName, sequenceMetadata) + // Now we need to run this query on the target shards in order + // to get the max value and set the next id for the sequence to + // a higher value. + shardResults := make([]int64, 0, len(ts.TargetShards())) + srMu := sync.Mutex{} + err = ts.ForAllTargets(func(target *workflow.MigrationTarget) error { + query := sqlparser.BuildParsedQuery(sqlGetMaxSequenceVal, + sqlescape.EscapeID(sequenceMetadata.usingTableDefinition.AutoIncrement.Column), + sqlescape.EscapeID(sequenceMetadata.usingTableDBName), + sqlescape.EscapeID(sequenceMetadata.usingTableName), + ) + ts.Logger().Errorf("DEBUG: query: %s on shard: %s", query.Query, target.GetShard().ShardName()) + qr, err := ts.wr.ExecuteFetchAsApp(ctx, target.GetPrimary().GetAlias(), true, query.Query, 1) + if err != nil || len(qr.Rows) != 1 { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get max used value for target table %s in order to initialize the backing sequence table %s: %v", + sequenceMetadata.usingTableName, ts.targetKeyspace, err) + } + maxID, err := sqltypes.Proto3ToResult(qr).Rows[0][0].ToInt64() + if err != nil { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get max used value for target table %s in order to initialize the backing sequence table %s: %v", + sequenceMetadata.usingTableName, ts.targetKeyspace, err) + } + ts.Logger().Errorf("DEBUG: max ID seen on shard %s: %d", target.GetShard().ShardName(), maxID) + srMu.Lock() + shardResults = append(shardResults, maxID) + srMu.Unlock() + + return nil + }) + if err != nil { + return err + } + // Sort the values to find the max value across all shards. + sort.Slice(shardResults, func(i, j int) bool { + return shardResults[i] < shardResults[j] + }) + nextVal := shardResults[len(shardResults)-1] + 1 + // Now we need to update the sequence table, if needed, in order to + // ensure that that the next value it provides is > the current max. + query := sqlparser.BuildParsedQuery(sqlInitSequenceTable, + sqlescape.EscapeID(sequenceMetadata.backingTableDBName), + sqlescape.EscapeID(sequenceMetadata.backingTableName), + nextVal, + nextVal, + nextVal, + ) + log.Errorf("DEBUG: query: %s", query.Query) + // Execute this on the primary tablet of the keyspace housing + // the backing table. + sequenceShard, err := ts.wr.TopoServer().GetOnlyShard(ctx, sequenceMetadata.backingTableKeyspace) + if err != nil || sequenceShard == nil || sequenceShard.PrimaryAlias == nil { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", + sequenceMetadata.backingTableKeyspace, err) + } + _, err = ts.wr.ExecuteFetchAsApp(ctx, sequenceShard.PrimaryAlias, true, query.Query, 1) + if err != nil { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the sequence table %s.%s: %v", + sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, err) + } + } + + // Now force the primary tablets managing the sequences to refresh their + // sequence caches for the tables we're moving. + ksDone := make(map[string]bool) + for _, sm := range sequencesByUsingTable { + if ksDone[sm.backingTableKeyspace] { + continue + } + si, err := ts.TopoServer().GetOnlyShard(ctx, sm.backingTableKeyspace) + if err != nil { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get shard for keyspace %s: %v", + sm.backingTableKeyspace, err) + } + ts.Logger().Infof("Resetting sequence caches for shard %s.%s on tablet %s", + si.Keyspace(), si.ShardName(), si.PrimaryAlias) + ti, err := ts.TopoServer().GetTablet(ctx, si.PrimaryAlias) + if err != nil { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get primary tablet for keyspace %s: %v", + sm.backingTableKeyspace, err) + } + err = ts.TabletManagerClient().ResetSequences(ctx, ti.Tablet, ts.Tables()) + if err != nil { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset sequence caches for shard %s.%s on tablet %s: %v", + si.Keyspace(), si.ShardName(), si.PrimaryAlias, err) + } + ksDone[sm.backingTableKeyspace] = true + } + + // We completed the work w/o errors. + return nil +} + func (ts *trafficSwitcher) mustResetSequences(ctx context.Context) (bool, error) { switch ts.workflowType { case binlogdatapb.VReplicationWorkflowType_Migrate, From 31d3895a94182c57e95f179b9f6369c987d78efd Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Thu, 27 Jul 2023 21:38:13 -0400 Subject: [PATCH 02/56] Reset cache per table, only when needed Signed-off-by: Matt Lord --- go/vt/wrangler/traffic_switcher.go | 37 +++++++++++------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 4ba9b119052..4e37e5dd3cb 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -2063,45 +2063,36 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context) e nextVal, ) log.Errorf("DEBUG: query: %s", query.Query) - // Execute this on the primary tablet of the keyspace housing - // the backing table. + // Execute this on the primary tablet of the unsharded keyspace + // housing the backing table. sequenceShard, err := ts.wr.TopoServer().GetOnlyShard(ctx, sequenceMetadata.backingTableKeyspace) if err != nil || sequenceShard == nil || sequenceShard.PrimaryAlias == nil { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", sequenceMetadata.backingTableKeyspace, err) } - _, err = ts.wr.ExecuteFetchAsApp(ctx, sequenceShard.PrimaryAlias, true, query.Query, 1) + qr, err := ts.wr.ExecuteFetchAsApp(ctx, sequenceShard.PrimaryAlias, true, query.Query, 1) if err != nil { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the sequence table %s.%s: %v", sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, err) } - } - - // Now force the primary tablets managing the sequences to refresh their - // sequence caches for the tables we're moving. - ksDone := make(map[string]bool) - for _, sm := range sequencesByUsingTable { - if ksDone[sm.backingTableKeyspace] { + // If we actually updated the backing sequence table, then we need + // to tell the primary tablet managing the sequence to refresh/reset + // its cache for the table. + if qr.RowsAffected == 0 { continue } - si, err := ts.TopoServer().GetOnlyShard(ctx, sm.backingTableKeyspace) - if err != nil { - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get shard for keyspace %s: %v", - sm.backingTableKeyspace, err) - } - ts.Logger().Infof("Resetting sequence caches for shard %s.%s on tablet %s", - si.Keyspace(), si.ShardName(), si.PrimaryAlias) - ti, err := ts.TopoServer().GetTablet(ctx, si.PrimaryAlias) + ts.Logger().Infof("Resetting sequence cache for table %s on shard %s.%s using tablet %s", + sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias) + ti, err := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) if err != nil { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get primary tablet for keyspace %s: %v", - sm.backingTableKeyspace, err) + sequenceMetadata.backingTableKeyspace, err) } - err = ts.TabletManagerClient().ResetSequences(ctx, ti.Tablet, ts.Tables()) + err = ts.TabletManagerClient().ResetSequences(ctx, ti.Tablet, []string{sequenceMetadata.backingTableName}) if err != nil { - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset sequence caches for shard %s.%s on tablet %s: %v", - si.Keyspace(), si.ShardName(), si.PrimaryAlias, err) + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset sequence cache for table %s on shard %s.%s using tablet %s: %v", + sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err) } - ksDone[sm.backingTableKeyspace] = true } // We completed the work w/o errors. From ed62652d9848990605b1745abba048ac618c8cf0 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Fri, 28 Jul 2023 07:11:36 -0400 Subject: [PATCH 03/56] Gather sequence details after keyspace lock but before blocking writes Signed-off-by: Matt Lord --- go/vt/wrangler/traffic_switcher.go | 55 +++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 4e37e5dd3cb..6f715ed198b 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -531,6 +531,16 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowNa defer targetUnlock(&err) } + // Find out if the target is using any sequence tables for auto_increment + // value generation. If so, then we'll need to ensure that they are + // initialized properly before allowing new writes on the target. + sequenceMetadata, err := ts.getSequenceMetadata(ctx) + if err != nil { + werr := vterrors.Wrapf(err, "getSequenceMetadata failed") + ts.Logger().Error(werr) + return 0, nil, werr + } + // If no journals exist, sourceWorkflows will be initialized by sm.MigrateStreams. journalsExist, sourceWorkflows, err := ts.checkJournals(ctx) if err != nil { @@ -632,11 +642,13 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowNa ts.Logger().Errorf("createJournals failed: %v", err) return 0, nil, err } - // Initialize any target sequences before allowing new writes. - if err := ts.initializeTargetSequenceTables(ctx); err != nil { - werr := vterrors.Wrapf(err, "initializeTargetSequenceTables failed") - ts.Logger().Error(werr) - return 0, nil, werr + // Initialize any target sequences, if there are any, before allowing new writes. + if len(sequenceMetadata) > 0 { + if err := ts.initializeTargetSequenceTables(ctx, sequenceMetadata); err != nil { + werr := vterrors.Wrapf(err, "initializeTargetSequenceTables failed") + ts.Logger().Error(werr) + return 0, nil, werr + } } if err := sw.allowTargetWrites(ctx); err != nil { ts.Logger().Errorf("allowTargetWrites failed: %v", err) @@ -1930,15 +1942,19 @@ func (ts *trafficSwitcher) isSequenceParticipating(ctx context.Context) (bool, e return sequenceFound, nil } -func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context) error { +// getSequenceMetadata returns a map of sequence metadata keyed by the +// backing sequence table name. If the target keyspace has no tables +// defined that use sequences for auto_increment generation then a nil +// map will be returned. +func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string]*sequenceMetadata, error) { log.Error("DEBUG: initializeTargetSequenceTables") vschema, err := ts.TopoServer().GetVSchema(ctx, ts.targetKeyspace) if err != nil { - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for target keyspace %s: %v", + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for target keyspace %s: %v", ts.targetKeyspace, err) } if vschema == nil || vschema.Tables == nil || len(vschema.Tables) == 0 { // Nothing to do - return nil + return nil, nil } // We maintain two maps of the same sequence metadata so @@ -1964,7 +1980,7 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context) e } } if len(sequencesByUsingTable) == 0 { // Nothing to do - return nil + return nil, nil } log.Errorf("DEBUG: sequences: %+v", sequencesByUsingTable) @@ -1973,13 +1989,13 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context) e // be in another unsharded keyspace. keyspaces, err := ts.TopoServer().GetKeyspaces(ctx) if err != nil { - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get keyspaces: %v", err) + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get keyspaces: %v", err) } log.Errorf("DEBUG: keyspaces: %+v", keyspaces) for _, keyspace := range keyspaces { vschema, err = ts.TopoServer().GetVSchema(ctx, keyspace) if err != nil { - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", keyspace, err) } if vschema == nil || vschema.Sharded || vschema.Tables == nil || len(vschema.Tables) == 0 { @@ -2005,12 +2021,25 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context) e // Now we need to make sure we found all of the backing sequence tables. for _, sm := range sequencesByUsingTable { if sm.backingTableKeyspace == "" { - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to locate all of the backing sequence tables being used; sequence tables metadata: %+v", + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to locate all of the backing sequence tables being used; sequence tables metadata: %+v", sequencesByUsingTable) } } log.Errorf("DEBUG: sequence backing tables: %+v", sequencesByBackingTable) + return sequencesByBackingTable, nil +} + +// initializeTargetSequenceTables initializes the backing sequence tables +// using a map keyed by the backing sequence table name. +// +// The backing tables must have already been created. This function will +// then ensure that the next value is set to a value greater than any +// currently stored in the using table on the target keyspace. If the +// backing table is updated to a new higher value then it will also tell +// the primary tablet serving the sequence to refresh/reset its cache to +// be sure that it does not provide a value that is less than the current max. +func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { // Now we need to initialize the backing sequence tables so that // the next values they generate are greater than those that // currently exist in the using table on the target keyspace. @@ -2021,7 +2050,7 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context) e // a higher value. shardResults := make([]int64, 0, len(ts.TargetShards())) srMu := sync.Mutex{} - err = ts.ForAllTargets(func(target *workflow.MigrationTarget) error { + err := ts.ForAllTargets(func(target *workflow.MigrationTarget) error { query := sqlparser.BuildParsedQuery(sqlGetMaxSequenceVal, sqlescape.EscapeID(sequenceMetadata.usingTableDefinition.AutoIncrement.Column), sqlescape.EscapeID(sequenceMetadata.usingTableDBName), From 5ac2e4dcbae70ce7ab49ec447f4be2058adee88f Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Fri, 28 Jul 2023 07:24:42 -0400 Subject: [PATCH 04/56] Check for context cancellation often Signed-off-by: Matt Lord --- go/vt/wrangler/traffic_switcher.go | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 6f715ed198b..014de52c157 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -1982,9 +1982,14 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] if len(sequencesByUsingTable) == 0 { // Nothing to do return nil, nil } - log.Errorf("DEBUG: sequences: %+v", sequencesByUsingTable) + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + // Now we need to locate the backing sequence tables which will // be in another unsharded keyspace. keyspaces, err := ts.TopoServer().GetKeyspaces(ctx) @@ -2017,6 +2022,11 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] sm.backingTableDBName = "vt_" + keyspace } } + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } } // Now we need to make sure we found all of the backing sequence tables. for _, sm := range sequencesByUsingTable { @@ -2077,6 +2087,11 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s if err != nil { return err } + select { + case <-ctx.Done(): + return ctx.Err() + default: + } // Sort the values to find the max value across all shards. sort.Slice(shardResults, func(i, j int) bool { return shardResults[i] < shardResults[j] @@ -2122,6 +2137,11 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset sequence cache for table %s on shard %s.%s using tablet %s: %v", sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err) } + select { + case <-ctx.Done(): + return ctx.Err() + default: + } } // We completed the work w/o errors. From 52ab8b753f65387b2372e1061e56fc95ddaeaaf5 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Fri, 28 Jul 2023 08:08:54 -0400 Subject: [PATCH 05/56] Skip sequence work for sharded to sharded migrations Signed-off-by: Matt Lord --- go/vt/wrangler/traffic_switcher.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 014de52c157..cc2b429081d 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -534,11 +534,15 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowNa // Find out if the target is using any sequence tables for auto_increment // value generation. If so, then we'll need to ensure that they are // initialized properly before allowing new writes on the target. - sequenceMetadata, err := ts.getSequenceMetadata(ctx) - if err != nil { - werr := vterrors.Wrapf(err, "getSequenceMetadata failed") - ts.Logger().Error(werr) - return 0, nil, werr + sequenceMetadata := make(map[string]*sequenceMetadata) + // For sharded to sharded migrations the sequence must already be setup. + if ts.SourceKeyspaceSchema() != nil && ts.SourceKeyspaceSchema().Keyspace != nil && ts.SourceKeyspaceSchema().Keyspace.Sharded { + sequenceMetadata, err = ts.getSequenceMetadata(ctx) + if err != nil { + werr := vterrors.Wrapf(err, "getSequenceMetadata failed") + ts.Logger().Error(werr) + return 0, nil, werr + } } // If no journals exist, sourceWorkflows will be initialized by sm.MigrateStreams. From f2b73137c62354313e4c902dd18253f4509a05d8 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Fri, 28 Jul 2023 08:22:32 -0400 Subject: [PATCH 06/56] Efficiency improvements Signed-off-by: Matt Lord --- go/vt/wrangler/traffic_switcher.go | 56 +++++++++++++++++++----------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index cc2b429081d..f6753ab7111 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -1961,10 +1961,6 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] return nil, nil } - // We maintain two maps of the same sequence metadata so - // that we have fast lookups for both the using table and - // the backing sequence table. - sequencesByUsingTable := make(map[string]*sequenceMetadata) sequencesByBackingTable := make(map[string]*sequenceMetadata) for _, table := range ts.Tables() { vs, ok := vschema.Tables[table] @@ -1973,20 +1969,29 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] } if vs.AutoIncrement != nil && vs.AutoIncrement.Sequence != "" { sm := &sequenceMetadata{ + backingTableName: vs.AutoIncrement.Sequence, usingTableName: table, usingTableDefinition: vs, - backingTableName: vs.AutoIncrement.Sequence, // TODO: get and set this properly to deal with db_name_overrides usingTableDBName: "vt_" + ts.targetKeyspace, } - sequencesByUsingTable[table] = sm - sequencesByBackingTable[vs.AutoIncrement.Sequence] = sm + // If the sequence table is fully qualified in the vschema then + // we don't need to find it later. + if strings.Contains(vs.AutoIncrement.Sequence, ".") { + parts := strings.Split(vs.AutoIncrement.Sequence, ".") + if len(parts) != 2 { + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "invalid sequence table name %s", vs.AutoIncrement.Sequence) + } + sm.backingTableName = parts[1] + sm.backingTableKeyspace = parts[0] + } + sequencesByBackingTable[sm.backingTableName] = sm } } - if len(sequencesByUsingTable) == 0 { // Nothing to do + if len(sequencesByBackingTable) == 0 { // Nothing to do return nil, nil } - log.Errorf("DEBUG: sequences: %+v", sequencesByUsingTable) + log.Errorf("DEBUG: sequences: %+v", sequencesByBackingTable) select { case <-ctx.Done(): @@ -1994,6 +1999,19 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] default: } + // If all of the sequence tables were defined using qualified table + // names in the vschema, then we don't need to look for them. + mustSearch := false + for _, sm := range sequencesByBackingTable { + if sm.backingTableKeyspace == "" { + mustSearch = true + break + } + } + if !mustSearch { + return sequencesByBackingTable, nil + } + // Now we need to locate the backing sequence tables which will // be in another unsharded keyspace. keyspaces, err := ts.TopoServer().GetKeyspaces(ctx) @@ -2001,6 +2019,8 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get keyspaces: %v", err) } log.Errorf("DEBUG: keyspaces: %+v", keyspaces) + tableCount := len(sequencesByBackingTable) + tablesFound := 0 for _, keyspace := range keyspaces { vschema, err = ts.TopoServer().GetVSchema(ctx, keyspace) if err != nil { @@ -2014,16 +2034,20 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] sm := sequencesByBackingTable[tableName] if tableDef != nil && tableDef.Type == vindexes.TypeSequence && sm != nil && tableName == sm.backingTableName { + tablesFound++ // If the sequence backing table is being moved then we do not // want to initialize it. if keyspace == ts.targetKeyspace { delete(sequencesByBackingTable, tableName) - delete(sequencesByUsingTable, tableName) continue } sm.backingTableKeyspace = keyspace // TODO: get and set this properly in order to deal with db_name_overrides sm.backingTableDBName = "vt_" + keyspace + if tablesFound == tableCount { + log.Errorf("DEBUG: sequence backing tables found: %+v", sequencesByBackingTable) + return sequencesByBackingTable, nil + } } } select { @@ -2032,16 +2056,8 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] default: } } - // Now we need to make sure we found all of the backing sequence tables. - for _, sm := range sequencesByUsingTable { - if sm.backingTableKeyspace == "" { - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to locate all of the backing sequence tables being used; sequence tables metadata: %+v", - sequencesByUsingTable) - } - } - log.Errorf("DEBUG: sequence backing tables: %+v", sequencesByBackingTable) - - return sequencesByBackingTable, nil + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to locate all of the backing sequence tables being used; sequence tables metadata: %+v", + sequencesByBackingTable) } // initializeTargetSequenceTables initializes the backing sequence tables From ca06b321057e7877a513c65a6fe50324dbc84e22 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Fri, 28 Jul 2023 17:21:23 -0400 Subject: [PATCH 07/56] More tweaks Signed-off-by: Matt Lord --- go/vt/wrangler/traffic_switcher.go | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index f6753ab7111..4fb3d0f1f2d 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -26,6 +26,8 @@ import ( "sync" "time" + "golang.org/x/exp/maps" + "vitess.io/vitess/go/json2" "vitess.io/vitess/go/sqlescape" "vitess.io/vitess/go/sqltypes" @@ -536,7 +538,10 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowNa // initialized properly before allowing new writes on the target. sequenceMetadata := make(map[string]*sequenceMetadata) // For sharded to sharded migrations the sequence must already be setup. - if ts.SourceKeyspaceSchema() != nil && ts.SourceKeyspaceSchema().Keyspace != nil && ts.SourceKeyspaceSchema().Keyspace.Sharded { + // For reshards the sequence usage is not changed. + if ts.workflowType == binlogdatapb.VReplicationWorkflowType_MoveTables && + ts.SourceKeyspaceSchema() != nil && ts.SourceKeyspaceSchema().Keyspace != nil && + !ts.SourceKeyspaceSchema().Keyspace.Sharded { sequenceMetadata, err = ts.getSequenceMetadata(ctx) if err != nil { werr := vterrors.Wrapf(err, "getSequenceMetadata failed") @@ -1961,6 +1966,7 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] return nil, nil } + targetDBName := maps.Values(ts.Targets())[0].GetPrimary().DbName() sequencesByBackingTable := make(map[string]*sequenceMetadata) for _, table := range ts.Tables() { vs, ok := vschema.Tables[table] @@ -1972,8 +1978,7 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] backingTableName: vs.AutoIncrement.Sequence, usingTableName: table, usingTableDefinition: vs, - // TODO: get and set this properly to deal with db_name_overrides - usingTableDBName: "vt_" + ts.targetKeyspace, + usingTableDBName: targetDBName, } // If the sequence table is fully qualified in the vschema then // we don't need to find it later. @@ -2035,17 +2040,11 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] if tableDef != nil && tableDef.Type == vindexes.TypeSequence && sm != nil && tableName == sm.backingTableName { tablesFound++ - // If the sequence backing table is being moved then we do not - // want to initialize it. - if keyspace == ts.targetKeyspace { - delete(sequencesByBackingTable, tableName) - continue - } sm.backingTableKeyspace = keyspace // TODO: get and set this properly in order to deal with db_name_overrides sm.backingTableDBName = "vt_" + keyspace if tablesFound == tableCount { - log.Errorf("DEBUG: sequence backing tables found: %+v", sequencesByBackingTable) + log.Errorf("DEBUG: sequence backing table found: %+v", sequencesByBackingTable) return sequencesByBackingTable, nil } } @@ -2086,7 +2085,7 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s sqlescape.EscapeID(sequenceMetadata.usingTableDBName), sqlescape.EscapeID(sequenceMetadata.usingTableName), ) - ts.Logger().Errorf("DEBUG: query: %s on shard: %s", query.Query, target.GetShard().ShardName()) + log.Errorf("DEBUG: query: %s on shard: %s", query.Query, target.GetShard().ShardName()) qr, err := ts.wr.ExecuteFetchAsApp(ctx, target.GetPrimary().GetAlias(), true, query.Query, 1) if err != nil || len(qr.Rows) != 1 { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get max used value for target table %s in order to initialize the backing sequence table %s: %v", @@ -2097,7 +2096,7 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get max used value for target table %s in order to initialize the backing sequence table %s: %v", sequenceMetadata.usingTableName, ts.targetKeyspace, err) } - ts.Logger().Errorf("DEBUG: max ID seen on shard %s: %d", target.GetShard().ShardName(), maxID) + log.Errorf("DEBUG: max ID seen on shard %s: %d", target.GetShard().ShardName(), maxID) srMu.Lock() shardResults = append(shardResults, maxID) srMu.Unlock() From 91112ae6cbda66d7dbc5e68145552bd8d4c81494 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Fri, 28 Jul 2023 19:07:00 -0400 Subject: [PATCH 08/56] Address todo Signed-off-by: Matt Lord --- go/vt/wrangler/traffic_switcher.go | 31 +++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 4fb3d0f1f2d..e27aa5c5e94 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -2041,7 +2041,6 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] sm != nil && tableName == sm.backingTableName { tablesFound++ sm.backingTableKeyspace = keyspace - // TODO: get and set this properly in order to deal with db_name_overrides sm.backingTableDBName = "vt_" + keyspace if tablesFound == tableCount { log.Errorf("DEBUG: sequence backing table found: %+v", sequencesByBackingTable) @@ -2080,13 +2079,19 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s shardResults := make([]int64, 0, len(ts.TargetShards())) srMu := sync.Mutex{} err := ts.ForAllTargets(func(target *workflow.MigrationTarget) error { + primary := target.GetPrimary() + if primary == nil { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary tablet found for target shard %s/%s", + ts.targetKeyspace, target.GetShard().ShardName()) + } query := sqlparser.BuildParsedQuery(sqlGetMaxSequenceVal, sqlescape.EscapeID(sequenceMetadata.usingTableDefinition.AutoIncrement.Column), sqlescape.EscapeID(sequenceMetadata.usingTableDBName), sqlescape.EscapeID(sequenceMetadata.usingTableName), ) - log.Errorf("DEBUG: query: %s on shard: %s", query.Query, target.GetShard().ShardName()) - qr, err := ts.wr.ExecuteFetchAsApp(ctx, target.GetPrimary().GetAlias(), true, query.Query, 1) + log.Errorf("DEBUG: query: %s on shard: %s/%s", + query.Query, ts.targetKeyspace, target.GetShard().ShardName()) + qr, err := ts.wr.ExecuteFetchAsApp(ctx, primary.GetAlias(), true, query.Query, 1) if err != nil || len(qr.Rows) != 1 { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get max used value for target table %s in order to initialize the backing sequence table %s: %v", sequenceMetadata.usingTableName, ts.targetKeyspace, err) @@ -2118,6 +2123,19 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s nextVal := shardResults[len(shardResults)-1] + 1 // Now we need to update the sequence table, if needed, in order to // ensure that that the next value it provides is > the current max. + sequenceShard, err := ts.wr.TopoServer().GetOnlyShard(ctx, sequenceMetadata.backingTableKeyspace) + if err != nil || sequenceShard == nil || sequenceShard.PrimaryAlias == nil { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for %s: %v", + sequenceMetadata.backingTableKeyspace, err) + } + sequenceTablet, err := ts.wr.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) + if err != nil || sequenceTablet == nil { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", + sequenceMetadata.backingTableKeyspace, err) + } + if sequenceTablet.DbNameOverride != "" { + sequenceMetadata.backingTableDBName = sequenceTablet.DbNameOverride + } query := sqlparser.BuildParsedQuery(sqlInitSequenceTable, sqlescape.EscapeID(sequenceMetadata.backingTableDBName), sqlescape.EscapeID(sequenceMetadata.backingTableName), @@ -2126,13 +2144,8 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s nextVal, ) log.Errorf("DEBUG: query: %s", query.Query) - // Execute this on the primary tablet of the unsharded keyspace + // Now execute this on the primary tablet of the unsharded keyspace // housing the backing table. - sequenceShard, err := ts.wr.TopoServer().GetOnlyShard(ctx, sequenceMetadata.backingTableKeyspace) - if err != nil || sequenceShard == nil || sequenceShard.PrimaryAlias == nil { - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, err) - } qr, err := ts.wr.ExecuteFetchAsApp(ctx, sequenceShard.PrimaryAlias, true, query.Query, 1) if err != nil { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the sequence table %s.%s: %v", From 0c3e0f6252812120440b5ef2e9fc15cd21b9618f Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Sat, 29 Jul 2023 11:51:37 -0400 Subject: [PATCH 09/56] Search keyspaces in parallel Signed-off-by: Matt Lord --- go/vt/wrangler/traffic_switcher.go | 84 +++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 18 deletions(-) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index e27aa5c5e94..09c10c7d3f3 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -1968,6 +1968,7 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] targetDBName := maps.Values(ts.Targets())[0].GetPrimary().DbName() sequencesByBackingTable := make(map[string]*sequenceMetadata) + smMu := sync.Mutex{} for _, table := range ts.Tables() { vs, ok := vschema.Tables[table] if !ok || vs == nil { @@ -2025,37 +2026,84 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] } log.Errorf("DEBUG: keyspaces: %+v", keyspaces) tableCount := len(sequencesByBackingTable) - tablesFound := 0 - for _, keyspace := range keyspaces { - vschema, err = ts.TopoServer().GetVSchema(ctx, keyspace) + tablesFound := 0 // Used to short circuit the search + ksCtx, ksCancel := context.WithCancel(ctx) // Used to cancel the goroutines + defer ksCancel() // Cancel all of the goroutines when we are done + ksErr := make(chan error) // Used if we encountered an error during the search + ksDone := make(chan struct{}) // The search has completed + ksWg := sync.WaitGroup{} // All of the goroutines finished + ksFunc := func(ks string) { // The function used to search each keyspace + defer ksWg.Done() + vschema, err = ts.TopoServer().GetVSchema(ctx, ks) if err != nil { - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", - keyspace, err) + ksErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", + ks, err) + return } if vschema == nil || vschema.Sharded || vschema.Tables == nil || len(vschema.Tables) == 0 { - continue + return + } + select { + case <-ctx.Done(): // The command timed out + return + default: + } + select { + case <-ksCtx.Done(): // The search has been cancelled + return + default: } for tableName, tableDef := range vschema.Tables { + smMu.Lock() // Prevent concurrent access to the map sm := sequencesByBackingTable[tableName] if tableDef != nil && tableDef.Type == vindexes.TypeSequence && sm != nil && tableName == sm.backingTableName { - tablesFound++ - sm.backingTableKeyspace = keyspace - sm.backingTableDBName = "vt_" + keyspace - if tablesFound == tableCount { - log.Errorf("DEBUG: sequence backing table found: %+v", sequencesByBackingTable) - return sequencesByBackingTable, nil + tablesFound++ // This is also protected by the mutex + sm.backingTableKeyspace = ks + sm.backingTableDBName = "vt_" + ks + if tablesFound == tableCount { // Short circuit the search + log.Errorf("DEBUG: all sequence backing tables found: %+v", sequencesByBackingTable) + smMu.Unlock() + ksDone <- struct{}{} + return } } + smMu.Unlock() + select { + case <-ctx.Done(): // The command timed out + return + default: + } + select { + case <-ksCtx.Done(): // The search has been cancelled + return + default: + } } - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: + } + for _, keyspace := range keyspaces { + ksWg.Add(1) + go ksFunc(keyspace) + } + // Wait for all the goroutines to finish on their own, which means we + // probably did not find all of the tables. + go func() { + ksWg.Wait() + close(ksDone) + }() + + select { + case <-ksDone: + if tablesFound != tableCount { + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to locate all of the backing sequence tables being used; sequence tables metadata: %+v", + sequencesByBackingTable) } + return sequencesByBackingTable, nil + case ksErr := <-ksErr: // We encountered an error + return nil, ksErr + case <-ctx.Done(): // The command timed out + return nil, ctx.Err() } - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to locate all of the backing sequence tables being used; sequence tables metadata: %+v", - sequencesByBackingTable) } // initializeTargetSequenceTables initializes the backing sequence tables From fcab61240564b32ef52b787de87aed8471068a1a Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Sat, 29 Jul 2023 17:08:07 -0400 Subject: [PATCH 10/56] Parallelize init work Signed-off-by: Matt Lord --- go/vt/wrangler/traffic_switcher.go | 80 +++++++++++++++++++----------- 1 file changed, 51 insertions(+), 29 deletions(-) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 09c10c7d3f3..f2321977a06 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -653,7 +653,12 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowNa } // Initialize any target sequences, if there are any, before allowing new writes. if len(sequenceMetadata) > 0 { - if err := ts.initializeTargetSequenceTables(ctx, sequenceMetadata); err != nil { + // Writes are blocked so we can safely initialize the sequence tables but + // we also want to use a shorter timeout than the parent context. + // We use up at most half of the overall timeout. + initSeqCtx, cancel := context.WithTimeout(ctx, timeout/2) + defer cancel() + if err := ts.initializeTargetSequenceTables(initSeqCtx, sequenceMetadata); err != nil { werr := vterrors.Wrapf(err, "initializeTargetSequenceTables failed") ts.Logger().Error(werr) return 0, nil, werr @@ -1956,7 +1961,7 @@ func (ts *trafficSwitcher) isSequenceParticipating(ctx context.Context) (bool, e // defined that use sequences for auto_increment generation then a nil // map will be returned. func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string]*sequenceMetadata, error) { - log.Error("DEBUG: initializeTargetSequenceTables") + log.Error("DEBUG: getSequenceMetadata") vschema, err := ts.TopoServer().GetVSchema(ctx, ts.targetKeyspace) if err != nil { return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for target keyspace %s: %v", @@ -2044,12 +2049,12 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] return } select { - case <-ctx.Done(): // The command timed out + case <-ctx.Done(): return default: } select { - case <-ksCtx.Done(): // The search has been cancelled + case <-ksCtx.Done(): return default: } @@ -2070,7 +2075,7 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] } smMu.Unlock() select { - case <-ctx.Done(): // The command timed out + case <-ctx.Done(): return default: } @@ -2099,9 +2104,9 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] sequencesByBackingTable) } return sequencesByBackingTable, nil - case ksErr := <-ksErr: // We encountered an error + case ksErr := <-ksErr: return nil, ksErr - case <-ctx.Done(): // The command timed out + case <-ctx.Done(): return nil, ctx.Err() } } @@ -2116,11 +2121,13 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] // the primary tablet serving the sequence to refresh/reset its cache to // be sure that it does not provide a value that is less than the current max. func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { - // Now we need to initialize the backing sequence tables so that - // the next values they generate are greater than those that - // currently exist in the using table on the target keyspace. - for sequenceTableName, sequenceMetadata := range sequencesByBackingTable { + log.Error("DEBUG: initializeTargetSequenceTables") + initErr := make(chan error) // Used if we encounter an error + initDone := make(chan struct{}) // The initialization has completed + initWg := sync.WaitGroup{} // All of the goroutines finished + initFunc := func(sequenceTableName string, sequenceMetadata *sequenceMetadata) { log.Errorf("DEBUG: sequence table: %v, sequenceMetadata: %+v", sequenceTableName, sequenceMetadata) + defer initWg.Done() // Now we need to run this query on the target shards in order // to get the max value and set the next id for the sequence to // a higher value. @@ -2128,8 +2135,8 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s srMu := sync.Mutex{} err := ts.ForAllTargets(func(target *workflow.MigrationTarget) error { primary := target.GetPrimary() - if primary == nil { - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary tablet found for target shard %s/%s", + if primary == nil || primary.GetAlias() == nil { + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary tablet found for target shard %s/%s", ts.targetKeyspace, target.GetShard().ShardName()) } query := sqlparser.BuildParsedQuery(sqlGetMaxSequenceVal, @@ -2141,12 +2148,12 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s query.Query, ts.targetKeyspace, target.GetShard().ShardName()) qr, err := ts.wr.ExecuteFetchAsApp(ctx, primary.GetAlias(), true, query.Query, 1) if err != nil || len(qr.Rows) != 1 { - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get max used value for target table %s in order to initialize the backing sequence table %s: %v", + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get max used value for target table %s in order to initialize the backing sequence table %s: %v", sequenceMetadata.usingTableName, ts.targetKeyspace, err) } maxID, err := sqltypes.Proto3ToResult(qr).Rows[0][0].ToInt64() if err != nil { - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get max used value for target table %s in order to initialize the backing sequence table %s: %v", + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get max used value for target table %s in order to initialize the backing sequence table %s: %v", sequenceMetadata.usingTableName, ts.targetKeyspace, err) } log.Errorf("DEBUG: max ID seen on shard %s: %d", target.GetShard().ShardName(), maxID) @@ -2157,11 +2164,11 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s return nil }) if err != nil { - return err + initErr <- err } select { case <-ctx.Done(): - return ctx.Err() + return default: } // Sort the values to find the max value across all shards. @@ -2173,12 +2180,12 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s // ensure that that the next value it provides is > the current max. sequenceShard, err := ts.wr.TopoServer().GetOnlyShard(ctx, sequenceMetadata.backingTableKeyspace) if err != nil || sequenceShard == nil || sequenceShard.PrimaryAlias == nil { - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for %s: %v", + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for %s: %v", sequenceMetadata.backingTableKeyspace, err) } sequenceTablet, err := ts.wr.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) if err != nil || sequenceTablet == nil { - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", sequenceMetadata.backingTableKeyspace, err) } if sequenceTablet.DbNameOverride != "" { @@ -2196,36 +2203,51 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s // housing the backing table. qr, err := ts.wr.ExecuteFetchAsApp(ctx, sequenceShard.PrimaryAlias, true, query.Query, 1) if err != nil { - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the sequence table %s.%s: %v", + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the sequence table %s.%s: %v", sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, err) } // If we actually updated the backing sequence table, then we need // to tell the primary tablet managing the sequence to refresh/reset // its cache for the table. if qr.RowsAffected == 0 { - continue + return + } + select { + case <-ctx.Done(): + return + default: } ts.Logger().Infof("Resetting sequence cache for table %s on shard %s.%s using tablet %s", sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias) ti, err := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) if err != nil { - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get primary tablet for keyspace %s: %v", + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get primary tablet for keyspace %s: %v", sequenceMetadata.backingTableKeyspace, err) } err = ts.TabletManagerClient().ResetSequences(ctx, ti.Tablet, []string{sequenceMetadata.backingTableName}) if err != nil { - return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset sequence cache for table %s on shard %s.%s using tablet %s: %v", + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset sequence cache for table %s on shard %s.%s using tablet %s: %v", sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err) } - select { - case <-ctx.Done(): - return ctx.Err() - default: - } } + for sequenceTableName, sequenceMetadata := range sequencesByBackingTable { + initWg.Add(1) + go initFunc(sequenceTableName, sequenceMetadata) + } + go func() { + initWg.Wait() + close(initDone) + }() + select { // We completed the work w/o errors. - return nil + case <-initDone: + return nil + case err := <-initErr: + return err + case <-ctx.Done(): + return ctx.Err() + } } func (ts *trafficSwitcher) mustResetSequences(ctx context.Context) (bool, error) { From b4037da4a55e1cae4486cd06e54a51f3ca2a0ddd Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Sat, 29 Jul 2023 17:48:50 -0400 Subject: [PATCH 11/56] Consolidate selects Signed-off-by: Matt Lord --- go/vt/wrangler/traffic_switcher.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index f2321977a06..c4b6fbd5b22 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -1971,7 +1971,11 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] return nil, nil } - targetDBName := maps.Values(ts.Targets())[0].GetPrimary().DbName() + targets := maps.Values(ts.Targets()) + if len(targets) == 0 || targets[0].GetPrimary() == nil { // This should never happen + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary tablet for target keyspace %s", ts.targetKeyspace) + } + targetDBName := targets[0].GetPrimary().DbName() sequencesByBackingTable := make(map[string]*sequenceMetadata) smMu := sync.Mutex{} for _, table := range ts.Tables() { @@ -2051,9 +2055,6 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] select { case <-ctx.Done(): return - default: - } - select { case <-ksCtx.Done(): return default: @@ -2077,9 +2078,6 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] select { case <-ctx.Done(): return - default: - } - select { case <-ksCtx.Done(): // The search has been cancelled return default: @@ -2126,8 +2124,8 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s initDone := make(chan struct{}) // The initialization has completed initWg := sync.WaitGroup{} // All of the goroutines finished initFunc := func(sequenceTableName string, sequenceMetadata *sequenceMetadata) { - log.Errorf("DEBUG: sequence table: %v, sequenceMetadata: %+v", sequenceTableName, sequenceMetadata) defer initWg.Done() + log.Errorf("DEBUG: sequence table: %v, sequenceMetadata: %+v", sequenceTableName, sequenceMetadata) // Now we need to run this query on the target shards in order // to get the max value and set the next id for the sequence to // a higher value. From f1c95b772217b1055867a189a3eea1f904d966b3 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Sat, 29 Jul 2023 23:01:47 -0400 Subject: [PATCH 12/56] Minor tweaks after self review Signed-off-by: Matt Lord --- go/vt/wrangler/traffic_switcher.go | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index c4b6fbd5b22..545640550f9 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -1973,7 +1973,7 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] targets := maps.Values(ts.Targets()) if len(targets) == 0 || targets[0].GetPrimary() == nil { // This should never happen - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary tablet for target keyspace %s", ts.targetKeyspace) + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary tablet found for target keyspace %s", ts.targetKeyspace) } targetDBName := targets[0].GetPrimary().DbName() sequencesByBackingTable := make(map[string]*sequenceMetadata) @@ -1995,7 +1995,8 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] if strings.Contains(vs.AutoIncrement.Sequence, ".") { parts := strings.Split(vs.AutoIncrement.Sequence, ".") if len(parts) != 2 { - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "invalid sequence table name %s", vs.AutoIncrement.Sequence) + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "invalid sequence table name %s defined in the %s keyspace", + vs.AutoIncrement.Sequence, ts.targetKeyspace) } sm.backingTableName = parts[1] sm.backingTableKeyspace = parts[0] @@ -2027,7 +2028,7 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] return sequencesByBackingTable, nil } - // Now we need to locate the backing sequence tables which will + // Now we need to locate the backing sequence table(s) which will // be in another unsharded keyspace. keyspaces, err := ts.TopoServer().GetKeyspaces(ctx) if err != nil { @@ -2084,6 +2085,7 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] } } } + for _, keyspace := range keyspaces { ksWg.Add(1) go ksFunc(keyspace) @@ -2098,7 +2100,7 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] select { case <-ksDone: if tablesFound != tableCount { - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to locate all of the backing sequence tables being used; sequence tables metadata: %+v", + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to locate all of the backing sequence tables being used; sequence table metadata: %+v", sequencesByBackingTable) } return sequencesByBackingTable, nil @@ -2146,19 +2148,18 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s query.Query, ts.targetKeyspace, target.GetShard().ShardName()) qr, err := ts.wr.ExecuteFetchAsApp(ctx, primary.GetAlias(), true, query.Query, 1) if err != nil || len(qr.Rows) != 1 { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get max used value for target table %s in order to initialize the backing sequence table %s: %v", - sequenceMetadata.usingTableName, ts.targetKeyspace, err) + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the max used sequence value for target table %s.%s in order to initialize the backing sequence table: %v", + ts.targetKeyspace, sequenceMetadata.usingTableName, err) } maxID, err := sqltypes.Proto3ToResult(qr).Rows[0][0].ToInt64() if err != nil { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get max used value for target table %s in order to initialize the backing sequence table %s: %v", - sequenceMetadata.usingTableName, ts.targetKeyspace, err) + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the max used sequence value for target table %s.%s in order to initialize the backing sequence table: %v", + ts.targetKeyspace, sequenceMetadata.usingTableName, err) } log.Errorf("DEBUG: max ID seen on shard %s: %d", target.GetShard().ShardName(), maxID) srMu.Lock() + defer srMu.Unlock() shardResults = append(shardResults, maxID) - srMu.Unlock() - return nil }) if err != nil { @@ -2178,7 +2179,7 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s // ensure that that the next value it provides is > the current max. sequenceShard, err := ts.wr.TopoServer().GetOnlyShard(ctx, sequenceMetadata.backingTableKeyspace) if err != nil || sequenceShard == nil || sequenceShard.PrimaryAlias == nil { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for %s: %v", + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", sequenceMetadata.backingTableKeyspace, err) } sequenceTablet, err := ts.wr.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) @@ -2201,7 +2202,7 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s // housing the backing table. qr, err := ts.wr.ExecuteFetchAsApp(ctx, sequenceShard.PrimaryAlias, true, query.Query, 1) if err != nil { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the sequence table %s.%s: %v", + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the backing sequence table %s.%s: %v", sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, err) } // If we actually updated the backing sequence table, then we need @@ -2215,7 +2216,7 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s return default: } - ts.Logger().Infof("Resetting sequence cache for table %s on shard %s.%s using tablet %s", + ts.Logger().Infof("Resetting sequence cache for backing table %s on shard %s.%s using tablet %s", sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias) ti, err := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) if err != nil { @@ -2224,10 +2225,11 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s } err = ts.TabletManagerClient().ResetSequences(ctx, ti.Tablet, []string{sequenceMetadata.backingTableName}) if err != nil { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset sequence cache for table %s on shard %s.%s using tablet %s: %v", + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset the sequence cache for backing table %s on shard %s.%s using tablet %s: %v", sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err) } } + for sequenceTableName, sequenceMetadata := range sequencesByBackingTable { initWg.Add(1) go initFunc(sequenceTableName, sequenceMetadata) From fecfc4934fbc175469cf61d23446f9926cfbe681 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Sun, 30 Jul 2023 16:02:38 -0400 Subject: [PATCH 13/56] Add flag and switcher ifc impl Signed-off-by: Matt Lord --- go/vt/vtctl/vtctl.go | 4 +++- go/vt/wrangler/stream_migrater_test.go | 26 ++++++++++---------- go/vt/wrangler/switcher.go | 4 ++++ go/vt/wrangler/switcher_dry_run.go | 8 +++++++ go/vt/wrangler/switcher_interface.go | 1 + go/vt/wrangler/traffic_switcher.go | 19 +++++++-------- go/vt/wrangler/traffic_switcher_test.go | 32 ++++++++++++------------- go/vt/wrangler/workflow.go | 12 ++++++---- 8 files changed, 61 insertions(+), 45 deletions(-) diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index fa9d8a1e1d0..b0faf9658e3 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -443,7 +443,7 @@ var commands = []commandGroup{ { name: "MoveTables", method: commandMoveTables, - params: "[--source=] [--tables=] [--cells=] [--tablet_types=] [--all] [--exclude=] [--auto_start] [--stop_after_copy] [--defer-secondary-keys] [--on-ddl=] [--source_shards=] 'action must be one of the following: Create, Complete, Cancel, SwitchTraffic, ReverseTrafffic, Show, or Progress' ", + params: "[--source=] [--tables=] [--cells=] [--tablet_types=] [--all] [--exclude=] [--auto_start] [--stop_after_copy] [--defer-secondary-keys] [--on-ddl=] [--source_shards=] [--initialize-target-sequences] 'action must be one of the following: Create, Complete, Cancel, SwitchTraffic, ReverseTrafffic, Show, or Progress' ", help: `Move table(s) to another keyspace, table_specs is a list of tables or the tables section of the vschema for the target keyspace. Example: '{"t1":{"column_vindexes": [{"column": "id1", "name": "hash"}]}, "t2":{"column_vindexes": [{"column": "id2", "name": "hash"}]}}'. In the case of an unsharded target keyspace the vschema for each table may be empty. Example: '{"t1":{}, "t2":{}}'.`, }, { @@ -2107,6 +2107,7 @@ func commandVReplicationWorkflow(ctx context.Context, wr *wrangler.Wrangler, sub allTables := subFlags.Bool("all", false, "MoveTables only. Move all tables from the source keyspace. Either table_specs or --all needs to be specified.") excludes := subFlags.String("exclude", "", "MoveTables only. Tables to exclude (comma-separated) if --all is specified") sourceKeyspace := subFlags.String("source", "", "MoveTables only. Source keyspace") + initializeTargetSequences := subFlags.Bool("initialize-target-sequences", false, "MoveTables only. When moving tables from an unsharded keyspace to a sharded keyspace, initialize any sequences that are being used on the target.") // if sourceTimeZone is specified, the target needs to have time zones loaded // note we make an opinionated decision to not allow specifying a different target time zone than UTC. @@ -2288,6 +2289,7 @@ func commandVReplicationWorkflow(ctx context.Context, wr *wrangler.Wrangler, sub vrwp.Timeout = *timeout vrwp.EnableReverseReplication = *reverseReplication vrwp.MaxAllowedTransactionLagSeconds = int64(math.Ceil(maxReplicationLagAllowed.Seconds())) + vrwp.InitializeTargetSequences = *initializeTargetSequences case vReplicationWorkflowActionCancel: vrwp.KeepData = *keepData vrwp.KeepRoutingRules = *keepRoutingRules diff --git a/go/vt/wrangler/stream_migrater_test.go b/go/vt/wrangler/stream_migrater_test.go index c0835760bde..d7b93e07f59 100644 --- a/go/vt/wrangler/stream_migrater_test.go +++ b/go/vt/wrangler/stream_migrater_test.go @@ -169,7 +169,7 @@ func TestStreamMigrateMainflow(t *testing.T) { tme.expectCreateReverseVReplication() tme.expectStartReverseVReplication() tme.expectFrozenTargetVReplication() - if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false); err != nil { + if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false); err != nil { t.Fatal(err) } @@ -345,7 +345,7 @@ func TestStreamMigrateTwoStreams(t *testing.T) { tme.expectStartReverseVReplication() tme.expectFrozenTargetVReplication() - if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false); err != nil { + if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false); err != nil { t.Fatal(err) } @@ -480,7 +480,7 @@ func TestStreamMigrateOneToMany(t *testing.T) { tme.expectStartReverseVReplication() tme.expectFrozenTargetVReplication() - if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false); err != nil { + if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false); err != nil { t.Fatal(err) } @@ -618,7 +618,7 @@ func TestStreamMigrateManyToOne(t *testing.T) { tme.expectStartReverseVReplication() tme.expectFrozenTargetVReplication() - if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false); err != nil { + if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false); err != nil { t.Fatal(err) } @@ -810,7 +810,7 @@ func TestStreamMigrateSyncSuccess(t *testing.T) { tme.expectStartReverseVReplication() tme.expectFrozenTargetVReplication() - if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false); err != nil { + if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false); err != nil { t.Fatal(err) } @@ -941,7 +941,7 @@ func TestStreamMigrateSyncFail(t *testing.T) { tme.expectCancelMigration() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) want := "does not match" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites err: %v, want %s", err, want) @@ -1037,7 +1037,7 @@ func TestStreamMigrateCancel(t *testing.T) { } cancelMigration() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) want := "intentionally failed" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites err: %v, want %s", err, want) @@ -1107,7 +1107,7 @@ func TestStreamMigrateStoppedStreams(t *testing.T) { } stopStreams() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) want := "cannot migrate until all streams are running: 0: 10" if err == nil || err.Error() != want { t.Errorf("SwitchWrites err: %v, want %v", err, want) @@ -1175,7 +1175,7 @@ func TestStreamMigrateCancelWithStoppedStreams(t *testing.T) { tme.expectCancelMigration() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false, false, false) if err != nil { t.Fatal(err) } @@ -1237,7 +1237,7 @@ func TestStreamMigrateStillCopying(t *testing.T) { } stopStreams() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) want := "cannot migrate while vreplication streams in source shards are still copying: 0" if err == nil || err.Error() != want { t.Errorf("SwitchWrites err: %v, want %v", err, want) @@ -1299,7 +1299,7 @@ func TestStreamMigrateEmptyWorkflow(t *testing.T) { } stopStreams() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) want := "VReplication streams must have named workflows for migration: shard: ks:0, stream: 1" if err == nil || err.Error() != want { t.Errorf("SwitchWrites err: %v, want %v", err, want) @@ -1361,7 +1361,7 @@ func TestStreamMigrateDupWorkflow(t *testing.T) { } stopStreams() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) want := "VReplication stream has the same workflow name as the resharding workflow: shard: ks:0, stream: 1" if err == nil || err.Error() != want { t.Errorf("SwitchWrites err: %v, want %v", err, want) @@ -1434,7 +1434,7 @@ func TestStreamMigrateStreamsMismatch(t *testing.T) { } stopStreams() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) want := "streams are mismatched across source shards" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites err: %v, must contain %v", err, want) diff --git a/go/vt/wrangler/switcher.go b/go/vt/wrangler/switcher.go index c151f5d62cc..acf1ea194b0 100644 --- a/go/vt/wrangler/switcher.go +++ b/go/vt/wrangler/switcher.go @@ -144,3 +144,7 @@ func (r *switcher) logs() *[]string { func (r *switcher) resetSequences(ctx context.Context) error { return r.ts.resetSequences(ctx) } + +func (r *switcher) initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { + return r.ts.initializeTargetSequences(ctx, sequencesByBackingTable) +} diff --git a/go/vt/wrangler/switcher_dry_run.go b/go/vt/wrangler/switcher_dry_run.go index 231f6846928..017a2a03b68 100644 --- a/go/vt/wrangler/switcher_dry_run.go +++ b/go/vt/wrangler/switcher_dry_run.go @@ -23,6 +23,8 @@ import ( "strings" "time" + "golang.org/x/exp/maps" + "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/vt/vtctl/workflow" @@ -383,3 +385,9 @@ func (dr *switcherDryRun) resetSequences(ctx context.Context) error { dr.drLog.Log("The sequence caches will be reset on the source since sequence tables are being moved") return nil } + +func (dr *switcherDryRun) initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { + dr.drLog.Log(fmt.Sprintf("The following sequence backing tables used by tables being moved will be initialized: %s", + strings.Join(maps.Keys(sequencesByBackingTable), ","))) + return nil +} diff --git a/go/vt/wrangler/switcher_interface.go b/go/vt/wrangler/switcher_interface.go index e46992975d3..a29e178e9eb 100644 --- a/go/vt/wrangler/switcher_interface.go +++ b/go/vt/wrangler/switcher_interface.go @@ -53,5 +53,6 @@ type iswitcher interface { deleteShardRoutingRules(ctx context.Context) error addParticipatingTablesToKeyspace(ctx context.Context, keyspace, tableSpecs string) error resetSequences(ctx context.Context) error + initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error logs() *[]string } diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 545640550f9..67771bdec85 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -477,7 +477,7 @@ func (wr *Wrangler) areTabletsAvailableToStreamFrom(ctx context.Context, ts *tra // SwitchWrites is a generic way of migrating write traffic for a resharding workflow. func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowName string, timeout time.Duration, - cancel, reverse, reverseReplication bool, dryRun bool) (journalID int64, dryRunResults *[]string, err error) { + cancel, reverse, reverseReplication bool, dryRun, initTargetSequences bool) (journalID int64, dryRunResults *[]string, err error) { ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflowName) _ = ws if err != nil { @@ -539,7 +539,7 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowNa sequenceMetadata := make(map[string]*sequenceMetadata) // For sharded to sharded migrations the sequence must already be setup. // For reshards the sequence usage is not changed. - if ts.workflowType == binlogdatapb.VReplicationWorkflowType_MoveTables && + if initTargetSequences && ts.workflowType == binlogdatapb.VReplicationWorkflowType_MoveTables && ts.SourceKeyspaceSchema() != nil && ts.SourceKeyspaceSchema().Keyspace != nil && !ts.SourceKeyspaceSchema().Keyspace.Sharded { sequenceMetadata, err = ts.getSequenceMetadata(ctx) @@ -652,14 +652,14 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowNa return 0, nil, err } // Initialize any target sequences, if there are any, before allowing new writes. - if len(sequenceMetadata) > 0 { + if initTargetSequences && len(sequenceMetadata) > 0 { // Writes are blocked so we can safely initialize the sequence tables but // we also want to use a shorter timeout than the parent context. // We use up at most half of the overall timeout. initSeqCtx, cancel := context.WithTimeout(ctx, timeout/2) defer cancel() - if err := ts.initializeTargetSequenceTables(initSeqCtx, sequenceMetadata); err != nil { - werr := vterrors.Wrapf(err, "initializeTargetSequenceTables failed") + if err := sw.initializeTargetSequences(initSeqCtx, sequenceMetadata); err != nil { + werr := vterrors.Wrapf(err, "initializeTargetSequences failed") ts.Logger().Error(werr) return 0, nil, werr } @@ -2111,7 +2111,7 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] } } -// initializeTargetSequenceTables initializes the backing sequence tables +// initializeTargetSequences initializes the backing sequence tables // using a map keyed by the backing sequence table name. // // The backing tables must have already been created. This function will @@ -2120,8 +2120,8 @@ func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string] // backing table is updated to a new higher value then it will also tell // the primary tablet serving the sequence to refresh/reset its cache to // be sure that it does not provide a value that is less than the current max. -func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { - log.Error("DEBUG: initializeTargetSequenceTables") +func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { + log.Error("DEBUG: initializeTargetSequences") initErr := make(chan error) // Used if we encounter an error initDone := make(chan struct{}) // The initialization has completed initWg := sync.WaitGroup{} // All of the goroutines finished @@ -2240,8 +2240,7 @@ func (ts *trafficSwitcher) initializeTargetSequenceTables(ctx context.Context, s }() select { - // We completed the work w/o errors. - case <-initDone: + case <-initDone: // We completed the work w/o errors return nil case err := <-initErr: return err diff --git a/go/vt/wrangler/traffic_switcher_test.go b/go/vt/wrangler/traffic_switcher_test.go index f7ae7998410..ecad4d5db9d 100644 --- a/go/vt/wrangler/traffic_switcher_test.go +++ b/go/vt/wrangler/traffic_switcher_test.go @@ -347,7 +347,7 @@ func TestTableMigrateMainflow(t *testing.T) { cancelMigration() switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false, false) want = "DeadlineExceeded" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites(0 timeout) err: %v, must contain %v", err, want) @@ -455,7 +455,7 @@ func TestTableMigrateMainflow(t *testing.T) { } deleteTargetVReplication() - journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) + journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) if err != nil { t.Fatal(err) } @@ -662,7 +662,7 @@ func TestShardMigrateMainflow(t *testing.T) { } cancelMigration() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false, false) want = "DeadlineExceeded" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites(0 timeout) err: %v, must contain %v", err, want) @@ -751,7 +751,7 @@ func TestShardMigrateMainflow(t *testing.T) { } freezeTargetVReplication() - journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) + journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) if err != nil { t.Fatal(err) } @@ -870,7 +870,7 @@ func testTableMigrateOneToMany(t *testing.T, keepData, keepRoutingRules bool) { tme.dbSourceClients[0].addQueryRE(tsCheckJournals, &sqltypes.Result{}, nil) switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, false, false) if err != nil { t.Fatal(err) } @@ -1092,7 +1092,7 @@ func TestTableMigrateOneToManyDryRun(t *testing.T) { deleteTargetVReplication() switchWrites(tme) - _, results, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, true) + _, results, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, true, false) require.NoError(t, err) require.Empty(t, cmp.Diff(wantdryRunWrites, *results)) } @@ -1180,7 +1180,7 @@ func TestMigrateFailJournal(t *testing.T) { tme.dbSourceClients[1].addQueryRE("insert into _vt.resharding_journal", nil, errors.New("journaling intentionally failed")) switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) want := "journaling intentionally failed" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites(0 timeout) err: %v, must contain %v", err, want) @@ -1242,7 +1242,7 @@ func TestTableMigrateJournalExists(t *testing.T) { tme.dbTargetClients[1].addQuery("select * from _vt.vreplication where id = 2", stoppedResult(2), nil) switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) if err != nil { t.Fatal(err) } @@ -1320,7 +1320,7 @@ func TestShardMigrateJournalExists(t *testing.T) { tme.dbTargetClients[1].addQuery("select * from _vt.vreplication where id = 2", stoppedResult(2), nil) switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) if err != nil { t.Fatal(err) } @@ -1385,7 +1385,7 @@ func TestTableMigrateCancel(t *testing.T) { cancelMigration() switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false, false, false) if err != nil { t.Fatal(err) } @@ -1447,7 +1447,7 @@ func TestTableMigrateCancelDryRun(t *testing.T) { cancelMigration() switchWrites(tme) - _, dryRunResults, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false, true) + _, dryRunResults, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false, true, false) require.NoError(t, err) require.Empty(t, cmp.Diff(want, *dryRunResults)) } @@ -1548,7 +1548,7 @@ func TestTableMigrateNoReverse(t *testing.T) { deleteTargetVReplication() switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, false, false) if err != nil { t.Fatal(err) } @@ -1590,7 +1590,7 @@ func TestMigrateFrozen(t *testing.T) { tme.dbTargetClients[1].addQuery(streamInfoKs2, &sqltypes.Result{}, nil) switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false, false) if err != nil { t.Fatal(err) } @@ -1965,7 +1965,7 @@ func TestShardMigrateNoAvailableTabletsForReverseReplication(t *testing.T) { cancelMigration() switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false, false) want = "DeadlineExceeded" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites(0 timeout) err: %v, must contain %v", err, want) @@ -2060,7 +2060,7 @@ func TestShardMigrateNoAvailableTabletsForReverseReplication(t *testing.T) { invariants[fmt.Sprintf("%s-%d", streamInfoKs, i)] = tme.dbTargetClients[i].getInvariant(streamInfoKs) tme.dbTargetClients[i].addInvariant(streamInfoKs, tme.dbTargetClients[i].getInvariant(streamInfoKs+"-rdonly")) } - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) require.Error(t, err) require.True(t, strings.Contains(err.Error(), "no tablet found")) require.True(t, strings.Contains(err.Error(), "-80")) @@ -2070,7 +2070,7 @@ func TestShardMigrateNoAvailableTabletsForReverseReplication(t *testing.T) { tme.dbTargetClients[i].addInvariant(streamInfoKs, invariants[fmt.Sprintf("%s-%d", streamInfoKs, i)]) } - journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) + journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) if err != nil { t.Fatal(err) } diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index 40acaf1f55d..ade24cd6d40 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -57,10 +57,11 @@ type VReplicationWorkflowParams struct { OnDDL string // MoveTables/Migrate specific - SourceKeyspace, Tables string - AllTables, RenameTables bool - SourceTimeZone string - DropForeignKeys bool + SourceKeyspace, Tables string + AllTables, RenameTables bool + SourceTimeZone string + DropForeignKeys bool + InitializeTargetSequences bool // Reshard specific SourceShards, TargetShards []string @@ -478,7 +479,8 @@ func (vrw *VReplicationWorkflow) switchWrites() (*[]string, error) { log.Infof("In VReplicationWorkflow.switchWrites(reverse) for %+v", vrw) } journalID, dryRunResults, err = vrw.wr.SwitchWrites(vrw.ctx, vrw.params.TargetKeyspace, vrw.params.Workflow, vrw.params.Timeout, - false, vrw.params.Direction == workflow.DirectionBackward, vrw.params.EnableReverseReplication, vrw.params.DryRun) + false, vrw.params.Direction == workflow.DirectionBackward, vrw.params.EnableReverseReplication, vrw.params.DryRun, + vrw.params.InitializeTargetSequences) if err != nil { return nil, err } From 72a5c218427e57e99f3c549334093302ffa91040 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Sun, 30 Jul 2023 18:41:12 -0400 Subject: [PATCH 14/56] Add flag and vtctldclient impl Signed-off-by: Matt Lord --- go/cmd/vtctldclient/command/movetables.go | 16 +- go/vt/proto/vtctldata/vtctldata.pb.go | 32 +- go/vt/proto/vtctldata/vtctldata_vtproto.pb.go | 33 ++ go/vt/vtctl/vtctl.go | 2 +- go/vt/vtctl/workflow/server.go | 47 +++ go/vt/vtctl/workflow/switcher.go | 4 + go/vt/vtctl/workflow/switcher_dry_run.go | 8 + go/vt/vtctl/workflow/switcher_interface.go | 3 +- go/vt/vtctl/workflow/traffic_switcher.go | 318 +++++++++++++++++- go/vt/wrangler/traffic_switcher.go | 27 +- proto/vtctldata.proto | 1 + web/vtadmin/src/proto/vtadmin.d.ts | 6 + web/vtadmin/src/proto/vtadmin.js | 23 ++ 13 files changed, 489 insertions(+), 31 deletions(-) diff --git a/go/cmd/vtctldclient/command/movetables.go b/go/cmd/vtctldclient/command/movetables.go index 50caa99669b..194b45f5ad5 100644 --- a/go/cmd/vtctldclient/command/movetables.go +++ b/go/cmd/vtctldclient/command/movetables.go @@ -237,13 +237,14 @@ var ( StopAfterCopy bool }{} moveTablesSwitchTrafficOptions = struct { - Cells []string - TabletTypes []topodatapb.TabletType - MaxReplicationLagAllowed time.Duration - EnableReverseReplication bool - Timeout time.Duration - DryRun bool - Direction workflow.TrafficSwitchDirection + Cells []string + TabletTypes []topodatapb.TabletType + MaxReplicationLagAllowed time.Duration + EnableReverseReplication bool + Timeout time.Duration + DryRun bool + InitializeTargetSequences bool + Direction workflow.TrafficSwitchDirection }{} ) @@ -533,6 +534,7 @@ func init() { MoveTablesSwitchTraffic.Flags().DurationVar(&moveTablesSwitchTrafficOptions.Timeout, "timeout", timeoutDefault, "Specifies the maximum time to wait, in seconds, for VReplication to catch up on primary tablets. The traffic switch will be cancelled on timeout.") MoveTablesSwitchTraffic.Flags().DurationVar(&moveTablesSwitchTrafficOptions.MaxReplicationLagAllowed, "max-replication-lag-allowed", maxReplicationLagDefault, "Allow traffic to be switched only if VReplication lag is below this") MoveTablesSwitchTraffic.Flags().BoolVar(&moveTablesSwitchTrafficOptions.DryRun, "dry-run", false, "Print the actions that would be taken and report any known errors that would have occurred") + MoveTablesSwitchTraffic.Flags().BoolVar(&moveTablesSwitchTrafficOptions.InitializeTargetSequences, "initialize-target-sequences", false, "When moving tables from an unsharded keyspace to a sharded keyspace, initialize any sequences that are being used on the target when switching writes.") MoveTables.AddCommand(MoveTablesSwitchTraffic) MoveTablesReverseTraffic.Flags().StringSliceVarP(&moveTablesSwitchTrafficOptions.Cells, "cells", "c", nil, "Cells and/or CellAliases to switch traffic in") diff --git a/go/vt/proto/vtctldata/vtctldata.pb.go b/go/vt/proto/vtctldata/vtctldata.pb.go index 214778c47d9..cdf344dd6dd 100644 --- a/go/vt/proto/vtctldata/vtctldata.pb.go +++ b/go/vt/proto/vtctldata/vtctldata.pb.go @@ -10919,15 +10919,16 @@ type WorkflowSwitchTrafficRequest struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Keyspace string `protobuf:"bytes,1,opt,name=keyspace,proto3" json:"keyspace,omitempty"` - Workflow string `protobuf:"bytes,2,opt,name=workflow,proto3" json:"workflow,omitempty"` - Cells []string `protobuf:"bytes,3,rep,name=cells,proto3" json:"cells,omitempty"` - TabletTypes []topodata.TabletType `protobuf:"varint,4,rep,packed,name=tablet_types,json=tabletTypes,proto3,enum=topodata.TabletType" json:"tablet_types,omitempty"` - MaxReplicationLagAllowed *vttime.Duration `protobuf:"bytes,5,opt,name=max_replication_lag_allowed,json=maxReplicationLagAllowed,proto3" json:"max_replication_lag_allowed,omitempty"` - EnableReverseReplication bool `protobuf:"varint,6,opt,name=enable_reverse_replication,json=enableReverseReplication,proto3" json:"enable_reverse_replication,omitempty"` - Direction int32 `protobuf:"varint,7,opt,name=direction,proto3" json:"direction,omitempty"` - Timeout *vttime.Duration `protobuf:"bytes,8,opt,name=timeout,proto3" json:"timeout,omitempty"` - DryRun bool `protobuf:"varint,9,opt,name=dry_run,json=dryRun,proto3" json:"dry_run,omitempty"` + Keyspace string `protobuf:"bytes,1,opt,name=keyspace,proto3" json:"keyspace,omitempty"` + Workflow string `protobuf:"bytes,2,opt,name=workflow,proto3" json:"workflow,omitempty"` + Cells []string `protobuf:"bytes,3,rep,name=cells,proto3" json:"cells,omitempty"` + TabletTypes []topodata.TabletType `protobuf:"varint,4,rep,packed,name=tablet_types,json=tabletTypes,proto3,enum=topodata.TabletType" json:"tablet_types,omitempty"` + MaxReplicationLagAllowed *vttime.Duration `protobuf:"bytes,5,opt,name=max_replication_lag_allowed,json=maxReplicationLagAllowed,proto3" json:"max_replication_lag_allowed,omitempty"` + EnableReverseReplication bool `protobuf:"varint,6,opt,name=enable_reverse_replication,json=enableReverseReplication,proto3" json:"enable_reverse_replication,omitempty"` + Direction int32 `protobuf:"varint,7,opt,name=direction,proto3" json:"direction,omitempty"` + Timeout *vttime.Duration `protobuf:"bytes,8,opt,name=timeout,proto3" json:"timeout,omitempty"` + DryRun bool `protobuf:"varint,9,opt,name=dry_run,json=dryRun,proto3" json:"dry_run,omitempty"` + InitializeTargetSequences bool `protobuf:"varint,10,opt,name=initialize_target_sequences,json=initializeTargetSequences,proto3" json:"initialize_target_sequences,omitempty"` } func (x *WorkflowSwitchTrafficRequest) Reset() { @@ -11025,6 +11026,13 @@ func (x *WorkflowSwitchTrafficRequest) GetDryRun() bool { return false } +func (x *WorkflowSwitchTrafficRequest) GetInitializeTargetSequences() bool { + if x != nil { + return x.InitializeTargetSequences + } + return false +} + type WorkflowSwitchTrafficResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -13640,7 +13648,7 @@ var file_vtctldata_proto_rawDesc = []byte{ 0x63, 0x74, 0x6c, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x53, 0x68, 0x61, 0x72, 0x64, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x73, 0x52, 0x05, 0x76, 0x61, 0x6c, - 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x97, 0x03, 0x0a, 0x1c, 0x57, 0x6f, 0x72, 0x6b, 0x66, + 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xd7, 0x03, 0x0a, 0x1c, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x53, 0x77, 0x69, 0x74, 0x63, 0x68, 0x54, 0x72, 0x61, 0x66, 0x66, 0x69, 0x63, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1a, 0x0a, 0x08, 0x6b, 0x65, 0x79, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6b, 0x65, 0x79, 0x73, 0x70, @@ -13666,6 +13674,10 @@ var file_vtctldata_proto_rawDesc = []byte{ 0x74, 0x74, 0x69, 0x6d, 0x65, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x07, 0x74, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x12, 0x17, 0x0a, 0x07, 0x64, 0x72, 0x79, 0x5f, 0x72, 0x75, 0x6e, 0x18, 0x09, 0x20, 0x01, 0x28, 0x08, 0x52, 0x06, 0x64, 0x72, 0x79, 0x52, 0x75, 0x6e, + 0x12, 0x3e, 0x0a, 0x1b, 0x69, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x5f, 0x74, + 0x61, 0x72, 0x67, 0x65, 0x74, 0x5f, 0x73, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x65, 0x73, 0x18, + 0x0a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x19, 0x69, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, + 0x65, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x53, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x65, 0x73, 0x22, 0xa7, 0x01, 0x0a, 0x1d, 0x57, 0x6f, 0x72, 0x6b, 0x66, 0x6c, 0x6f, 0x77, 0x53, 0x77, 0x69, 0x74, 0x63, 0x68, 0x54, 0x72, 0x61, 0x66, 0x66, 0x69, 0x63, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x6d, 0x6d, 0x61, 0x72, 0x79, 0x18, 0x01, 0x20, diff --git a/go/vt/proto/vtctldata/vtctldata_vtproto.pb.go b/go/vt/proto/vtctldata/vtctldata_vtproto.pb.go index aba656454d6..80de53404fe 100644 --- a/go/vt/proto/vtctldata/vtctldata_vtproto.pb.go +++ b/go/vt/proto/vtctldata/vtctldata_vtproto.pb.go @@ -10689,6 +10689,16 @@ func (m *WorkflowSwitchTrafficRequest) MarshalToSizedBufferVT(dAtA []byte) (int, i -= len(m.unknownFields) copy(dAtA[i:], m.unknownFields) } + if m.InitializeTargetSequences { + i-- + if m.InitializeTargetSequences { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i-- + dAtA[i] = 0x50 + } if m.DryRun { i-- if m.DryRun { @@ -15009,6 +15019,9 @@ func (m *WorkflowSwitchTrafficRequest) SizeVT() (n int) { if m.DryRun { n += 2 } + if m.InitializeTargetSequences { + n += 2 + } n += len(m.unknownFields) return n } @@ -40629,6 +40642,26 @@ func (m *WorkflowSwitchTrafficRequest) UnmarshalVT(dAtA []byte) error { } } m.DryRun = bool(v != 0) + case 10: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field InitializeTargetSequences", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflow + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + m.InitializeTargetSequences = bool(v != 0) default: iNdEx = preIndex skippy, err := skip(dAtA[iNdEx:]) diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index b0faf9658e3..bca586f338a 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -2107,7 +2107,7 @@ func commandVReplicationWorkflow(ctx context.Context, wr *wrangler.Wrangler, sub allTables := subFlags.Bool("all", false, "MoveTables only. Move all tables from the source keyspace. Either table_specs or --all needs to be specified.") excludes := subFlags.String("exclude", "", "MoveTables only. Tables to exclude (comma-separated) if --all is specified") sourceKeyspace := subFlags.String("source", "", "MoveTables only. Source keyspace") - initializeTargetSequences := subFlags.Bool("initialize-target-sequences", false, "MoveTables only. When moving tables from an unsharded keyspace to a sharded keyspace, initialize any sequences that are being used on the target.") + initializeTargetSequences := subFlags.Bool("initialize-target-sequences", false, "MoveTables only. When moving tables from an unsharded keyspace to a sharded keyspace, initialize any sequences that are being used on the target when switching writes.") // if sourceTimeZone is specified, the target needs to have time zones loaded // note we make an opinionated decision to not allow specifying a different target time zone than UTC. diff --git a/go/vt/vtctl/workflow/server.go b/go/vt/vtctl/workflow/server.go index b0200a3c95a..202460422fe 100644 --- a/go/vt/vtctl/workflow/server.go +++ b/go/vt/vtctl/workflow/server.go @@ -68,6 +68,23 @@ type TableCopyProgress struct { // CopyProgress stores the TableCopyProgress for all tables still being copied type CopyProgress map[string]*TableCopyProgress +// sequenceMetadata contains all of the relevant metadata for a sequence that +// is being used by a table involved in a vreplication workflow. +type sequenceMetadata struct { + // The name of the sequence table. + backingTableName string + // The keyspace where the backing table lives. + backingTableKeyspace string + // The dbName in use by the keyspace where the backing table lives. + backingTableDBName string + // The name of the table using the sequence. + usingTableName string + // The dbName in use by the keyspace where the using table lives. + usingTableDBName string + // The using table definition. + usingTableDefinition *vschemapb.Table +} + const ( cannotSwitchError = "workflow has errors" cannotSwitchCopyIncomplete = "copy is still in progress" @@ -2436,6 +2453,23 @@ func (s *Server) switchWrites(ctx context.Context, req *vtctldatapb.WorkflowSwit defer targetUnlock(&err) } + // Find out if the target is using any sequence tables for auto_increment + // value generation. If so, then we'll need to ensure that they are + // initialized properly before allowing new writes on the target. + sequenceMetadata := make(map[string]*sequenceMetadata) + // For sharded to sharded migrations the sequence must already be setup. + // For reshards the sequence usage is not changed. + if req.InitializeTargetSequences && ts.workflowType == binlogdatapb.VReplicationWorkflowType_MoveTables && + ts.SourceKeyspaceSchema() != nil && ts.SourceKeyspaceSchema().Keyspace != nil && + !ts.SourceKeyspaceSchema().Keyspace.Sharded { + sequenceMetadata, err = ts.getTargetSequenceMetadata(ctx) + if err != nil { + werr := vterrors.Wrapf(err, "getSequenceMetadata failed") + ts.Logger().Error(werr) + return 0, nil, werr + } + } + // If no journals exist, sourceWorkflows will be initialized by sm.MigrateStreams. journalsExist, sourceWorkflows, err := ts.checkJournals(ctx) if err != nil { @@ -2537,6 +2571,19 @@ func (s *Server) switchWrites(ctx context.Context, req *vtctldatapb.WorkflowSwit ts.Logger().Errorf("createJournals failed: %v", err) return 0, nil, err } + // Initialize any target sequences, if there are any, before allowing new writes. + if req.InitializeTargetSequences && len(sequenceMetadata) > 0 { + // Writes are blocked so we can safely initialize the sequence tables but + // we also want to use a shorter timeout than the parent context. + // We use up at most half of the overall timeout. + initSeqCtx, cancel := context.WithTimeout(ctx, timeout/2) + defer cancel() + if err := sw.initializeTargetSequences(initSeqCtx, sequenceMetadata); err != nil { + werr := vterrors.Wrapf(err, "initializeTargetSequences failed") + ts.Logger().Error(werr) + return 0, nil, werr + } + } if err := sw.allowTargetWrites(ctx); err != nil { ts.Logger().Errorf("allowTargetWrites failed: %v", err) return 0, nil, err diff --git a/go/vt/vtctl/workflow/switcher.go b/go/vt/vtctl/workflow/switcher.go index 2199cbe3641..e609d10d279 100644 --- a/go/vt/vtctl/workflow/switcher.go +++ b/go/vt/vtctl/workflow/switcher.go @@ -141,3 +141,7 @@ func (r *switcher) logs() *[]string { func (r *switcher) resetSequences(ctx context.Context) error { return r.ts.resetSequences(ctx) } + +func (r *switcher) initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { + return r.ts.initializeTargetSequences(ctx, sequencesByBackingTable) +} diff --git a/go/vt/vtctl/workflow/switcher_dry_run.go b/go/vt/vtctl/workflow/switcher_dry_run.go index 05bee1246b9..de502b97102 100644 --- a/go/vt/vtctl/workflow/switcher_dry_run.go +++ b/go/vt/vtctl/workflow/switcher_dry_run.go @@ -23,6 +23,8 @@ import ( "strings" "time" + "golang.org/x/exp/maps" + "vitess.io/vitess/go/mysql" binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" @@ -365,3 +367,9 @@ func (dr *switcherDryRun) resetSequences(ctx context.Context) error { dr.drLog.Log("The sequence caches will be reset on the source since sequence tables are being moved") return nil } + +func (dr *switcherDryRun) initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { + dr.drLog.Log(fmt.Sprintf("The following sequence backing tables used by tables being moved will be initialized: %s", + strings.Join(maps.Keys(sequencesByBackingTable), ","))) + return nil +} diff --git a/go/vt/vtctl/workflow/switcher_interface.go b/go/vt/vtctl/workflow/switcher_interface.go index c0b5265e63f..a9e803dd714 100644 --- a/go/vt/vtctl/workflow/switcher_interface.go +++ b/go/vt/vtctl/workflow/switcher_interface.go @@ -50,6 +50,7 @@ type iswitcher interface { deleteRoutingRules(ctx context.Context) error deleteShardRoutingRules(ctx context.Context) error addParticipatingTablesToKeyspace(ctx context.Context, keyspace, tableSpecs string) error - logs() *[]string resetSequences(ctx context.Context) error + initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error + logs() *[]string } diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index e0482dedeb0..06794657c9b 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -25,8 +25,11 @@ import ( "sync" "time" + "golang.org/x/exp/maps" + "vitess.io/vitess/go/json2" "vitess.io/vitess/go/sqlescape" + "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/vt/binlog/binlogplayer" "vitess.io/vitess/go/vt/concurrency" "vitess.io/vitess/go/vt/key" @@ -68,7 +71,9 @@ const ( // Use pt-osc's naming convention, this format also ensures vstreamer ignores such tables. renameTableTemplate = "_%.59s_old" // limit table name to 64 characters - sqlDeleteWorkflow = "delete from _vt.vreplication where db_name = %s and workflow = %s" + sqlDeleteWorkflow = "delete from _vt.vreplication where db_name = %s and workflow = %s" + sqlGetMaxSequenceVal = "select max(%a) as maxval from %a.%a" + sqlInitSequenceTable = "insert into %a.%a (id, next_id, cache) values (0, %d, 1000) on duplicate key update next_id = if(next_id < %d, %d, next_id)" ) // accessType specifies the type of access for a shard (allow/disallow writes). @@ -1195,6 +1200,317 @@ func (ts *trafficSwitcher) isSequenceParticipating(ctx context.Context) (bool, e return sequenceFound, nil } +// getTargetSequenceMetadata returns a map of sequence metadata keyed by the +// backing sequence table name. If the target keyspace has no tables +// defined that use sequences for auto_increment generation then a nil +// map will be returned. +func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[string]*sequenceMetadata, error) { + log.Error("DEBUG: getTargetSequenceMetadata") + vschema, err := ts.TopoServer().GetVSchema(ctx, ts.targetKeyspace) + if err != nil { + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for target keyspace %s: %v", + ts.targetKeyspace, err) + } + if vschema == nil || vschema.Tables == nil || len(vschema.Tables) == 0 { // Nothing to do + return nil, nil + } + + targets := maps.Values(ts.Targets()) + if len(targets) == 0 || targets[0].GetPrimary() == nil { // This should never happen + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary tablet found for target keyspace %s", ts.targetKeyspace) + } + targetDBName := targets[0].GetPrimary().DbName() + sequencesByBackingTable := make(map[string]*sequenceMetadata) + smMu := sync.Mutex{} + for _, table := range ts.Tables() { + vs, ok := vschema.Tables[table] + if !ok || vs == nil { + continue + } + if vs.AutoIncrement != nil && vs.AutoIncrement.Sequence != "" { + sm := &sequenceMetadata{ + backingTableName: vs.AutoIncrement.Sequence, + usingTableName: table, + usingTableDefinition: vs, + usingTableDBName: targetDBName, + } + // If the sequence table is fully qualified in the vschema then + // we don't need to find it later. + if strings.Contains(vs.AutoIncrement.Sequence, ".") { + parts := strings.Split(vs.AutoIncrement.Sequence, ".") + if len(parts) != 2 { + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "invalid sequence table name %s defined in the %s keyspace", + vs.AutoIncrement.Sequence, ts.targetKeyspace) + } + sm.backingTableName = parts[1] + sm.backingTableKeyspace = parts[0] + } + sequencesByBackingTable[sm.backingTableName] = sm + } + } + if len(sequencesByBackingTable) == 0 { // Nothing to do + return nil, nil + } + log.Errorf("DEBUG: sequences: %+v", sequencesByBackingTable) + + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + } + + // If all of the sequence tables were defined using qualified table + // names in the vschema, then we don't need to look for them. + mustSearch := false + for _, sm := range sequencesByBackingTable { + if sm.backingTableKeyspace == "" { + mustSearch = true + break + } + } + if !mustSearch { + return sequencesByBackingTable, nil + } + + // Now we need to locate the backing sequence table(s) which will + // be in another unsharded keyspace. + keyspaces, err := ts.TopoServer().GetKeyspaces(ctx) + if err != nil { + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get keyspaces: %v", err) + } + log.Errorf("DEBUG: keyspaces: %+v", keyspaces) + tableCount := len(sequencesByBackingTable) + tablesFound := 0 // Used to short circuit the search + ksCtx, ksCancel := context.WithCancel(ctx) // Used to cancel the goroutines + defer ksCancel() // Cancel all of the goroutines when we are done + ksErr := make(chan error) // Used if we encountered an error during the search + ksDone := make(chan struct{}) // The search has completed + ksWg := sync.WaitGroup{} // All of the goroutines finished + ksFunc := func(ks string) { // The function used to search each keyspace + defer ksWg.Done() + vschema, err = ts.TopoServer().GetVSchema(ctx, ks) + if err != nil { + ksErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", + ks, err) + return + } + if vschema == nil || vschema.Sharded || vschema.Tables == nil || len(vschema.Tables) == 0 { + return + } + select { + case <-ctx.Done(): + return + case <-ksCtx.Done(): + return + default: + } + for tableName, tableDef := range vschema.Tables { + smMu.Lock() // Prevent concurrent access to the map + sm := sequencesByBackingTable[tableName] + if tableDef != nil && tableDef.Type == vindexes.TypeSequence && + sm != nil && tableName == sm.backingTableName { + tablesFound++ // This is also protected by the mutex + sm.backingTableKeyspace = ks + sm.backingTableDBName = "vt_" + ks + if tablesFound == tableCount { // Short circuit the search + log.Errorf("DEBUG: all sequence backing tables found: %+v", sequencesByBackingTable) + smMu.Unlock() + ksDone <- struct{}{} + return + } + } + smMu.Unlock() + select { + case <-ctx.Done(): + return + case <-ksCtx.Done(): // The search has been cancelled + return + default: + } + } + } + + for _, keyspace := range keyspaces { + ksWg.Add(1) + go ksFunc(keyspace) + } + // Wait for all the goroutines to finish on their own, which means we + // probably did not find all of the tables. + go func() { + ksWg.Wait() + close(ksDone) + }() + + select { + case <-ksDone: + if tablesFound != tableCount { + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to locate all of the backing sequence tables being used; sequence table metadata: %+v", + sequencesByBackingTable) + } + return sequencesByBackingTable, nil + case ksErr := <-ksErr: + return nil, ksErr + case <-ctx.Done(): + return nil, ctx.Err() + } +} + +// initializeTargetSequences initializes the backing sequence tables +// using a map keyed by the backing sequence table name. +// +// The backing tables must have already been created. This function will +// then ensure that the next value is set to a value greater than any +// currently stored in the using table on the target keyspace. If the +// backing table is updated to a new higher value then it will also tell +// the primary tablet serving the sequence to refresh/reset its cache to +// be sure that it does not provide a value that is less than the current max. +func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { + log.Error("DEBUG: initializeTargetSequences") + initErr := make(chan error) // Used if we encounter an error + initDone := make(chan struct{}) // The initialization has completed + initWg := sync.WaitGroup{} // All of the goroutines finished + initFunc := func(sequenceTableName string, sequenceMetadata *sequenceMetadata) { + defer initWg.Done() + log.Errorf("DEBUG: sequence table: %v, sequenceMetadata: %+v", sequenceTableName, sequenceMetadata) + // Now we need to run this query on the target shards in order + // to get the max value and set the next id for the sequence to + // a higher value. + shardResults := make([]int64, 0, len(ts.TargetShards())) + srMu := sync.Mutex{} + err := ts.ForAllTargets(func(target *MigrationTarget) error { + primary := target.GetPrimary() + if primary == nil || primary.GetAlias() == nil { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary tablet found for target shard %s/%s", + ts.targetKeyspace, target.GetShard().ShardName()) + } + query := sqlparser.BuildParsedQuery(sqlGetMaxSequenceVal, + sqlescape.EscapeID(sequenceMetadata.usingTableDefinition.AutoIncrement.Column), + sqlescape.EscapeID(sequenceMetadata.usingTableDBName), + sqlescape.EscapeID(sequenceMetadata.usingTableName), + ) + log.Errorf("DEBUG: query: %s on shard: %s/%s", + query.Query, ts.targetKeyspace, target.GetShard().ShardName()) + qr, err := ts.ws.tmc.ExecuteFetchAsApp(ctx, primary.Tablet, true, &tabletmanagerdatapb.ExecuteFetchAsAppRequest{ + Query: []byte(query.Query), + MaxRows: 1, + }) + if err != nil || len(qr.Rows) != 1 { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the max used sequence value for target table %s.%s in order to initialize the backing sequence table: %v", + ts.targetKeyspace, sequenceMetadata.usingTableName, err) + } + maxID, err := sqltypes.Proto3ToResult(qr).Rows[0][0].ToInt64() + if err != nil { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the max used sequence value for target table %s.%s in order to initialize the backing sequence table: %v", + ts.targetKeyspace, sequenceMetadata.usingTableName, err) + } + log.Errorf("DEBUG: max ID seen on shard %s: %d", target.GetShard().ShardName(), maxID) + srMu.Lock() + defer srMu.Unlock() + shardResults = append(shardResults, maxID) + return nil + }) + if err != nil { + initErr <- err + } + select { + case <-ctx.Done(): + return + default: + } + // Sort the values to find the max value across all shards. + sort.Slice(shardResults, func(i, j int) bool { + return shardResults[i] < shardResults[j] + }) + nextVal := shardResults[len(shardResults)-1] + 1 + // Now we need to update the sequence table, if needed, in order to + // ensure that that the next value it provides is > the current max. + sequenceShard, err := ts.TopoServer().GetOnlyShard(ctx, sequenceMetadata.backingTableKeyspace) + if err != nil || sequenceShard == nil || sequenceShard.PrimaryAlias == nil { + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", + sequenceMetadata.backingTableKeyspace, err) + return + } + sequenceTablet, err := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) + if err != nil || sequenceTablet == nil { + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", + sequenceMetadata.backingTableKeyspace, err) + return + } + if sequenceTablet.DbNameOverride != "" { + sequenceMetadata.backingTableDBName = sequenceTablet.DbNameOverride + } + query := sqlparser.BuildParsedQuery(sqlInitSequenceTable, + sqlescape.EscapeID(sequenceMetadata.backingTableDBName), + sqlescape.EscapeID(sequenceMetadata.backingTableName), + nextVal, + nextVal, + nextVal, + ) + log.Errorf("DEBUG: query: %s", query.Query) + // Now execute this on the primary tablet of the unsharded keyspace + // housing the backing table. + primaryTablet, err := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) + if err != nil { + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for %s.%s using alias %s: %v", + sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err) + return + } + qr, err := ts.ws.tmc.ExecuteFetchAsApp(ctx, primaryTablet.Tablet, true, &tabletmanagerdatapb.ExecuteFetchAsAppRequest{ + Query: []byte(query.Query), + MaxRows: 1, + }) + + if err != nil { + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the backing sequence table %s.%s: %v", + sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, err) + return + } + // If we actually updated the backing sequence table, then we need + // to tell the primary tablet managing the sequence to refresh/reset + // its cache for the table. + if qr.RowsAffected == 0 { + return + } + select { + case <-ctx.Done(): + return + default: + } + ts.Logger().Infof("Resetting sequence cache for backing table %s on shard %s.%s using tablet %s", + sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias) + ti, err := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) + if err != nil { + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get primary tablet for keyspace %s: %v", + sequenceMetadata.backingTableKeyspace, err) + return + } + err = ts.TabletManagerClient().ResetSequences(ctx, ti.Tablet, []string{sequenceMetadata.backingTableName}) + if err != nil { + initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset the sequence cache for backing table %s on shard %s.%s using tablet %s: %v", + sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err) + return + } + } + + for sequenceTableName, sequenceMetadata := range sequencesByBackingTable { + initWg.Add(1) + go initFunc(sequenceTableName, sequenceMetadata) + } + go func() { + initWg.Wait() + close(initDone) + }() + + select { + case <-initDone: // We completed the work w/o errors + return nil + case err := <-initErr: + return err + case <-ctx.Done(): + return ctx.Err() + } +} + func (ts *trafficSwitcher) mustResetSequences(ctx context.Context) (bool, error) { switch ts.workflowType { case binlogdatapb.VReplicationWorkflowType_Migrate, diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 67771bdec85..2f486490ef8 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -477,7 +477,7 @@ func (wr *Wrangler) areTabletsAvailableToStreamFrom(ctx context.Context, ts *tra // SwitchWrites is a generic way of migrating write traffic for a resharding workflow. func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowName string, timeout time.Duration, - cancel, reverse, reverseReplication bool, dryRun, initTargetSequences bool) (journalID int64, dryRunResults *[]string, err error) { + cancel, reverse, reverseReplication bool, dryRun, initializeTargetSequences bool) (journalID int64, dryRunResults *[]string, err error) { ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflowName) _ = ws if err != nil { @@ -539,12 +539,12 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowNa sequenceMetadata := make(map[string]*sequenceMetadata) // For sharded to sharded migrations the sequence must already be setup. // For reshards the sequence usage is not changed. - if initTargetSequences && ts.workflowType == binlogdatapb.VReplicationWorkflowType_MoveTables && + if initializeTargetSequences && ts.workflowType == binlogdatapb.VReplicationWorkflowType_MoveTables && ts.SourceKeyspaceSchema() != nil && ts.SourceKeyspaceSchema().Keyspace != nil && !ts.SourceKeyspaceSchema().Keyspace.Sharded { - sequenceMetadata, err = ts.getSequenceMetadata(ctx) + sequenceMetadata, err = ts.getTargetSequenceMetadata(ctx) if err != nil { - werr := vterrors.Wrapf(err, "getSequenceMetadata failed") + werr := vterrors.Wrapf(err, "getTargetSequenceMetadata failed") ts.Logger().Error(werr) return 0, nil, werr } @@ -652,7 +652,7 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowNa return 0, nil, err } // Initialize any target sequences, if there are any, before allowing new writes. - if initTargetSequences && len(sequenceMetadata) > 0 { + if initializeTargetSequences && len(sequenceMetadata) > 0 { // Writes are blocked so we can safely initialize the sequence tables but // we also want to use a shorter timeout than the parent context. // We use up at most half of the overall timeout. @@ -1956,12 +1956,12 @@ func (ts *trafficSwitcher) isSequenceParticipating(ctx context.Context) (bool, e return sequenceFound, nil } -// getSequenceMetadata returns a map of sequence metadata keyed by the +// getTargetSequenceMetadata returns a map of sequence metadata keyed by the // backing sequence table name. If the target keyspace has no tables // defined that use sequences for auto_increment generation then a nil // map will be returned. -func (ts *trafficSwitcher) getSequenceMetadata(ctx context.Context) (map[string]*sequenceMetadata, error) { - log.Error("DEBUG: getSequenceMetadata") +func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[string]*sequenceMetadata, error) { + log.Error("DEBUG: getTargetSequenceMetadata") vschema, err := ts.TopoServer().GetVSchema(ctx, ts.targetKeyspace) if err != nil { return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for target keyspace %s: %v", @@ -2136,7 +2136,7 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen err := ts.ForAllTargets(func(target *workflow.MigrationTarget) error { primary := target.GetPrimary() if primary == nil || primary.GetAlias() == nil { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary tablet found for target shard %s/%s", + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary tablet found for target shard %s/%s", ts.targetKeyspace, target.GetShard().ShardName()) } query := sqlparser.BuildParsedQuery(sqlGetMaxSequenceVal, @@ -2148,12 +2148,12 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen query.Query, ts.targetKeyspace, target.GetShard().ShardName()) qr, err := ts.wr.ExecuteFetchAsApp(ctx, primary.GetAlias(), true, query.Query, 1) if err != nil || len(qr.Rows) != 1 { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the max used sequence value for target table %s.%s in order to initialize the backing sequence table: %v", + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the max used sequence value for target table %s.%s in order to initialize the backing sequence table: %v", ts.targetKeyspace, sequenceMetadata.usingTableName, err) } maxID, err := sqltypes.Proto3ToResult(qr).Rows[0][0].ToInt64() if err != nil { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the max used sequence value for target table %s.%s in order to initialize the backing sequence table: %v", + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the max used sequence value for target table %s.%s in order to initialize the backing sequence table: %v", ts.targetKeyspace, sequenceMetadata.usingTableName, err) } log.Errorf("DEBUG: max ID seen on shard %s: %d", target.GetShard().ShardName(), maxID) @@ -2181,11 +2181,13 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen if err != nil || sequenceShard == nil || sequenceShard.PrimaryAlias == nil { initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", sequenceMetadata.backingTableKeyspace, err) + return } sequenceTablet, err := ts.wr.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) if err != nil || sequenceTablet == nil { initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", sequenceMetadata.backingTableKeyspace, err) + return } if sequenceTablet.DbNameOverride != "" { sequenceMetadata.backingTableDBName = sequenceTablet.DbNameOverride @@ -2204,6 +2206,7 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen if err != nil { initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the backing sequence table %s.%s: %v", sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, err) + return } // If we actually updated the backing sequence table, then we need // to tell the primary tablet managing the sequence to refresh/reset @@ -2222,11 +2225,13 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen if err != nil { initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get primary tablet for keyspace %s: %v", sequenceMetadata.backingTableKeyspace, err) + return } err = ts.TabletManagerClient().ResetSequences(ctx, ti.Tablet, []string{sequenceMetadata.backingTableName}) if err != nil { initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset the sequence cache for backing table %s on shard %s.%s using tablet %s: %v", sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err) + return } } diff --git a/proto/vtctldata.proto b/proto/vtctldata.proto index 56a31bbb86a..7099788edba 100644 --- a/proto/vtctldata.proto +++ b/proto/vtctldata.proto @@ -1426,6 +1426,7 @@ message WorkflowSwitchTrafficRequest { int32 direction = 7; vttime.Duration timeout = 8; bool dry_run = 9; + bool initialize_target_sequences = 10; } message WorkflowSwitchTrafficResponse { diff --git a/web/vtadmin/src/proto/vtadmin.d.ts b/web/vtadmin/src/proto/vtadmin.d.ts index b39cb64655a..6aaf09dcebf 100644 --- a/web/vtadmin/src/proto/vtadmin.d.ts +++ b/web/vtadmin/src/proto/vtadmin.d.ts @@ -62320,6 +62320,9 @@ export namespace vtctldata { /** WorkflowSwitchTrafficRequest dry_run */ dry_run?: (boolean|null); + + /** WorkflowSwitchTrafficRequest initialize_target_sequences */ + initialize_target_sequences?: (boolean|null); } /** Represents a WorkflowSwitchTrafficRequest. */ @@ -62358,6 +62361,9 @@ export namespace vtctldata { /** WorkflowSwitchTrafficRequest dry_run. */ public dry_run: boolean; + /** WorkflowSwitchTrafficRequest initialize_target_sequences. */ + public initialize_target_sequences: boolean; + /** * Creates a new WorkflowSwitchTrafficRequest instance using the specified properties. * @param [properties] Properties to set diff --git a/web/vtadmin/src/proto/vtadmin.js b/web/vtadmin/src/proto/vtadmin.js index 6030c37f7c9..1a928296c26 100644 --- a/web/vtadmin/src/proto/vtadmin.js +++ b/web/vtadmin/src/proto/vtadmin.js @@ -150167,6 +150167,7 @@ export const vtctldata = $root.vtctldata = (() => { * @property {number|null} [direction] WorkflowSwitchTrafficRequest direction * @property {vttime.IDuration|null} [timeout] WorkflowSwitchTrafficRequest timeout * @property {boolean|null} [dry_run] WorkflowSwitchTrafficRequest dry_run + * @property {boolean|null} [initialize_target_sequences] WorkflowSwitchTrafficRequest initialize_target_sequences */ /** @@ -150258,6 +150259,14 @@ export const vtctldata = $root.vtctldata = (() => { */ WorkflowSwitchTrafficRequest.prototype.dry_run = false; + /** + * WorkflowSwitchTrafficRequest initialize_target_sequences. + * @member {boolean} initialize_target_sequences + * @memberof vtctldata.WorkflowSwitchTrafficRequest + * @instance + */ + WorkflowSwitchTrafficRequest.prototype.initialize_target_sequences = false; + /** * Creates a new WorkflowSwitchTrafficRequest instance using the specified properties. * @function create @@ -150305,6 +150314,8 @@ export const vtctldata = $root.vtctldata = (() => { $root.vttime.Duration.encode(message.timeout, writer.uint32(/* id 8, wireType 2 =*/66).fork()).ldelim(); if (message.dry_run != null && Object.hasOwnProperty.call(message, "dry_run")) writer.uint32(/* id 9, wireType 0 =*/72).bool(message.dry_run); + if (message.initialize_target_sequences != null && Object.hasOwnProperty.call(message, "initialize_target_sequences")) + writer.uint32(/* id 10, wireType 0 =*/80).bool(message.initialize_target_sequences); return writer; }; @@ -150384,6 +150395,10 @@ export const vtctldata = $root.vtctldata = (() => { message.dry_run = reader.bool(); break; } + case 10: { + message.initialize_target_sequences = reader.bool(); + break; + } default: reader.skipType(tag & 7); break; @@ -150472,6 +150487,9 @@ export const vtctldata = $root.vtctldata = (() => { if (message.dry_run != null && message.hasOwnProperty("dry_run")) if (typeof message.dry_run !== "boolean") return "dry_run: boolean expected"; + if (message.initialize_target_sequences != null && message.hasOwnProperty("initialize_target_sequences")) + if (typeof message.initialize_target_sequences !== "boolean") + return "initialize_target_sequences: boolean expected"; return null; }; @@ -150571,6 +150589,8 @@ export const vtctldata = $root.vtctldata = (() => { } if (object.dry_run != null) message.dry_run = Boolean(object.dry_run); + if (object.initialize_target_sequences != null) + message.initialize_target_sequences = Boolean(object.initialize_target_sequences); return message; }; @@ -150599,6 +150619,7 @@ export const vtctldata = $root.vtctldata = (() => { object.direction = 0; object.timeout = null; object.dry_run = false; + object.initialize_target_sequences = false; } if (message.keyspace != null && message.hasOwnProperty("keyspace")) object.keyspace = message.keyspace; @@ -150624,6 +150645,8 @@ export const vtctldata = $root.vtctldata = (() => { object.timeout = $root.vttime.Duration.toObject(message.timeout, options); if (message.dry_run != null && message.hasOwnProperty("dry_run")) object.dry_run = message.dry_run; + if (message.initialize_target_sequences != null && message.hasOwnProperty("initialize_target_sequences")) + object.initialize_target_sequences = message.initialize_target_sequences; return object; }; From 142109a2fcd96ee09192e6bb9bfe31737882bba5 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Sun, 30 Jul 2023 19:05:02 -0400 Subject: [PATCH 15/56] Add missing returns Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/traffic_switcher.go | 1 + go/vt/wrangler/traffic_switcher.go | 1 + 2 files changed, 2 insertions(+) diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index 06794657c9b..36ab79b40ee 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -1411,6 +1411,7 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen }) if err != nil { initErr <- err + return } select { case <-ctx.Done(): diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 2f486490ef8..cc1b22ef382 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -2164,6 +2164,7 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen }) if err != nil { initErr <- err + return } select { case <-ctx.Done(): From e92b498c15879f84cec39f8f6e81d8438a5d1b70 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Sun, 30 Jul 2023 21:45:28 -0400 Subject: [PATCH 16/56] Concurrency improvements Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/traffic_switcher.go | 56 +++++++++++++------ go/vt/wrangler/traffic_switcher.go | 68 ++++++++++++++++++------ 2 files changed, 93 insertions(+), 31 deletions(-) diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index 36ab79b40ee..9b80ebff271 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -1290,8 +1290,11 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s defer ksWg.Done() vschema, err = ts.TopoServer().GetVSchema(ctx, ks) if err != nil { - ksErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", - ks, err) + select { + case ksErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", + ks, err): + default: + } return } if vschema == nil || vschema.Sharded || vschema.Tables == nil || len(vschema.Tables) == 0 { @@ -1339,6 +1342,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s go func() { ksWg.Wait() close(ksDone) + close(ksErr) }() select { @@ -1410,7 +1414,10 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen return nil }) if err != nil { - initErr <- err + select { + case initErr <- err: + default: + } return } select { @@ -1427,14 +1434,20 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen // ensure that that the next value it provides is > the current max. sequenceShard, err := ts.TopoServer().GetOnlyShard(ctx, sequenceMetadata.backingTableKeyspace) if err != nil || sequenceShard == nil || sequenceShard.PrimaryAlias == nil { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, err) + select { + case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", + sequenceMetadata.backingTableKeyspace, err): + default: + } return } sequenceTablet, err := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) if err != nil || sequenceTablet == nil { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, err) + select { + case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", + sequenceMetadata.backingTableKeyspace, err): + default: + } return } if sequenceTablet.DbNameOverride != "" { @@ -1452,8 +1465,11 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen // housing the backing table. primaryTablet, err := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) if err != nil { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for %s.%s using alias %s: %v", - sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err) + select { + case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for %s.%s using alias %s: %v", + sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err): + default: + } return } qr, err := ts.ws.tmc.ExecuteFetchAsApp(ctx, primaryTablet.Tablet, true, &tabletmanagerdatapb.ExecuteFetchAsAppRequest{ @@ -1462,8 +1478,11 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen }) if err != nil { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the backing sequence table %s.%s: %v", - sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, err) + select { + case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the backing sequence table %s.%s: %v", + sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, err): + default: + } return } // If we actually updated the backing sequence table, then we need @@ -1481,14 +1500,20 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias) ti, err := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) if err != nil { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, err) + select { + case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get primary tablet for keyspace %s: %v", + sequenceMetadata.backingTableKeyspace, err): + default: + } return } err = ts.TabletManagerClient().ResetSequences(ctx, ti.Tablet, []string{sequenceMetadata.backingTableName}) if err != nil { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset the sequence cache for backing table %s on shard %s.%s using tablet %s: %v", - sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err) + select { + case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset the sequence cache for backing table %s on shard %s.%s using tablet %s: %v", + sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err): + default: + } return } } @@ -1500,6 +1525,7 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen go func() { initWg.Wait() close(initDone) + close(initErr) }() select { diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index cc1b22ef382..3cd528c56e1 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -2046,8 +2046,11 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s defer ksWg.Done() vschema, err = ts.TopoServer().GetVSchema(ctx, ks) if err != nil { - ksErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", - ks, err) + select { + case ksErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", + ks, err): + default: + } return } if vschema == nil || vschema.Sharded || vschema.Tables == nil || len(vschema.Tables) == 0 { @@ -2095,6 +2098,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s go func() { ksWg.Wait() close(ksDone) + close(ksErr) }() select { @@ -2122,9 +2126,11 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s // be sure that it does not provide a value that is less than the current max. func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { log.Error("DEBUG: initializeTargetSequences") - initErr := make(chan error) // Used if we encounter an error - initDone := make(chan struct{}) // The initialization has completed - initWg := sync.WaitGroup{} // All of the goroutines finished + initCtx, initCancel := context.WithCancel(ctx) // Used to cancel the goroutines + defer initCancel() // Cancel all of the goroutines when we are done + initErr := make(chan error) // Used if we encounter an error + initDone := make(chan struct{}) // The initialization has completed + initWg := sync.WaitGroup{} // All of the goroutines finished initFunc := func(sequenceTableName string, sequenceMetadata *sequenceMetadata) { defer initWg.Done() log.Errorf("DEBUG: sequence table: %v, sequenceMetadata: %+v", sequenceTableName, sequenceMetadata) @@ -2163,12 +2169,17 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen return nil }) if err != nil { - initErr <- err + select { + case initErr <- err: + default: + } return } select { case <-ctx.Done(): return + case <-initCtx.Done(): // The initialization work has been cancelled + return default: } // Sort the values to find the max value across all shards. @@ -2180,16 +2191,29 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen // ensure that that the next value it provides is > the current max. sequenceShard, err := ts.wr.TopoServer().GetOnlyShard(ctx, sequenceMetadata.backingTableKeyspace) if err != nil || sequenceShard == nil || sequenceShard.PrimaryAlias == nil { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, err) + select { + case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", + sequenceMetadata.backingTableKeyspace, err): + default: + } return } sequenceTablet, err := ts.wr.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) if err != nil || sequenceTablet == nil { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, err) + select { + case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", + sequenceMetadata.backingTableKeyspace, err): + default: + } return } + select { + case <-ctx.Done(): + return + case <-initCtx.Done(): // The initialization work has been cancelled + return + default: + } if sequenceTablet.DbNameOverride != "" { sequenceMetadata.backingTableDBName = sequenceTablet.DbNameOverride } @@ -2205,8 +2229,11 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen // housing the backing table. qr, err := ts.wr.ExecuteFetchAsApp(ctx, sequenceShard.PrimaryAlias, true, query.Query, 1) if err != nil { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the backing sequence table %s.%s: %v", - sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, err) + select { + case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the backing sequence table %s.%s: %v", + sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, err): + default: + } return } // If we actually updated the backing sequence table, then we need @@ -2218,20 +2245,28 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen select { case <-ctx.Done(): return + case <-initCtx.Done(): // The initialization work has been cancelled + return default: } ts.Logger().Infof("Resetting sequence cache for backing table %s on shard %s.%s using tablet %s", sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias) ti, err := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) if err != nil { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, err) + select { + case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get primary tablet for keyspace %s: %v", + sequenceMetadata.backingTableKeyspace, err): + default: + } return } err = ts.TabletManagerClient().ResetSequences(ctx, ti.Tablet, []string{sequenceMetadata.backingTableName}) if err != nil { - initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset the sequence cache for backing table %s on shard %s.%s using tablet %s: %v", - sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err) + select { + case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset the sequence cache for backing table %s on shard %s.%s using tablet %s: %v", + sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err): + default: + } return } } @@ -2243,6 +2278,7 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen go func() { initWg.Wait() close(initDone) + close(initErr) }() select { From 65c17d1682e8fabfedc90c9f5c1a5bd0a3831e1e Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Mon, 31 Jul 2023 12:30:49 -0400 Subject: [PATCH 17/56] Update wrangler unit tests Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/traffic_switcher.go | 4 +- go/vt/wrangler/traffic_switcher.go | 4 +- go/vt/wrangler/traffic_switcher_env_test.go | 170 +++++++++++++++----- go/vt/wrangler/traffic_switcher_test.go | 33 ++-- 4 files changed, 152 insertions(+), 59 deletions(-) diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index 9b80ebff271..58d863a8b0a 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -1496,7 +1496,7 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen return default: } - ts.Logger().Infof("Resetting sequence cache for backing table %s on shard %s.%s using tablet %s", + ts.Logger().Infof("Resetting sequence cache for backing table %s on shard %s/%s using tablet %s", sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias) ti, err := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) if err != nil { @@ -1510,7 +1510,7 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen err = ts.TabletManagerClient().ResetSequences(ctx, ti.Tablet, []string{sequenceMetadata.backingTableName}) if err != nil { select { - case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset the sequence cache for backing table %s on shard %s.%s using tablet %s: %v", + case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset the sequence cache for backing table %s on shard %s/%s using tablet %s: %v", sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err): default: } diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 3cd528c56e1..d868a96520a 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -2249,7 +2249,7 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen return default: } - ts.Logger().Infof("Resetting sequence cache for backing table %s on shard %s.%s using tablet %s", + ts.Logger().Infof("Resetting sequence cache for backing table %s on shard %s/%s using tablet %s", sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias) ti, err := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) if err != nil { @@ -2263,7 +2263,7 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen err = ts.TabletManagerClient().ResetSequences(ctx, ti.Tablet, []string{sequenceMetadata.backingTableName}) if err != nil { select { - case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset the sequence cache for backing table %s on shard %s.%s using tablet %s: %v", + case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset the sequence cache for backing table %s on shard %s/%s using tablet %s: %v", sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err): default: } diff --git a/go/vt/wrangler/traffic_switcher_env_test.go b/go/vt/wrangler/traffic_switcher_env_test.go index 6da0e386487..adc7c6f879e 100644 --- a/go/vt/wrangler/traffic_switcher_env_test.go +++ b/go/vt/wrangler/traffic_switcher_env_test.go @@ -26,33 +26,38 @@ import ( "github.com/stretchr/testify/require" "golang.org/x/sync/semaphore" + "google.golang.org/protobuf/proto" "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/mysql/fakesqldb" + "vitess.io/vitess/go/sqlescape" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/vt/binlog/binlogplayer" "vitess.io/vitess/go/vt/grpcclient" "vitess.io/vitess/go/vt/key" "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/logutil" - binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" - topodatapb "vitess.io/vitess/go/vt/proto/topodata" - "vitess.io/vitess/go/vt/proto/vschema" - vschemapb "vitess.io/vitess/go/vt/proto/vschema" + "vitess.io/vitess/go/vt/sqlparser" "vitess.io/vitess/go/vt/topo" "vitess.io/vitess/go/vt/topo/memorytopo" "vitess.io/vitess/go/vt/topotools" + "vitess.io/vitess/go/vt/vtgate/vindexes" "vitess.io/vitess/go/vt/vttablet/queryservice" "vitess.io/vitess/go/vt/vttablet/tabletconn" "vitess.io/vitess/go/vt/vttablet/tabletconntest" "vitess.io/vitess/go/vt/vttablet/tabletmanager/vreplication" "vitess.io/vitess/go/vt/vttablet/tmclient" + + binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" + topodatapb "vitess.io/vitess/go/vt/proto/topodata" + vschemapb "vitess.io/vitess/go/vt/proto/vschema" ) const ( streamInfoQuery = "select id, source, message, cell, tablet_types, workflow_type, workflow_sub_type, defer_secondary_keys from _vt.vreplication where workflow='%s' and db_name='vt_%s'" streamExtInfoQuery = "select id, source, pos, stop_pos, max_replication_lag, state, db_name, time_updated, transaction_timestamp, time_heartbeat, time_throttled, component_throttled, message, tags, workflow_type, workflow_sub_type, defer_secondary_keys, rows_copied from _vt.vreplication where db_name = 'vt_%s' and workflow = '%s'" copyStateQuery = "select table_name, lastpk from _vt.copy_state where vrepl_id = %d and id in (select max(id) from _vt.copy_state where vrepl_id = %d group by vrepl_id, table_name)" + maxValForSequence = "select max(`id`) as maxval from `vt_%s`.`%s`" ) var ( @@ -67,20 +72,22 @@ var ( ) type testMigraterEnv struct { - ts *topo.Server - wr *Wrangler - sourcePrimaries []*fakeTablet - targetPrimaries []*fakeTablet - dbSourceClients []*fakeDBClient - dbTargetClients []*fakeDBClient - allDBClients []*fakeDBClient - targetKeyspace string - sourceShards []string - targetShards []string - sourceKeyRanges []*topodatapb.KeyRange - targetKeyRanges []*topodatapb.KeyRange - tmeDB *fakesqldb.DB - mu sync.Mutex + ts *topo.Server + wr *Wrangler + sourcePrimaries []*fakeTablet + targetPrimaries []*fakeTablet + additionalPrimaries []*fakeTablet + dbSourceClients []*fakeDBClient + dbTargetClients []*fakeDBClient + dbAdditionalClients []*fakeDBClient + allDBClients []*fakeDBClient + targetKeyspace string + sourceShards []string + targetShards []string + sourceKeyRanges []*topodatapb.KeyRange + targetKeyRanges []*topodatapb.KeyRange + tmeDB *fakesqldb.DB + mu sync.Mutex } // testShardMigraterEnv has some convenience functions for adding expected queries. @@ -116,6 +123,10 @@ func newTestTableMigraterCustom(ctx context.Context, t *testing.T, sourceShards, tme.sourceShards = sourceShards tme.targetShards = targetShards tme.tmeDB = fakesqldb.New(t) + useSequences := false + if len(sourceShards) == 1 && len(targetShards) > 1 { + useSequences = true + } expectVDiffQueries(tme.tmeDB) tabletID := 10 for _, shard := range sourceShards { @@ -189,6 +200,73 @@ func newTestTableMigraterCustom(ctx context.Context, t *testing.T, sourceShards, t.Fatal(err) } } + if useSequences { + // Add another unsharded keyspace with sequence tables in + // order to test sequence handling. + uvs := &vschemapb.Keyspace{ + Sharded: false, + Tables: map[string]*vschemapb.Table{ + "t1_seq": { + Type: vindexes.TypeSequence, + }, + "t2_seq": { + Type: vindexes.TypeSequence, + }, + }, + } + tabletID += 10 + gfdb := fakesqldb.New(t) + tme.additionalPrimaries = append(tme.additionalPrimaries, newFakeTablet(t, tme.wr, "cell1", uint32(tabletID), topodatapb.TabletType_PRIMARY, gfdb, TabletKeyspaceShard(t, "global", "0"))) + if err := tme.ts.SaveVSchema(ctx, "global", uvs); err != nil { + t.Fatal(err) + } + + // Now use these sequence tables in the target sharded keyspace. + tks := proto.Clone(vs).(*vschemapb.Keyspace) + tks.Tables["t1"].AutoIncrement = &vschemapb.AutoIncrement{ + Column: "id", + Sequence: "t1_seq", + } + tks.Tables["t2"].AutoIncrement = &vschemapb.AutoIncrement{ + Column: "id", + Sequence: "t2_seq", + } + if err := tme.ts.SaveVSchema(ctx, "ks2", tks); err != nil { + t.Fatal(err) + } + + // Now tell the fakesqldb used by the target keyspace tablets to expect + // the sequence management related queries against the target keyspace. + tme.tmeDB.AddQuery(fmt.Sprintf(maxValForSequence, "ks2", "t1"), + sqltypes.MakeTestResult( + sqltypes.MakeTestFields( + "maxval", + "int64", + ), + "5", + ), + ) + tme.tmeDB.AddQuery(fmt.Sprintf(maxValForSequence, "ks2", "t2"), + sqltypes.MakeTestResult( + sqltypes.MakeTestFields( + "maxval", + "int64", + ), + "7", + ), + ) + + // Now tell the fakesqldb used by the global keyspace tablets to expect + // the sequence management related queries against the target keyspace. + gfdb.AddQuery( + sqlparser.BuildParsedQuery(sqlInitSequenceTable, sqlescape.EscapeID("vt_global"), sqlescape.EscapeID("t1_seq"), 6, 6, 6).Query, + &sqltypes.Result{RowsAffected: 0}, + ) + gfdb.AddQuery( + sqlparser.BuildParsedQuery(sqlInitSequenceTable, sqlescape.EscapeID("vt_global"), sqlescape.EscapeID("t2_seq"), 8, 8, 8).Query, + &sqltypes.Result{RowsAffected: 0}, + ) + } if err := tme.ts.RebuildSrvVSchema(ctx, nil); err != nil { t.Fatal(err) } @@ -222,13 +300,13 @@ func newTestTableMigraterCustom(ctx context.Context, t *testing.T, sourceShards, }}, }, } - streamInfoRows = append(streamInfoRows, fmt.Sprintf("%d|%v|||", j+1, bls)) - streamExtInfoRows = append(streamExtInfoRows, fmt.Sprintf("%d|||||Running|vt_ks1|%d|%d|0|0||||0", j+1, now, now)) + streamInfoRows = append(streamInfoRows, fmt.Sprintf("%d|%v||||1|0|0", j+1, bls)) + streamExtInfoRows = append(streamExtInfoRows, fmt.Sprintf("%d|||||Running|vt_ks1|%d|%d|0|0||1||0", j+1, now, now)) tme.dbTargetClients[i].addInvariant(fmt.Sprintf(copyStateQuery, j+1, j+1), noResult) } tme.dbTargetClients[i].addInvariant(streamInfoKs2, sqltypes.MakeTestResult(sqltypes.MakeTestFields( - "id|source|message|cell|tablet_types", - "int64|varchar|varchar|varchar|varchar"), + "id|source|message|cell|tablet_types|workflow_type|workflow_sub_type|defer_secondary_keys", + "int64|varchar|varchar|varchar|varchar|int64|int64|int64"), streamInfoRows...)) tme.dbTargetClients[i].addInvariant(streamExtInfoKs2, sqltypes.MakeTestResult(sqltypes.MakeTestFields( "id|source|pos|stop_pos|max_replication_lag|state|db_name|time_updated|transaction_timestamp|time_heartbeat|time_throttled|component_throttled|message|tags|workflow_type|workflow_sub_type|defer_secondary_keys", @@ -256,12 +334,12 @@ func newTestTableMigraterCustom(ctx context.Context, t *testing.T, sourceShards, }}, }, } - streamInfoRows = append(streamInfoRows, fmt.Sprintf("%d|%v|||", j+1, bls)) + streamInfoRows = append(streamInfoRows, fmt.Sprintf("%d|%v||||1|0|0", j+1, bls)) tme.dbTargetClients[i].addInvariant(fmt.Sprintf(copyStateQuery, j+1, j+1), noResult) } tme.dbSourceClients[i].addInvariant(reverseStreamInfoKs1, sqltypes.MakeTestResult(sqltypes.MakeTestFields( - "id|source|message|cell|tablet_types", - "int64|varchar|varchar|varchar|varchar"), + "id|source|message|cell|tablet_types|workflow_type|workflow_sub_type|defer_secondary_keys", + "int64|varchar|varchar|varchar|varchar|int64|int64|int64"), streamInfoRows...), ) } @@ -394,13 +472,13 @@ func newTestTablePartialMigrater(ctx context.Context, t *testing.T, shards, shar }}, }, } - streamInfoRows = append(streamInfoRows, fmt.Sprintf("%d|%v|||", i+1, bls)) - streamExtInfoRows = append(streamExtInfoRows, fmt.Sprintf("%d|||||Running|vt_ks1|%d|%d|0|0||||0", i+1, now, now)) + streamInfoRows = append(streamInfoRows, fmt.Sprintf("%d|%v||||1|0|0", i+1, bls)) + streamExtInfoRows = append(streamExtInfoRows, fmt.Sprintf("%d|||||Running|vt_ks1|%d|%d|0|0|||1||0", i+1, now, now)) } tme.dbTargetClients[i].addInvariant(fmt.Sprintf(copyStateQuery, i+1, i+1), noResult) tme.dbTargetClients[i].addInvariant(streamInfoKs2, sqltypes.MakeTestResult(sqltypes.MakeTestFields( - "id|source|message|cell|tablet_types", - "int64|varchar|varchar|varchar|varchar"), + "id|source|message|cell|tablet_types|workflow_type|workflow_sub_type|defer_secondary_keys", + "int64|varchar|varchar|varchar|varchar|int64|int64|int64"), streamInfoRows...)) tme.dbTargetClients[i].addInvariant(streamExtInfoKs2, sqltypes.MakeTestResult(sqltypes.MakeTestFields( "id|source|pos|stop_pos|max_replication_lag|state|db_name|time_updated|transaction_timestamp|time_heartbeat|time_throttled|component_throttled|message|tags|workflow_type|workflow_sub_type|defer_secondary_keys", @@ -430,12 +508,12 @@ func newTestTablePartialMigrater(ctx context.Context, t *testing.T, shards, shar }}, }, } - streamInfoRows = append(streamInfoRows, fmt.Sprintf("%d|%v|||", i+1, bls)) + streamInfoRows = append(streamInfoRows, fmt.Sprintf("%d|%v||||1|0|0", i+1, bls)) tme.dbTargetClients[i].addInvariant(fmt.Sprintf(copyStateQuery, i+1, i+1), noResult) } tme.dbSourceClients[i].addInvariant(reverseStreamInfoKs1, sqltypes.MakeTestResult(sqltypes.MakeTestFields( - "id|source|message|cell|tablet_types", - "int64|varchar|varchar|varchar|varchar"), + "id|source|message|cell|tablet_types|workflow_type|workflow_sub_type|defer_secondary_keys", + "int64|varchar|varchar|varchar|varchar|int64|int64|int64"), streamInfoRows...), ) } @@ -498,26 +576,26 @@ func newTestShardMigrater(ctx context.Context, t *testing.T, sourceShards, targe vs := &vschemapb.Keyspace{ Sharded: true, - Vindexes: map[string]*vschema.Vindex{ + Vindexes: map[string]*vschemapb.Vindex{ "thash": { Type: "hash", }, }, - Tables: map[string]*vschema.Table{ + Tables: map[string]*vschemapb.Table{ "t1": { - ColumnVindexes: []*vschema.ColumnVindex{{ + ColumnVindexes: []*vschemapb.ColumnVindex{{ Columns: []string{"c1"}, Name: "thash", }}, }, "t2": { - ColumnVindexes: []*vschema.ColumnVindex{{ + ColumnVindexes: []*vschemapb.ColumnVindex{{ Columns: []string{"c1"}, Name: "thash", }}, }, "t3": { - ColumnVindexes: []*vschema.ColumnVindex{{ + ColumnVindexes: []*vschemapb.ColumnVindex{{ Columns: []string{"c1"}, Name: "thash", }}, @@ -556,8 +634,8 @@ func newTestShardMigrater(ctx context.Context, t *testing.T, sourceShards, targe }}, }, } - rows = append(rows, fmt.Sprintf("%d|%v||||0|0|0", j+1, bls)) - rowsRdOnly = append(rows, fmt.Sprintf("%d|%v|||RDONLY|0|0|0", j+1, bls)) + rows = append(rows, fmt.Sprintf("%d|%v||||1|0|0", j+1, bls)) + rowsRdOnly = append(rows, fmt.Sprintf("%d|%v|||RDONLY|1|0|0", j+1, bls)) streamExtInfoRows = append(streamExtInfoRows, fmt.Sprintf("%d|||||Running|vt_ks1|%d|%d|0|0|||", j+1, now, now)) tme.dbTargetClients[i].addInvariant(fmt.Sprintf(copyStateQuery, j+1, j+1), noResult) } @@ -597,6 +675,7 @@ func (tme *testMigraterEnv) startTablets(t *testing.T) { tme.mu.Lock() defer tme.mu.Unlock() allPrimarys := append(tme.sourcePrimaries, tme.targetPrimaries...) + allPrimarys = append(allPrimarys, tme.additionalPrimaries...) for _, primary := range allPrimarys { primary.StartActionLoop(t, tme.wr) } @@ -627,6 +706,9 @@ func (tme *testMigraterEnv) stopTablets(t *testing.T) { for _, primary := range tme.targetPrimaries { primary.StopActionLoop(t) } + for _, primary := range tme.additionalPrimaries { + primary.StopActionLoop(t) + } } func (tme *testMigraterEnv) createDBClients(ctx context.Context, t *testing.T) { @@ -649,7 +731,17 @@ func (tme *testMigraterEnv) createDBClients(ctx context.Context, t *testing.T) { primary.TM.VREngine = vreplication.NewTestEngine(tme.ts, primary.Tablet.GetAlias().GetCell(), primary.FakeMysqlDaemon, dbClientFactory, dbClientFactory, dbclient.DBName(), nil) primary.TM.VREngine.Open(ctx) } + for _, primary := range tme.additionalPrimaries { + log.Infof("Adding as additionalPrimary %s", primary.Tablet.Alias) + dbclient := newFakeDBClient(primary.Tablet.Alias.String()) + tme.dbAdditionalClients = append(tme.dbTargetClients, dbclient) + dbClientFactory := func() binlogplayer.DBClient { return dbclient } + // Replace existing engine with a new one + primary.TM.VREngine = vreplication.NewTestEngine(tme.ts, primary.Tablet.GetAlias().GetCell(), primary.FakeMysqlDaemon, dbClientFactory, dbClientFactory, dbclient.DBName(), nil) + primary.TM.VREngine.Open(ctx) + } tme.allDBClients = append(tme.dbSourceClients, tme.dbTargetClients...) + tme.allDBClients = append(tme.allDBClients, tme.dbAdditionalClients...) } func (tme *testMigraterEnv) setPrimaryPositions() { diff --git a/go/vt/wrangler/traffic_switcher_test.go b/go/vt/wrangler/traffic_switcher_test.go index ecad4d5db9d..0fb8a786e4b 100644 --- a/go/vt/wrangler/traffic_switcher_test.go +++ b/go/vt/wrangler/traffic_switcher_test.go @@ -347,7 +347,7 @@ func TestTableMigrateMainflow(t *testing.T) { cancelMigration() switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false, true) want = "DeadlineExceeded" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites(0 timeout) err: %v, must contain %v", err, want) @@ -455,7 +455,7 @@ func TestTableMigrateMainflow(t *testing.T) { } deleteTargetVReplication() - journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) + journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, true) if err != nil { t.Fatal(err) } @@ -662,7 +662,7 @@ func TestShardMigrateMainflow(t *testing.T) { } cancelMigration() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false, true) want = "DeadlineExceeded" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites(0 timeout) err: %v, must contain %v", err, want) @@ -751,7 +751,7 @@ func TestShardMigrateMainflow(t *testing.T) { } freezeTargetVReplication() - journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) + journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, true) if err != nil { t.Fatal(err) } @@ -870,7 +870,7 @@ func testTableMigrateOneToMany(t *testing.T, keepData, keepRoutingRules bool) { tme.dbSourceClients[0].addQueryRE(tsCheckJournals, &sqltypes.Result{}, nil) switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, false, true) if err != nil { t.Fatal(err) } @@ -1000,6 +1000,7 @@ func TestTableMigrateOneToManyDryRun(t *testing.T) { "Wait for VReplication on stopped streams to catchup for up to 1s", "Create reverse replication workflow test_reverse", "Create journal entries on source databases", + "The following sequence backing tables used by tables being moved will be initialized: t1_seq,t2_seq", "Enable writes on keyspace ks2 tables [t1,t2]", "Switch routing from keyspace ks1 to keyspace ks2", "Routing rules for tables [t1,t2] will be updated", @@ -1092,7 +1093,7 @@ func TestTableMigrateOneToManyDryRun(t *testing.T) { deleteTargetVReplication() switchWrites(tme) - _, results, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, true, false) + _, results, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, true, true) require.NoError(t, err) require.Empty(t, cmp.Diff(wantdryRunWrites, *results)) } @@ -1180,7 +1181,7 @@ func TestMigrateFailJournal(t *testing.T) { tme.dbSourceClients[1].addQueryRE("insert into _vt.resharding_journal", nil, errors.New("journaling intentionally failed")) switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, true) want := "journaling intentionally failed" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites(0 timeout) err: %v, must contain %v", err, want) @@ -1242,7 +1243,7 @@ func TestTableMigrateJournalExists(t *testing.T) { tme.dbTargetClients[1].addQuery("select * from _vt.vreplication where id = 2", stoppedResult(2), nil) switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, true) if err != nil { t.Fatal(err) } @@ -1320,7 +1321,7 @@ func TestShardMigrateJournalExists(t *testing.T) { tme.dbTargetClients[1].addQuery("select * from _vt.vreplication where id = 2", stoppedResult(2), nil) switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, true) if err != nil { t.Fatal(err) } @@ -1385,7 +1386,7 @@ func TestTableMigrateCancel(t *testing.T) { cancelMigration() switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false, false, true) if err != nil { t.Fatal(err) } @@ -1447,7 +1448,7 @@ func TestTableMigrateCancelDryRun(t *testing.T) { cancelMigration() switchWrites(tme) - _, dryRunResults, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false, true, false) + _, dryRunResults, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false, true, true) require.NoError(t, err) require.Empty(t, cmp.Diff(want, *dryRunResults)) } @@ -1548,7 +1549,7 @@ func TestTableMigrateNoReverse(t *testing.T) { deleteTargetVReplication() switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, false, true) if err != nil { t.Fatal(err) } @@ -1590,7 +1591,7 @@ func TestMigrateFrozen(t *testing.T) { tme.dbTargetClients[1].addQuery(streamInfoKs2, &sqltypes.Result{}, nil) switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false, true) if err != nil { t.Fatal(err) } @@ -1965,7 +1966,7 @@ func TestShardMigrateNoAvailableTabletsForReverseReplication(t *testing.T) { cancelMigration() switchWrites(tme) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false, true) want = "DeadlineExceeded" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites(0 timeout) err: %v, must contain %v", err, want) @@ -2060,7 +2061,7 @@ func TestShardMigrateNoAvailableTabletsForReverseReplication(t *testing.T) { invariants[fmt.Sprintf("%s-%d", streamInfoKs, i)] = tme.dbTargetClients[i].getInvariant(streamInfoKs) tme.dbTargetClients[i].addInvariant(streamInfoKs, tme.dbTargetClients[i].getInvariant(streamInfoKs+"-rdonly")) } - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, true) require.Error(t, err) require.True(t, strings.Contains(err.Error(), "no tablet found")) require.True(t, strings.Contains(err.Error(), "-80")) @@ -2070,7 +2071,7 @@ func TestShardMigrateNoAvailableTabletsForReverseReplication(t *testing.T) { tme.dbTargetClients[i].addInvariant(streamInfoKs, invariants[fmt.Sprintf("%s-%d", streamInfoKs, i)]) } - journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) + journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, true) if err != nil { t.Fatal(err) } From cacf7d9d6ffb709ea55b9f6a09c5861cd0daf012 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Mon, 31 Jul 2023 13:58:44 -0400 Subject: [PATCH 18/56] Enable new flag in e2e test Signed-off-by: Matt Lord --- .../endtoend/vreplication/resharding_workflows_v2_test.go | 1 + go/test/endtoend/vreplication/vreplication_test.go | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go index a55ad3047e1..0c12d217f83 100644 --- a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go +++ b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go @@ -119,6 +119,7 @@ func tstWorkflowExec(t *testing.T, cells, workflow, sourceKs, targetKs, tables, switch currentWorkflowType { case wrangler.MoveTablesWorkflow, wrangler.MigrateWorkflow, wrangler.ReshardWorkflow: args = append(args, "--defer-secondary-keys") + args = append(args, "--initialize-target-sequences") // Only used for MoveTables } } if cells != "" { diff --git a/go/test/endtoend/vreplication/vreplication_test.go b/go/test/endtoend/vreplication/vreplication_test.go index 35f7062d27d..d325f9f6b38 100644 --- a/go/test/endtoend/vreplication/vreplication_test.go +++ b/go/test/endtoend/vreplication/vreplication_test.go @@ -1355,8 +1355,11 @@ func catchup(t *testing.T, vttablet *cluster.VttabletProcess, workflow, info str func moveTablesAction(t *testing.T, action, cell, workflow, sourceKs, targetKs, tables string, extraFlags ...string) { var err error args := []string{"MoveTables", "--workflow=" + workflow, "--target-keyspace=" + targetKs, action} - if strings.EqualFold(action, strings.ToLower(workflowActionCreate)) { + switch strings.ToLower(action) { + case strings.ToLower(workflowActionCreate): extraFlags = append(extraFlags, "--source-keyspace="+sourceKs, "--tables="+tables, "--cells="+cell, "--tablet-types=primary,replica,rdonly") + case strings.ToLower(workflowActionSwitchTraffic): + extraFlags = append(extraFlags, "--initialize-target-sequences") } args = append(args, extraFlags...) err = vc.VtctldClient.ExecuteCommand(args...) From e35208851f067bb507e81175f738516f81cbfce1 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Mon, 31 Jul 2023 15:35:38 -0400 Subject: [PATCH 19/56] Deflake SwitchTraffic dry run unit tests Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/switcher_dry_run.go | 6 +++++- go/vt/wrangler/switcher_dry_run.go | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/go/vt/vtctl/workflow/switcher_dry_run.go b/go/vt/vtctl/workflow/switcher_dry_run.go index de502b97102..c34534d7e77 100644 --- a/go/vt/vtctl/workflow/switcher_dry_run.go +++ b/go/vt/vtctl/workflow/switcher_dry_run.go @@ -369,7 +369,11 @@ func (dr *switcherDryRun) resetSequences(ctx context.Context) error { } func (dr *switcherDryRun) initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { + sortedBackingTableNames := maps.Keys(sequencesByBackingTable) + sort.Slice(sortedBackingTableNames, func(i, j int) bool { + return i < j + }) dr.drLog.Log(fmt.Sprintf("The following sequence backing tables used by tables being moved will be initialized: %s", - strings.Join(maps.Keys(sequencesByBackingTable), ","))) + strings.Join(sortedBackingTableNames, ","))) return nil } diff --git a/go/vt/wrangler/switcher_dry_run.go b/go/vt/wrangler/switcher_dry_run.go index 017a2a03b68..e33f16ea9f6 100644 --- a/go/vt/wrangler/switcher_dry_run.go +++ b/go/vt/wrangler/switcher_dry_run.go @@ -387,7 +387,11 @@ func (dr *switcherDryRun) resetSequences(ctx context.Context) error { } func (dr *switcherDryRun) initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { + sortedBackingTableNames := maps.Keys(sequencesByBackingTable) + sort.Slice(sortedBackingTableNames, func(i, j int) bool { + return i < j + }) dr.drLog.Log(fmt.Sprintf("The following sequence backing tables used by tables being moved will be initialized: %s", - strings.Join(maps.Keys(sequencesByBackingTable), ","))) + strings.Join(sortedBackingTableNames, ","))) return nil } From bee2a67840ef97f8ac2f3a98013caf9fe4c3011f Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Mon, 31 Jul 2023 17:57:52 -0400 Subject: [PATCH 20/56] Tweakin' and deflakin' Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/traffic_switcher.go | 64 ++++++++++----------- go/vt/wrangler/traffic_switcher.go | 46 +++++++-------- go/vt/wrangler/traffic_switcher_env_test.go | 10 ++-- 3 files changed, 60 insertions(+), 60 deletions(-) diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index 58d863a8b0a..a2a6891461a 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -1288,16 +1288,16 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s ksWg := sync.WaitGroup{} // All of the goroutines finished ksFunc := func(ks string) { // The function used to search each keyspace defer ksWg.Done() - vschema, err = ts.TopoServer().GetVSchema(ctx, ks) - if err != nil { + kvs, kerr := ts.TopoServer().GetVSchema(ctx, ks) + if kerr != nil { select { case ksErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", - ks, err): + ks, kerr): default: } return } - if vschema == nil || vschema.Sharded || vschema.Tables == nil || len(vschema.Tables) == 0 { + if kvs == nil || kvs.Sharded || kvs.Tables == nil || len(kvs.Tables) == 0 { return } select { @@ -1307,7 +1307,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return default: } - for tableName, tableDef := range vschema.Tables { + for tableName, tableDef := range kvs.Tables { smMu.Lock() // Prevent concurrent access to the map sm := sequencesByBackingTable[tableName] if tableDef != nil && tableDef.Type == vindexes.TypeSequence && @@ -1381,7 +1381,7 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen // a higher value. shardResults := make([]int64, 0, len(ts.TargetShards())) srMu := sync.Mutex{} - err := ts.ForAllTargets(func(target *MigrationTarget) error { + ierr := ts.ForAllTargets(func(target *MigrationTarget) error { primary := target.GetPrimary() if primary == nil || primary.GetAlias() == nil { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary tablet found for target shard %s/%s", @@ -1394,18 +1394,18 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen ) log.Errorf("DEBUG: query: %s on shard: %s/%s", query.Query, ts.targetKeyspace, target.GetShard().ShardName()) - qr, err := ts.ws.tmc.ExecuteFetchAsApp(ctx, primary.Tablet, true, &tabletmanagerdatapb.ExecuteFetchAsAppRequest{ + qr, ierr := ts.ws.tmc.ExecuteFetchAsApp(ctx, primary.Tablet, true, &tabletmanagerdatapb.ExecuteFetchAsAppRequest{ Query: []byte(query.Query), MaxRows: 1, }) - if err != nil || len(qr.Rows) != 1 { + if ierr != nil || len(qr.Rows) != 1 { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the max used sequence value for target table %s.%s in order to initialize the backing sequence table: %v", - ts.targetKeyspace, sequenceMetadata.usingTableName, err) + ts.targetKeyspace, sequenceMetadata.usingTableName, ierr) } - maxID, err := sqltypes.Proto3ToResult(qr).Rows[0][0].ToInt64() - if err != nil { + maxID, ierr := sqltypes.Proto3ToResult(qr).Rows[0][0].ToInt64() + if ierr != nil { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the max used sequence value for target table %s.%s in order to initialize the backing sequence table: %v", - ts.targetKeyspace, sequenceMetadata.usingTableName, err) + ts.targetKeyspace, sequenceMetadata.usingTableName, ierr) } log.Errorf("DEBUG: max ID seen on shard %s: %d", target.GetShard().ShardName(), maxID) srMu.Lock() @@ -1413,9 +1413,9 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen shardResults = append(shardResults, maxID) return nil }) - if err != nil { + if ierr != nil { select { - case initErr <- err: + case initErr <- ierr: default: } return @@ -1432,20 +1432,20 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen nextVal := shardResults[len(shardResults)-1] + 1 // Now we need to update the sequence table, if needed, in order to // ensure that that the next value it provides is > the current max. - sequenceShard, err := ts.TopoServer().GetOnlyShard(ctx, sequenceMetadata.backingTableKeyspace) - if err != nil || sequenceShard == nil || sequenceShard.PrimaryAlias == nil { + sequenceShard, ierr := ts.TopoServer().GetOnlyShard(ctx, sequenceMetadata.backingTableKeyspace) + if ierr != nil || sequenceShard == nil || sequenceShard.PrimaryAlias == nil { select { case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, err): + sequenceMetadata.backingTableKeyspace, ierr): default: } return } - sequenceTablet, err := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) - if err != nil || sequenceTablet == nil { + sequenceTablet, ierr := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) + if ierr != nil || sequenceTablet == nil { select { case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, err): + sequenceMetadata.backingTableKeyspace, ierr): default: } return @@ -1463,24 +1463,24 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen log.Errorf("DEBUG: query: %s", query.Query) // Now execute this on the primary tablet of the unsharded keyspace // housing the backing table. - primaryTablet, err := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) - if err != nil { + primaryTablet, ierr := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) + if ierr != nil { select { case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for %s.%s using alias %s: %v", - sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err): + sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, ierr): default: } return } - qr, err := ts.ws.tmc.ExecuteFetchAsApp(ctx, primaryTablet.Tablet, true, &tabletmanagerdatapb.ExecuteFetchAsAppRequest{ + qr, ierr := ts.ws.tmc.ExecuteFetchAsApp(ctx, primaryTablet.Tablet, true, &tabletmanagerdatapb.ExecuteFetchAsAppRequest{ Query: []byte(query.Query), MaxRows: 1, }) - if err != nil { + if ierr != nil { select { case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the backing sequence table %s.%s: %v", - sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, err): + sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, ierr): default: } return @@ -1498,20 +1498,20 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen } ts.Logger().Infof("Resetting sequence cache for backing table %s on shard %s/%s using tablet %s", sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias) - ti, err := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) - if err != nil { + ti, ierr := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) + if ierr != nil { select { case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, err): + sequenceMetadata.backingTableKeyspace, ierr): default: } return } - err = ts.TabletManagerClient().ResetSequences(ctx, ti.Tablet, []string{sequenceMetadata.backingTableName}) - if err != nil { + ierr = ts.TabletManagerClient().ResetSequences(ctx, ti.Tablet, []string{sequenceMetadata.backingTableName}) + if ierr != nil { select { case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset the sequence cache for backing table %s on shard %s/%s using tablet %s: %v", - sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err): + sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, ierr): default: } return diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index d868a96520a..340988cce86 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -2044,16 +2044,16 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s ksWg := sync.WaitGroup{} // All of the goroutines finished ksFunc := func(ks string) { // The function used to search each keyspace defer ksWg.Done() - vschema, err = ts.TopoServer().GetVSchema(ctx, ks) - if err != nil { + kvs, kerr := ts.TopoServer().GetVSchema(ctx, ks) + if kerr != nil { select { case ksErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", - ks, err): + ks, kerr): default: } return } - if vschema == nil || vschema.Sharded || vschema.Tables == nil || len(vschema.Tables) == 0 { + if kvs == nil || kvs.Sharded || kvs.Tables == nil || len(kvs.Tables) == 0 { return } select { @@ -2063,7 +2063,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return default: } - for tableName, tableDef := range vschema.Tables { + for tableName, tableDef := range kvs.Tables { smMu.Lock() // Prevent concurrent access to the map sm := sequencesByBackingTable[tableName] if tableDef != nil && tableDef.Type == vindexes.TypeSequence && @@ -2139,7 +2139,7 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen // a higher value. shardResults := make([]int64, 0, len(ts.TargetShards())) srMu := sync.Mutex{} - err := ts.ForAllTargets(func(target *workflow.MigrationTarget) error { + ierr := ts.ForAllTargets(func(target *workflow.MigrationTarget) error { primary := target.GetPrimary() if primary == nil || primary.GetAlias() == nil { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary tablet found for target shard %s/%s", @@ -2168,9 +2168,9 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen shardResults = append(shardResults, maxID) return nil }) - if err != nil { + if ierr != nil { select { - case initErr <- err: + case initErr <- ierr: default: } return @@ -2189,20 +2189,20 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen nextVal := shardResults[len(shardResults)-1] + 1 // Now we need to update the sequence table, if needed, in order to // ensure that that the next value it provides is > the current max. - sequenceShard, err := ts.wr.TopoServer().GetOnlyShard(ctx, sequenceMetadata.backingTableKeyspace) - if err != nil || sequenceShard == nil || sequenceShard.PrimaryAlias == nil { + sequenceShard, ierr := ts.wr.TopoServer().GetOnlyShard(ctx, sequenceMetadata.backingTableKeyspace) + if ierr != nil || sequenceShard == nil || sequenceShard.PrimaryAlias == nil { select { case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, err): + sequenceMetadata.backingTableKeyspace, ierr): default: } return } - sequenceTablet, err := ts.wr.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) - if err != nil || sequenceTablet == nil { + sequenceTablet, ierr := ts.wr.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) + if ierr != nil || sequenceTablet == nil { select { case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, err): + sequenceMetadata.backingTableKeyspace, ierr): default: } return @@ -2227,11 +2227,11 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen log.Errorf("DEBUG: query: %s", query.Query) // Now execute this on the primary tablet of the unsharded keyspace // housing the backing table. - qr, err := ts.wr.ExecuteFetchAsApp(ctx, sequenceShard.PrimaryAlias, true, query.Query, 1) - if err != nil { + qr, ierr := ts.wr.ExecuteFetchAsApp(ctx, sequenceShard.PrimaryAlias, true, query.Query, 1) + if ierr != nil { select { case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the backing sequence table %s.%s: %v", - sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, err): + sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, ierr): default: } return @@ -2251,20 +2251,20 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen } ts.Logger().Infof("Resetting sequence cache for backing table %s on shard %s/%s using tablet %s", sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias) - ti, err := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) - if err != nil { + ti, ierr := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) + if ierr != nil { select { case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, err): + sequenceMetadata.backingTableKeyspace, ierr): default: } return } - err = ts.TabletManagerClient().ResetSequences(ctx, ti.Tablet, []string{sequenceMetadata.backingTableName}) - if err != nil { + ierr = ts.TabletManagerClient().ResetSequences(ctx, ti.Tablet, []string{sequenceMetadata.backingTableName}) + if ierr != nil { select { case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset the sequence cache for backing table %s on shard %s/%s using tablet %s: %v", - sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, err): + sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, ierr): default: } return diff --git a/go/vt/wrangler/traffic_switcher_env_test.go b/go/vt/wrangler/traffic_switcher_env_test.go index adc7c6f879e..a752a08d4f1 100644 --- a/go/vt/wrangler/traffic_switcher_env_test.go +++ b/go/vt/wrangler/traffic_switcher_env_test.go @@ -159,7 +159,8 @@ func newTestTableMigraterCustom(ctx context.Context, t *testing.T, sourceShards, tabletconn.RegisterDialer(dialerName, func(tablet *topodatapb.Tablet, failFast grpcclient.FailFast) (queryservice.QueryService, error) { tme.mu.Lock() defer tme.mu.Unlock() - for _, ft := range append(tme.sourcePrimaries, tme.targetPrimaries...) { + allPrimaries := append(tme.sourcePrimaries, tme.targetPrimaries...) + for _, ft := range append(allPrimaries, tme.additionalPrimaries...) { if ft.Tablet.Alias.Uid == tablet.Alias.Uid { return ft, nil } @@ -735,10 +736,6 @@ func (tme *testMigraterEnv) createDBClients(ctx context.Context, t *testing.T) { log.Infof("Adding as additionalPrimary %s", primary.Tablet.Alias) dbclient := newFakeDBClient(primary.Tablet.Alias.String()) tme.dbAdditionalClients = append(tme.dbTargetClients, dbclient) - dbClientFactory := func() binlogplayer.DBClient { return dbclient } - // Replace existing engine with a new one - primary.TM.VREngine = vreplication.NewTestEngine(tme.ts, primary.Tablet.GetAlias().GetCell(), primary.FakeMysqlDaemon, dbClientFactory, dbClientFactory, dbclient.DBName(), nil) - primary.TM.VREngine.Open(ctx) } tme.allDBClients = append(tme.dbSourceClients, tme.dbTargetClients...) tme.allDBClients = append(tme.allDBClients, tme.dbAdditionalClients...) @@ -901,6 +898,9 @@ func (tme *testMigraterEnv) close(t *testing.T) { for _, dbclient := range tme.dbTargetClients { dbclient.Close() } + for _, dbclient := range tme.dbAdditionalClients { + dbclient.Close() + } tme.tmeDB.CloseAllConnections() tme.ts.Close() tme.wr.tmc.Close() From 252b791ac6ac8fc656b017c17603465901670f95 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Tue, 1 Aug 2023 00:04:41 -0400 Subject: [PATCH 21/56] Add e2e test integration Signed-off-by: Matt Lord --- go/cmd/vtctldclient/command/movetables.go | 24 +++---- .../vreplication/unsharded_init_data.sql | 3 +- .../vreplication/vreplication_test.go | 65 ++++++++++++------- go/vt/vtctl/workflow/server.go | 8 +-- 4 files changed, 62 insertions(+), 38 deletions(-) diff --git a/go/cmd/vtctldclient/command/movetables.go b/go/cmd/vtctldclient/command/movetables.go index 194b45f5ad5..55b964a468a 100644 --- a/go/cmd/vtctldclient/command/movetables.go +++ b/go/cmd/vtctldclient/command/movetables.go @@ -451,14 +451,15 @@ func commandMoveTablesSwitchTraffic(cmd *cobra.Command, args []string) error { cli.FinishedParsing(cmd) req := &vtctldatapb.WorkflowSwitchTrafficRequest{ - Keyspace: moveTablesOptions.TargetKeyspace, - Workflow: moveTablesOptions.Workflow, - TabletTypes: moveTablesSwitchTrafficOptions.TabletTypes, - MaxReplicationLagAllowed: protoutil.DurationToProto(moveTablesSwitchTrafficOptions.MaxReplicationLagAllowed), - Timeout: protoutil.DurationToProto(moveTablesSwitchTrafficOptions.Timeout), - DryRun: moveTablesSwitchTrafficOptions.DryRun, - EnableReverseReplication: moveTablesSwitchTrafficOptions.EnableReverseReplication, - Direction: int32(moveTablesSwitchTrafficOptions.Direction), + Keyspace: moveTablesOptions.TargetKeyspace, + Workflow: moveTablesOptions.Workflow, + TabletTypes: moveTablesSwitchTrafficOptions.TabletTypes, + MaxReplicationLagAllowed: protoutil.DurationToProto(moveTablesSwitchTrafficOptions.MaxReplicationLagAllowed), + Timeout: protoutil.DurationToProto(moveTablesSwitchTrafficOptions.Timeout), + DryRun: moveTablesSwitchTrafficOptions.DryRun, + EnableReverseReplication: moveTablesSwitchTrafficOptions.EnableReverseReplication, + InitializeTargetSequences: moveTablesSwitchTrafficOptions.InitializeTargetSequences, + Direction: int32(moveTablesSwitchTrafficOptions.Direction), } resp, err := client.WorkflowSwitchTraffic(commandCtx, req) if err != nil { @@ -533,15 +534,16 @@ func init() { MoveTablesSwitchTraffic.Flags().Var((*topoproto.TabletTypeListFlag)(&moveTablesSwitchTrafficOptions.TabletTypes), "tablet-types", "Tablet types to switch traffic for") MoveTablesSwitchTraffic.Flags().DurationVar(&moveTablesSwitchTrafficOptions.Timeout, "timeout", timeoutDefault, "Specifies the maximum time to wait, in seconds, for VReplication to catch up on primary tablets. The traffic switch will be cancelled on timeout.") MoveTablesSwitchTraffic.Flags().DurationVar(&moveTablesSwitchTrafficOptions.MaxReplicationLagAllowed, "max-replication-lag-allowed", maxReplicationLagDefault, "Allow traffic to be switched only if VReplication lag is below this") + MoveTablesSwitchTraffic.Flags().BoolVar(&moveTablesSwitchTrafficOptions.EnableReverseReplication, "enable-reverse-replication", true, "Setup replication going back to the original source keyspace to support rolling back the traffic cutover") MoveTablesSwitchTraffic.Flags().BoolVar(&moveTablesSwitchTrafficOptions.DryRun, "dry-run", false, "Print the actions that would be taken and report any known errors that would have occurred") MoveTablesSwitchTraffic.Flags().BoolVar(&moveTablesSwitchTrafficOptions.InitializeTargetSequences, "initialize-target-sequences", false, "When moving tables from an unsharded keyspace to a sharded keyspace, initialize any sequences that are being used on the target when switching writes.") MoveTables.AddCommand(MoveTablesSwitchTraffic) MoveTablesReverseTraffic.Flags().StringSliceVarP(&moveTablesSwitchTrafficOptions.Cells, "cells", "c", nil, "Cells and/or CellAliases to switch traffic in") - MoveTablesReverseTraffic.Flags().BoolVar(&moveTablesSwitchTrafficOptions.DryRun, "dry-run", false, "Print the actions that would be taken and report any known errors that would have occurred") - MoveTablesReverseTraffic.Flags().DurationVar(&moveTablesSwitchTrafficOptions.MaxReplicationLagAllowed, "max-replication-lag-allowed", maxReplicationLagDefault, "Allow traffic to be switched only if VReplication lag is below this") - MoveTablesReverseTraffic.Flags().BoolVar(&moveTablesSwitchTrafficOptions.EnableReverseReplication, "enable-reverse-replication", true, "Setup replication going back to the original source keyspace to support rolling back the traffic cutover") MoveTablesReverseTraffic.Flags().Var((*topoproto.TabletTypeListFlag)(&moveTablesSwitchTrafficOptions.TabletTypes), "tablet-types", "Tablet types to switch traffic for") MoveTablesReverseTraffic.Flags().DurationVar(&moveTablesSwitchTrafficOptions.Timeout, "timeout", timeoutDefault, "Specifies the maximum time to wait, in seconds, for VReplication to catch up on primary tablets. The traffic switch will be cancelled on timeout.") + MoveTablesReverseTraffic.Flags().DurationVar(&moveTablesSwitchTrafficOptions.MaxReplicationLagAllowed, "max-replication-lag-allowed", maxReplicationLagDefault, "Allow traffic to be switched only if VReplication lag is below this") + MoveTablesReverseTraffic.Flags().BoolVar(&moveTablesSwitchTrafficOptions.EnableReverseReplication, "enable-reverse-replication", true, "Setup replication going back to the original target keyspace to support switching traffic again") + MoveTablesReverseTraffic.Flags().BoolVar(&moveTablesSwitchTrafficOptions.DryRun, "dry-run", false, "Print the actions that would be taken and report any known errors that would have occurred") MoveTables.AddCommand(MoveTablesReverseTraffic) } diff --git a/go/test/endtoend/vreplication/unsharded_init_data.sql b/go/test/endtoend/vreplication/unsharded_init_data.sql index a29a4d1b405..8af0cab6608 100644 --- a/go/test/endtoend/vreplication/unsharded_init_data.sql +++ b/go/test/endtoend/vreplication/unsharded_init_data.sql @@ -1,6 +1,7 @@ insert into customer(cid, name, typ, sport, meta) values(1, 'Jøhn "❤️" Rizzolo',1,'football,baseball','{}'); insert into customer(cid, name, typ, sport, meta) values(2, 'Paül','soho','cricket',convert(x'7b7d' using utf8mb4)); -insert into customer(cid, name, typ, sport, blb) values(3, 'ringo','enterprise','','blob data'); +-- We use a high cid value here to test the target sequence initialization. +insert into customer(cid, name, typ, sport, blb) values(999999, 'ringo','enterprise','','blob data'); insert into merchant(mname, category) values('Monoprice', 'eléctronics'); insert into merchant(mname, category) values('newegg', 'elec†ronics'); insert into product(pid, description) values(1, 'keyböard ⌨️'); diff --git a/go/test/endtoend/vreplication/vreplication_test.go b/go/test/endtoend/vreplication/vreplication_test.go index d325f9f6b38..1777173d390 100644 --- a/go/test/endtoend/vreplication/vreplication_test.go +++ b/go/test/endtoend/vreplication/vreplication_test.go @@ -788,8 +788,15 @@ func shardCustomer(t *testing.T, testReverse bool, cells []*Cell, sourceCellOrAl } switchWritesDryRun(t, workflowType, ksWorkflow, dryRunResultsSwitchWritesCustomerShard) switchWrites(t, workflowType, ksWorkflow, false) + checkThatVDiffFails(t, targetKs, workflow) + // The original unsharded customer data included an insert with the + // vindex column (cid) of 999999, so the backing sequence table should + // now have a next_id of 1000000 after SwitchTraffic. + res := execVtgateQuery(t, vtgateConn, sourceKs, "select next_id from customer_seq where id = 0") + require.Equal(t, "1000000", res.Rows[0][0].ToString()) + if withOpenTx && commit != nil { commit(t) } @@ -1362,7 +1369,11 @@ func moveTablesAction(t *testing.T, action, cell, workflow, sourceKs, targetKs, extraFlags = append(extraFlags, "--initialize-target-sequences") } args = append(args, extraFlags...) - err = vc.VtctldClient.ExecuteCommand(args...) + output, err := vc.VtctldClient.ExecuteCommandWithOutput(args...) + if output != "" { + fmt.Printf("Output of vtctldclient MoveTables %s for %s workflow:\n++++++\n%s\n--------\n", + action, workflow, output) + } if err != nil { t.Fatalf("MoveTables %s command failed with %+v\n", action, err) } @@ -1415,6 +1426,37 @@ func switchReads(t *testing.T, workflowType, cells, ksWorkflow string, reverse b require.NoError(t, err, fmt.Sprintf("%s Error: %s: %s", command, err, output)) } +func switchWrites(t *testing.T, workflowType, ksWorkflow string, reverse bool) { + if workflowType != binlogdatapb.VReplicationWorkflowType_MoveTables.String() && + workflowType != binlogdatapb.VReplicationWorkflowType_Reshard.String() { + require.FailNowf(t, "Invalid workflow type for SwitchTraffic, must be MoveTables or Reshard", + "workflow type specified: %s", workflowType) + } + command := "SwitchTraffic" + if reverse { + command = "ReverseTraffic" + } + const SwitchWritesTimeout = "91s" // max: 3 tablet picker 30s waits + 1 + // Use vtctldclient for MoveTables SwitchTraffic ~ 50% of the time. + // TODO: remove the command conditional below once I figure out why the + // vtctldclient ReverseTraffic command fails... + if workflowType == binlogdatapb.VReplicationWorkflowType_MoveTables.String() && time.Now().Second()%2 == 0 && + command == "SwitchTraffic" { + parts := strings.Split(ksWorkflow, ".") + require.Equal(t, 2, len(parts)) + moveTablesAction(t, command, defaultCellName, parts[1], sourceKs, parts[0], "", "--timeout="+SwitchWritesTimeout, "--tablet-types=primary") + return + } + output, err := vc.VtctlClient.ExecuteCommandWithOutput(workflowType, "--", "--tablet_types=primary", + "--timeout="+SwitchWritesTimeout, "--initialize-target-sequences", command, ksWorkflow) + if output != "" { + fmt.Printf("Output of switching writes for %s:\n++++++\n%s\n--------\n", ksWorkflow, output) + } + // printSwitchWritesExtraDebug is useful when debugging failures in Switch writes due to corner cases/races + _ = printSwitchWritesExtraDebug + require.NoError(t, err, fmt.Sprintf("Switch writes Error: %s: %s", err, output)) +} + func switchWritesDryRun(t *testing.T, workflowType, ksWorkflow string, dryRunResults []string) { if workflowType != binlogdatapb.VReplicationWorkflowType_name[int32(binlogdatapb.VReplicationWorkflowType_MoveTables)] && workflowType != binlogdatapb.VReplicationWorkflowType_name[int32(binlogdatapb.VReplicationWorkflowType_Reshard)] { @@ -1457,27 +1499,6 @@ func printSwitchWritesExtraDebug(t *testing.T, ksWorkflow, msg string) { } } -func switchWrites(t *testing.T, workflowType, ksWorkflow string, reverse bool) { - if workflowType != binlogdatapb.VReplicationWorkflowType_name[int32(binlogdatapb.VReplicationWorkflowType_MoveTables)] && - workflowType != binlogdatapb.VReplicationWorkflowType_name[int32(binlogdatapb.VReplicationWorkflowType_Reshard)] { - require.FailNowf(t, "Invalid workflow type for SwitchTraffic, must be MoveTables or Reshard", - "workflow type specified: %s", workflowType) - } - command := "SwitchTraffic" - if reverse { - command = "ReverseTraffic" - } - const SwitchWritesTimeout = "91s" // max: 3 tablet picker 30s waits + 1 - output, err := vc.VtctlClient.ExecuteCommandWithOutput(workflowType, "--", "--tablet_types=primary", - "--timeout="+SwitchWritesTimeout, command, ksWorkflow) - if output != "" { - fmt.Printf("Output of switching writes for %s:\n++++++\n%s\n--------\n", ksWorkflow, output) - } - // printSwitchWritesExtraDebug is useful when debugging failures in Switch writes due to corner cases/races - _ = printSwitchWritesExtraDebug - require.NoError(t, err, fmt.Sprintf("Switch writes Error: %s: %s", err, output)) -} - // generateInnoDBRowHistory generates at least maxSourceTrxHistory rollback segment entries. // This allows us to confirm two behaviors: // 1. MoveTables blocks on starting its first copy phase until we rollback diff --git a/go/vt/vtctl/workflow/server.go b/go/vt/vtctl/workflow/server.go index 202460422fe..58e5668068f 100644 --- a/go/vt/vtctl/workflow/server.go +++ b/go/vt/vtctl/workflow/server.go @@ -2267,7 +2267,7 @@ func (s *Server) WorkflowSwitchTraffic(ctx context.Context, req *vtctldatapb.Wor dryRunResults = append(dryRunResults, *rdDryRunResults...) } if hasPrimary { - if _, wrDryRunResults, err = s.switchWrites(ctx, req, ts, timeout, false, req.EnableReverseReplication); err != nil { + if _, wrDryRunResults, err = s.switchWrites(ctx, req, ts, timeout, false); err != nil { return nil, err } } @@ -2409,7 +2409,7 @@ func (s *Server) switchReads(ctx context.Context, req *vtctldatapb.WorkflowSwitc // switchWrites is a generic way of migrating write traffic for a workflow. func (s *Server) switchWrites(ctx context.Context, req *vtctldatapb.WorkflowSwitchTrafficRequest, ts *trafficSwitcher, timeout time.Duration, - cancel, reverseReplication bool) (journalID int64, dryRunResults *[]string, err error) { + cancel bool) (journalID int64, dryRunResults *[]string, err error) { var sw iswitcher if req.DryRun { @@ -2428,7 +2428,7 @@ func (s *Server) switchWrites(ctx context.Context, req *vtctldatapb.WorkflowSwit return 0, nil, err } - if reverseReplication { + if req.EnableReverseReplication { err := areTabletsAvailableToStreamFrom(ctx, req, ts, ts.TargetKeyspaceName(), ts.TargetShards()) if err != nil { return 0, nil, err @@ -2596,7 +2596,7 @@ func (s *Server) switchWrites(ctx context.Context, req *vtctldatapb.WorkflowSwit ts.Logger().Errorf("finalize failed: %v", err) return 0, nil, err } - if reverseReplication { + if req.EnableReverseReplication { if err := sw.startReverseVReplication(ctx); err != nil { ts.Logger().Errorf("startReverseVReplication failed: %v", err) return 0, nil, err From 36f90596ad881c2324cad57b43511f19e138ab3a Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Tue, 1 Aug 2023 01:58:52 -0400 Subject: [PATCH 22/56] Tweaks and bug fixes Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/server.go | 18 +++++++++--------- go/vt/vtctl/workflow/traffic_switcher.go | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/go/vt/vtctl/workflow/server.go b/go/vt/vtctl/workflow/server.go index 58e5668068f..35ffde8422d 100644 --- a/go/vt/vtctl/workflow/server.go +++ b/go/vt/vtctl/workflow/server.go @@ -2310,25 +2310,25 @@ func (s *Server) WorkflowSwitchTraffic(ctx context.Context, req *vtctldatapb.Wor // switchReads is a generic way of switching read traffic for a workflow. func (s *Server) switchReads(ctx context.Context, req *vtctldatapb.WorkflowSwitchTrafficRequest, ts *trafficSwitcher, state *State, timeout time.Duration, cancel bool, direction TrafficSwitchDirection) (*[]string, error) { roTypesToSwitchStr := topoproto.MakeStringTypeCSV(req.TabletTypes) - var hasReplica, hasRdonly bool + var switchReplica, switchRdonly bool for _, roType := range req.TabletTypes { switch roType { case topodatapb.TabletType_REPLICA: - hasReplica = true + switchReplica = true case topodatapb.TabletType_RDONLY: - hasRdonly = true + switchRdonly = true } } log.Infof("Switching reads: %s.%s tablet types: %s, cells: %s, workflow state: %s", ts.targetKeyspace, ts.workflow, roTypesToSwitchStr, ts.optCells, state.String()) - if !hasReplica && !hasRdonly { + if !switchReplica && !switchRdonly { return nil, fmt.Errorf("tablet types must be REPLICA or RDONLY: %s", roTypesToSwitchStr) } if !ts.isPartialMigration { // shard level traffic switching is all or nothing - if direction == DirectionBackward && hasReplica && len(state.ReplicaCellsSwitched) == 0 { + if direction == DirectionBackward && switchReplica && len(state.ReplicaCellsSwitched) == 0 { return nil, fmt.Errorf("requesting reversal of read traffic for REPLICAs but REPLICA reads have not been switched") } - if direction == DirectionBackward && hasRdonly && len(state.RdonlyCellsSwitched) == 0 { + if direction == DirectionBackward && switchRdonly && len(state.RdonlyCellsSwitched) == 0 { return nil, fmt.Errorf("requesting reversal of SwitchReads for RDONLYs but RDONLY reads have not been switched") } } @@ -2342,14 +2342,14 @@ func (s *Server) switchReads(ctx context.Context, req *vtctldatapb.WorkflowSwitc // are updated for rdonly as well. Otherwise vitess will not know that the workflow has completed and will // incorrectly report that not all reads have been switched. User currently is forced to switch non-existent // rdonly tablets. - if hasReplica && !hasRdonly { + if switchReplica && !switchRdonly { var err error rdonlyTabletsExist, err := topotools.DoCellsHaveRdonlyTablets(ctx, s.ts, cells) if err != nil { return nil, err } - if rdonlyTabletsExist { - return nil, vterrors.New(vtrpcpb.Code_FAILED_PRECONDITION, "requesting reversal of SwitchReads for REPLICAs but RDONLY tablets also exist in the cells") + if !rdonlyTabletsExist { + req.TabletTypes = append(req.TabletTypes, topodatapb.TabletType_RDONLY) } } diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index a2a6891461a..86ad33ffd42 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -1146,7 +1146,7 @@ func (ts *trafficSwitcher) executeLockTablesOnSource(ctx context.Context) error return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary found for source shard %s", source.GetShard()) } tablet := primary.Tablet - _, err := ts.ws.tmc.ExecuteFetchAsDba(ctx, tablet, true, &tabletmanagerdatapb.ExecuteFetchAsDbaRequest{ + _, err := ts.ws.tmc.ExecuteFetchAsDba(ctx, tablet, false, &tabletmanagerdatapb.ExecuteFetchAsDbaRequest{ Query: []byte(lockStmt), MaxRows: uint64(1), DisableBinlogs: false, From 17227ff126876539e57b3affee2b3be0cc21ef50 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Tue, 1 Aug 2023 11:29:17 -0400 Subject: [PATCH 23/56] Fix vtctldclient bug and enable usage for ReverseTraffic Signed-off-by: Matt Lord --- .../vreplication/vreplication_test.go | 8 +++---- go/vt/vtctl/workflow/traffic_switcher.go | 23 +++++++++++++++---- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/go/test/endtoend/vreplication/vreplication_test.go b/go/test/endtoend/vreplication/vreplication_test.go index 1777173d390..ba56b0bcc68 100644 --- a/go/test/endtoend/vreplication/vreplication_test.go +++ b/go/test/endtoend/vreplication/vreplication_test.go @@ -1407,8 +1407,8 @@ func switchReadsDryRun(t *testing.T, workflowType, cells, ksWorkflow string, dry } func switchReads(t *testing.T, workflowType, cells, ksWorkflow string, reverse bool) { - if workflowType != binlogdatapb.VReplicationWorkflowType_name[int32(binlogdatapb.VReplicationWorkflowType_MoveTables)] && - workflowType != binlogdatapb.VReplicationWorkflowType_name[int32(binlogdatapb.VReplicationWorkflowType_Reshard)] { + if workflowType != binlogdatapb.VReplicationWorkflowType_MoveTables.String() && + workflowType != binlogdatapb.VReplicationWorkflowType_Reshard.String() { require.FailNowf(t, "Invalid workflow type for SwitchTraffic, must be MoveTables or Reshard", "workflow type specified: %s", workflowType) } @@ -1438,8 +1438,6 @@ func switchWrites(t *testing.T, workflowType, ksWorkflow string, reverse bool) { } const SwitchWritesTimeout = "91s" // max: 3 tablet picker 30s waits + 1 // Use vtctldclient for MoveTables SwitchTraffic ~ 50% of the time. - // TODO: remove the command conditional below once I figure out why the - // vtctldclient ReverseTraffic command fails... if workflowType == binlogdatapb.VReplicationWorkflowType_MoveTables.String() && time.Now().Second()%2 == 0 && command == "SwitchTraffic" { parts := strings.Split(ksWorkflow, ".") @@ -1450,7 +1448,7 @@ func switchWrites(t *testing.T, workflowType, ksWorkflow string, reverse bool) { output, err := vc.VtctlClient.ExecuteCommandWithOutput(workflowType, "--", "--tablet_types=primary", "--timeout="+SwitchWritesTimeout, "--initialize-target-sequences", command, ksWorkflow) if output != "" { - fmt.Printf("Output of switching writes for %s:\n++++++\n%s\n--------\n", ksWorkflow, output) + fmt.Printf("Output of switching writes with vtctlclient for %s:\n++++++\n%s\n--------\n", ksWorkflow, output) } // printSwitchWritesExtraDebug is useful when debugging failures in Switch writes due to corner cases/races _ = printSwitchWritesExtraDebug diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index 86ad33ffd42..626798cdee2 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -842,12 +842,25 @@ func (ts *trafficSwitcher) createReverseVReplication(ctx context.Context) error if ts.SourceKeyspaceSchema().Keyspace.Sharded { vtable, ok := ts.SourceKeyspaceSchema().Tables[rule.Match] if !ok { - return fmt.Errorf("table %s not found in vschema1", rule.Match) + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "table %s not found in vschema", rule.Match) + } + // We currently assume the primary vindex is the best way to filter rows + // for the table, which may not always be true. + // TODO: handle more of these edge cases explicitly, e.g. sequence tables. + switch vtable.Type { + case vindexes.TypeReference: + // For reference tables there are no vindexes and thus no filter to apply. + default: + // For non-reference tables we return an error if there's no primary + // vindex as it's not clear what to do. + if len(vtable.ColumnVindexes) > 0 && len(vtable.ColumnVindexes[0].Columns) > 0 { + inKeyrange = fmt.Sprintf(" where in_keyrange(%s, '%s.%s', '%s')", sqlparser.String(vtable.ColumnVindexes[0].Columns[0]), + ts.SourceKeyspaceName(), vtable.ColumnVindexes[0].Name, key.KeyRangeString(source.GetShard().KeyRange)) + } else { + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary vindex found for the %s table in the %s keyspace", + vtable.Name.String(), ts.SourceKeyspaceName()) + } } - // TODO(sougou): handle degenerate cases like sequence, etc. - // We currently assume the primary vindex is the best way to filter, which may not be true. - inKeyrange = fmt.Sprintf(" where in_keyrange(%s, '%s.%s', '%s')", sqlparser.String(vtable.ColumnVindexes[0].Columns[0]), - ts.SourceKeyspaceName(), vtable.ColumnVindexes[0].Name, key.KeyRangeString(source.GetShard().KeyRange)) } filter = fmt.Sprintf("select * from %s%s", sqlescape.EscapeID(rule.Match), inKeyrange) } From 026bc4b67fe30d2a3208424ac0240140109ccca8 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Tue, 1 Aug 2023 14:11:17 -0400 Subject: [PATCH 24/56] Adjust VDiff2 e2e tests The rows inserted before resume need to have a PK greater than the rows inserted in the initial customer schema, which is now high in order to test the target sequence initialization. Signed-off-by: Matt Lord --- go/test/endtoend/vreplication/vdiff2_test.go | 12 ++++++------ go/test/endtoend/vreplication/vreplication_test.go | 3 +-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/go/test/endtoend/vreplication/vdiff2_test.go b/go/test/endtoend/vreplication/vdiff2_test.go index e42cd6b73fe..2011f8613c8 100644 --- a/go/test/endtoend/vreplication/vdiff2_test.go +++ b/go/test/endtoend/vreplication/vdiff2_test.go @@ -65,9 +65,9 @@ var testCases = []*testCase{ tabletBaseID: 200, tables: "customer,Lead,Lead-1", autoRetryError: true, - retryInsert: `insert into customer(cid, name, typ) values(91234, 'Testy McTester', 'soho')`, + retryInsert: `insert into customer(cid, name, typ) values(1991234, 'Testy McTester', 'soho')`, resume: true, - resumeInsert: `insert into customer(cid, name, typ) values(92234, 'Testy McTester (redux)', 'enterprise')`, + resumeInsert: `insert into customer(cid, name, typ) values(1992234, 'Testy McTester (redux)', 'enterprise')`, testCLIErrors: true, // test for errors in the simplest workflow testCLICreateWait: true, // test wait on create feature against simplest workflow }, @@ -81,9 +81,9 @@ var testCases = []*testCase{ targetShards: "-40,40-a0,a0-", tabletBaseID: 400, autoRetryError: true, - retryInsert: `insert into customer(cid, name, typ) values(93234, 'Testy McTester Jr', 'enterprise'), (94234, 'Testy McTester II', 'enterprise')`, + retryInsert: `insert into customer(cid, name, typ) values(1993234, 'Testy McTester Jr', 'enterprise'), (1993235, 'Testy McTester II', 'enterprise')`, resume: true, - resumeInsert: `insert into customer(cid, name, typ) values(95234, 'Testy McTester III', 'enterprise')`, + resumeInsert: `insert into customer(cid, name, typ) values(1994234, 'Testy McTester III', 'enterprise')`, stop: true, }, { @@ -96,9 +96,9 @@ var testCases = []*testCase{ targetShards: "0", tabletBaseID: 700, autoRetryError: true, - retryInsert: `insert into customer(cid, name, typ) values(96234, 'Testy McTester IV', 'enterprise')`, + retryInsert: `insert into customer(cid, name, typ) values(1995234, 'Testy McTester IV', 'enterprise')`, resume: true, - resumeInsert: `insert into customer(cid, name, typ) values(97234, 'Testy McTester V', 'enterprise'), (98234, 'Testy McTester VI', 'enterprise')`, + resumeInsert: `insert into customer(cid, name, typ) values(1996234, 'Testy McTester V', 'enterprise'), (1996235, 'Testy McTester VI', 'enterprise')`, stop: true, }, } diff --git a/go/test/endtoend/vreplication/vreplication_test.go b/go/test/endtoend/vreplication/vreplication_test.go index ba56b0bcc68..e53970f4ba0 100644 --- a/go/test/endtoend/vreplication/vreplication_test.go +++ b/go/test/endtoend/vreplication/vreplication_test.go @@ -1438,8 +1438,7 @@ func switchWrites(t *testing.T, workflowType, ksWorkflow string, reverse bool) { } const SwitchWritesTimeout = "91s" // max: 3 tablet picker 30s waits + 1 // Use vtctldclient for MoveTables SwitchTraffic ~ 50% of the time. - if workflowType == binlogdatapb.VReplicationWorkflowType_MoveTables.String() && time.Now().Second()%2 == 0 && - command == "SwitchTraffic" { + if workflowType == binlogdatapb.VReplicationWorkflowType_MoveTables.String() && time.Now().Second()%2 == 0 { parts := strings.Split(ksWorkflow, ".") require.Equal(t, 2, len(parts)) moveTablesAction(t, command, defaultCellName, parts[1], sourceKs, parts[0], "", "--timeout="+SwitchWritesTimeout, "--tablet-types=primary") From dbf7d84be26aceddbd0cf4aac7992ad9b83f2da7 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Tue, 1 Aug 2023 19:14:13 -0400 Subject: [PATCH 25/56] Move keyspace search concurrency to errgroup Signed-off-by: Matt Lord --- go/vt/wrangler/traffic_switcher.go | 102 ++++++++++++----------------- 1 file changed, 42 insertions(+), 60 deletions(-) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 340988cce86..ef5da5f901c 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -27,6 +27,7 @@ import ( "time" "golang.org/x/exp/maps" + "golang.org/x/sync/errgroup" "vitess.io/vitess/go/json2" "vitess.io/vitess/go/sqlescape" @@ -2030,89 +2031,70 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s // Now we need to locate the backing sequence table(s) which will // be in another unsharded keyspace. - keyspaces, err := ts.TopoServer().GetKeyspaces(ctx) - if err != nil { - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get keyspaces: %v", err) - } - log.Errorf("DEBUG: keyspaces: %+v", keyspaces) tableCount := len(sequencesByBackingTable) - tablesFound := 0 // Used to short circuit the search - ksCtx, ksCancel := context.WithCancel(ctx) // Used to cancel the goroutines - defer ksCancel() // Cancel all of the goroutines when we are done - ksErr := make(chan error) // Used if we encountered an error during the search - ksDone := make(chan struct{}) // The search has completed - ksWg := sync.WaitGroup{} // All of the goroutines finished - ksFunc := func(ks string) { // The function used to search each keyspace - defer ksWg.Done() - kvs, kerr := ts.TopoServer().GetVSchema(ctx, ks) + tablesFound := 0 // Used to short circuit the search + searchCompleted := make(chan struct{}) // The search has completed + searchKeyspace := func(sctx context.Context, keyspace string) error { // The function used to search each keyspace + kvs, kerr := ts.TopoServer().GetVSchema(ctx, keyspace) if kerr != nil { - select { - case ksErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", - ks, kerr): - default: - } - return + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", + keyspace, kerr) } if kvs == nil || kvs.Sharded || kvs.Tables == nil || len(kvs.Tables) == 0 { - return - } - select { - case <-ctx.Done(): - return - case <-ksCtx.Done(): - return - default: + return nil } for tableName, tableDef := range kvs.Tables { + select { + case <-sctx.Done(): + return sctx.Err() + case <-searchCompleted: + return nil + default: + } smMu.Lock() // Prevent concurrent access to the map sm := sequencesByBackingTable[tableName] if tableDef != nil && tableDef.Type == vindexes.TypeSequence && sm != nil && tableName == sm.backingTableName { tablesFound++ // This is also protected by the mutex - sm.backingTableKeyspace = ks - sm.backingTableDBName = "vt_" + ks + sm.backingTableKeyspace = keyspace + sm.backingTableDBName = "vt_" + keyspace if tablesFound == tableCount { // Short circuit the search log.Errorf("DEBUG: all sequence backing tables found: %+v", sequencesByBackingTable) smMu.Unlock() - ksDone <- struct{}{} - return + select { + case <-searchCompleted: // It's already been closed + return nil + default: + close(searchCompleted) // Mark the search as completed + return nil + } } } smMu.Unlock() - select { - case <-ctx.Done(): - return - case <-ksCtx.Done(): // The search has been cancelled - return - default: - } } + return nil } - + keyspaces, err := ts.TopoServer().GetKeyspaces(ctx) + if err != nil { + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get keyspaces: %v", err) + } + log.Errorf("DEBUG: keyspaces: %+v", keyspaces) + eg, gctx := errgroup.WithContext(ctx) for _, keyspace := range keyspaces { - ksWg.Add(1) - go ksFunc(keyspace) + keyspace := keyspace // https://golang.org/doc/faq#closures_and_goroutines + eg.Go(func() error { + return searchKeyspace(gctx, keyspace) + }) + } + if err := eg.Wait(); err != nil { + return nil, err } - // Wait for all the goroutines to finish on their own, which means we - // probably did not find all of the tables. - go func() { - ksWg.Wait() - close(ksDone) - close(ksErr) - }() - select { - case <-ksDone: - if tablesFound != tableCount { - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to locate all of the backing sequence tables being used; sequence table metadata: %+v", - sequencesByBackingTable) - } - return sequencesByBackingTable, nil - case ksErr := <-ksErr: - return nil, ksErr - case <-ctx.Done(): - return nil, ctx.Err() + if tablesFound != tableCount { + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to locate all of the backing sequence tables being used; sequence table metadata: %+v", + sequencesByBackingTable) } + return sequencesByBackingTable, nil } // initializeTargetSequences initializes the backing sequence tables From c40e6cc7b6c362677ea4cdae5407ffe4a214068e Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Tue, 1 Aug 2023 20:37:36 -0400 Subject: [PATCH 26/56] Move init function to errgroup Signed-off-by: Matt Lord --- go/vt/wrangler/traffic_switcher.go | 125 +++++++++-------------------- 1 file changed, 40 insertions(+), 85 deletions(-) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index ef5da5f901c..d68c328db30 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -2079,14 +2079,14 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get keyspaces: %v", err) } log.Errorf("DEBUG: keyspaces: %+v", keyspaces) - eg, gctx := errgroup.WithContext(ctx) + searchGroup, gctx := errgroup.WithContext(ctx) for _, keyspace := range keyspaces { keyspace := keyspace // https://golang.org/doc/faq#closures_and_goroutines - eg.Go(func() error { + searchGroup.Go(func() error { return searchKeyspace(gctx, keyspace) }) } - if err := eg.Wait(); err != nil { + if err := searchGroup.Wait(); err != nil { return nil, err } @@ -2108,13 +2108,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s // be sure that it does not provide a value that is less than the current max. func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { log.Error("DEBUG: initializeTargetSequences") - initCtx, initCancel := context.WithCancel(ctx) // Used to cancel the goroutines - defer initCancel() // Cancel all of the goroutines when we are done - initErr := make(chan error) // Used if we encounter an error - initDone := make(chan struct{}) // The initialization has completed - initWg := sync.WaitGroup{} // All of the goroutines finished - initFunc := func(sequenceTableName string, sequenceMetadata *sequenceMetadata) { - defer initWg.Done() + initSequenceTable := func(ictx context.Context, sequenceTableName string, sequenceMetadata *sequenceMetadata) error { log.Errorf("DEBUG: sequence table: %v, sequenceMetadata: %+v", sequenceTableName, sequenceMetadata) // Now we need to run this query on the target shards in order // to get the max value and set the next id for the sequence to @@ -2134,15 +2128,15 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen ) log.Errorf("DEBUG: query: %s on shard: %s/%s", query.Query, ts.targetKeyspace, target.GetShard().ShardName()) - qr, err := ts.wr.ExecuteFetchAsApp(ctx, primary.GetAlias(), true, query.Query, 1) - if err != nil || len(qr.Rows) != 1 { + qr, terr := ts.wr.ExecuteFetchAsApp(ictx, primary.GetAlias(), true, query.Query, 1) + if terr != nil || len(qr.Rows) != 1 { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the max used sequence value for target table %s.%s in order to initialize the backing sequence table: %v", - ts.targetKeyspace, sequenceMetadata.usingTableName, err) + ts.targetKeyspace, sequenceMetadata.usingTableName, terr) } - maxID, err := sqltypes.Proto3ToResult(qr).Rows[0][0].ToInt64() - if err != nil { + maxID, terr := sqltypes.Proto3ToResult(qr).Rows[0][0].ToInt64() + if terr != nil { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the max used sequence value for target table %s.%s in order to initialize the backing sequence table: %v", - ts.targetKeyspace, sequenceMetadata.usingTableName, err) + ts.targetKeyspace, sequenceMetadata.usingTableName, terr) } log.Errorf("DEBUG: max ID seen on shard %s: %d", target.GetShard().ShardName(), maxID) srMu.Lock() @@ -2151,17 +2145,11 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen return nil }) if ierr != nil { - select { - case initErr <- ierr: - default: - } - return + return ierr } select { - case <-ctx.Done(): - return - case <-initCtx.Done(): // The initialization work has been cancelled - return + case <-ictx.Done(): + return ictx.Err() default: } // Sort the values to find the max value across all shards. @@ -2171,29 +2159,19 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen nextVal := shardResults[len(shardResults)-1] + 1 // Now we need to update the sequence table, if needed, in order to // ensure that that the next value it provides is > the current max. - sequenceShard, ierr := ts.wr.TopoServer().GetOnlyShard(ctx, sequenceMetadata.backingTableKeyspace) + sequenceShard, ierr := ts.wr.TopoServer().GetOnlyShard(ictx, sequenceMetadata.backingTableKeyspace) if ierr != nil || sequenceShard == nil || sequenceShard.PrimaryAlias == nil { - select { - case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, ierr): - default: - } - return + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", + sequenceMetadata.backingTableKeyspace, ierr) } - sequenceTablet, ierr := ts.wr.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) + sequenceTablet, ierr := ts.wr.TopoServer().GetTablet(ictx, sequenceShard.PrimaryAlias) if ierr != nil || sequenceTablet == nil { - select { - case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, ierr): - default: - } - return + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", + sequenceMetadata.backingTableKeyspace, ierr) } select { - case <-ctx.Done(): - return - case <-initCtx.Done(): // The initialization work has been cancelled - return + case <-ictx.Done(): + return ictx.Err() default: } if sequenceTablet.DbNameOverride != "" { @@ -2209,68 +2187,45 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen log.Errorf("DEBUG: query: %s", query.Query) // Now execute this on the primary tablet of the unsharded keyspace // housing the backing table. - qr, ierr := ts.wr.ExecuteFetchAsApp(ctx, sequenceShard.PrimaryAlias, true, query.Query, 1) + qr, ierr := ts.wr.ExecuteFetchAsApp(ictx, sequenceShard.PrimaryAlias, true, query.Query, 1) if ierr != nil { - select { - case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the backing sequence table %s.%s: %v", - sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, ierr): - default: - } - return + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the backing sequence table %s.%s: %v", + sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, ierr) } // If we actually updated the backing sequence table, then we need // to tell the primary tablet managing the sequence to refresh/reset // its cache for the table. if qr.RowsAffected == 0 { - return + return nil } select { - case <-ctx.Done(): - return - case <-initCtx.Done(): // The initialization work has been cancelled - return + case <-ictx.Done(): + return ictx.Err() default: } ts.Logger().Infof("Resetting sequence cache for backing table %s on shard %s/%s using tablet %s", sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias) - ti, ierr := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) + ti, ierr := ts.TopoServer().GetTablet(ictx, sequenceShard.PrimaryAlias) if ierr != nil { - select { - case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, ierr): - default: - } - return + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get primary tablet for keyspace %s: %v", + sequenceMetadata.backingTableKeyspace, ierr) } - ierr = ts.TabletManagerClient().ResetSequences(ctx, ti.Tablet, []string{sequenceMetadata.backingTableName}) + ierr = ts.TabletManagerClient().ResetSequences(ictx, ti.Tablet, []string{sequenceMetadata.backingTableName}) if ierr != nil { - select { - case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset the sequence cache for backing table %s on shard %s/%s using tablet %s: %v", - sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, ierr): - default: - } - return + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset the sequence cache for backing table %s on shard %s/%s using tablet %s: %v", + sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, ierr) } + return nil } + initGroup, gctx := errgroup.WithContext(ctx) for sequenceTableName, sequenceMetadata := range sequencesByBackingTable { - initWg.Add(1) - go initFunc(sequenceTableName, sequenceMetadata) - } - go func() { - initWg.Wait() - close(initDone) - close(initErr) - }() - - select { - case <-initDone: // We completed the work w/o errors - return nil - case err := <-initErr: - return err - case <-ctx.Done(): - return ctx.Err() + sequenceTableName, sequenceMetadata := sequenceTableName, sequenceMetadata // https://golang.org/doc/faq#closures_and_goroutines + initGroup.Go(func() error { + return initSequenceTable(gctx, sequenceTableName, sequenceMetadata) + }) } + return initGroup.Wait() } func (ts *trafficSwitcher) mustResetSequences(ctx context.Context) (bool, error) { From c0e5b9749739af04927233944ec728540c377dfd Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Tue, 1 Aug 2023 20:47:52 -0400 Subject: [PATCH 27/56] Move vtctldclient impl to errgroup Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/traffic_switcher.go | 225 +++++++++-------------- 1 file changed, 85 insertions(+), 140 deletions(-) diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index 626798cdee2..e7ce11be0db 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -26,6 +26,7 @@ import ( "time" "golang.org/x/exp/maps" + "golang.org/x/sync/errgroup" "vitess.io/vitess/go/json2" "vitess.io/vitess/go/sqlescape" @@ -1287,89 +1288,70 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s // Now we need to locate the backing sequence table(s) which will // be in another unsharded keyspace. - keyspaces, err := ts.TopoServer().GetKeyspaces(ctx) - if err != nil { - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get keyspaces: %v", err) - } - log.Errorf("DEBUG: keyspaces: %+v", keyspaces) tableCount := len(sequencesByBackingTable) - tablesFound := 0 // Used to short circuit the search - ksCtx, ksCancel := context.WithCancel(ctx) // Used to cancel the goroutines - defer ksCancel() // Cancel all of the goroutines when we are done - ksErr := make(chan error) // Used if we encountered an error during the search - ksDone := make(chan struct{}) // The search has completed - ksWg := sync.WaitGroup{} // All of the goroutines finished - ksFunc := func(ks string) { // The function used to search each keyspace - defer ksWg.Done() - kvs, kerr := ts.TopoServer().GetVSchema(ctx, ks) + tablesFound := 0 // Used to short circuit the search + searchCompleted := make(chan struct{}) // The search has completed + searchKeyspace := func(sctx context.Context, keyspace string) error { // The function used to search each keyspace + kvs, kerr := ts.TopoServer().GetVSchema(ctx, keyspace) if kerr != nil { - select { - case ksErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", - ks, kerr): - default: - } - return + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", + keyspace, kerr) } if kvs == nil || kvs.Sharded || kvs.Tables == nil || len(kvs.Tables) == 0 { - return - } - select { - case <-ctx.Done(): - return - case <-ksCtx.Done(): - return - default: + return nil } for tableName, tableDef := range kvs.Tables { + select { + case <-sctx.Done(): + return sctx.Err() + case <-searchCompleted: + return nil + default: + } smMu.Lock() // Prevent concurrent access to the map sm := sequencesByBackingTable[tableName] if tableDef != nil && tableDef.Type == vindexes.TypeSequence && sm != nil && tableName == sm.backingTableName { tablesFound++ // This is also protected by the mutex - sm.backingTableKeyspace = ks - sm.backingTableDBName = "vt_" + ks + sm.backingTableKeyspace = keyspace + sm.backingTableDBName = "vt_" + keyspace if tablesFound == tableCount { // Short circuit the search log.Errorf("DEBUG: all sequence backing tables found: %+v", sequencesByBackingTable) smMu.Unlock() - ksDone <- struct{}{} - return + select { + case <-searchCompleted: // It's already been closed + return nil + default: + close(searchCompleted) // Mark the search as completed + return nil + } } } smMu.Unlock() - select { - case <-ctx.Done(): - return - case <-ksCtx.Done(): // The search has been cancelled - return - default: - } } + return nil } - + keyspaces, err := ts.TopoServer().GetKeyspaces(ctx) + if err != nil { + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get keyspaces: %v", err) + } + log.Errorf("DEBUG: keyspaces: %+v", keyspaces) + searchGroup, gctx := errgroup.WithContext(ctx) for _, keyspace := range keyspaces { - ksWg.Add(1) - go ksFunc(keyspace) + keyspace := keyspace // https://golang.org/doc/faq#closures_and_goroutines + searchGroup.Go(func() error { + return searchKeyspace(gctx, keyspace) + }) + } + if err := searchGroup.Wait(); err != nil { + return nil, err } - // Wait for all the goroutines to finish on their own, which means we - // probably did not find all of the tables. - go func() { - ksWg.Wait() - close(ksDone) - close(ksErr) - }() - select { - case <-ksDone: - if tablesFound != tableCount { - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to locate all of the backing sequence tables being used; sequence table metadata: %+v", - sequencesByBackingTable) - } - return sequencesByBackingTable, nil - case ksErr := <-ksErr: - return nil, ksErr - case <-ctx.Done(): - return nil, ctx.Err() + if tablesFound != tableCount { + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to locate all of the backing sequence tables being used; sequence table metadata: %+v", + sequencesByBackingTable) } + return sequencesByBackingTable, nil } // initializeTargetSequences initializes the backing sequence tables @@ -1383,11 +1365,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s // be sure that it does not provide a value that is less than the current max. func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { log.Error("DEBUG: initializeTargetSequences") - initErr := make(chan error) // Used if we encounter an error - initDone := make(chan struct{}) // The initialization has completed - initWg := sync.WaitGroup{} // All of the goroutines finished - initFunc := func(sequenceTableName string, sequenceMetadata *sequenceMetadata) { - defer initWg.Done() + initSequenceTable := func(ictx context.Context, sequenceTableName string, sequenceMetadata *sequenceMetadata) error { log.Errorf("DEBUG: sequence table: %v, sequenceMetadata: %+v", sequenceTableName, sequenceMetadata) // Now we need to run this query on the target shards in order // to get the max value and set the next id for the sequence to @@ -1407,18 +1385,18 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen ) log.Errorf("DEBUG: query: %s on shard: %s/%s", query.Query, ts.targetKeyspace, target.GetShard().ShardName()) - qr, ierr := ts.ws.tmc.ExecuteFetchAsApp(ctx, primary.Tablet, true, &tabletmanagerdatapb.ExecuteFetchAsAppRequest{ + qr, terr := ts.ws.tmc.ExecuteFetchAsApp(ictx, primary.Tablet, true, &tabletmanagerdatapb.ExecuteFetchAsAppRequest{ Query: []byte(query.Query), MaxRows: 1, }) - if ierr != nil || len(qr.Rows) != 1 { + if terr != nil || len(qr.Rows) != 1 { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the max used sequence value for target table %s.%s in order to initialize the backing sequence table: %v", - ts.targetKeyspace, sequenceMetadata.usingTableName, ierr) + ts.targetKeyspace, sequenceMetadata.usingTableName, terr) } - maxID, ierr := sqltypes.Proto3ToResult(qr).Rows[0][0].ToInt64() - if ierr != nil { + maxID, terr := sqltypes.Proto3ToResult(qr).Rows[0][0].ToInt64() + if terr != nil { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the max used sequence value for target table %s.%s in order to initialize the backing sequence table: %v", - ts.targetKeyspace, sequenceMetadata.usingTableName, ierr) + ts.targetKeyspace, sequenceMetadata.usingTableName, terr) } log.Errorf("DEBUG: max ID seen on shard %s: %d", target.GetShard().ShardName(), maxID) srMu.Lock() @@ -1427,15 +1405,11 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen return nil }) if ierr != nil { - select { - case initErr <- ierr: - default: - } - return + return ierr } select { - case <-ctx.Done(): - return + case <-ictx.Done(): + return ictx.Err() default: } // Sort the values to find the max value across all shards. @@ -1445,23 +1419,20 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen nextVal := shardResults[len(shardResults)-1] + 1 // Now we need to update the sequence table, if needed, in order to // ensure that that the next value it provides is > the current max. - sequenceShard, ierr := ts.TopoServer().GetOnlyShard(ctx, sequenceMetadata.backingTableKeyspace) + sequenceShard, ierr := ts.TopoServer().GetOnlyShard(ictx, sequenceMetadata.backingTableKeyspace) if ierr != nil || sequenceShard == nil || sequenceShard.PrimaryAlias == nil { - select { - case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, ierr): - default: - } - return + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", + sequenceMetadata.backingTableKeyspace, ierr) } - sequenceTablet, ierr := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) + sequenceTablet, ierr := ts.TopoServer().GetTablet(ictx, sequenceShard.PrimaryAlias) if ierr != nil || sequenceTablet == nil { - select { - case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, ierr): - default: - } - return + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for keyspace %s: %v", + sequenceMetadata.backingTableKeyspace, ierr) + } + select { + case <-ictx.Done(): + return ictx.Err() + default: } if sequenceTablet.DbNameOverride != "" { sequenceMetadata.backingTableDBName = sequenceTablet.DbNameOverride @@ -1476,79 +1447,53 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen log.Errorf("DEBUG: query: %s", query.Query) // Now execute this on the primary tablet of the unsharded keyspace // housing the backing table. - primaryTablet, ierr := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) + primaryTablet, ierr := ts.TopoServer().GetTablet(ictx, sequenceShard.PrimaryAlias) if ierr != nil { - select { - case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for %s.%s using alias %s: %v", - sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, ierr): - default: - } - return + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the primary tablet for %s.%s using alias %s: %v", + sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, ierr) } - qr, ierr := ts.ws.tmc.ExecuteFetchAsApp(ctx, primaryTablet.Tablet, true, &tabletmanagerdatapb.ExecuteFetchAsAppRequest{ + qr, ierr := ts.ws.tmc.ExecuteFetchAsApp(ictx, primaryTablet.Tablet, true, &tabletmanagerdatapb.ExecuteFetchAsAppRequest{ Query: []byte(query.Query), MaxRows: 1, }) - if ierr != nil { - select { - case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the backing sequence table %s.%s: %v", - sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, ierr): - default: - } - return + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to initialize the backing sequence table %s.%s: %v", + sequenceMetadata.backingTableDBName, sequenceMetadata.backingTableName, ierr) } // If we actually updated the backing sequence table, then we need // to tell the primary tablet managing the sequence to refresh/reset // its cache for the table. if qr.RowsAffected == 0 { - return + return nil } select { - case <-ctx.Done(): - return + case <-ictx.Done(): + return ictx.Err() default: } ts.Logger().Infof("Resetting sequence cache for backing table %s on shard %s/%s using tablet %s", sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias) - ti, ierr := ts.TopoServer().GetTablet(ctx, sequenceShard.PrimaryAlias) + ti, ierr := ts.TopoServer().GetTablet(ictx, sequenceShard.PrimaryAlias) if ierr != nil { - select { - case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get primary tablet for keyspace %s: %v", - sequenceMetadata.backingTableKeyspace, ierr): - default: - } - return + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get primary tablet for keyspace %s: %v", + sequenceMetadata.backingTableKeyspace, ierr) } - ierr = ts.TabletManagerClient().ResetSequences(ctx, ti.Tablet, []string{sequenceMetadata.backingTableName}) + ierr = ts.TabletManagerClient().ResetSequences(ictx, ti.Tablet, []string{sequenceMetadata.backingTableName}) if ierr != nil { - select { - case initErr <- vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset the sequence cache for backing table %s on shard %s/%s using tablet %s: %v", - sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, ierr): - default: - } - return + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to reset the sequence cache for backing table %s on shard %s/%s using tablet %s: %v", + sequenceMetadata.backingTableName, sequenceShard.Keyspace(), sequenceShard.ShardName(), sequenceShard.PrimaryAlias, ierr) } + return nil } + initGroup, gctx := errgroup.WithContext(ctx) for sequenceTableName, sequenceMetadata := range sequencesByBackingTable { - initWg.Add(1) - go initFunc(sequenceTableName, sequenceMetadata) - } - go func() { - initWg.Wait() - close(initDone) - close(initErr) - }() - - select { - case <-initDone: // We completed the work w/o errors - return nil - case err := <-initErr: - return err - case <-ctx.Done(): - return ctx.Err() + sequenceTableName, sequenceMetadata := sequenceTableName, sequenceMetadata // https://golang.org/doc/faq#closures_and_goroutines + initGroup.Go(func() error { + return initSequenceTable(gctx, sequenceTableName, sequenceMetadata) + }) } + return initGroup.Wait() } func (ts *trafficSwitcher) mustResetSequences(ctx context.Context) (bool, error) { From 331d60feab09f5a417b60475db1d25461b6e04da Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Tue, 1 Aug 2023 21:13:46 -0400 Subject: [PATCH 28/56] Fix duh bug Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/switcher_dry_run.go | 2 +- go/vt/wrangler/switcher_dry_run.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/go/vt/vtctl/workflow/switcher_dry_run.go b/go/vt/vtctl/workflow/switcher_dry_run.go index c34534d7e77..9ebb43eb7e4 100644 --- a/go/vt/vtctl/workflow/switcher_dry_run.go +++ b/go/vt/vtctl/workflow/switcher_dry_run.go @@ -371,7 +371,7 @@ func (dr *switcherDryRun) resetSequences(ctx context.Context) error { func (dr *switcherDryRun) initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { sortedBackingTableNames := maps.Keys(sequencesByBackingTable) sort.Slice(sortedBackingTableNames, func(i, j int) bool { - return i < j + return sortedBackingTableNames[i] < sortedBackingTableNames[j] }) dr.drLog.Log(fmt.Sprintf("The following sequence backing tables used by tables being moved will be initialized: %s", strings.Join(sortedBackingTableNames, ","))) diff --git a/go/vt/wrangler/switcher_dry_run.go b/go/vt/wrangler/switcher_dry_run.go index e33f16ea9f6..69f7c79e56f 100644 --- a/go/vt/wrangler/switcher_dry_run.go +++ b/go/vt/wrangler/switcher_dry_run.go @@ -389,7 +389,7 @@ func (dr *switcherDryRun) resetSequences(ctx context.Context) error { func (dr *switcherDryRun) initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { sortedBackingTableNames := maps.Keys(sequencesByBackingTable) sort.Slice(sortedBackingTableNames, func(i, j int) bool { - return i < j + return sortedBackingTableNames[i] < sortedBackingTableNames[j] }) dr.drLog.Log(fmt.Sprintf("The following sequence backing tables used by tables being moved will be initialized: %s", strings.Join(sortedBackingTableNames, ","))) From f52ac01c53105024fd06bc3c906658d17a8c54ef Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Tue, 1 Aug 2023 22:01:33 -0400 Subject: [PATCH 29/56] Use slices.Sort in dry run and remove DEBUG logs Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/switcher_dry_run.go | 5 ++--- go/vt/vtctl/workflow/traffic_switcher.go | 14 +------------- go/vt/wrangler/switcher_dry_run.go | 5 ++--- go/vt/wrangler/traffic_switcher.go | 10 ---------- 4 files changed, 5 insertions(+), 29 deletions(-) diff --git a/go/vt/vtctl/workflow/switcher_dry_run.go b/go/vt/vtctl/workflow/switcher_dry_run.go index 9ebb43eb7e4..eb068621510 100644 --- a/go/vt/vtctl/workflow/switcher_dry_run.go +++ b/go/vt/vtctl/workflow/switcher_dry_run.go @@ -24,6 +24,7 @@ import ( "time" "golang.org/x/exp/maps" + "golang.org/x/exp/slices" "vitess.io/vitess/go/mysql" @@ -370,9 +371,7 @@ func (dr *switcherDryRun) resetSequences(ctx context.Context) error { func (dr *switcherDryRun) initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { sortedBackingTableNames := maps.Keys(sequencesByBackingTable) - sort.Slice(sortedBackingTableNames, func(i, j int) bool { - return sortedBackingTableNames[i] < sortedBackingTableNames[j] - }) + slices.Sort(sortedBackingTableNames) dr.drLog.Log(fmt.Sprintf("The following sequence backing tables used by tables being moved will be initialized: %s", strings.Join(sortedBackingTableNames, ","))) return nil diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index e7ce11be0db..46a5c940c70 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -402,7 +402,6 @@ func (ts *trafficSwitcher) deleteRoutingRules(ctx context.Context) error { if err != nil { return err } - log.Errorf("DEBUG: rules: %v", rules) for _, table := range ts.Tables() { delete(rules, table) delete(rules, table+"@replica") @@ -414,7 +413,6 @@ func (ts *trafficSwitcher) deleteRoutingRules(ctx context.Context) error { delete(rules, ts.SourceKeyspaceName()+"."+table+"@replica") delete(rules, ts.SourceKeyspaceName()+"."+table+"@rdonly") } - log.Errorf("DEBUG: new rules: %v", rules) if err := topotools.SaveRoutingRules(ctx, ts.TopoServer(), rules); err != nil { return err } @@ -1160,7 +1158,7 @@ func (ts *trafficSwitcher) executeLockTablesOnSource(ctx context.Context) error return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary found for source shard %s", source.GetShard()) } tablet := primary.Tablet - _, err := ts.ws.tmc.ExecuteFetchAsDba(ctx, tablet, false, &tabletmanagerdatapb.ExecuteFetchAsDbaRequest{ + _, err := ts.ws.tmc.ExecuteFetchAsDba(ctx, tablet, true, &tabletmanagerdatapb.ExecuteFetchAsDbaRequest{ Query: []byte(lockStmt), MaxRows: uint64(1), DisableBinlogs: false, @@ -1219,7 +1217,6 @@ func (ts *trafficSwitcher) isSequenceParticipating(ctx context.Context) (bool, e // defined that use sequences for auto_increment generation then a nil // map will be returned. func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[string]*sequenceMetadata, error) { - log.Error("DEBUG: getTargetSequenceMetadata") vschema, err := ts.TopoServer().GetVSchema(ctx, ts.targetKeyspace) if err != nil { return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for target keyspace %s: %v", @@ -1265,7 +1262,6 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s if len(sequencesByBackingTable) == 0 { // Nothing to do return nil, nil } - log.Errorf("DEBUG: sequences: %+v", sequencesByBackingTable) select { case <-ctx.Done(): @@ -1316,7 +1312,6 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s sm.backingTableKeyspace = keyspace sm.backingTableDBName = "vt_" + keyspace if tablesFound == tableCount { // Short circuit the search - log.Errorf("DEBUG: all sequence backing tables found: %+v", sequencesByBackingTable) smMu.Unlock() select { case <-searchCompleted: // It's already been closed @@ -1335,7 +1330,6 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s if err != nil { return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get keyspaces: %v", err) } - log.Errorf("DEBUG: keyspaces: %+v", keyspaces) searchGroup, gctx := errgroup.WithContext(ctx) for _, keyspace := range keyspaces { keyspace := keyspace // https://golang.org/doc/faq#closures_and_goroutines @@ -1364,9 +1358,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s // the primary tablet serving the sequence to refresh/reset its cache to // be sure that it does not provide a value that is less than the current max. func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { - log.Error("DEBUG: initializeTargetSequences") initSequenceTable := func(ictx context.Context, sequenceTableName string, sequenceMetadata *sequenceMetadata) error { - log.Errorf("DEBUG: sequence table: %v, sequenceMetadata: %+v", sequenceTableName, sequenceMetadata) // Now we need to run this query on the target shards in order // to get the max value and set the next id for the sequence to // a higher value. @@ -1383,8 +1375,6 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen sqlescape.EscapeID(sequenceMetadata.usingTableDBName), sqlescape.EscapeID(sequenceMetadata.usingTableName), ) - log.Errorf("DEBUG: query: %s on shard: %s/%s", - query.Query, ts.targetKeyspace, target.GetShard().ShardName()) qr, terr := ts.ws.tmc.ExecuteFetchAsApp(ictx, primary.Tablet, true, &tabletmanagerdatapb.ExecuteFetchAsAppRequest{ Query: []byte(query.Query), MaxRows: 1, @@ -1398,7 +1388,6 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the max used sequence value for target table %s.%s in order to initialize the backing sequence table: %v", ts.targetKeyspace, sequenceMetadata.usingTableName, terr) } - log.Errorf("DEBUG: max ID seen on shard %s: %d", target.GetShard().ShardName(), maxID) srMu.Lock() defer srMu.Unlock() shardResults = append(shardResults, maxID) @@ -1444,7 +1433,6 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen nextVal, nextVal, ) - log.Errorf("DEBUG: query: %s", query.Query) // Now execute this on the primary tablet of the unsharded keyspace // housing the backing table. primaryTablet, ierr := ts.TopoServer().GetTablet(ictx, sequenceShard.PrimaryAlias) diff --git a/go/vt/wrangler/switcher_dry_run.go b/go/vt/wrangler/switcher_dry_run.go index 69f7c79e56f..ddc30e06900 100644 --- a/go/vt/wrangler/switcher_dry_run.go +++ b/go/vt/wrangler/switcher_dry_run.go @@ -24,6 +24,7 @@ import ( "time" "golang.org/x/exp/maps" + "golang.org/x/exp/slices" "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/vt/vtctl/workflow" @@ -388,9 +389,7 @@ func (dr *switcherDryRun) resetSequences(ctx context.Context) error { func (dr *switcherDryRun) initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { sortedBackingTableNames := maps.Keys(sequencesByBackingTable) - sort.Slice(sortedBackingTableNames, func(i, j int) bool { - return sortedBackingTableNames[i] < sortedBackingTableNames[j] - }) + slices.Sort(sortedBackingTableNames) dr.drLog.Log(fmt.Sprintf("The following sequence backing tables used by tables being moved will be initialized: %s", strings.Join(sortedBackingTableNames, ","))) return nil diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index d68c328db30..26520874fbe 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -1962,7 +1962,6 @@ func (ts *trafficSwitcher) isSequenceParticipating(ctx context.Context) (bool, e // defined that use sequences for auto_increment generation then a nil // map will be returned. func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[string]*sequenceMetadata, error) { - log.Error("DEBUG: getTargetSequenceMetadata") vschema, err := ts.TopoServer().GetVSchema(ctx, ts.targetKeyspace) if err != nil { return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for target keyspace %s: %v", @@ -2008,7 +2007,6 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s if len(sequencesByBackingTable) == 0 { // Nothing to do return nil, nil } - log.Errorf("DEBUG: sequences: %+v", sequencesByBackingTable) select { case <-ctx.Done(): @@ -2059,7 +2057,6 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s sm.backingTableKeyspace = keyspace sm.backingTableDBName = "vt_" + keyspace if tablesFound == tableCount { // Short circuit the search - log.Errorf("DEBUG: all sequence backing tables found: %+v", sequencesByBackingTable) smMu.Unlock() select { case <-searchCompleted: // It's already been closed @@ -2078,7 +2075,6 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s if err != nil { return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get keyspaces: %v", err) } - log.Errorf("DEBUG: keyspaces: %+v", keyspaces) searchGroup, gctx := errgroup.WithContext(ctx) for _, keyspace := range keyspaces { keyspace := keyspace // https://golang.org/doc/faq#closures_and_goroutines @@ -2107,9 +2103,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s // the primary tablet serving the sequence to refresh/reset its cache to // be sure that it does not provide a value that is less than the current max. func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequencesByBackingTable map[string]*sequenceMetadata) error { - log.Error("DEBUG: initializeTargetSequences") initSequenceTable := func(ictx context.Context, sequenceTableName string, sequenceMetadata *sequenceMetadata) error { - log.Errorf("DEBUG: sequence table: %v, sequenceMetadata: %+v", sequenceTableName, sequenceMetadata) // Now we need to run this query on the target shards in order // to get the max value and set the next id for the sequence to // a higher value. @@ -2126,8 +2120,6 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen sqlescape.EscapeID(sequenceMetadata.usingTableDBName), sqlescape.EscapeID(sequenceMetadata.usingTableName), ) - log.Errorf("DEBUG: query: %s on shard: %s/%s", - query.Query, ts.targetKeyspace, target.GetShard().ShardName()) qr, terr := ts.wr.ExecuteFetchAsApp(ictx, primary.GetAlias(), true, query.Query, 1) if terr != nil || len(qr.Rows) != 1 { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the max used sequence value for target table %s.%s in order to initialize the backing sequence table: %v", @@ -2138,7 +2130,6 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get the max used sequence value for target table %s.%s in order to initialize the backing sequence table: %v", ts.targetKeyspace, sequenceMetadata.usingTableName, terr) } - log.Errorf("DEBUG: max ID seen on shard %s: %d", target.GetShard().ShardName(), maxID) srMu.Lock() defer srMu.Unlock() shardResults = append(shardResults, maxID) @@ -2184,7 +2175,6 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen nextVal, nextVal, ) - log.Errorf("DEBUG: query: %s", query.Query) // Now execute this on the primary tablet of the unsharded keyspace // housing the backing table. qr, ierr := ts.wr.ExecuteFetchAsApp(ictx, sequenceShard.PrimaryAlias, true, query.Query, 1) From 4f428d742b760a112a737482a543b0816764661d Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Wed, 2 Aug 2023 16:15:06 -0400 Subject: [PATCH 30/56] WiP unit test work Signed-off-by: Matt Lord --- .../tabletconntest/fakequeryservice.go | 3 +- .../vttablet/tabletmanager/framework_test.go | 186 +++++---- .../tabletmanager/rpc_vreplication_test.go | 373 ++++++++++-------- 3 files changed, 337 insertions(+), 225 deletions(-) diff --git a/go/vt/vttablet/tabletconntest/fakequeryservice.go b/go/vt/vttablet/tabletconntest/fakequeryservice.go index 2be8e4eab54..0e089a1882e 100644 --- a/go/vt/vttablet/tabletconntest/fakequeryservice.go +++ b/go/vt/vttablet/tabletconntest/fakequeryservice.go @@ -20,6 +20,7 @@ import ( "context" "errors" "fmt" + "io" "testing" "vitess.io/vitess/go/vt/vttablet/queryservice" @@ -681,7 +682,7 @@ func (f *FakeQueryService) StreamHealth(ctx context.Context, callback func(*quer if shr == nil { shr = TestStreamHealthStreamHealthResponse } - if err := callback(shr); err != nil { + if err := callback(shr); err != nil && err != io.EOF { f.t.Logf("StreamHealth callback failed: %v", err) } return nil diff --git a/go/vt/vttablet/tabletmanager/framework_test.go b/go/vt/vttablet/tabletmanager/framework_test.go index 9d8f1c9a6ee..c9ef460b90a 100644 --- a/go/vt/vttablet/tabletmanager/framework_test.go +++ b/go/vt/vttablet/tabletmanager/framework_test.go @@ -26,6 +26,7 @@ import ( "github.com/stretchr/testify/require" + "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/mysql/fakesqldb" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/vt/binlog/binlogplayer" @@ -47,46 +48,58 @@ import ( topodatapb "vitess.io/vitess/go/vt/proto/topodata" ) +const ( + gtidFlavor = "MySQL56" + gtidPosition = "16b1039f-22b6-11ed-b765-0a43f95f28a3:1-220" +) + func init() { tabletconn.RegisterDialer("grpc", func(tablet *topodatapb.Tablet, failFast grpcclient.FailFast) (queryservice.QueryService, error) { - return &tabletconntest.FakeQueryService{}, nil + return &tabletconntest.FakeQueryService{ + StreamHealthResponse: &querypb.StreamHealthResponse{ + Serving: true, + Target: &querypb.Target{ + Keyspace: tablet.Keyspace, + Shard: tablet.Shard, + TabletType: tablet.Type, + Cell: tablet.Alias.Cell, + }, + RealtimeStats: &querypb.RealtimeStats{}, + }, + }, nil }) } type testEnv struct { - mu sync.Mutex - ctx context.Context - vrengine *vreplication.Engine - vrdbClient *binlogplayer.MockDBClient - ts *topo.Server - cells []string - tablets map[int]*fakeTabletConn - mysqld *mysqlctl.FakeMysqlDaemon - tmc *fakeTMClient - dbName string - protoName string + mu sync.Mutex + ctx context.Context + ts *topo.Server + cells []string + mysqld *mysqlctl.FakeMysqlDaemon + tmc *fakeTMClient + dbName string + protoName string } -func newTestEnv(t *testing.T, keyspace string, shards []string) *testEnv { +func newTestEnv(t *testing.T, sourceKeyspace string, sourceShards []string) *testEnv { tenv := &testEnv{ - ctx: context.Background(), - vrdbClient: binlogplayer.NewMockDBClient(t), - tablets: make(map[int]*fakeTabletConn), - tmc: newFakeTMClient(), - cells: []string{"zone1"}, - dbName: "tmtestdb", - protoName: t.Name(), + ctx: context.Background(), + tmc: newFakeTMClient(), + cells: []string{"zone1"}, + dbName: "tmtestdb", + protoName: t.Name(), } tenv.mu.Lock() defer tenv.mu.Unlock() tenv.ts = memorytopo.NewServer(tenv.cells...) - tenv.tmc.keyspace = keyspace - tenv.tmc.shards = shards + tenv.tmc.sourceKeyspace = sourceKeyspace + tenv.tmc.sourceShards = sourceShards + tenv.tmc.schema = defaultSchema tabletconn.RegisterDialer(t.Name(), func(tablet *topodatapb.Tablet, failFast grpcclient.FailFast) (queryservice.QueryService, error) { tenv.mu.Lock() defer tenv.mu.Unlock() - if qs, ok := tenv.tablets[int(tablet.Alias.Uid)]; ok { + if qs, ok := tenv.tmc.tablets[int(tablet.Alias.Uid)]; ok { return qs, nil } return nil, fmt.Errorf("tablet %d not found", tablet.Alias.Uid) @@ -97,21 +110,10 @@ func newTestEnv(t *testing.T, keyspace string, shards []string) *testEnv { }) tmclienttest.SetProtocol(fmt.Sprintf("go.vt.vttablet.tabletmanager.framework_test_%s", t.Name()), tenv.protoName) - dbClientFactory := func() binlogplayer.DBClient { - return tenv.vrdbClient - } tenv.mysqld = mysqlctl.NewFakeMysqlDaemon(fakesqldb.New(t)) - tenv.vrengine = vreplication.NewTestEngine(tenv.ts, tenv.cells[0], tenv.mysqld, dbClientFactory, dbClientFactory, tenv.dbName, nil) - tenv.vrdbClient.ExpectRequest(fmt.Sprintf("select * from _vt.vreplication where db_name='%s'", tenv.dbName), &sqltypes.Result{}, nil) - tenv.vrengine.Open(tenv.ctx) - require.True(t, tenv.vrengine.IsOpen(), "vreplication engine was not open") - - tenv.tmc.tm = TabletManager{ - VREngine: tenv.vrengine, - DBConfigs: &dbconfigs.DBConfigs{ - DBName: tenv.dbName, - }, - } + var err error + tenv.mysqld.CurrentPrimaryPosition, err = mysql.ParsePosition(gtidFlavor, gtidPosition) + require.NoError(t, err) return tenv } @@ -119,14 +121,13 @@ func newTestEnv(t *testing.T, keyspace string, shards []string) *testEnv { func (tenv *testEnv) close() { tenv.mu.Lock() defer tenv.mu.Unlock() - tenv.vrengine.Close() tenv.ts.Close() } //-------------------------------------- // Tablets -func (tenv *testEnv) addTablet(id int, keyspace, shard string) *fakeTabletConn { +func (tenv *testEnv) addTablet(t *testing.T, id int, keyspace, shard string) *fakeTabletConn { tenv.mu.Lock() defer tenv.mu.Unlock() tablet := &topodatapb.Tablet{ @@ -136,7 +137,6 @@ func (tenv *testEnv) addTablet(id int, keyspace, shard string) *fakeTabletConn { }, Keyspace: keyspace, Shard: shard, - KeyRange: &topodatapb.KeyRange{}, Type: topodatapb.TabletType_PRIMARY, PortMap: map[string]int32{ tenv.protoName: int32(id), @@ -156,13 +156,34 @@ func (tenv *testEnv) addTablet(id int, keyspace, shard string) *fakeTabletConn { panic(err) } - tenv.tablets[id] = &fakeTabletConn{tablet: tablet} - return tenv.tablets[id] + tenv.tmc.tablets[id] = &fakeTabletConn{ + tablet: tablet, + vrdbClient: binlogplayer.NewMockDBClient(t), + } + + dbClientFactory := func() binlogplayer.DBClient { + return tenv.tmc.tablets[id].vrdbClient + } + tenv.tmc.tablets[id].vrengine = vreplication.NewTestEngine(tenv.ts, tenv.cells[0], tenv.mysqld, dbClientFactory, dbClientFactory, tenv.dbName, nil) + tenv.tmc.tablets[id].vrdbClient.ExpectRequest(fmt.Sprintf("select * from _vt.vreplication where db_name='%s'", tenv.dbName), &sqltypes.Result{}, nil) + tenv.tmc.tablets[id].vrengine.Open(tenv.ctx) + require.True(t, tenv.tmc.tablets[id].vrengine.IsOpen(), "vreplication engine was not open") + + tenv.tmc.tablets[id].tm = &TabletManager{ + VREngine: tenv.tmc.tablets[id].vrengine, + DBConfigs: &dbconfigs.DBConfigs{ + DBName: tenv.dbName, + }, + } + + return tenv.tmc.tablets[id] } func (tenv *testEnv) deleteTablet(tablet *topodatapb.Tablet) { tenv.mu.Lock() defer tenv.mu.Unlock() + tenv.tmc.tablets[int(tablet.Alias.Uid)].vrdbClient.Close() + tenv.tmc.tablets[int(tablet.Alias.Uid)].vrengine.Close() tenv.ts.DeleteTablet(tenv.ctx, tablet.Alias) // This is not automatically removed from shard replication, which results in log spam. topo.DeleteTabletReplicationData(tenv.ctx, tenv.ts, tablet) @@ -171,7 +192,10 @@ func (tenv *testEnv) deleteTablet(tablet *topodatapb.Tablet) { // fakeTabletConn implements the TabletConn and QueryService interfaces. type fakeTabletConn struct { queryservice.QueryService - tablet *topodatapb.Tablet + tablet *topodatapb.Tablet + tm *TabletManager + vrdbClient *binlogplayer.MockDBClient + vrengine *vreplication.Engine } // fakeTabletConn implements the QueryService interface. @@ -280,11 +304,6 @@ func (ftc *fakeTabletConn) VStreamResults(ctx context.Context, target *querypb.T return nil } -// fakeTabletConn implements the QueryService interface. -func (ftc *fakeTabletConn) StreamHealth(ctx context.Context, callback func(*querypb.StreamHealthResponse) error) error { - return nil -} - // fakeTabletConn implements the QueryService interface. func (ftc *fakeTabletConn) HandlePanic(err *error) { } @@ -332,20 +351,34 @@ func (ftc *fakeTabletConn) Close(ctx context.Context) error { return nil } +func (ftc *fakeTabletConn) StreamHealth(ctx context.Context, callback func(*querypb.StreamHealthResponse) error) error { + return callback(&querypb.StreamHealthResponse{ + Serving: true, + Target: &querypb.Target{ + Keyspace: ftc.tablet.Keyspace, + Shard: ftc.tablet.Shard, + TabletType: ftc.tablet.Type, + Cell: ftc.tablet.Alias.Cell, + }, + RealtimeStats: &querypb.RealtimeStats{}, + }) +} + //---------------------------------------------- // fakeTMClient type fakeTMClient struct { tmclient.TabletManagerClient - keyspace string - shards []string - tm TabletManager - schema *tabletmanagerdatapb.SchemaDefinition - vreQueries map[int]map[string]*querypb.QueryResult + sourceKeyspace string + sourceShards []string + tablets map[int]*fakeTabletConn + schema *tabletmanagerdatapb.SchemaDefinition + vreQueries map[int]map[string]*querypb.QueryResult } func newFakeTMClient() *fakeTMClient { return &fakeTMClient{ + tablets: make(map[int]*fakeTabletConn), vreQueries: make(map[int]map[string]*querypb.QueryResult), schema: &tabletmanagerdatapb.SchemaDefinition{}, } @@ -359,13 +392,9 @@ func (tmc *fakeTMClient) SetSchema(schema *tabletmanagerdatapb.SchemaDefinition) tmc.schema = schema } -// ExecuteFetchAsApp is is needed for the materializer's checkTZConversion function. func (tmc *fakeTMClient) ExecuteFetchAsApp(ctx context.Context, tablet *topodatapb.Tablet, usePool bool, req *tabletmanagerdatapb.ExecuteFetchAsAppRequest) (*querypb.QueryResult, error) { - return sqltypes.ResultToProto3( - sqltypes.MakeTestResult( - sqltypes.MakeTestFields("convert_tz", "varchar"), - "2023-07-14 09:05:01", - )), nil + // Reuse VReplicationExec + return tmc.VReplicationExec(ctx, tablet, string(req.Query)) } func (tmc *fakeTMClient) ExecuteFetchAsDba(ctx context.Context, tablet *topodatapb.Tablet, usePool bool, req *tabletmanagerdatapb.ExecuteFetchAsDbaRequest) (*querypb.QueryResult, error) { @@ -401,26 +430,31 @@ func (tmc *fakeTMClient) VReplicationExec(ctx context.Context, tablet *topodatap } func (tmc *fakeTMClient) CreateVReplicationWorkflow(ctx context.Context, tablet *topodatapb.Tablet, req *tabletmanagerdatapb.CreateVReplicationWorkflowRequest) (*tabletmanagerdatapb.CreateVReplicationWorkflowResponse, error) { - return tmc.tm.CreateVReplicationWorkflow(ctx, req) + return tmc.tablets[int(tablet.Alias.Uid)].tm.CreateVReplicationWorkflow(ctx, req) } func (tmc *fakeTMClient) ReadVReplicationWorkflow(ctx context.Context, tablet *topodatapb.Tablet, req *tabletmanagerdatapb.ReadVReplicationWorkflowRequest) (*tabletmanagerdatapb.ReadVReplicationWorkflowResponse, error) { resp := &tabletmanagerdatapb.ReadVReplicationWorkflowResponse{ - Workflow: req.Workflow, - Streams: make([]*tabletmanagerdatapb.ReadVReplicationWorkflowResponse_Stream, len(tmc.shards)), + Workflow: req.Workflow, + WorkflowType: binlogdatapb.VReplicationWorkflowType_MoveTables, + TabletTypes: []topodatapb.TabletType{topodatapb.TabletType_PRIMARY}, + Streams: make([]*tabletmanagerdatapb.ReadVReplicationWorkflowResponse_Stream, len(tmc.sourceShards)), + } + rules := make([]*binlogdatapb.Rule, len(defaultSchema.TableDefinitions)) + for i, table := range defaultSchema.TableDefinitions { + rules[i] = &binlogdatapb.Rule{ + Match: table.Name, + Filter: tablet.Shard, + } } - for i, shard := range tmc.shards { + for i, shard := range tmc.sourceShards { resp.Streams[i] = &tabletmanagerdatapb.ReadVReplicationWorkflowResponse_Stream{ Id: int32(i + 1), Bls: &binlogdatapb.BinlogSource{ - Keyspace: tmc.keyspace, + Keyspace: tmc.sourceKeyspace, Shard: shard, Filter: &binlogdatapb.Filter{ - Rules: []*binlogdatapb.Rule{ - { - Match: ".*", - }, - }, + Rules: rules, }, }, } @@ -428,3 +462,17 @@ func (tmc *fakeTMClient) ReadVReplicationWorkflow(ctx context.Context, tablet *t return resp, nil } + +func (tmc *fakeTMClient) PrimaryPosition(ctx context.Context, tablet *topodatapb.Tablet) (string, error) { + return fmt.Sprintf("%s/%s", gtidFlavor, gtidPosition), nil +} + +func (tmc *fakeTMClient) VReplicationWaitForPos(ctx context.Context, tablet *topodatapb.Tablet, id int32, pos string) error { + return nil +} + +func (tmc *fakeTMClient) ExecuteFetchAsAllPrivs(ctx context.Context, tablet *topodatapb.Tablet, req *tabletmanagerdatapb.ExecuteFetchAsAllPrivsRequest) (*querypb.QueryResult, error) { + return &querypb.QueryResult{ + RowsAffected: 1, + }, nil +} diff --git a/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go b/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go index 05708f924f0..280624470f6 100644 --- a/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go +++ b/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go @@ -24,16 +24,19 @@ import ( "github.com/stretchr/testify/require" + "vitess.io/vitess/go/sqlescape" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/textutil" "vitess.io/vitess/go/vt/sidecardb" "vitess.io/vitess/go/vt/sqlparser" "vitess.io/vitess/go/vt/vtctl/workflow" + "vitess.io/vitess/go/vt/vtgate/vindexes" binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" querypb "vitess.io/vitess/go/vt/proto/query" tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata" topodatapb "vitess.io/vitess/go/vt/proto/topodata" + vschemapb "vitess.io/vitess/go/vt/proto/vschema" vtctldatapb "vitess.io/vitess/go/vt/proto/vtctldata" "vitess.io/vitess/go/vt/proto/vttime" ) @@ -43,6 +46,7 @@ const ( getWorkflow = "select id from _vt.vreplication where db_name='vt_%s' and workflow='%s'" checkForWorkflow = "select 1 from _vt.vreplication where db_name='vt_%s' and workflow='%s'" checkForFrozenWorkflow = "select 1 from _vt.vreplication where db_name='vt_%s' and message='FROZEN' and workflow_sub_type != 1" + freezeWorkflow = "update _vt.vreplication set message = 'FROZEN' where db_name='vt_%s' and workflow='%s'" checkForJournal = "/select val from _vt.resharding_journal where id=" getWorkflowStatus = "select id, workflow, source, pos, stop_pos, max_replication_lag, state, db_name, time_updated, transaction_timestamp, message, tags, workflow_type, workflow_sub_type from _vt.vreplication where workflow = '%s' and db_name = 'vt_%s'" getWorkflowState = "select pos, stop_pos, max_tps, max_replication_lag, state, workflow_type, workflow, workflow_sub_type, defer_secondary_keys from _vt.vreplication where id=1" @@ -63,8 +67,9 @@ const ( insertStreamsCreatedLog = "insert into _vt.vreplication_log(vrepl_id, type, state, message) values(1, 'Stream Created', '', '%s'" getVReplicationRecord = "select * from _vt.vreplication where id = 1" startWorkflow = "update _vt.vreplication set state='Running' where db_name='vt_%s' and workflow='%s'" - - position = "MySQL56/9d10e6ec-07a0-11ee-ae73-8e53f4cf3083:1-97" + stopForCutover = "update _vt.vreplication set state='Stopped', message='stopped for cutover' where id=1" + getMaxValForSequence = "select max(`id`) as maxval from `vt_%s`.`%s`" + initSequenceTable = "insert into %a.%a (id, next_id, cache) values (0, %d, 1000) on duplicate key update next_id = if(next_id < %d, %d, next_id)" ) var ( @@ -73,12 +78,13 @@ var ( TableDefinitions: []*tabletmanagerdatapb.TableDefinition{ { Name: "t1", - Columns: []string{"c1", "c2"}, - PrimaryKeyColumns: []string{"c1"}, - Fields: sqltypes.MakeTestFields("c1|c2", "int64|int64"), + Columns: []string{"id", "c2"}, + PrimaryKeyColumns: []string{"id"}, + Fields: sqltypes.MakeTestFields("id|c2", "int64|int64"), }, }, } + position = fmt.Sprintf("%s/%s", gtidFlavor, gtidPosition) ) // TestCreateVReplicationWorkflow tests the query generated @@ -92,22 +98,12 @@ func TestCreateVReplicationWorkflow(t *testing.T) { targetTabletUID := 300 shard := "0" wf := "testwf" - defaultSchema := &tabletmanagerdatapb.SchemaDefinition{ - TableDefinitions: []*tabletmanagerdatapb.TableDefinition{ - { - Name: "t1", - Columns: []string{"c1", "c2"}, - PrimaryKeyColumns: []string{"c1"}, - Fields: sqltypes.MakeTestFields("c1|c2", "int64|int64"), - }, - }, - } - tenv := newTestEnv(t, targetKs, []string{shard}) + tenv := newTestEnv(t, sourceKs, []string{shard}) defer tenv.close() - sourceTablet := tenv.addTablet(sourceTabletUID, sourceKs, shard) + sourceTablet := tenv.addTablet(t, sourceTabletUID, sourceKs, shard) defer tenv.deleteTablet(sourceTablet.tablet) - targetTablet := tenv.addTablet(targetTabletUID, targetKs, shard) + targetTablet := tenv.addTablet(t, targetTabletUID, targetKs, shard) defer tenv.deleteTablet(targetTablet.tablet) ws := workflow.NewServer(tenv.ts, tenv.tmc) @@ -136,15 +132,15 @@ func TestCreateVReplicationWorkflow(t *testing.T) { TableDefinitions: []*tabletmanagerdatapb.TableDefinition{ { Name: "t1", - Columns: []string{"c1", "c2"}, - PrimaryKeyColumns: []string{"c1"}, - Fields: sqltypes.MakeTestFields("c1|c2", "int64|int64"), + Columns: []string{"id", "c2"}, + PrimaryKeyColumns: []string{"id"}, + Fields: sqltypes.MakeTestFields("id|c2", "int64|int64"), }, { Name: "wut", - Columns: []string{"c1"}, - PrimaryKeyColumns: []string{"c1"}, - Fields: sqltypes.MakeTestFields("c1", "int64"), + Columns: []string{"id"}, + PrimaryKeyColumns: []string{"id"}, + Fields: sqltypes.MakeTestFields("id", "int64"), }, }, }, @@ -191,13 +187,13 @@ func TestCreateVReplicationWorkflow(t *testing.T) { } tenv.tmc.SetSchema(tt.schema) - tenv.vrdbClient.ExpectRequest("use _vt", &sqltypes.Result{}, nil) + tenv.tmc.tablets[targetTabletUID].vrdbClient.ExpectRequest("use _vt", &sqltypes.Result{}, nil) // This is our expected query, which will also short circuit // the test with an error as at this point we've tested what // we wanted to test. - tenv.vrdbClient.ExpectRequest(tt.query, nil, errShortCircuit) + tenv.tmc.tablets[targetTabletUID].vrdbClient.ExpectRequest(tt.query, nil, errShortCircuit) _, err := ws.MoveTablesCreate(ctx, tt.req) - tenv.vrdbClient.Wait() + tenv.tmc.tablets[targetTabletUID].vrdbClient.Wait() require.ErrorIs(t, err, errShortCircuit) }) } @@ -207,22 +203,61 @@ func TestCreateVReplicationWorkflow(t *testing.T) { // MoveTablesCreate request to ensure that the VReplication // stream(s) are created correctly. Followed by ensuring that // SwitchTraffic and ReverseTraffic work as expected. +// TODO: figure out why this fails the -race tests in GetWorkflows. func TestMoveTables(t *testing.T) { ctx := context.Background() sourceKs := "sourceks" sourceTabletUID := 200 targetKs := "targetks" - targetTabletUID := 300 - shard := "0" + targetShards := make(map[string]*fakeTabletConn) + sourceShard := "0" + globalKs := "global" + globalShard := "0" wf := "testwf" + tabletTypes := []topodatapb.TabletType{topodatapb.TabletType_PRIMARY} - tenv := newTestEnv(t, targetKs, []string{shard}) + tenv := newTestEnv(t, sourceKs, []string{sourceShard}) defer tenv.close() - sourceTablet := tenv.addTablet(sourceTabletUID, sourceKs, shard) + sourceTablet := tenv.addTablet(t, sourceTabletUID, sourceKs, sourceShard) defer tenv.deleteTablet(sourceTablet.tablet) - targetTablet := tenv.addTablet(targetTabletUID, targetKs, shard) - defer tenv.deleteTablet(targetTablet.tablet) + + targetShards["-80"] = tenv.addTablet(t, 300, targetKs, "-80") + defer tenv.deleteTablet(targetShards["-80"].tablet) + targetShards["80-"] = tenv.addTablet(t, 310, targetKs, "80-") + defer tenv.deleteTablet(targetShards["80-"].tablet) + + globalTablet := tenv.addTablet(t, 500, globalKs, globalShard) + defer tenv.deleteTablet(globalTablet.tablet) + + tenv.ts.SaveVSchema(ctx, globalKs, &vschemapb.Keyspace{ + Sharded: false, + Tables: map[string]*vschemapb.Table{ + "t1_seq": { + Type: vindexes.TypeSequence, + }, + }, + }) + tenv.ts.SaveVSchema(ctx, targetKs, &vschemapb.Keyspace{ + Sharded: true, + Vindexes: map[string]*vschemapb.Vindex{ + "hash": { + Type: "hash", + }, + }, + Tables: map[string]*vschemapb.Table{ + "t1": { + ColumnVindexes: []*vschemapb.ColumnVindex{{ + Column: "id", + Name: "hash", + }}, + AutoIncrement: &vschemapb.AutoIncrement{ + Column: "id", + Sequence: "t1_seq", + }, + }, + }, + }) ws := workflow.NewServer(tenv.ts, tenv.tmc) @@ -234,146 +269,174 @@ func TestMoveTables(t *testing.T) { "character_set_name|collation_name|column_name|data_type|column_type|extra", "varchar|varchar|varchar|varchar|varchar|varchar", ), - "NULL|NULL|c1|bigint|bigint|", + "NULL|NULL|id|bigint|bigint|", "NULL|NULL|c2|bigint|bigint|", ) - req := &vtctldatapb.MoveTablesCreateRequest{ - SourceKeyspace: sourceKs, - TargetKeyspace: targetKs, - Workflow: wf, - Cells: tenv.cells, - AllTables: true, - AutoStart: true, - } - insert := fmt.Sprintf(`%s values ('%s', 'keyspace:\"%s\" shard:\"%s\" filter:{rules:{match:\"t1\" filter:\"select * from t1\"}}', '', 0, 0, '%s', '', now(), 0, 'Stopped', '%s', 1, 0, 0)`, - insertVReplicationPrefix, wf, sourceKs, shard, tenv.cells[0], tenv.dbName) - bls := fmt.Sprintf("keyspace:\"%s\" shard:\"%s\" filter:{rules:{match:\"t1\" filter:\"select * from t1\"}}", sourceKs, shard) + bls := fmt.Sprintf("keyspace:\"%s\" shard:\"%s\" filter:{rules:{match:\"t1\" filter:\"select * from t1\"}}", sourceKs, sourceShard) tenv.tmc.SetSchema(defaultSchema) - tenv.tmc.setVReplicationExecResults(targetTablet.tablet, fmt.Sprintf(checkForWorkflow, targetKs, wf), &sqltypes.Result{}) - tenv.tmc.setVReplicationExecResults(targetTablet.tablet, fmt.Sprintf(checkForFrozenWorkflow, targetKs), &sqltypes.Result{}) - tenv.tmc.setVReplicationExecResults(targetTablet.tablet, fmt.Sprintf(getWorkflow, targetKs, wf), - sqltypes.MakeTestResult( + tenv.tmc.setVReplicationExecResults(sourceTablet.tablet, checkForJournal, &sqltypes.Result{}) + + for _, ftc := range targetShards { + tenv.tmc.setVReplicationExecResults(ftc.tablet, fmt.Sprintf(checkForWorkflow, targetKs, wf), &sqltypes.Result{}) + tenv.tmc.setVReplicationExecResults(ftc.tablet, fmt.Sprintf(checkForFrozenWorkflow, targetKs), &sqltypes.Result{}) + tenv.tmc.setVReplicationExecResults(ftc.tablet, fmt.Sprintf(getWorkflow, targetKs, wf), + sqltypes.MakeTestResult( + sqltypes.MakeTestFields( + "id", + "int64", + ), + "1", + ), + ) + tenv.tmc.setVReplicationExecResults(ftc.tablet, getCopyState, &sqltypes.Result{}) + tenv.tmc.setVReplicationExecResults(ftc.tablet, fmt.Sprintf(getWorkflowStatus, wf, targetKs), + sqltypes.MakeTestResult( + sqltypes.MakeTestFields( + "id|workflow|source|pos|stop_pos|max_replication_log|state|db_name|time_updated|transaction_timestamp|message|tags|workflow_type|workflow_sub_type", + "int64|varchar|blob|varchar|varchar|int64|varchar|varchar|int64|int64|varchar|varchar|int64|int64", + ), + fmt.Sprintf("1|%s|%s|%s|NULL|0|running|vt_%s|1686577659|0|||1|0", wf, bls, position, targetKs), + ), + ) + tenv.tmc.setVReplicationExecResults(ftc.tablet, getLatestCopyState, &sqltypes.Result{}) + + ftc.vrdbClient.ExpectRequest("use _vt", &sqltypes.Result{}, nil) + insert := fmt.Sprintf(`%s values ('%s', 'keyspace:\"%s\" shard:\"%s\" filter:{rules:{match:\"t1\" filter:\"select * from t1 where in_keyrange(id, \'%s.hash\', \'%s\')\"}}', '', 0, 0, '%s', 'primary', now(), 0, 'Stopped', '%s', 1, 0, 0)`, + insertVReplicationPrefix, wf, sourceKs, sourceShard, targetKs, ftc.tablet.Shard, tenv.cells[0], tenv.dbName) + ftc.vrdbClient.ExpectRequest(insert, &sqltypes.Result{InsertID: 1}, nil) + ftc.vrdbClient.ExpectRequest(getAutoIncrementStep, &sqltypes.Result{}, nil) + ftc.vrdbClient.ExpectRequest(getVReplicationRecord, + sqltypes.MakeTestResult( + sqltypes.MakeTestFields( + "id|source", + "int64|varchar", + ), + fmt.Sprintf("1|%s", bls), + ), nil) + ftc.vrdbClient.ExpectRequest(`update _vt.vreplication set message='Picked source tablet: cell:\"zone1\" uid:200' where id=1`, &sqltypes.Result{}, nil) + ftc.vrdbClient.ExpectRequest(setSessionTZ, &sqltypes.Result{}, nil) + ftc.vrdbClient.ExpectRequest(setNames, &sqltypes.Result{}, nil) + ftc.vrdbClient.ExpectRequest(setSQLMode, &sqltypes.Result{}, nil) + ftc.vrdbClient.ExpectRequest(getSQLMode, sqltypes.MakeTestResult( + sqltypes.MakeTestFields("sql_mode", "varchar"), + sqlMode, + ), nil) + ftc.vrdbClient.ExpectRequest(getWorkflowState, sqltypes.MakeTestResult( + sqltypes.MakeTestFields( + "pos|stop_pos|max_tps|max_replication_lag|state|workflow_type|workflow|workflow_sub_type|defer_secondary_keys", + "varchar|varchar|int64|int64|varchar|int64|varchar|int64|int64", + ), + fmt.Sprintf("||0|0|Stopped|1|%s|0|0", wf), + ), nil) + ftc.vrdbClient.ExpectRequest(getNumCopyStateTable, sqltypes.MakeTestResult( sqltypes.MakeTestFields( - "id", + "count(distinct table_name)", "int64", ), "1", - ), - ) - tenv.tmc.setVReplicationExecResults(sourceTablet.tablet, checkForJournal, &sqltypes.Result{}) - tenv.tmc.setVReplicationExecResults(targetTablet.tablet, getCopyState, &sqltypes.Result{}) - tenv.tmc.setVReplicationExecResults(targetTablet.tablet, fmt.Sprintf(getWorkflowStatus, wf, targetKs), - sqltypes.MakeTestResult( + ), nil) + ftc.vrdbClient.ExpectRequest(setPermissiveSQLMode, &sqltypes.Result{}, nil) + ftc.vrdbClient.ExpectRequest(getFKChecks, sqltypes.MakeTestResult( sqltypes.MakeTestFields( - "id|workflow|source|pos|stop_pos|max_replication_log|state|db_name|time_updated|transaction_timestamp|message|tags|workflow_type|workflow_sub_type", - "int64|varchar|blob|varchar|varchar|int64|varchar|varchar|int64|int64|varchar|varchar|int64|int64", + "@@foreign_key_checks", + "int64", ), - fmt.Sprintf("1|%s|%s|%s|NULL|0|running|vt_%s|1686577659|0|||1|0", wf, bls, position, targetKs), - ), - ) - tenv.tmc.setVReplicationExecResults(targetTablet.tablet, getLatestCopyState, &sqltypes.Result{}) - - tenv.vrdbClient.ExpectRequest("use _vt", &sqltypes.Result{}, nil) - tenv.vrdbClient.ExpectRequest(insert, &sqltypes.Result{InsertID: 1}, nil) - tenv.vrdbClient.ExpectRequest(getAutoIncrementStep, &sqltypes.Result{}, nil) - tenv.vrdbClient.ExpectRequest(getVReplicationRecord, - sqltypes.MakeTestResult( + "1", + ), nil) + ftc.vrdbClient.ExpectRequest(getWorkflowState, sqltypes.MakeTestResult( sqltypes.MakeTestFields( - "id|source", - "int64|varchar", + "pos|stop_pos|max_tps|max_replication_lag|state|workflow_type|workflow|workflow_sub_type|defer_secondary_keys", + "varchar|varchar|int64|int64|varchar|int64|varchar|int64|int64", ), - fmt.Sprintf("1|%s", bls), + fmt.Sprintf("||0|0|Stopped|1|%s|0|0", wf), ), nil) - tenv.vrdbClient.ExpectRequest(`update _vt.vreplication set message='Picked source tablet: cell:\"zone1\" uid:200' where id=1`, &sqltypes.Result{}, nil) - tenv.vrdbClient.ExpectRequest(setSessionTZ, &sqltypes.Result{}, nil) - tenv.vrdbClient.ExpectRequest(setNames, &sqltypes.Result{}, nil) - tenv.vrdbClient.ExpectRequest(setSQLMode, &sqltypes.Result{}, nil) - tenv.vrdbClient.ExpectRequest(getSQLMode, sqltypes.MakeTestResult( - sqltypes.MakeTestFields("sql_mode", "varchar"), - sqlMode, - ), nil) - tenv.vrdbClient.ExpectRequest(getWorkflowState, sqltypes.MakeTestResult( - sqltypes.MakeTestFields( - "pos|stop_pos|max_tps|max_replication_lag|state|workflow_type|workflow|workflow_sub_type|defer_secondary_keys", - "varchar|varchar|int64|int64|varchar|int64|varchar|int64|int64", - ), - fmt.Sprintf("||0|0|Stopped|1|%s|0|0", wf), - ), nil) - tenv.vrdbClient.ExpectRequest(getNumCopyStateTable, sqltypes.MakeTestResult( - sqltypes.MakeTestFields( - "count(distinct table_name)", - "int64", - ), - "1", - ), nil) - tenv.vrdbClient.ExpectRequest(setPermissiveSQLMode, &sqltypes.Result{}, nil) - tenv.vrdbClient.ExpectRequest(getFKChecks, sqltypes.MakeTestResult( - sqltypes.MakeTestFields( - "@@foreign_key_checks", - "int64", - ), - "1", - ), nil) - tenv.vrdbClient.ExpectRequest(getWorkflowState, sqltypes.MakeTestResult( - sqltypes.MakeTestFields( - "pos|stop_pos|max_tps|max_replication_lag|state|workflow_type|workflow|workflow_sub_type|defer_secondary_keys", - "varchar|varchar|int64|int64|varchar|int64|varchar|int64|int64", - ), - fmt.Sprintf("||0|0|Stopped|1|%s|0|0", wf), - ), nil) - tenv.vrdbClient.ExpectRequest(getNumCopyStateTable, sqltypes.MakeTestResult( - sqltypes.MakeTestFields( - "count(distinct table_name)", - "int64", - ), - "1", - ), nil) - tenv.vrdbClient.ExpectRequest(getBinlogRowImage, sqltypes.MakeTestResult( - sqltypes.MakeTestFields( - "@@binlog_row_image", - "varchar", - ), - "FULL", - ), nil) - tenv.vrdbClient.ExpectRequest(disableFKChecks, &sqltypes.Result{}, nil) - tenv.vrdbClient.ExpectRequest(setStrictSQLMode, &sqltypes.Result{}, nil) - - tenv.vrdbClient.ExpectRequest(fmt.Sprintf(insertStreamsCreatedLog, bls), &sqltypes.Result{}, nil) - tenv.tmc.setVReplicationExecResults(targetTablet.tablet, fmt.Sprintf(getWorkflow, targetKs, wf), - sqltypes.MakeTestResult( + ftc.vrdbClient.ExpectRequest(getNumCopyStateTable, sqltypes.MakeTestResult( sqltypes.MakeTestFields( - "id", + "count(distinct table_name)", "int64", ), "1", - ), + ), nil) + ftc.vrdbClient.ExpectRequest(getBinlogRowImage, sqltypes.MakeTestResult( + sqltypes.MakeTestFields( + "@@binlog_row_image", + "varchar", + ), + "FULL", + ), nil) + ftc.vrdbClient.ExpectRequest(disableFKChecks, &sqltypes.Result{}, nil) + ftc.vrdbClient.ExpectRequest(setStrictSQLMode, &sqltypes.Result{}, nil) + + ftc.vrdbClient.ExpectRequest(fmt.Sprintf(insertStreamsCreatedLog, bls), &sqltypes.Result{}, nil) + tenv.tmc.setVReplicationExecResults(ftc.tablet, fmt.Sprintf(getWorkflow, targetKs, wf), + sqltypes.MakeTestResult( + sqltypes.MakeTestFields( + "id", + "int64", + ), + "1", + ), + ) + tenv.tmc.setVReplicationExecResults(ftc.tablet, fmt.Sprintf(startWorkflow, targetKs, wf), &sqltypes.Result{}) + ftc.vrdbClient.ExpectRequest(fmt.Sprintf("use %s", sidecardb.DefaultName), &sqltypes.Result{}, nil) + + tenv.tmc.setVReplicationExecResults(ftc.tablet, stopForCutover, &sqltypes.Result{}) + tenv.tmc.setVReplicationExecResults(ftc.tablet, fmt.Sprintf(freezeWorkflow, targetKs, wf), &sqltypes.Result{}) + + tenv.tmc.setVReplicationExecResults(ftc.tablet, fmt.Sprintf(getMaxValForSequence, targetKs, "t1"), + sqltypes.MakeTestResult( + sqltypes.MakeTestFields( + "maxval", + "int64", + ), + "5", + ), + ) + } + + tenv.tmc.setVReplicationExecResults(globalTablet.tablet, sqlparser.BuildParsedQuery(initSequenceTable, sqlescape.EscapeID("vt_global"), sqlescape.EscapeID("t1_seq"), 6, 6, 6).Query, + &sqltypes.Result{RowsAffected: 0}, ) - tenv.tmc.setVReplicationExecResults(targetTablet.tablet, fmt.Sprintf(startWorkflow, targetKs, wf), &sqltypes.Result{}) - _, err := ws.MoveTablesCreate(ctx, req) - require.NoError(t, err) - tenv.vrdbClient.ExpectRequest(fmt.Sprintf("use %s", sidecardb.DefaultName), &sqltypes.Result{}, nil) - _, err = ws.WorkflowSwitchTraffic(ctx, &vtctldatapb.WorkflowSwitchTrafficRequest{ - Keyspace: req.TargetKeyspace, - Workflow: req.Workflow, - Cells: req.Cells, - TabletTypes: req.TabletTypes, - MaxReplicationLagAllowed: &vttime.Duration{Seconds: 922337203}, - Direction: int32(workflow.DirectionForward), + _, err := ws.MoveTablesCreate(ctx, &vtctldatapb.MoveTablesCreateRequest{ + SourceKeyspace: sourceKs, + TargetKeyspace: targetKs, + Workflow: wf, + TabletTypes: tabletTypes, + Cells: tenv.cells, + AllTables: true, + AutoStart: true, }) require.NoError(t, err) + _, err = ws.WorkflowSwitchTraffic(ctx, &vtctldatapb.WorkflowSwitchTrafficRequest{ - Keyspace: req.TargetKeyspace, - Workflow: req.Workflow, - Cells: req.Cells, - TabletTypes: req.TabletTypes, - MaxReplicationLagAllowed: &vttime.Duration{Seconds: 922337203}, - Direction: int32(workflow.DirectionBackward), + Keyspace: targetKs, + Workflow: wf, + Cells: tenv.cells, + TabletTypes: tabletTypes, + MaxReplicationLagAllowed: &vttime.Duration{Seconds: 922337203}, + EnableReverseReplication: true, + InitializeTargetSequences: true, + Direction: int32(workflow.DirectionForward), }) require.NoError(t, err) + + /* + // TODO: figure out why this fails with "one or more tables are already present in the denylist" + _, err = ws.WorkflowSwitchTraffic(ctx, &vtctldatapb.WorkflowSwitchTrafficRequest{ + Keyspace: targetKs, + Workflow: wf, + Cells: tenv.cells, + TabletTypes: tabletTypes, + MaxReplicationLagAllowed: &vttime.Duration{Seconds: 922337203}, + EnableReverseReplication: true, + Direction: int32(workflow.DirectionBackward), + }) + require.NoError(t, err) + */ } func TestUpdateVReplicationWorkflow(t *testing.T) { @@ -388,7 +451,7 @@ func TestUpdateVReplicationWorkflow(t *testing.T) { tenv := newTestEnv(t, keyspace, []string{shard}) defer tenv.close() - tablet := tenv.addTablet(tabletUID, keyspace, shard) + tablet := tenv.addTablet(t, tabletUID, keyspace, shard) defer tenv.deleteTablet(tablet.tablet) parsed := sqlparser.BuildParsedQuery(sqlSelectVReplicationWorkflowConfig, sidecardb.DefaultName, ":wf") @@ -500,17 +563,17 @@ func TestUpdateVReplicationWorkflow(t *testing.T) { tt.request.State = binlogdatapb.VReplicationWorkflowState_Stopped // These are the same for each RPC call. - tenv.vrdbClient.ExpectRequest(fmt.Sprintf("use %s", sidecardb.DefaultName), &sqltypes.Result{}, nil) - tenv.vrdbClient.ExpectRequest(selectQuery, selectRes, nil) - tenv.vrdbClient.ExpectRequest(fmt.Sprintf("use %s", sidecardb.DefaultName), &sqltypes.Result{}, nil) - tenv.vrdbClient.ExpectRequest(idQuery, idRes, nil) + tenv.tmc.tablets[tabletUID].vrdbClient.ExpectRequest(fmt.Sprintf("use %s", sidecardb.DefaultName), &sqltypes.Result{}, nil) + tenv.tmc.tablets[tabletUID].vrdbClient.ExpectRequest(selectQuery, selectRes, nil) + tenv.tmc.tablets[tabletUID].vrdbClient.ExpectRequest(fmt.Sprintf("use %s", sidecardb.DefaultName), &sqltypes.Result{}, nil) + tenv.tmc.tablets[tabletUID].vrdbClient.ExpectRequest(idQuery, idRes, nil) // This is our expected query, which will also short circuit // the test with an error as at this point we've tested what // we wanted to test. - tenv.vrdbClient.ExpectRequest(tt.query, &sqltypes.Result{RowsAffected: 1}, errShortCircuit) - _, err = tenv.tmc.tm.UpdateVReplicationWorkflow(ctx, tt.request) - tenv.vrdbClient.Wait() + tenv.tmc.tablets[tabletUID].vrdbClient.ExpectRequest(tt.query, &sqltypes.Result{RowsAffected: 1}, errShortCircuit) + _, err = tenv.tmc.tablets[tabletUID].tm.UpdateVReplicationWorkflow(ctx, tt.request) + tenv.tmc.tablets[tabletUID].vrdbClient.Wait() require.ErrorIs(t, err, errShortCircuit) }) } From 4226af4b4315ebab6e20b3e2ad3c1c02594caad8 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Thu, 3 Aug 2023 10:20:17 -0400 Subject: [PATCH 31/56] Address some review comments Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/traffic_switcher.go | 14 +++++++------- go/vt/wrangler/traffic_switcher.go | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index 75097cb30e9..94fe0a7ed6e 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -1203,7 +1203,7 @@ func (ts *trafficSwitcher) isSequenceParticipating(ctx context.Context) (bool, e if err != nil { return false, err } - if vschema == nil || vschema.Tables == nil { + if vschema == nil || len(vschema.Tables) == 0 { return false, nil } sequenceFound := false @@ -1230,7 +1230,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for target keyspace %s: %v", ts.targetKeyspace, err) } - if vschema == nil || vschema.Tables == nil || len(vschema.Tables) == 0 { // Nothing to do + if vschema == nil || len(vschema.Tables) == 0 { // Nothing to do return nil, nil } @@ -1256,13 +1256,13 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s // If the sequence table is fully qualified in the vschema then // we don't need to find it later. if strings.Contains(vs.AutoIncrement.Sequence, ".") { - parts := strings.Split(vs.AutoIncrement.Sequence, ".") - if len(parts) != 2 { + keyspace, tableName, found := strings.Cut(vs.AutoIncrement.Sequence, ".") + if !found { return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "invalid sequence table name %s defined in the %s keyspace", vs.AutoIncrement.Sequence, ts.targetKeyspace) } - sm.backingTableName = parts[1] - sm.backingTableKeyspace = parts[0] + sm.backingTableName = tableName + sm.backingTableKeyspace = keyspace } sequencesByBackingTable[sm.backingTableName] = sm } @@ -1301,7 +1301,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", keyspace, kerr) } - if kvs == nil || kvs.Sharded || kvs.Tables == nil || len(kvs.Tables) == 0 { + if kvs == nil || kvs.Sharded || len(kvs.Tables) == 0 { return nil } for tableName, tableDef := range kvs.Tables { diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 3a93fba7837..41995dc1f1b 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -1947,7 +1947,7 @@ func (ts *trafficSwitcher) isSequenceParticipating(ctx context.Context) (bool, e if err != nil { return false, err } - if vschema == nil || vschema.Tables == nil || len(vschema.Tables) == 0 { + if vschema == nil || len(vschema.Tables) == 0 { return false, nil } sequenceFound := false @@ -1974,7 +1974,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for target keyspace %s: %v", ts.targetKeyspace, err) } - if vschema == nil || vschema.Tables == nil || len(vschema.Tables) == 0 { // Nothing to do + if vschema == nil || len(vschema.Tables) == 0 { // Nothing to do return nil, nil } @@ -2045,7 +2045,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", keyspace, kerr) } - if kvs == nil || kvs.Sharded || kvs.Tables == nil || len(kvs.Tables) == 0 { + if kvs == nil || kvs.Sharded || len(kvs.Tables) == 0 { return nil } for tableName, tableDef := range kvs.Tables { From 2e5822a0162f194fd9ea1dbd4843ce2651df24d1 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Thu, 3 Aug 2023 10:57:17 -0400 Subject: [PATCH 32/56] Address unit test race Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/server.go | 24 ++++++++++--------- .../vttablet/tabletmanager/framework_test.go | 9 +++---- .../tabletmanager/rpc_vreplication_test.go | 8 +++++-- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/go/vt/vtctl/workflow/server.go b/go/vt/vtctl/workflow/server.go index 35ffde8422d..d54be6995bf 100644 --- a/go/vt/vtctl/workflow/server.go +++ b/go/vt/vtctl/workflow/server.go @@ -467,8 +467,7 @@ func (s *Server) GetWorkflows(ctx context.Context, req *vtctldatapb.GetWorkflows Message: message, Tags: tagArray, } - workflow.WorkflowType = binlogdatapb.VReplicationWorkflowType_name[workflowType] - workflow.WorkflowSubType = binlogdatapb.VReplicationWorkflowSubType_name[workflowSubType] + stream.CopyStates, err = s.getWorkflowCopyStates(ctx, tablet, id) if err != nil { return err @@ -476,15 +475,6 @@ func (s *Server) GetWorkflows(ctx context.Context, req *vtctldatapb.GetWorkflows span.Annotate("num_copy_states", len(stream.CopyStates)) - switch { - case strings.Contains(strings.ToLower(stream.Message), "error"): - stream.State = binlogdatapb.VReplicationWorkflowState_Error.String() - case stream.State == binlogdatapb.VReplicationWorkflowState_Running.String() && len(stream.CopyStates) > 0: - stream.State = binlogdatapb.VReplicationWorkflowState_Copying.String() - case stream.State == binlogdatapb.VReplicationWorkflowState_Running.String() && int64(time.Now().Second())-timeUpdatedSeconds > 10: - stream.State = binlogdatapb.VReplicationWorkflowState_Lagging.String() - } - // At this point, we're going to start modifying the maps defined // outside this function, as well as fields on the passed-in Workflow // pointer. Since we're running concurrently, take the lock. @@ -494,6 +484,18 @@ func (s *Server) GetWorkflows(ctx context.Context, req *vtctldatapb.GetWorkflows m.Lock() defer m.Unlock() + workflow.WorkflowType = binlogdatapb.VReplicationWorkflowType_name[workflowType] + workflow.WorkflowSubType = binlogdatapb.VReplicationWorkflowSubType_name[workflowSubType] + + switch { + case strings.Contains(strings.ToLower(stream.Message), "error"): + stream.State = binlogdatapb.VReplicationWorkflowState_Error.String() + case stream.State == binlogdatapb.VReplicationWorkflowState_Running.String() && len(stream.CopyStates) > 0: + stream.State = binlogdatapb.VReplicationWorkflowState_Copying.String() + case stream.State == binlogdatapb.VReplicationWorkflowState_Running.String() && int64(time.Now().Second())-timeUpdatedSeconds > 10: + stream.State = binlogdatapb.VReplicationWorkflowState_Lagging.String() + } + shardStreamKey := fmt.Sprintf("%s/%s", tablet.Shard, tablet.AliasString()) shardStream, ok := workflow.ShardStreams[shardStreamKey] if !ok { diff --git a/go/vt/vttablet/tabletmanager/framework_test.go b/go/vt/vttablet/tabletmanager/framework_test.go index c9ef460b90a..9b66f1389c3 100644 --- a/go/vt/vttablet/tabletmanager/framework_test.go +++ b/go/vt/vttablet/tabletmanager/framework_test.go @@ -435,10 +435,11 @@ func (tmc *fakeTMClient) CreateVReplicationWorkflow(ctx context.Context, tablet func (tmc *fakeTMClient) ReadVReplicationWorkflow(ctx context.Context, tablet *topodatapb.Tablet, req *tabletmanagerdatapb.ReadVReplicationWorkflowRequest) (*tabletmanagerdatapb.ReadVReplicationWorkflowResponse, error) { resp := &tabletmanagerdatapb.ReadVReplicationWorkflowResponse{ - Workflow: req.Workflow, - WorkflowType: binlogdatapb.VReplicationWorkflowType_MoveTables, - TabletTypes: []topodatapb.TabletType{topodatapb.TabletType_PRIMARY}, - Streams: make([]*tabletmanagerdatapb.ReadVReplicationWorkflowResponse_Stream, len(tmc.sourceShards)), + Workflow: req.Workflow, + WorkflowSubType: binlogdatapb.VReplicationWorkflowSubType_None, + WorkflowType: binlogdatapb.VReplicationWorkflowType_MoveTables, + TabletTypes: []topodatapb.TabletType{topodatapb.TabletType_PRIMARY}, + Streams: make([]*tabletmanagerdatapb.ReadVReplicationWorkflowResponse_Stream, len(tmc.sourceShards)), } rules := make([]*binlogdatapb.Rule, len(defaultSchema.TableDefinitions)) for i, table := range defaultSchema.TableDefinitions { diff --git a/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go b/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go index 280624470f6..631d2603da2 100644 --- a/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go +++ b/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go @@ -19,6 +19,7 @@ package tabletmanager import ( "context" "fmt" + "math" "runtime/debug" "testing" @@ -392,12 +393,15 @@ func TestMoveTables(t *testing.T) { "maxval", "int64", ), - "5", + fmt.Sprintf("%d", ftc.tablet.Alias.Uid), // Use the tablet's UID as the max value ), ) } - tenv.tmc.setVReplicationExecResults(globalTablet.tablet, sqlparser.BuildParsedQuery(initSequenceTable, sqlescape.EscapeID("vt_global"), sqlescape.EscapeID("t1_seq"), 6, 6, 6).Query, + // We use the tablet's UID in the mocked results for the max value used on each target shard. + nextSeqVal := int(math.Max(float64(targetShards["-80"].tablet.Alias.Uid), float64(targetShards["80-"].tablet.Alias.Uid))) + 1 + tenv.tmc.setVReplicationExecResults(globalTablet.tablet, + sqlparser.BuildParsedQuery(initSequenceTable, sqlescape.EscapeID(fmt.Sprintf("vt_%s", globalKs)), sqlescape.EscapeID("t1_seq"), nextSeqVal, nextSeqVal, nextSeqVal).Query, &sqltypes.Result{RowsAffected: 0}, ) From 2fed344f19d059753e9e785c57315f94b4d4292f Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Thu, 3 Aug 2023 17:58:40 -0400 Subject: [PATCH 33/56] Finish unit test todos Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/server.go | 3 +- .../tabletmanager/rpc_vreplication_test.go | 42 +++++++++++-------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/go/vt/vtctl/workflow/server.go b/go/vt/vtctl/workflow/server.go index d54be6995bf..0dfb9ffedc6 100644 --- a/go/vt/vtctl/workflow/server.go +++ b/go/vt/vtctl/workflow/server.go @@ -327,7 +327,8 @@ func (s *Server) GetWorkflow(ctx context.Context, keyspace, workflow string) (*v return nil, err } if len(res.Workflows) != 1 { - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "unexpected number of workflows returned; expected 1, got %d", len(res.Workflows)) + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "unexpected number of workflows returned for %s.%s; expected 1, got %d", + keyspace, workflow, len(res.Workflows)) } return res.Workflows[0], nil } diff --git a/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go b/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go index 631d2603da2..be6e3021610 100644 --- a/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go +++ b/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go @@ -204,7 +204,6 @@ func TestCreateVReplicationWorkflow(t *testing.T) { // MoveTablesCreate request to ensure that the VReplication // stream(s) are created correctly. Followed by ensuring that // SwitchTraffic and ReverseTraffic work as expected. -// TODO: figure out why this fails the -race tests in GetWorkflows. func TestMoveTables(t *testing.T) { ctx := context.Background() sourceKs := "sourceks" @@ -215,7 +214,11 @@ func TestMoveTables(t *testing.T) { globalKs := "global" globalShard := "0" wf := "testwf" - tabletTypes := []topodatapb.TabletType{topodatapb.TabletType_PRIMARY} + tabletTypes := []topodatapb.TabletType{ + topodatapb.TabletType_PRIMARY, + topodatapb.TabletType_REPLICA, + topodatapb.TabletType_RDONLY, + } tenv := newTestEnv(t, sourceKs, []string{sourceShard}) defer tenv.close() @@ -305,7 +308,7 @@ func TestMoveTables(t *testing.T) { tenv.tmc.setVReplicationExecResults(ftc.tablet, getLatestCopyState, &sqltypes.Result{}) ftc.vrdbClient.ExpectRequest("use _vt", &sqltypes.Result{}, nil) - insert := fmt.Sprintf(`%s values ('%s', 'keyspace:\"%s\" shard:\"%s\" filter:{rules:{match:\"t1\" filter:\"select * from t1 where in_keyrange(id, \'%s.hash\', \'%s\')\"}}', '', 0, 0, '%s', 'primary', now(), 0, 'Stopped', '%s', 1, 0, 0)`, + insert := fmt.Sprintf(`%s values ('%s', 'keyspace:\"%s\" shard:\"%s\" filter:{rules:{match:\"t1\" filter:\"select * from t1 where in_keyrange(id, \'%s.hash\', \'%s\')\"}}', '', 0, 0, '%s', 'primary,replica,rdonly', now(), 0, 'Stopped', '%s', 1, 0, 0)`, insertVReplicationPrefix, wf, sourceKs, sourceShard, targetKs, ftc.tablet.Shard, tenv.cells[0], tenv.dbName) ftc.vrdbClient.ExpectRequest(insert, &sqltypes.Result{InsertID: 1}, nil) ftc.vrdbClient.ExpectRequest(getAutoIncrementStep, &sqltypes.Result{}, nil) @@ -420,7 +423,6 @@ func TestMoveTables(t *testing.T) { Keyspace: targetKs, Workflow: wf, Cells: tenv.cells, - TabletTypes: tabletTypes, MaxReplicationLagAllowed: &vttime.Duration{Seconds: 922337203}, EnableReverseReplication: true, InitializeTargetSequences: true, @@ -428,19 +430,25 @@ func TestMoveTables(t *testing.T) { }) require.NoError(t, err) - /* - // TODO: figure out why this fails with "one or more tables are already present in the denylist" - _, err = ws.WorkflowSwitchTraffic(ctx, &vtctldatapb.WorkflowSwitchTrafficRequest{ - Keyspace: targetKs, - Workflow: wf, - Cells: tenv.cells, - TabletTypes: tabletTypes, - MaxReplicationLagAllowed: &vttime.Duration{Seconds: 922337203}, - EnableReverseReplication: true, - Direction: int32(workflow.DirectionBackward), - }) - require.NoError(t, err) - */ + tenv.tmc.setVReplicationExecResults(sourceTablet.tablet, fmt.Sprintf(getWorkflowStatus, workflow.ReverseWorkflowName(wf), sourceKs), + sqltypes.MakeTestResult( + sqltypes.MakeTestFields( + "id|workflow|source|pos|stop_pos|max_replication_log|state|db_name|time_updated|transaction_timestamp|message|tags|workflow_type|workflow_sub_type", + "int64|varchar|blob|varchar|varchar|int64|varchar|varchar|int64|int64|varchar|varchar|int64|int64", + ), + fmt.Sprintf("1|%s|%s|%s|NULL|0|running|vt_%s|1686577659|0|||1|0", workflow.ReverseWorkflowName(wf), bls, position, sourceKs), + ), + ) + + _, err = ws.WorkflowSwitchTraffic(ctx, &vtctldatapb.WorkflowSwitchTrafficRequest{ + Keyspace: targetKs, + Workflow: wf, + Cells: tenv.cells, + MaxReplicationLagAllowed: &vttime.Duration{Seconds: 922337203}, + EnableReverseReplication: true, + Direction: int32(workflow.DirectionBackward), + }) + require.NoError(t, err) } func TestUpdateVReplicationWorkflow(t *testing.T) { From 8a77582e61d65994eef44323bf77da9981737e4e Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Thu, 3 Aug 2023 18:04:10 -0400 Subject: [PATCH 34/56] use correct ctx Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/traffic_switcher.go | 2 +- go/vt/wrangler/traffic_switcher.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index 94fe0a7ed6e..b8ddb34012b 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -1296,7 +1296,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s tablesFound := 0 // Used to short circuit the search searchCompleted := make(chan struct{}) // The search has completed searchKeyspace := func(sctx context.Context, keyspace string) error { // The function used to search each keyspace - kvs, kerr := ts.TopoServer().GetVSchema(ctx, keyspace) + kvs, kerr := ts.TopoServer().GetVSchema(sctx, keyspace) if kerr != nil { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", keyspace, kerr) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 41995dc1f1b..0ab04f4088f 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -2040,7 +2040,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s tablesFound := 0 // Used to short circuit the search searchCompleted := make(chan struct{}) // The search has completed searchKeyspace := func(sctx context.Context, keyspace string) error { // The function used to search each keyspace - kvs, kerr := ts.TopoServer().GetVSchema(ctx, keyspace) + kvs, kerr := ts.TopoServer().GetVSchema(sctx, keyspace) if kerr != nil { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "failed to get vschema for keyspace %s: %v", keyspace, kerr) From 28af2ee10ccfbc79891c29471f9ed540e7b44766 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Fri, 4 Aug 2023 00:50:17 -0400 Subject: [PATCH 35/56] Implement a comment suggestion Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/traffic_switcher.go | 6 ++---- go/vt/wrangler/traffic_switcher.go | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index b8ddb34012b..a0b416ee623 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -1271,10 +1271,8 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return nil, nil } - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: + if err := ctx.Err(); err != nil { + return nil, err } // If all of the sequence tables were defined using qualified table diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 0ab04f4088f..47b09d73bcf 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -2015,10 +2015,8 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return nil, nil } - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: + if err := ctx.Err(); err != nil { + return nil, err } // If all of the sequence tables were defined using qualified table From 2ca52b27cd3a68533f39cd37c0ae5e48cd83e529 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Fri, 4 Aug 2023 01:08:57 -0400 Subject: [PATCH 36/56] Make same switch to Cut in wrangler impl Signed-off-by: Matt Lord --- go/vt/wrangler/traffic_switcher.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 47b09d73bcf..743e8cf825f 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -2000,13 +2000,13 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s // If the sequence table is fully qualified in the vschema then // we don't need to find it later. if strings.Contains(vs.AutoIncrement.Sequence, ".") { - parts := strings.Split(vs.AutoIncrement.Sequence, ".") - if len(parts) != 2 { + keyspace, tableName, found := strings.Cut(vs.AutoIncrement.Sequence, ".") + if !found { return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "invalid sequence table name %s defined in the %s keyspace", vs.AutoIncrement.Sequence, ts.targetKeyspace) } - sm.backingTableName = parts[1] - sm.backingTableKeyspace = parts[0] + sm.backingTableName = tableName + sm.backingTableKeyspace = keyspace } sequencesByBackingTable[sm.backingTableName] = sm } From eb667fcfb464f3e158783d41b532d407723164f8 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Fri, 4 Aug 2023 15:50:20 -0400 Subject: [PATCH 37/56] Align vtctlclient help output with reality Signed-off-by: Matt Lord --- go/vt/vtctl/vtctl.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index 44a7fa2f680..5303ea2105a 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -443,7 +443,7 @@ var commands = []commandGroup{ { name: "MoveTables", method: commandMoveTables, - params: "[--source=] [--tables=] [--cells=] [--tablet_types=] [--all] [--exclude=] [--auto_start] [--stop_after_copy] [--defer-secondary-keys] [--on-ddl=] [--source_shards=] [--initialize-target-sequences] 'action must be one of the following: Create, Complete, Cancel, SwitchTraffic, ReverseTrafffic, Show, or Progress' ", + params: "[--source=] [--tables=] [--cells=] [--tablet_types=] [--all] [--exclude=] [--auto_start] [--stop_after_copy] [--defer-secondary-keys] [--on-ddl=] [--source_shards=] [--source_time_zone=] [--initialize-target-sequences] 'action must be one of the following: Create, Complete, Cancel, SwitchTraffic, ReverseTrafffic, Show, or Progress' ", help: `Move table(s) to another keyspace, table_specs is a list of tables or the tables section of the vschema for the target keyspace. Example: '{"t1":{"column_vindexes": [{"column": "id1", "name": "hash"}]}, "t2":{"column_vindexes": [{"column": "id2", "name": "hash"}]}}'. In the case of an unsharded target keyspace the vschema for each table may be empty. Example: '{"t1":{}, "t2":{}}'.`, }, { From a82d853018106465c348ccca494ff7835cb107cd Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Fri, 4 Aug 2023 15:55:50 -0400 Subject: [PATCH 38/56] Fix after merging main Signed-off-by: Matt Lord --- go/vt/vttablet/tabletmanager/framework_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go/vt/vttablet/tabletmanager/framework_test.go b/go/vt/vttablet/tabletmanager/framework_test.go index 9b66f1389c3..32d1c7019c2 100644 --- a/go/vt/vttablet/tabletmanager/framework_test.go +++ b/go/vt/vttablet/tabletmanager/framework_test.go @@ -26,8 +26,8 @@ import ( "github.com/stretchr/testify/require" - "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/mysql/fakesqldb" + "vitess.io/vitess/go/mysql/replication" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/vt/binlog/binlogplayer" "vitess.io/vitess/go/vt/dbconfigs" @@ -112,7 +112,7 @@ func newTestEnv(t *testing.T, sourceKeyspace string, sourceShards []string) *tes tenv.mysqld = mysqlctl.NewFakeMysqlDaemon(fakesqldb.New(t)) var err error - tenv.mysqld.CurrentPrimaryPosition, err = mysql.ParsePosition(gtidFlavor, gtidPosition) + tenv.mysqld.CurrentPrimaryPosition, err = replication.ParsePosition(gtidFlavor, gtidPosition) require.NoError(t, err) return tenv From a17bef6f2fba081e6cb1baf436fb262ce9328e45 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Sat, 5 Aug 2023 16:10:41 -0400 Subject: [PATCH 39/56] Address another review comment Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/traffic_switcher.go | 108 +++++++++++++---------- go/vt/wrangler/traffic_switcher.go | 108 +++++++++++++---------- 2 files changed, 120 insertions(+), 96 deletions(-) diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index a0b416ee623..39b8d8e1d4f 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -1234,62 +1234,19 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return nil, nil } - targets := maps.Values(ts.Targets()) - if len(targets) == 0 || targets[0].GetPrimary() == nil { // This should never happen - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary tablet found for target keyspace %s", ts.targetKeyspace) - } - targetDBName := targets[0].GetPrimary().DbName() - sequencesByBackingTable := make(map[string]*sequenceMetadata) - smMu := sync.Mutex{} - for _, table := range ts.Tables() { - vs, ok := vschema.Tables[table] - if !ok || vs == nil { - continue - } - if vs.AutoIncrement != nil && vs.AutoIncrement.Sequence != "" { - sm := &sequenceMetadata{ - backingTableName: vs.AutoIncrement.Sequence, - usingTableName: table, - usingTableDefinition: vs, - usingTableDBName: targetDBName, - } - // If the sequence table is fully qualified in the vschema then - // we don't need to find it later. - if strings.Contains(vs.AutoIncrement.Sequence, ".") { - keyspace, tableName, found := strings.Cut(vs.AutoIncrement.Sequence, ".") - if !found { - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "invalid sequence table name %s defined in the %s keyspace", - vs.AutoIncrement.Sequence, ts.targetKeyspace) - } - sm.backingTableName = tableName - sm.backingTableKeyspace = keyspace - } - sequencesByBackingTable[sm.backingTableName] = sm - } - } - if len(sequencesByBackingTable) == 0 { // Nothing to do - return nil, nil - } - - if err := ctx.Err(); err != nil { + sequencesByBackingTable, backingTablesFound, err := ts.findSequenceUsageInKeyspace(ctx, vschema) + if err != nil { return nil, err } - // If all of the sequence tables were defined using qualified table - // names in the vschema, then we don't need to look for them. - mustSearch := false - for _, sm := range sequencesByBackingTable { - if sm.backingTableKeyspace == "" { - mustSearch = true - break - } - } - if !mustSearch { + // names then we don't need to search for them in the other keyspaces. + if backingTablesFound { return sequencesByBackingTable, nil } // Now we need to locate the backing sequence table(s) which will // be in another unsharded keyspace. + smMu := sync.Mutex{} tableCount := len(sequencesByBackingTable) tablesFound := 0 // Used to short circuit the search searchCompleted := make(chan struct{}) // The search has completed @@ -1354,6 +1311,61 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return sequencesByBackingTable, nil } +// findSequenceUsageInKeyspace searches the keyspace's vschema for usage +// of sequence tables. It returns a map of sequence metadata keyed by the +// backing sequence table name -- if any usage is found -- along with a +// boolean to indicate if all of the backing sequence tables were defined +// using qualified table names (so we know where they all live and don't +// need to go looking) along with an error if any is seen. +func (ts *trafficSwitcher) findSequenceUsageInKeyspace(ctx context.Context, vschema *vschemapb.Keyspace) (map[string]*sequenceMetadata, bool, error) { + // If all of the sequence tables were defined using qualified table + // names in the vschema, then we don't need to look for them. + allFullyQualified := true + targets := maps.Values(ts.Targets()) + if len(targets) == 0 || targets[0].GetPrimary() == nil { // This should never happen + return nil, false, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary tablet found for target keyspace %s", ts.targetKeyspace) + } + targetDBName := targets[0].GetPrimary().DbName() + sequencesByBackingTable := make(map[string]*sequenceMetadata) + for _, table := range ts.Tables() { + vs, ok := vschema.Tables[table] + if !ok || vs == nil { + continue + } + if vs.AutoIncrement != nil && vs.AutoIncrement.Sequence != "" { + sm := &sequenceMetadata{ + backingTableName: vs.AutoIncrement.Sequence, + usingTableName: table, + usingTableDefinition: vs, + usingTableDBName: targetDBName, + } + // If the sequence table is fully qualified in the vschema then + // we don't need to find it later. + if strings.Contains(vs.AutoIncrement.Sequence, ".") { + keyspace, tableName, found := strings.Cut(vs.AutoIncrement.Sequence, ".") + if !found { + return nil, false, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "invalid sequence table name %s defined in the %s keyspace", + vs.AutoIncrement.Sequence, ts.targetKeyspace) + } + sm.backingTableName = tableName + sm.backingTableKeyspace = keyspace + } else { + allFullyQualified = false + } + sequencesByBackingTable[sm.backingTableName] = sm + } + } + if len(sequencesByBackingTable) == 0 { // Nothing to do + return nil, false, nil + } + + if err := ctx.Err(); err != nil { + return nil, false, err + } + + return sequencesByBackingTable, allFullyQualified, nil +} + // initializeTargetSequences initializes the backing sequence tables // using a map keyed by the backing sequence table name. // diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 743e8cf825f..ea6108efb9d 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -1978,62 +1978,19 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return nil, nil } - targets := maps.Values(ts.Targets()) - if len(targets) == 0 || targets[0].GetPrimary() == nil { // This should never happen - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary tablet found for target keyspace %s", ts.targetKeyspace) - } - targetDBName := targets[0].GetPrimary().DbName() - sequencesByBackingTable := make(map[string]*sequenceMetadata) - smMu := sync.Mutex{} - for _, table := range ts.Tables() { - vs, ok := vschema.Tables[table] - if !ok || vs == nil { - continue - } - if vs.AutoIncrement != nil && vs.AutoIncrement.Sequence != "" { - sm := &sequenceMetadata{ - backingTableName: vs.AutoIncrement.Sequence, - usingTableName: table, - usingTableDefinition: vs, - usingTableDBName: targetDBName, - } - // If the sequence table is fully qualified in the vschema then - // we don't need to find it later. - if strings.Contains(vs.AutoIncrement.Sequence, ".") { - keyspace, tableName, found := strings.Cut(vs.AutoIncrement.Sequence, ".") - if !found { - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "invalid sequence table name %s defined in the %s keyspace", - vs.AutoIncrement.Sequence, ts.targetKeyspace) - } - sm.backingTableName = tableName - sm.backingTableKeyspace = keyspace - } - sequencesByBackingTable[sm.backingTableName] = sm - } - } - if len(sequencesByBackingTable) == 0 { // Nothing to do - return nil, nil - } - - if err := ctx.Err(); err != nil { + sequencesByBackingTable, backingTablesFound, err := ts.findSequenceUsageInKeyspace(ctx, vschema) + if err != nil { return nil, err } - // If all of the sequence tables were defined using qualified table - // names in the vschema, then we don't need to look for them. - mustSearch := false - for _, sm := range sequencesByBackingTable { - if sm.backingTableKeyspace == "" { - mustSearch = true - break - } - } - if !mustSearch { + // names then we don't need to search for them in the other keyspaces. + if backingTablesFound { return sequencesByBackingTable, nil } // Now we need to locate the backing sequence table(s) which will // be in another unsharded keyspace. + smMu := sync.Mutex{} tableCount := len(sequencesByBackingTable) tablesFound := 0 // Used to short circuit the search searchCompleted := make(chan struct{}) // The search has completed @@ -2098,6 +2055,61 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return sequencesByBackingTable, nil } +// findSequenceUsageInKeyspace searches the keyspace's vschema for usage +// of sequence tables. It returns a map of sequence metadata keyed by the +// backing sequence table name -- if any usage is found -- along with a +// boolean to indicate if all of the backing sequence tables were defined +// using qualified table names (so we know where they all live and don't +// need to go looking) along with an error if any is seen. +func (ts *trafficSwitcher) findSequenceUsageInKeyspace(ctx context.Context, vschema *vschemapb.Keyspace) (map[string]*sequenceMetadata, bool, error) { + // If all of the sequence tables were defined using qualified table + // names in the vschema, then we don't need to look for them. + allFullyQualified := true + targets := maps.Values(ts.Targets()) + if len(targets) == 0 || targets[0].GetPrimary() == nil { // This should never happen + return nil, false, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no primary tablet found for target keyspace %s", ts.targetKeyspace) + } + targetDBName := targets[0].GetPrimary().DbName() + sequencesByBackingTable := make(map[string]*sequenceMetadata) + for _, table := range ts.Tables() { + vs, ok := vschema.Tables[table] + if !ok || vs == nil { + continue + } + if vs.AutoIncrement != nil && vs.AutoIncrement.Sequence != "" { + sm := &sequenceMetadata{ + backingTableName: vs.AutoIncrement.Sequence, + usingTableName: table, + usingTableDefinition: vs, + usingTableDBName: targetDBName, + } + // If the sequence table is fully qualified in the vschema then + // we don't need to find it later. + if strings.Contains(vs.AutoIncrement.Sequence, ".") { + keyspace, tableName, found := strings.Cut(vs.AutoIncrement.Sequence, ".") + if !found { + return nil, false, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "invalid sequence table name %s defined in the %s keyspace", + vs.AutoIncrement.Sequence, ts.targetKeyspace) + } + sm.backingTableName = tableName + sm.backingTableKeyspace = keyspace + } else { + allFullyQualified = false + } + sequencesByBackingTable[sm.backingTableName] = sm + } + } + if len(sequencesByBackingTable) == 0 { // Nothing to do + return nil, false, nil + } + + if err := ctx.Err(); err != nil { + return nil, false, err + } + + return sequencesByBackingTable, allFullyQualified, nil +} + // initializeTargetSequences initializes the backing sequence tables // using a map keyed by the backing sequence table name. // From 29f978d96e942f5898785942b0884a5576e035e8 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Sat, 5 Aug 2023 16:24:55 -0400 Subject: [PATCH 40/56] Set the backing table's default DB name when fully qualified Otherwise if they're all fully qualified we would not have it set as it was only being done during the search. Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/traffic_switcher.go | 1 + go/vt/wrangler/traffic_switcher.go | 1 + 2 files changed, 2 insertions(+) diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index 39b8d8e1d4f..bdfe7ec8598 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -1349,6 +1349,7 @@ func (ts *trafficSwitcher) findSequenceUsageInKeyspace(ctx context.Context, vsch } sm.backingTableName = tableName sm.backingTableKeyspace = keyspace + sm.backingTableDBName = "vt_" + keyspace } else { allFullyQualified = false } diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index ea6108efb9d..1e9328c7959 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -2093,6 +2093,7 @@ func (ts *trafficSwitcher) findSequenceUsageInKeyspace(ctx context.Context, vsch } sm.backingTableName = tableName sm.backingTableKeyspace = keyspace + sm.backingTableDBName = "vt_" + keyspace } else { allFullyQualified = false } From 71ff0249e0e71e59468619e4b303162345feeaa4 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Sat, 5 Aug 2023 23:07:18 -0400 Subject: [PATCH 41/56] Minor tweaks after self review of recent changes Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/traffic_switcher.go | 20 ++++++++------------ go/vt/wrangler/traffic_switcher.go | 20 ++++++++------------ 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index bdfe7ec8598..61c1c26b9f5 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -1239,8 +1239,8 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return nil, err } // If all of the sequence tables were defined using qualified table - // names then we don't need to search for them in the other keyspaces. - if backingTablesFound { + // names then we don't need to search for them in other keyspaces. + if len(sequencesByBackingTable) == 0 || backingTablesFound { return sequencesByBackingTable, nil } @@ -1312,14 +1312,12 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s } // findSequenceUsageInKeyspace searches the keyspace's vschema for usage -// of sequence tables. It returns a map of sequence metadata keyed by the -// backing sequence table name -- if any usage is found -- along with a -// boolean to indicate if all of the backing sequence tables were defined -// using qualified table names (so we know where they all live and don't -// need to go looking) along with an error if any is seen. +// of sequences. It returns a map of sequence metadata keyed by the backing +// sequence table name -- if any usage is found -- along with a boolean to +// indicate if all of the backing sequence tables were defined using +// qualified table names (so we know where they all live) along with an +// error if any is seen. func (ts *trafficSwitcher) findSequenceUsageInKeyspace(ctx context.Context, vschema *vschemapb.Keyspace) (map[string]*sequenceMetadata, bool, error) { - // If all of the sequence tables were defined using qualified table - // names in the vschema, then we don't need to look for them. allFullyQualified := true targets := maps.Values(ts.Targets()) if len(targets) == 0 || targets[0].GetPrimary() == nil { // This should never happen @@ -1327,6 +1325,7 @@ func (ts *trafficSwitcher) findSequenceUsageInKeyspace(ctx context.Context, vsch } targetDBName := targets[0].GetPrimary().DbName() sequencesByBackingTable := make(map[string]*sequenceMetadata) + for _, table := range ts.Tables() { vs, ok := vschema.Tables[table] if !ok || vs == nil { @@ -1356,9 +1355,6 @@ func (ts *trafficSwitcher) findSequenceUsageInKeyspace(ctx context.Context, vsch sequencesByBackingTable[sm.backingTableName] = sm } } - if len(sequencesByBackingTable) == 0 { // Nothing to do - return nil, false, nil - } if err := ctx.Err(); err != nil { return nil, false, err diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 1e9328c7959..97b21b8c59b 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -1983,8 +1983,8 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return nil, err } // If all of the sequence tables were defined using qualified table - // names then we don't need to search for them in the other keyspaces. - if backingTablesFound { + // names then we don't need to search for them in other keyspaces. + if len(sequencesByBackingTable) == 0 || backingTablesFound { return sequencesByBackingTable, nil } @@ -2056,14 +2056,12 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s } // findSequenceUsageInKeyspace searches the keyspace's vschema for usage -// of sequence tables. It returns a map of sequence metadata keyed by the -// backing sequence table name -- if any usage is found -- along with a -// boolean to indicate if all of the backing sequence tables were defined -// using qualified table names (so we know where they all live and don't -// need to go looking) along with an error if any is seen. +// of sequences. It returns a map of sequence metadata keyed by the backing +// sequence table name -- if any usage is found -- along with a boolean to +// indicate if all of the backing sequence tables were defined using +// qualified table names (so we know where they all live) along with an +// error if any is seen. func (ts *trafficSwitcher) findSequenceUsageInKeyspace(ctx context.Context, vschema *vschemapb.Keyspace) (map[string]*sequenceMetadata, bool, error) { - // If all of the sequence tables were defined using qualified table - // names in the vschema, then we don't need to look for them. allFullyQualified := true targets := maps.Values(ts.Targets()) if len(targets) == 0 || targets[0].GetPrimary() == nil { // This should never happen @@ -2071,6 +2069,7 @@ func (ts *trafficSwitcher) findSequenceUsageInKeyspace(ctx context.Context, vsch } targetDBName := targets[0].GetPrimary().DbName() sequencesByBackingTable := make(map[string]*sequenceMetadata) + for _, table := range ts.Tables() { vs, ok := vschema.Tables[table] if !ok || vs == nil { @@ -2100,9 +2099,6 @@ func (ts *trafficSwitcher) findSequenceUsageInKeyspace(ctx context.Context, vsch sequencesByBackingTable[sm.backingTableName] = sm } } - if len(sequencesByBackingTable) == 0 { // Nothing to do - return nil, false, nil - } if err := ctx.Err(); err != nil { return nil, false, err From 22e21d07cd783aed27245a668beba74a136c0cc0 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Tue, 8 Aug 2023 08:35:58 -0400 Subject: [PATCH 42/56] Deflake tests that use tabletconntest.SetProtocol() Signed-off-by: Matt Lord --- go/vt/servenv/servenv.go | 9 +++++++++ go/vt/vttablet/tabletconntest/tabletconntest.go | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/go/vt/servenv/servenv.go b/go/vt/servenv/servenv.go index 1944a39453d..e285b509c1f 100644 --- a/go/vt/servenv/servenv.go +++ b/go/vt/servenv/servenv.go @@ -351,6 +351,15 @@ func ParseFlags(cmd string) { logutil.PurgeLogs() } +// ParseFlagsForTests initializes flags but skips the filesystem args +// and go flag related work. +// Note: this should NOT be used outside of unit tests. +func ParseFlagsForTests(cmd string) { + fs := GetFlagSetFor(cmd) + viperutil.BindFlags(fs) + loadViper(cmd) +} + // GetFlagSetFor returns the flag set for a given command. // This has to exported for the Vitess-operator to use func GetFlagSetFor(cmd string) *pflag.FlagSet { diff --git a/go/vt/vttablet/tabletconntest/tabletconntest.go b/go/vt/vttablet/tabletconntest/tabletconntest.go index 23d4a3ce2e2..b279ac53726 100644 --- a/go/vt/vttablet/tabletconntest/tabletconntest.go +++ b/go/vt/vttablet/tabletconntest/tabletconntest.go @@ -1049,7 +1049,7 @@ func SetProtocol(name string, protocol string) { tabletconn.RegisterFlags(fs) }) - servenv.ParseFlags(name) + servenv.ParseFlagsForTests(name) if err := pflag.Set(tabletProtocolFlagName, protocol); err != nil { msg := "failed to set flag %q to %q: %v" From 4a1f2eb065456eb3ef77823625f76c480f112b4f Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Tue, 8 Aug 2023 09:09:11 -0400 Subject: [PATCH 43/56] Register grpc dialer for healthcheck test Signed-off-by: Matt Lord --- go/vt/discovery/healthcheck_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/go/vt/discovery/healthcheck_test.go b/go/vt/discovery/healthcheck_test.go index ea78b5c2647..20bb876b148 100644 --- a/go/vt/discovery/healthcheck_test.go +++ b/go/vt/discovery/healthcheck_test.go @@ -58,6 +58,7 @@ func testChecksum(t *testing.T, want, got int64) { func init() { tabletconn.RegisterDialer("fake_gateway", tabletDialer) + tabletconn.RegisterDialer("grpc", tabletDialer) tabletconntest.SetProtocol("go.vt.discovery.healthcheck_test", "fake_gateway") connMap = make(map[string]*fakeConn) refreshInterval = time.Minute From 4b53f667c4d976ebfab914311f673b76198ba3f8 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Tue, 8 Aug 2023 12:05:55 -0400 Subject: [PATCH 44/56] More deflaking Signed-off-by: Matt Lord --- go/vt/discovery/healthcheck_test.go | 1 - go/vt/servenv/servenv.go | 8 +++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/go/vt/discovery/healthcheck_test.go b/go/vt/discovery/healthcheck_test.go index 20bb876b148..ea78b5c2647 100644 --- a/go/vt/discovery/healthcheck_test.go +++ b/go/vt/discovery/healthcheck_test.go @@ -58,7 +58,6 @@ func testChecksum(t *testing.T, want, got int64) { func init() { tabletconn.RegisterDialer("fake_gateway", tabletDialer) - tabletconn.RegisterDialer("grpc", tabletDialer) tabletconntest.SetProtocol("go.vt.discovery.healthcheck_test", "fake_gateway") connMap = make(map[string]*fakeConn) refreshInterval = time.Minute diff --git a/go/vt/servenv/servenv.go b/go/vt/servenv/servenv.go index e285b509c1f..b8dee4233bc 100644 --- a/go/vt/servenv/servenv.go +++ b/go/vt/servenv/servenv.go @@ -351,11 +351,13 @@ func ParseFlags(cmd string) { logutil.PurgeLogs() } -// ParseFlagsForTests initializes flags but skips the filesystem args -// and go flag related work. -// Note: this should NOT be used outside of unit tests. +// ParseFlagsForTests initializes flags but skips the version, filesystem +// args and go flag related work. +// Note: this should not be used outside of unit tests. func ParseFlagsForTests(cmd string) { fs := GetFlagSetFor(cmd) + pflag.CommandLine = fs + pflag.Parse() viperutil.BindFlags(fs) loadViper(cmd) } From a04b965ae7798e389342f7a8d8a9a3f91c1c6e95 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Tue, 8 Aug 2023 12:35:39 -0400 Subject: [PATCH 45/56] Add another bounds safety check Signed-off-by: Matt Lord --- go/vt/wrangler/traffic_switcher.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 97b21b8c59b..8a557ed9f05 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -2157,6 +2157,10 @@ func (ts *trafficSwitcher) initializeTargetSequences(ctx context.Context, sequen return ictx.Err() default: } + if len(shardResults) == 0 { // This should never happen + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "did not get any results for the max used sequence value for target table %s.%s in order to initialize the backing sequence table", + ts.targetKeyspace, sequenceMetadata.usingTableName) + } // Sort the values to find the max value across all shards. sort.Slice(shardResults, func(i, j int) bool { return shardResults[i] < shardResults[j] From c00925c1c5a3c4e6914e6644fdf5c8c787885bc4 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Tue, 8 Aug 2023 13:09:38 -0400 Subject: [PATCH 46/56] Try to deflake the vtctldclient MoveTables unit test I have only seen this fail in the CI's MySQL57 unit test workflow. The order that the set foreign_key_checks=1 statement comes in is for some reason non-deterministic in that workflow specifically. Signed-off-by: Matt Lord --- go/vt/binlog/binlogplayer/mock_dbclient.go | 9 +++++++++ go/vt/vttablet/tabletmanager/rpc_vreplication_test.go | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/go/vt/binlog/binlogplayer/mock_dbclient.go b/go/vt/binlog/binlogplayer/mock_dbclient.go index 50df683976d..803ead258be 100644 --- a/go/vt/binlog/binlogplayer/mock_dbclient.go +++ b/go/vt/binlog/binlogplayer/mock_dbclient.go @@ -37,6 +37,7 @@ type MockDBClient struct { currentResult int done chan struct{} invariants map[string]*sqltypes.Result + ignored map[string]struct{} } type mockExpect struct { @@ -57,6 +58,10 @@ func NewMockDBClient(t *testing.T) *MockDBClient { "select id, type, state, message from _vt.vreplication_log": {}, "insert into _vt.vreplication_log": {}, }, + // For some reason the following queries are not sent in a determinstic order. + ignored: map[string]struct{}{ + "set foreign_key_checks=1": {}, + }, } } @@ -151,6 +156,10 @@ func (dc *MockDBClient) ExecuteFetch(query string, maxrows int) (qr *sqltypes.Re dc.t.Helper() dc.t.Logf("DBClient query: %v", query) + if _, ok := dc.ignored[query]; ok { + return qr, nil + } + for q, result := range dc.invariants { if strings.Contains(query, q) { return result, nil diff --git a/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go b/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go index a0d5b215ff0..6576fa6c906 100644 --- a/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go +++ b/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go @@ -62,7 +62,7 @@ const ( setStrictSQLMode = "SET @@session.sql_mode='ONLY_FULL_GROUP_BY,NO_AUTO_VALUE_ON_ZERO,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION'" getSQLMode = "SELECT @@session.sql_mode AS sql_mode" getFKChecks = "select @@foreign_key_checks;" - disableFKChecks = "set foreign_key_checks=1;" + enableFKChecks = "set foreign_key_checks=1;" sqlMode = "ONLY_FULL_GROUP_BY,NO_AUTO_VALUE_ON_ZERO,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION" getBinlogRowImage = "select @@binlog_row_image" insertStreamsCreatedLog = "insert into _vt.vreplication_log(vrepl_id, type, state, message) values(1, 'Stream Created', '', '%s'" @@ -371,7 +371,7 @@ func TestMoveTables(t *testing.T) { ), "FULL", ), nil) - ftc.vrdbClient.ExpectRequest(disableFKChecks, &sqltypes.Result{}, nil) + ftc.vrdbClient.ExpectRequest(enableFKChecks, &sqltypes.Result{}, nil) ftc.vrdbClient.ExpectRequest(setStrictSQLMode, &sqltypes.Result{}, nil) ftc.vrdbClient.ExpectRequest(fmt.Sprintf(insertStreamsCreatedLog, bls), &sqltypes.Result{}, nil) From 89295b59d80a6f5ef57b06c5ff19cbf06651699d Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Wed, 9 Aug 2023 10:01:15 -0400 Subject: [PATCH 47/56] Improve and unify error handling when switching reads/writes Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/server.go | 114 ++++++++++++--------------- go/vt/wrangler/traffic_switcher.go | 122 ++++++++++++----------------- 2 files changed, 98 insertions(+), 138 deletions(-) diff --git a/go/vt/vtctl/workflow/server.go b/go/vt/vtctl/workflow/server.go index 5c1436dae6f..92e22e34f50 100644 --- a/go/vt/vtctl/workflow/server.go +++ b/go/vt/vtctl/workflow/server.go @@ -2323,16 +2323,23 @@ func (s *Server) switchReads(ctx context.Context, req *vtctldatapb.WorkflowSwitc } } + // Consistently handle errors by logging and returning them. + handleError := func(message string, err error) (*[]string, error) { + werr := vterrors.Wrapf(err, message) + ts.Logger().Error(werr) + return nil, werr + } + log.Infof("Switching reads: %s.%s tablet types: %s, cells: %s, workflow state: %s", ts.targetKeyspace, ts.workflow, roTypesToSwitchStr, ts.optCells, state.String()) if !switchReplica && !switchRdonly { - return nil, fmt.Errorf("tablet types must be REPLICA or RDONLY: %s", roTypesToSwitchStr) + return handleError("invalid tablet types", fmt.Errorf("tablet types must be REPLICA or RDONLY: %s", roTypesToSwitchStr)) } if !ts.isPartialMigration { // shard level traffic switching is all or nothing if direction == DirectionBackward && switchReplica && len(state.ReplicaCellsSwitched) == 0 { - return nil, fmt.Errorf("requesting reversal of read traffic for REPLICAs but REPLICA reads have not been switched") + return handleError("invalid request", fmt.Errorf("requesting reversal of read traffic for REPLICAs but REPLICA reads have not been switched")) } if direction == DirectionBackward && switchRdonly && len(state.RdonlyCellsSwitched) == 0 { - return nil, fmt.Errorf("requesting reversal of SwitchReads for RDONLYs but RDONLY reads have not been switched") + return handleError("invalid request", fmt.Errorf("requesting reversal of SwitchReads for RDONLYs but RDONLY reads have not been switched")) } } var cells []string = req.Cells @@ -2359,8 +2366,7 @@ func (s *Server) switchReads(ctx context.Context, req *vtctldatapb.WorkflowSwitc // If journals exist notify user and fail. journalsExist, _, err := ts.checkJournals(ctx) if err != nil { - ts.Logger().Errorf("checkJournals failed: %v", err) - return nil, err + return handleError(fmt.Sprintf("failed to read journal in the %s keyspace", ts.SourceKeyspaceName()), err) } if journalsExist { log.Infof("Found a previous journal entry for %d", ts.id) @@ -2373,15 +2379,13 @@ func (s *Server) switchReads(ctx context.Context, req *vtctldatapb.WorkflowSwitc } if err := ts.validate(ctx); err != nil { - ts.Logger().Errorf("validate failed: %v", err) - return nil, err + return handleError("workflow validation failed", err) } // For reads, locking the source keyspace is sufficient. ctx, unlock, lockErr := sw.lockKeyspace(ctx, ts.SourceKeyspaceName(), "SwitchReads") if lockErr != nil { - ts.Logger().Errorf("LockKeyspace failed: %v", lockErr) - return nil, lockErr + return handleError(fmt.Sprintf("failed to lock the %s keyspace", ts.SourceKeyspaceName()), lockErr) } defer unlock(&err) @@ -2389,23 +2393,20 @@ func (s *Server) switchReads(ctx context.Context, req *vtctldatapb.WorkflowSwitc if ts.isPartialMigration { ts.Logger().Infof("Partial migration, skipping switchTableReads as traffic is all or nothing per shard and overridden for reads AND writes in the ShardRoutingRule created when switching writes.") } else if err := sw.switchTableReads(ctx, cells, req.TabletTypes, direction); err != nil { - ts.Logger().Errorf("switchTableReads failed: %v", err) - return nil, err + return handleError("failed to switch read traffic for the tables", err) } return sw.logs(), nil } ts.Logger().Infof("About to switchShardReads: %+v, %+s, %+v", cells, roTypesToSwitchStr, direction) if err := sw.switchShardReads(ctx, cells, req.TabletTypes, direction); err != nil { - ts.Logger().Errorf("switchShardReads failed: %v", err) - return nil, err + return handleError("failed to switch read traffic for the shards", err) } ts.Logger().Infof("switchShardReads Completed: %+v, %+s, %+v", cells, roTypesToSwitchStr, direction) if err := s.ts.ValidateSrvKeyspace(ctx, ts.targetKeyspace, strings.Join(cells, ",")); err != nil { - err2 := vterrors.Wrapf(err, "After switching shard reads, found SrvKeyspace for %s is corrupt in cell %s", + err2 := vterrors.Wrapf(err, "after switching shard reads, found SrvKeyspace for %s is corrupt in cell %s", ts.targetKeyspace, strings.Join(cells, ",")) - ts.Logger().Errorf("%w", err2) - return nil, err2 + return handleError("failed to validate SrvKeyspace record", err2) } return sw.logs(), nil } @@ -2421,36 +2422,39 @@ func (s *Server) switchWrites(ctx context.Context, req *vtctldatapb.WorkflowSwit sw = &switcher{ts: ts, s: s} } + // Consistently handle errors by logging and returning them. + handleError := func(message string, err error) (int64, *[]string, error) { + werr := vterrors.Wrapf(err, message) + ts.Logger().Error(werr) + return 0, nil, werr + } + if ts.frozen { ts.Logger().Warningf("Writes have already been switched for workflow %s, nothing to do here", ts.WorkflowName()) return 0, sw.logs(), nil } if err := ts.validate(ctx); err != nil { - ts.Logger().Errorf("validate failed: %v", err) - return 0, nil, err + return handleError("workflow validation failed", err) } if req.EnableReverseReplication { - err := areTabletsAvailableToStreamFrom(ctx, req, ts, ts.TargetKeyspaceName(), ts.TargetShards()) - if err != nil { - return 0, nil, err + if err := areTabletsAvailableToStreamFrom(ctx, req, ts, ts.TargetKeyspaceName(), ts.TargetShards()); err != nil { + return handleError(fmt.Sprintf("no tablets were available to stream from in the %s keyspace", ts.SourceKeyspaceName()), err) } } // Need to lock both source and target keyspaces. tctx, sourceUnlock, lockErr := sw.lockKeyspace(ctx, ts.SourceKeyspaceName(), "SwitchWrites") if lockErr != nil { - ts.Logger().Errorf("LockKeyspace failed: %v", lockErr) - return 0, nil, lockErr + return handleError(fmt.Sprintf("failed to lock the %s keyspace", ts.SourceKeyspaceName()), lockErr) } ctx = tctx defer sourceUnlock(&err) if ts.TargetKeyspaceName() != ts.SourceKeyspaceName() { tctx, targetUnlock, lockErr := sw.lockKeyspace(ctx, ts.TargetKeyspaceName(), "SwitchWrites") if lockErr != nil { - ts.Logger().Errorf("LockKeyspace failed: %v", lockErr) - return 0, nil, lockErr + return handleError(fmt.Sprintf("failed to lock the %s keyspace", ts.TargetKeyspaceName()), lockErr) } ctx = tctx defer targetUnlock(&err) @@ -2467,24 +2471,20 @@ func (s *Server) switchWrites(ctx context.Context, req *vtctldatapb.WorkflowSwit !ts.SourceKeyspaceSchema().Keyspace.Sharded { sequenceMetadata, err = ts.getTargetSequenceMetadata(ctx) if err != nil { - werr := vterrors.Wrapf(err, "getSequenceMetadata failed") - ts.Logger().Error(werr) - return 0, nil, werr + return handleError(fmt.Sprintf("failed to get the sequence information in the %s keyspace", ts.TargetKeyspaceName()), err) } } // If no journals exist, sourceWorkflows will be initialized by sm.MigrateStreams. journalsExist, sourceWorkflows, err := ts.checkJournals(ctx) if err != nil { - ts.Logger().Errorf("checkJournals failed: %v", err) - return 0, nil, err + return handleError(fmt.Sprintf("failed to read journal in the %s keyspace", ts.SourceKeyspaceName()), err) } if !journalsExist { ts.Logger().Infof("No previous journals were found. Proceeding normally.") sm, err := BuildStreamMigrator(ctx, ts, cancel) if err != nil { - ts.Logger().Errorf("buildStreamMigrater failed: %v", err) - return 0, nil, err + return handleError("failed to migrate the workflow streams", err) } if cancel { sw.cancelMigration(ctx, sm) @@ -2494,21 +2494,19 @@ func (s *Server) switchWrites(ctx context.Context, req *vtctldatapb.WorkflowSwit ts.Logger().Infof("Stopping streams") sourceWorkflows, err = sw.stopStreams(ctx, sm) if err != nil { - ts.Logger().Errorf("stopStreams failed: %v", err) for key, streams := range sm.Streams() { for _, stream := range streams { ts.Logger().Errorf("stream in stopStreams: key %s shard %s stream %+v", key, stream.BinlogSource.Shard, stream.BinlogSource) } } sw.cancelMigration(ctx, sm) - return 0, nil, err + return handleError("failed to stop the workflow streams", err) } ts.Logger().Infof("Stopping source writes") if err := sw.stopSourceWrites(ctx); err != nil { - ts.Logger().Errorf("stopSourceWrites failed: %v", err) sw.cancelMigration(ctx, sm) - return 0, nil, err + return handleError(fmt.Sprintf("failed to stop writes in the %s keyspace", ts.SourceKeyspaceName()), err) } if ts.MigrationType() == binlogdatapb.MigrationType_TABLES { @@ -2517,9 +2515,8 @@ func (s *Server) switchWrites(ctx context.Context, req *vtctldatapb.WorkflowSwit // the tablet's deny list check and the first mysqld side table lock. for cnt := 1; cnt <= lockTablesCycles; cnt++ { if err := ts.executeLockTablesOnSource(ctx); err != nil { - ts.Logger().Errorf("Failed to execute LOCK TABLES (attempt %d of %d) on sources: %v", cnt, lockTablesCycles, err) sw.cancelMigration(ctx, sm) - return 0, nil, err + return handleError(fmt.Sprintf("failed to execute LOCK TABLES (attempt %d of %d) on sources", cnt, lockTablesCycles), err) } // No need to UNLOCK the tables as the connection was closed once the locks were acquired // and thus the locks released. @@ -2529,50 +2526,42 @@ func (s *Server) switchWrites(ctx context.Context, req *vtctldatapb.WorkflowSwit ts.Logger().Infof("Waiting for streams to catchup") if err := sw.waitForCatchup(ctx, timeout); err != nil { - ts.Logger().Errorf("waitForCatchup failed: %v", err) sw.cancelMigration(ctx, sm) - return 0, nil, err + return handleError("failed to sync up replication between the source and target", err) } ts.Logger().Infof("Migrating streams") if err := sw.migrateStreams(ctx, sm); err != nil { - ts.Logger().Errorf("migrateStreams failed: %v", err) sw.cancelMigration(ctx, sm) - return 0, nil, err + return handleError("failed to migrate the workflow streams", err) } ts.Logger().Infof("Resetting sequences") if err := sw.resetSequences(ctx); err != nil { - ts.Logger().Errorf("resetSequences failed: %v", err) sw.cancelMigration(ctx, sm) - return 0, nil, err + return handleError("failed to reset the sequences", err) } ts.Logger().Infof("Creating reverse streams") if err := sw.createReverseVReplication(ctx); err != nil { - ts.Logger().Errorf("createReverseVReplication failed: %v", err) sw.cancelMigration(ctx, sm) - return 0, nil, err + return handleError("failed to create the reverse vreplication streams", err) } } else { if cancel { - err := fmt.Errorf("traffic switching has reached the point of no return, cannot cancel") - ts.Logger().Errorf("%v", err) - return 0, nil, err + return handleError("invalid cancel", fmt.Errorf("traffic switching has reached the point of no return, cannot cancel")) } ts.Logger().Infof("Journals were found. Completing the left over steps.") // Need to gather positions in case all journals were not created. if err := ts.gatherPositions(ctx); err != nil { - ts.Logger().Errorf("gatherPositions failed: %v", err) - return 0, nil, err + return handleError("failed to gather replication positions", err) } } // This is the point of no return. Once a journal is created, // traffic can be redirected to target shards. if err := sw.createJournals(ctx, sourceWorkflows); err != nil { - ts.Logger().Errorf("createJournals failed: %v", err) - return 0, nil, err + return handleError("failed to create the journal", err) } // Initialize any target sequences, if there are any, before allowing new writes. if req.InitializeTargetSequences && len(sequenceMetadata) > 0 { @@ -2582,33 +2571,26 @@ func (s *Server) switchWrites(ctx context.Context, req *vtctldatapb.WorkflowSwit initSeqCtx, cancel := context.WithTimeout(ctx, timeout/2) defer cancel() if err := sw.initializeTargetSequences(initSeqCtx, sequenceMetadata); err != nil { - werr := vterrors.Wrapf(err, "initializeTargetSequences failed") - ts.Logger().Error(werr) - return 0, nil, werr + return handleError(fmt.Sprintf("failed to initialize the sequences used in the %s keyspace", ts.TargetKeyspaceName()), err) } } if err := sw.allowTargetWrites(ctx); err != nil { - ts.Logger().Errorf("allowTargetWrites failed: %v", err) - return 0, nil, err + return handleError(fmt.Sprintf("failed to allow writes in the %s keyspace", ts.TargetKeyspaceName()), err) } if err := sw.changeRouting(ctx); err != nil { - ts.Logger().Errorf("changeRouting failed: %v", err) - return 0, nil, err + return handleError("failed to update the routing rules", err) } if err := sw.streamMigraterfinalize(ctx, ts, sourceWorkflows); err != nil { - ts.Logger().Errorf("finalize failed: %v", err) - return 0, nil, err + return handleError("failed to finalize the traffic switch", err) } if req.EnableReverseReplication { if err := sw.startReverseVReplication(ctx); err != nil { - ts.Logger().Errorf("startReverseVReplication failed: %v", err) - return 0, nil, err + return handleError("failed to start the reverse workflow", err) } } if err := sw.freezeTargetVReplication(ctx); err != nil { - ts.Logger().Errorf("deleteTargetVReplication failed: %v", err) - return 0, nil, err + return handleError(fmt.Sprintf("failed to freeze the workflow in the %s keyspace", ts.TargetKeyspaceName()), err) } return ts.id, sw.logs(), nil diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 8a557ed9f05..6ece05c3f7e 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -331,29 +331,32 @@ func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workfl // SwitchReads is a generic way of switching read traffic for a resharding workflow. func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflowName string, servedTypes []topodatapb.TabletType, cells []string, direction workflow.TrafficSwitchDirection, dryRun bool) (*[]string, error) { - ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflowName) + // Consistently handle errors by logging and returning them. + handleError := func(message string, err error) (*[]string, error) { + werr := vterrors.Wrapf(err, message) + ts.Logger().Error(werr) + return nil, werr + } if err != nil { - wr.Logger().Errorf("getWorkflowState failed: %v", err) - return nil, err + return handleError("failed to get the current state of the workflow", err) } if ts == nil { errorMsg := fmt.Sprintf("workflow %s not found in keyspace %s", workflowName, targetKeyspace) - wr.Logger().Errorf(errorMsg) - return nil, fmt.Errorf(errorMsg) + return handleError("failed to get the current state of the workflow", fmt.Errorf(errorMsg)) } log.Infof("Switching reads: %s.%s tt %+v, cells %+v, workflow state: %+v", targetKeyspace, workflowName, servedTypes, cells, ws) var switchReplicas, switchRdonly bool for _, servedType := range servedTypes { if servedType != topodatapb.TabletType_REPLICA && servedType != topodatapb.TabletType_RDONLY { - return nil, fmt.Errorf("tablet type must be REPLICA or RDONLY: %v", servedType) + return handleError("invalid tablet type", fmt.Errorf("tablet type must be REPLICA or RDONLY: %v", servedType)) } if !ts.isPartialMigration { // shard level traffic switching is all or nothing if direction == workflow.DirectionBackward && servedType == topodatapb.TabletType_REPLICA && len(ws.ReplicaCellsSwitched) == 0 { - return nil, fmt.Errorf("requesting reversal of read traffic for REPLICAs but REPLICA reads have not been switched") + return handleError("invalid request", fmt.Errorf("requesting reversal of read traffic for REPLICAs but REPLICA reads have not been switched")) } if direction == workflow.DirectionBackward && servedType == topodatapb.TabletType_RDONLY && len(ws.RdonlyCellsSwitched) == 0 { - return nil, fmt.Errorf("requesting reversal of SwitchReads for RDONLYs but RDONLY reads have not been switched") + return handleError("invalid request", fmt.Errorf("requesting reversal of SwitchReads for RDONLYs but RDONLY reads have not been switched")) } } switch servedType { @@ -382,8 +385,7 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflowNam // If journals exist notify user and fail. journalsExist, _, err := ts.checkJournals(ctx) if err != nil { - wr.Logger().Errorf("checkJournals failed: %v", err) - return nil, err + return handleError(fmt.Sprintf("failed to read journal in the %s keyspace", ts.SourceKeyspaceName()), err) } if journalsExist { log.Infof("Found a previous journal entry for %d", ts.id) @@ -396,15 +398,13 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflowNam } if err := ts.validate(ctx); err != nil { - ts.Logger().Errorf("validate failed: %v", err) - return nil, err + return handleError("workflow validation failed", err) } // For reads, locking the source keyspace is sufficient. ctx, unlock, lockErr := sw.lockKeyspace(ctx, ts.SourceKeyspaceName(), "SwitchReads") if lockErr != nil { - ts.Logger().Errorf("LockKeyspace failed: %v", lockErr) - return nil, lockErr + return handleError(fmt.Sprintf("failed to lock the %s keyspace", ts.SourceKeyspaceName()), lockErr) } defer unlock(&err) @@ -412,23 +412,20 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflowNam if ts.isPartialMigration { ts.Logger().Infof("Partial migration, skipping switchTableReads as traffic is all or nothing per shard and overridden for reads AND writes in the ShardRoutingRule created when switching writes.") } else if err := sw.switchTableReads(ctx, cells, servedTypes, direction); err != nil { - ts.Logger().Errorf("switchTableReads failed: %v", err) - return nil, err + return handleError("failed to switch read traffic for the tables", err) } return sw.logs(), nil } wr.Logger().Infof("About to switchShardReads: %+v, %+v, %+v", cells, servedTypes, direction) if err := sw.switchShardReads(ctx, cells, servedTypes, direction); err != nil { - ts.Logger().Errorf("switchShardReads failed: %v", err) - return nil, err + return handleError("failed to switch read traffic for the shards", err) } wr.Logger().Infof("switchShardReads Completed: %+v, %+v, %+v", cells, servedTypes, direction) if err := wr.ts.ValidateSrvKeyspace(ctx, targetKeyspace, strings.Join(cells, ",")); err != nil { err2 := vterrors.Wrapf(err, "After switching shard reads, found SrvKeyspace for %s is corrupt in cell %s", targetKeyspace, strings.Join(cells, ",")) - log.Errorf("%w", err2) - return nil, err2 + return handleError("failed to validate SrvKeyspace record", err2) } return sw.logs(), nil } @@ -481,14 +478,20 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowNa cancel, reverse, reverseReplication bool, dryRun, initializeTargetSequences bool) (journalID int64, dryRunResults *[]string, err error) { ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflowName) _ = ws + + // Consistently handle errors by logging and returning them. + handleError := func(message string, err error) (int64, *[]string, error) { + werr := vterrors.Wrapf(err, message) + ts.Logger().Error(werr) + return 0, nil, werr + } + if err != nil { - wr.Logger().Errorf("getWorkflowState failed: %v", err) - return 0, nil, err + handleError("failed to get the current workflow state", err) } if ts == nil { errorMsg := fmt.Sprintf("workflow %s not found in keyspace %s", workflowName, targetKeyspace) - wr.Logger().Errorf(errorMsg) - return 0, nil, fmt.Errorf(errorMsg) + handleError("failed to get the current workflow state", fmt.Errorf(errorMsg)) } var sw iswitcher @@ -505,30 +508,27 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowNa ts.Logger().Infof("Built switching metadata: %+v", ts) if err := ts.validate(ctx); err != nil { - ts.Logger().Errorf("validate failed: %v", err) - return 0, nil, err + handleError("workflow validation failed", err) } if reverseReplication { err := wr.areTabletsAvailableToStreamFrom(ctx, ts, ts.TargetKeyspaceName(), ts.TargetShards()) if err != nil { - return 0, nil, err + return handleError(fmt.Sprintf("no tablets were available to stream from in the %s keyspace", ts.SourceKeyspaceName()), err) } } // Need to lock both source and target keyspaces. tctx, sourceUnlock, lockErr := sw.lockKeyspace(ctx, ts.SourceKeyspaceName(), "SwitchWrites") if lockErr != nil { - ts.Logger().Errorf("LockKeyspace failed: %v", lockErr) - return 0, nil, lockErr + return handleError(fmt.Sprintf("failed to lock the %s keyspace", ts.SourceKeyspaceName()), lockErr) } ctx = tctx defer sourceUnlock(&err) if ts.TargetKeyspaceName() != ts.SourceKeyspaceName() { tctx, targetUnlock, lockErr := sw.lockKeyspace(ctx, ts.TargetKeyspaceName(), "SwitchWrites") if lockErr != nil { - ts.Logger().Errorf("LockKeyspace failed: %v", lockErr) - return 0, nil, lockErr + return handleError(fmt.Sprintf("failed to lock the %s keyspace", ts.TargetKeyspaceName()), lockErr) } ctx = tctx defer targetUnlock(&err) @@ -545,24 +545,20 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowNa !ts.SourceKeyspaceSchema().Keyspace.Sharded { sequenceMetadata, err = ts.getTargetSequenceMetadata(ctx) if err != nil { - werr := vterrors.Wrapf(err, "getTargetSequenceMetadata failed") - ts.Logger().Error(werr) - return 0, nil, werr + return handleError(fmt.Sprintf("failed to get the sequence information in the %s keyspace", ts.TargetKeyspaceName()), err) } } // If no journals exist, sourceWorkflows will be initialized by sm.MigrateStreams. journalsExist, sourceWorkflows, err := ts.checkJournals(ctx) if err != nil { - ts.Logger().Errorf("checkJournals failed: %v", err) - return 0, nil, err + return handleError(fmt.Sprintf("failed to read journal in the %s keyspace", ts.SourceKeyspaceName()), err) } if !journalsExist { ts.Logger().Infof("No previous journals were found. Proceeding normally.") sm, err := workflow.BuildStreamMigrator(ctx, ts, cancel) if err != nil { - ts.Logger().Errorf("buildStreamMigrater failed: %v", err) - return 0, nil, err + return handleError("failed to migrate the workflow streams", err) } if cancel { sw.cancelMigration(ctx, sm) @@ -572,21 +568,19 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowNa ts.Logger().Infof("Stopping streams") sourceWorkflows, err = sw.stopStreams(ctx, sm) if err != nil { - ts.Logger().Errorf("stopStreams failed: %v", err) for key, streams := range sm.Streams() { for _, stream := range streams { ts.Logger().Errorf("stream in stopStreams: key %s shard %s stream %+v", key, stream.BinlogSource.Shard, stream.BinlogSource) } } sw.cancelMigration(ctx, sm) - return 0, nil, err + return handleError("failed to stop the workflow streams", err) } ts.Logger().Infof("Stopping source writes") if err := sw.stopSourceWrites(ctx); err != nil { - ts.Logger().Errorf("stopSourceWrites failed: %v", err) sw.cancelMigration(ctx, sm) - return 0, nil, err + return handleError(fmt.Sprintf("failed to stop writes in the %s keyspace", ts.SourceKeyspaceName()), err) } if ts.MigrationType() == binlogdatapb.MigrationType_TABLES { @@ -595,9 +589,8 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowNa // the tablet's deny list check and the first mysqld side table lock. for cnt := 1; cnt <= lockTablesCycles; cnt++ { if err := ts.executeLockTablesOnSource(ctx); err != nil { - ts.Logger().Errorf("Failed to execute LOCK TABLES (attempt %d of %d) on sources: %v", cnt, lockTablesCycles, err) sw.cancelMigration(ctx, sm) - return 0, nil, err + return handleError(fmt.Sprintf("failed to execute LOCK TABLES (attempt %d of %d) on sources", cnt, lockTablesCycles), err) } // No need to UNLOCK the tables as the connection was closed once the locks were acquired // and thus the locks released. @@ -607,50 +600,42 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowNa ts.Logger().Infof("Waiting for streams to catchup") if err := sw.waitForCatchup(ctx, timeout); err != nil { - ts.Logger().Errorf("waitForCatchup failed: %v", err) sw.cancelMigration(ctx, sm) - return 0, nil, err + return handleError("failed to sync up replication between the source and target", err) } ts.Logger().Infof("Migrating streams") if err := sw.migrateStreams(ctx, sm); err != nil { - ts.Logger().Errorf("migrateStreams failed: %v", err) sw.cancelMigration(ctx, sm) - return 0, nil, err + return handleError("failed to migrate the workflow streams", err) } ts.Logger().Infof("Resetting sequences") if err := sw.resetSequences(ctx); err != nil { - ts.Logger().Errorf("resetSequences failed: %v", err) sw.cancelMigration(ctx, sm) - return 0, nil, err + return handleError("failed to reset the sequences", err) } ts.Logger().Infof("Creating reverse streams") if err := sw.createReverseVReplication(ctx); err != nil { - ts.Logger().Errorf("createReverseVReplication failed: %v", err) sw.cancelMigration(ctx, sm) - return 0, nil, err + return handleError("failed to create the reverse vreplication streams", err) } } else { if cancel { - err := fmt.Errorf("traffic switching has reached the point of no return, cannot cancel") - ts.Logger().Errorf("%v", err) - return 0, nil, err + return handleError("invalid cancel", fmt.Errorf("traffic switching has reached the point of no return, cannot cancel")) } ts.Logger().Infof("Journals were found. Completing the left over steps.") // Need to gather positions in case all journals were not created. if err := ts.gatherPositions(ctx); err != nil { - ts.Logger().Errorf("gatherPositions failed: %v", err) - return 0, nil, err + return handleError("failed to gather replication positions", err) } } // This is the point of no return. Once a journal is created, // traffic can be redirected to target shards. if err := sw.createJournals(ctx, sourceWorkflows); err != nil { - ts.Logger().Errorf("createJournals failed: %v", err) - return 0, nil, err + return handleError("failed to create the journal", err) } // Initialize any target sequences, if there are any, before allowing new writes. if initializeTargetSequences && len(sequenceMetadata) > 0 { @@ -660,33 +645,26 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowNa initSeqCtx, cancel := context.WithTimeout(ctx, timeout/2) defer cancel() if err := sw.initializeTargetSequences(initSeqCtx, sequenceMetadata); err != nil { - werr := vterrors.Wrapf(err, "initializeTargetSequences failed") - ts.Logger().Error(werr) - return 0, nil, werr + return handleError(fmt.Sprintf("failed to initialize the sequences used in the %s keyspace", ts.TargetKeyspaceName()), err) } } if err := sw.allowTargetWrites(ctx); err != nil { - ts.Logger().Errorf("allowTargetWrites failed: %v", err) - return 0, nil, err + return handleError(fmt.Sprintf("failed to allow writes in the %s keyspace", ts.TargetKeyspaceName()), err) } if err := sw.changeRouting(ctx); err != nil { - ts.Logger().Errorf("changeRouting failed: %v", err) - return 0, nil, err + return handleError("failed to update the routing rules", err) } if err := sw.streamMigraterfinalize(ctx, ts, sourceWorkflows); err != nil { - ts.Logger().Errorf("finalize failed: %v", err) - return 0, nil, err + handleError("failed to finalize the traffic switch", err) } if reverseReplication { if err := sw.startReverseVReplication(ctx); err != nil { - ts.Logger().Errorf("startReverseVReplication failed: %v", err) - return 0, nil, err + return handleError("failed to start the reverse workflow", err) } } if err := sw.freezeTargetVReplication(ctx); err != nil { - ts.Logger().Errorf("deleteTargetVReplication failed: %v", err) - return 0, nil, err + return handleError(fmt.Sprintf("failed to freeze the workflow in the %s keyspace", ts.TargetKeyspaceName()), err) } return ts.id, sw.logs(), nil From 960056658d0b474d6dca591c6307aab3f7ab9197 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Wed, 9 Aug 2023 10:54:28 -0400 Subject: [PATCH 48/56] Address review comments Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/traffic_switcher.go | 54 ++++++++++++------------ go/vt/wrangler/traffic_switcher.go | 54 ++++++++++++------------ 2 files changed, 52 insertions(+), 56 deletions(-) diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index 61c1c26b9f5..a298043ce68 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -1234,7 +1234,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return nil, nil } - sequencesByBackingTable, backingTablesFound, err := ts.findSequenceUsageInKeyspace(ctx, vschema) + sequencesByBackingTable, backingTablesFound, err := ts.findSequenceUsageInKeyspace(vschema) if err != nil { return nil, err } @@ -1244,6 +1244,10 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return sequencesByBackingTable, nil } + if err := ctx.Err(); err != nil { + return nil, err + } + // Now we need to locate the backing sequence table(s) which will // be in another unsharded keyspace. smMu := sync.Mutex{} @@ -1317,7 +1321,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s // indicate if all of the backing sequence tables were defined using // qualified table names (so we know where they all live) along with an // error if any is seen. -func (ts *trafficSwitcher) findSequenceUsageInKeyspace(ctx context.Context, vschema *vschemapb.Keyspace) (map[string]*sequenceMetadata, bool, error) { +func (ts *trafficSwitcher) findSequenceUsageInKeyspace(vschema *vschemapb.Keyspace) (map[string]*sequenceMetadata, bool, error) { allFullyQualified := true targets := maps.Values(ts.Targets()) if len(targets) == 0 || targets[0].GetPrimary() == nil { // This should never happen @@ -1328,36 +1332,30 @@ func (ts *trafficSwitcher) findSequenceUsageInKeyspace(ctx context.Context, vsch for _, table := range ts.Tables() { vs, ok := vschema.Tables[table] - if !ok || vs == nil { + if !ok || vs == nil || vs.AutoIncrement == nil || vs.AutoIncrement.Sequence == "" { continue } - if vs.AutoIncrement != nil && vs.AutoIncrement.Sequence != "" { - sm := &sequenceMetadata{ - backingTableName: vs.AutoIncrement.Sequence, - usingTableName: table, - usingTableDefinition: vs, - usingTableDBName: targetDBName, + sm := &sequenceMetadata{ + backingTableName: vs.AutoIncrement.Sequence, + usingTableName: table, + usingTableDefinition: vs, + usingTableDBName: targetDBName, + } + // If the sequence table is fully qualified in the vschema then + // we don't need to find it later. + if strings.Contains(vs.AutoIncrement.Sequence, ".") { + keyspace, tableName, found := strings.Cut(vs.AutoIncrement.Sequence, ".") + if !found { + return nil, false, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "invalid sequence table name %s defined in the %s keyspace", + vs.AutoIncrement.Sequence, ts.targetKeyspace) } - // If the sequence table is fully qualified in the vschema then - // we don't need to find it later. - if strings.Contains(vs.AutoIncrement.Sequence, ".") { - keyspace, tableName, found := strings.Cut(vs.AutoIncrement.Sequence, ".") - if !found { - return nil, false, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "invalid sequence table name %s defined in the %s keyspace", - vs.AutoIncrement.Sequence, ts.targetKeyspace) - } - sm.backingTableName = tableName - sm.backingTableKeyspace = keyspace - sm.backingTableDBName = "vt_" + keyspace - } else { - allFullyQualified = false - } - sequencesByBackingTable[sm.backingTableName] = sm + sm.backingTableName = tableName + sm.backingTableKeyspace = keyspace + sm.backingTableDBName = "vt_" + keyspace + } else { + allFullyQualified = false } - } - - if err := ctx.Err(); err != nil { - return nil, false, err + sequencesByBackingTable[sm.backingTableName] = sm } return sequencesByBackingTable, allFullyQualified, nil diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 6ece05c3f7e..184d4a857e6 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -1956,7 +1956,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return nil, nil } - sequencesByBackingTable, backingTablesFound, err := ts.findSequenceUsageInKeyspace(ctx, vschema) + sequencesByBackingTable, backingTablesFound, err := ts.findSequenceUsageInKeyspace(vschema) if err != nil { return nil, err } @@ -1966,6 +1966,10 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return sequencesByBackingTable, nil } + if err := ctx.Err(); err != nil { + return nil, err + } + // Now we need to locate the backing sequence table(s) which will // be in another unsharded keyspace. smMu := sync.Mutex{} @@ -2039,7 +2043,7 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s // indicate if all of the backing sequence tables were defined using // qualified table names (so we know where they all live) along with an // error if any is seen. -func (ts *trafficSwitcher) findSequenceUsageInKeyspace(ctx context.Context, vschema *vschemapb.Keyspace) (map[string]*sequenceMetadata, bool, error) { +func (ts *trafficSwitcher) findSequenceUsageInKeyspace(vschema *vschemapb.Keyspace) (map[string]*sequenceMetadata, bool, error) { allFullyQualified := true targets := maps.Values(ts.Targets()) if len(targets) == 0 || targets[0].GetPrimary() == nil { // This should never happen @@ -2050,36 +2054,30 @@ func (ts *trafficSwitcher) findSequenceUsageInKeyspace(ctx context.Context, vsch for _, table := range ts.Tables() { vs, ok := vschema.Tables[table] - if !ok || vs == nil { + if !ok || vs == nil || vs.AutoIncrement == nil || vs.AutoIncrement.Sequence == "" { continue } - if vs.AutoIncrement != nil && vs.AutoIncrement.Sequence != "" { - sm := &sequenceMetadata{ - backingTableName: vs.AutoIncrement.Sequence, - usingTableName: table, - usingTableDefinition: vs, - usingTableDBName: targetDBName, - } - // If the sequence table is fully qualified in the vschema then - // we don't need to find it later. - if strings.Contains(vs.AutoIncrement.Sequence, ".") { - keyspace, tableName, found := strings.Cut(vs.AutoIncrement.Sequence, ".") - if !found { - return nil, false, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "invalid sequence table name %s defined in the %s keyspace", - vs.AutoIncrement.Sequence, ts.targetKeyspace) - } - sm.backingTableName = tableName - sm.backingTableKeyspace = keyspace - sm.backingTableDBName = "vt_" + keyspace - } else { - allFullyQualified = false + sm := &sequenceMetadata{ + backingTableName: vs.AutoIncrement.Sequence, + usingTableName: table, + usingTableDefinition: vs, + usingTableDBName: targetDBName, + } + // If the sequence table is fully qualified in the vschema then + // we don't need to find it later. + if strings.Contains(vs.AutoIncrement.Sequence, ".") { + keyspace, tableName, found := strings.Cut(vs.AutoIncrement.Sequence, ".") + if !found { + return nil, false, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "invalid sequence table name %s defined in the %s keyspace", + vs.AutoIncrement.Sequence, ts.targetKeyspace) } - sequencesByBackingTable[sm.backingTableName] = sm + sm.backingTableName = tableName + sm.backingTableKeyspace = keyspace + sm.backingTableDBName = "vt_" + keyspace + } else { + allFullyQualified = false } - } - - if err := ctx.Err(); err != nil { - return nil, false, err + sequencesByBackingTable[sm.backingTableName] = sm } return sequencesByBackingTable, allFullyQualified, nil From fb01b93ed838f94fee25b5fee0aee99fe09f6c0c Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Wed, 9 Aug 2023 11:00:16 -0400 Subject: [PATCH 49/56] Use closure to more safely manage mutex Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/traffic_switcher.go | 37 ++++++++++++++---------- go/vt/wrangler/traffic_switcher.go | 37 ++++++++++++++---------- 2 files changed, 42 insertions(+), 32 deletions(-) diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index a298043ce68..9745dea2393 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -1271,25 +1271,30 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return nil default: } - smMu.Lock() // Prevent concurrent access to the map - sm := sequencesByBackingTable[tableName] - if tableDef != nil && tableDef.Type == vindexes.TypeSequence && - sm != nil && tableName == sm.backingTableName { - tablesFound++ // This is also protected by the mutex - sm.backingTableKeyspace = keyspace - sm.backingTableDBName = "vt_" + keyspace - if tablesFound == tableCount { // Short circuit the search - smMu.Unlock() - select { - case <-searchCompleted: // It's already been closed - return nil - default: - close(searchCompleted) // Mark the search as completed - return nil + if complete := func() bool { + smMu.Lock() // Prevent concurrent access to the map + defer smMu.Unlock() + sm := sequencesByBackingTable[tableName] + if tableDef != nil && tableDef.Type == vindexes.TypeSequence && + sm != nil && tableName == sm.backingTableName { + tablesFound++ // This is also protected by the mutex + sm.backingTableKeyspace = keyspace + sm.backingTableDBName = "vt_" + keyspace + if tablesFound == tableCount { // Short circuit the search + smMu.Unlock() + select { + case <-searchCompleted: // It's already been closed + return true + default: + close(searchCompleted) // Mark the search as completed + return true + } } } + return false + }(); complete { + return nil } - smMu.Unlock() } return nil } diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 184d4a857e6..3d2aea64c08 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -1993,25 +1993,30 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s return nil default: } - smMu.Lock() // Prevent concurrent access to the map - sm := sequencesByBackingTable[tableName] - if tableDef != nil && tableDef.Type == vindexes.TypeSequence && - sm != nil && tableName == sm.backingTableName { - tablesFound++ // This is also protected by the mutex - sm.backingTableKeyspace = keyspace - sm.backingTableDBName = "vt_" + keyspace - if tablesFound == tableCount { // Short circuit the search - smMu.Unlock() - select { - case <-searchCompleted: // It's already been closed - return nil - default: - close(searchCompleted) // Mark the search as completed - return nil + if complete := func() bool { + smMu.Lock() // Prevent concurrent access to the map + defer smMu.Unlock() + sm := sequencesByBackingTable[tableName] + if tableDef != nil && tableDef.Type == vindexes.TypeSequence && + sm != nil && tableName == sm.backingTableName { + tablesFound++ // This is also protected by the mutex + sm.backingTableKeyspace = keyspace + sm.backingTableDBName = "vt_" + keyspace + if tablesFound == tableCount { // Short circuit the search + smMu.Unlock() + select { + case <-searchCompleted: // It's already been closed + return true + default: + close(searchCompleted) // Mark the search as completed + return true + } } } + return false + }(); complete { + return nil } - smMu.Unlock() } return nil } From 0663f6f14e2c20b0d38a79be2be76bb50ba8ee09 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Wed, 9 Aug 2023 11:06:05 -0400 Subject: [PATCH 50/56] Forgot to remove the unlock... Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/traffic_switcher.go | 1 - go/vt/wrangler/traffic_switcher.go | 1 - 2 files changed, 2 deletions(-) diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index 9745dea2393..3a6394543f1 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -1281,7 +1281,6 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s sm.backingTableKeyspace = keyspace sm.backingTableDBName = "vt_" + keyspace if tablesFound == tableCount { // Short circuit the search - smMu.Unlock() select { case <-searchCompleted: // It's already been closed return true diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 3d2aea64c08..f536f6830fc 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -2003,7 +2003,6 @@ func (ts *trafficSwitcher) getTargetSequenceMetadata(ctx context.Context) (map[s sm.backingTableKeyspace = keyspace sm.backingTableDBName = "vt_" + keyspace if tablesFound == tableCount { // Short circuit the search - smMu.Unlock() select { case <-searchCompleted: // It's already been closed return true From c1477486cf3570f5113e7a7da00403d392bf0585 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Wed, 9 Aug 2023 11:26:35 -0400 Subject: [PATCH 51/56] Improve error msg and update unit test expectations Signed-off-by: Matt Lord --- go/vt/vtctl/workflow/server.go | 4 ++-- go/vt/wrangler/stream_migrater_test.go | 8 ++++---- go/vt/wrangler/traffic_switcher.go | 16 ++++++++-------- go/vt/wrangler/traffic_switcher_test.go | 6 +++--- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/go/vt/vtctl/workflow/server.go b/go/vt/vtctl/workflow/server.go index 92e22e34f50..3a4f1765694 100644 --- a/go/vt/vtctl/workflow/server.go +++ b/go/vt/vtctl/workflow/server.go @@ -2325,7 +2325,7 @@ func (s *Server) switchReads(ctx context.Context, req *vtctldatapb.WorkflowSwitc // Consistently handle errors by logging and returning them. handleError := func(message string, err error) (*[]string, error) { - werr := vterrors.Wrapf(err, message) + werr := vterrors.Errorf(vtrpcpb.Code_INTERNAL, fmt.Sprintf("%s: %v", message, err)) ts.Logger().Error(werr) return nil, werr } @@ -2424,7 +2424,7 @@ func (s *Server) switchWrites(ctx context.Context, req *vtctldatapb.WorkflowSwit // Consistently handle errors by logging and returning them. handleError := func(message string, err error) (int64, *[]string, error) { - werr := vterrors.Wrapf(err, message) + werr := vterrors.Errorf(vtrpcpb.Code_INTERNAL, fmt.Sprintf("%s: %v", message, err)) ts.Logger().Error(werr) return 0, nil, werr } diff --git a/go/vt/wrangler/stream_migrater_test.go b/go/vt/wrangler/stream_migrater_test.go index d7b93e07f59..98828261b27 100644 --- a/go/vt/wrangler/stream_migrater_test.go +++ b/go/vt/wrangler/stream_migrater_test.go @@ -1108,7 +1108,7 @@ func TestStreamMigrateStoppedStreams(t *testing.T) { stopStreams() _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) - want := "cannot migrate until all streams are running: 0: 10" + want := "failed to migrate the workflow streams: cannot migrate until all streams are running: 0: 10" if err == nil || err.Error() != want { t.Errorf("SwitchWrites err: %v, want %v", err, want) } @@ -1238,7 +1238,7 @@ func TestStreamMigrateStillCopying(t *testing.T) { stopStreams() _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) - want := "cannot migrate while vreplication streams in source shards are still copying: 0" + want := "failed to migrate the workflow streams: cannot migrate while vreplication streams in source shards are still copying: 0" if err == nil || err.Error() != want { t.Errorf("SwitchWrites err: %v, want %v", err, want) } @@ -1300,7 +1300,7 @@ func TestStreamMigrateEmptyWorkflow(t *testing.T) { stopStreams() _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) - want := "VReplication streams must have named workflows for migration: shard: ks:0, stream: 1" + want := "failed to migrate the workflow streams: VReplication streams must have named workflows for migration: shard: ks:0, stream: 1" if err == nil || err.Error() != want { t.Errorf("SwitchWrites err: %v, want %v", err, want) } @@ -1362,7 +1362,7 @@ func TestStreamMigrateDupWorkflow(t *testing.T) { stopStreams() _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false, false) - want := "VReplication stream has the same workflow name as the resharding workflow: shard: ks:0, stream: 1" + want := "failed to migrate the workflow streams: VReplication stream has the same workflow name as the resharding workflow: shard: ks:0, stream: 1" if err == nil || err.Error() != want { t.Errorf("SwitchWrites err: %v, want %v", err, want) } diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index f536f6830fc..750540a855d 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -331,13 +331,14 @@ func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workfl // SwitchReads is a generic way of switching read traffic for a resharding workflow. func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflowName string, servedTypes []topodatapb.TabletType, cells []string, direction workflow.TrafficSwitchDirection, dryRun bool) (*[]string, error) { - ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflowName) // Consistently handle errors by logging and returning them. handleError := func(message string, err error) (*[]string, error) { - werr := vterrors.Wrapf(err, message) - ts.Logger().Error(werr) + werr := vterrors.Errorf(vtrpcpb.Code_INTERNAL, fmt.Sprintf("%s: %v", message, err)) + wr.Logger().Error(werr) return nil, werr } + + ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflowName) if err != nil { return handleError("failed to get the current state of the workflow", err) } @@ -476,16 +477,15 @@ func (wr *Wrangler) areTabletsAvailableToStreamFrom(ctx context.Context, ts *tra // SwitchWrites is a generic way of migrating write traffic for a resharding workflow. func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflowName string, timeout time.Duration, cancel, reverse, reverseReplication bool, dryRun, initializeTargetSequences bool) (journalID int64, dryRunResults *[]string, err error) { - ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflowName) - _ = ws - // Consistently handle errors by logging and returning them. handleError := func(message string, err error) (int64, *[]string, error) { - werr := vterrors.Wrapf(err, message) - ts.Logger().Error(werr) + werr := vterrors.Errorf(vtrpcpb.Code_INTERNAL, fmt.Sprintf("%s: %v", message, err)) + wr.Logger().Error(werr) return 0, nil, werr } + ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflowName) + _ = ws if err != nil { handleError("failed to get the current workflow state", err) } diff --git a/go/vt/wrangler/traffic_switcher_test.go b/go/vt/wrangler/traffic_switcher_test.go index 0fb8a786e4b..ef7495d13a0 100644 --- a/go/vt/wrangler/traffic_switcher_test.go +++ b/go/vt/wrangler/traffic_switcher_test.go @@ -277,7 +277,7 @@ func TestTableMigrateMainflow(t *testing.T) { //------------------------------------------------------------------------------------------------------------------- // Can't switch primary with SwitchReads. _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_PRIMARY}, nil, workflow.DirectionForward, false) - want := "tablet type must be REPLICA or RDONLY: PRIMARY" + want := "invalid tablet type: tablet type must be REPLICA or RDONLY: PRIMARY" if err == nil || err.Error() != want { t.Errorf("SwitchReads(primary) err: %v, want %v", err, want) } @@ -598,7 +598,7 @@ func TestShardMigrateMainflow(t *testing.T) { //------------------------------------------------------------------------------------------------------------------- // Can't switch primary with SwitchReads. _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_PRIMARY}, nil, workflow.DirectionForward, false) - want := "tablet type must be REPLICA or RDONLY: PRIMARY" + want := "invalid tablet type: tablet type must be REPLICA or RDONLY: PRIMARY" if err == nil || err.Error() != want { t.Errorf("SwitchReads(primary) err: %v, want %v", err, want) } @@ -1901,7 +1901,7 @@ func TestShardMigrateNoAvailableTabletsForReverseReplication(t *testing.T) { //------------------------------------------------------------------------------------------------------------------- // Can't switch primary with SwitchReads. _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_PRIMARY}, nil, workflow.DirectionForward, false) - want := "tablet type must be REPLICA or RDONLY: PRIMARY" + want := "invalid tablet type: tablet type must be REPLICA or RDONLY: PRIMARY" if err == nil || err.Error() != want { t.Errorf("SwitchReads(primary) err: %v, want %v", err, want) } From a8750096901e1b132e0de3faf6fc19cd2acf8fbb Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Wed, 9 Aug 2023 12:08:54 -0400 Subject: [PATCH 52/56] Tweaks for unit tests Signed-off-by: Matt Lord --- go/vt/binlog/binlogplayer/mock_dbclient.go | 4 +++- .../tabletmanager/rpc_vreplication_test.go | 4 ++-- .../tabletmanager/vreplication/vcopier_test.go | 18 +++++++++--------- .../tabletmanager/vreplication/vreplicator.go | 6 +++--- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/go/vt/binlog/binlogplayer/mock_dbclient.go b/go/vt/binlog/binlogplayer/mock_dbclient.go index 803ead258be..ac92608a63d 100644 --- a/go/vt/binlog/binlogplayer/mock_dbclient.go +++ b/go/vt/binlog/binlogplayer/mock_dbclient.go @@ -59,8 +59,10 @@ func NewMockDBClient(t *testing.T) *MockDBClient { "insert into _vt.vreplication_log": {}, }, // For some reason the following queries are not sent in a determinstic order. + // In the case of foreign_key_checks, it's likely because of a potential no-op + // done in a defer. ignored: map[string]struct{}{ - "set foreign_key_checks=1": {}, + //"set foreign_key_checks=1": {}, }, } } diff --git a/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go b/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go index 6576fa6c906..8c8e5c0e37b 100644 --- a/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go +++ b/go/vt/vttablet/tabletmanager/rpc_vreplication_test.go @@ -61,8 +61,8 @@ const ( setPermissiveSQLMode = "SET @@session.sql_mode='NO_AUTO_VALUE_ON_ZERO'" setStrictSQLMode = "SET @@session.sql_mode='ONLY_FULL_GROUP_BY,NO_AUTO_VALUE_ON_ZERO,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION'" getSQLMode = "SELECT @@session.sql_mode AS sql_mode" - getFKChecks = "select @@foreign_key_checks;" - enableFKChecks = "set foreign_key_checks=1;" + getFKChecks = "select @@foreign_key_checks" + enableFKChecks = "set foreign_key_checks=1" sqlMode = "ONLY_FULL_GROUP_BY,NO_AUTO_VALUE_ON_ZERO,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION" getBinlogRowImage = "select @@binlog_row_image" insertStreamsCreatedLog = "insert into _vt.vreplication_log(vrepl_id, type, state, message) values(1, 'Stream Created', '', '%s'" diff --git a/go/vt/vttablet/tabletmanager/vreplication/vcopier_test.go b/go/vt/vttablet/tabletmanager/vreplication/vcopier_test.go index ddb2b26e5f7..129827c5d47 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/vcopier_test.go +++ b/go/vt/vttablet/tabletmanager/vreplication/vcopier_test.go @@ -477,19 +477,19 @@ func testPlayerCopyTablesWithFK(t *testing.T) { expectDBClientQueries(t, qh.Expect( "/insert into _vt.vreplication", "/update _vt.vreplication set message='Picked source tablet.*", - "select @@foreign_key_checks;", + "select @@foreign_key_checks", // Create the list of tables to copy and transition to Copying state. "begin", "/insert into _vt.copy_state", "/update _vt.vreplication set state='Copying'", "commit", - "set foreign_key_checks=0;", + "set foreign_key_checks=0", // The first fast-forward has no starting point. So, it just saves the current position. "/update _vt.vreplication set pos=", ).Then(func(expect qh.ExpectationSequencer) qh.ExpectationSequencer { // With parallel inserts, new db client connects are created on-the-fly. if vreplicationParallelInsertWorkers > 1 { - return expect.Then(qh.Eventually("set foreign_key_checks=0;")) + return expect.Then(qh.Eventually("set foreign_key_checks=0")) } return expect }).Then(qh.Eventually( @@ -500,18 +500,18 @@ func testPlayerCopyTablesWithFK(t *testing.T) { `/insert into _vt.copy_state \(lastpk, vrepl_id, table_name\) values \('fields:{name:\\"id\\" type:INT32 charset:63 flags:53251} rows:{lengths:1 values:\\"2\\"}'.*`, "commit", )).Then(qh.Immediately( - "set foreign_key_checks=0;", + "set foreign_key_checks=0", // copy of dst1 is done: delete from copy_state. "/delete cs, pca from _vt.copy_state as cs left join _vt.post_copy_action as pca on cs.vrepl_id=pca.vrepl_id and cs.table_name=pca.table_name.*dst1", // The next FF executes and updates the position before copying. - "set foreign_key_checks=0;", + "set foreign_key_checks=0", "begin", "/update _vt.vreplication set pos=", "commit", )).Then(func(expect qh.ExpectationSequencer) qh.ExpectationSequencer { // With parallel inserts, new db client connects are created on-the-fly. if vreplicationParallelInsertWorkers > 1 { - return expect.Then(qh.Eventually("set foreign_key_checks=0;")) + return expect.Then(qh.Eventually("set foreign_key_checks=0")) } return expect }).Then(qh.Eventually( @@ -521,11 +521,11 @@ func testPlayerCopyTablesWithFK(t *testing.T) { `/insert into _vt.copy_state \(lastpk, vrepl_id, table_name\) values \('fields:{name:\\"id\\" type:INT32 charset:63 flags:53251} rows:{lengths:1 values:\\"2\\"}'.*`, "commit", )).Then(qh.Immediately( - "set foreign_key_checks=0;", + "set foreign_key_checks=0", // copy of dst1 is done: delete from copy_state. "/delete cs, pca from _vt.copy_state as cs left join _vt.post_copy_action as pca on cs.vrepl_id=pca.vrepl_id and cs.table_name=pca.table_name.*dst2", // All tables copied. Final catch up followed by Running state. - "set foreign_key_checks=1;", + "set foreign_key_checks=1", "/update _vt.vreplication set state='Running'", ))) @@ -545,7 +545,7 @@ func testPlayerCopyTablesWithFK(t *testing.T) { t.Fatal(err) } expectDBClientQueries(t, qh.Expect( - "set foreign_key_checks=1;", + "set foreign_key_checks=1", "begin", "/delete from _vt.vreplication", "/delete from _vt.copy_state", diff --git a/go/vt/vttablet/tabletmanager/vreplication/vreplicator.go b/go/vt/vttablet/tabletmanager/vreplication/vreplicator.go index daed5a58147..39bd8db2519 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/vreplicator.go +++ b/go/vt/vttablet/tabletmanager/vreplication/vreplicator.go @@ -484,7 +484,7 @@ func encodeString(in string) string { } func (vr *vreplicator) getSettingFKCheck() error { - qr, err := vr.dbClient.Execute("select @@foreign_key_checks;") + qr, err := vr.dbClient.Execute("select @@foreign_key_checks") if err != nil { return err } @@ -499,7 +499,7 @@ func (vr *vreplicator) getSettingFKCheck() error { } func (vr *vreplicator) resetFKCheckAfterCopy(dbClient *vdbClient) error { - _, err := dbClient.Execute(fmt.Sprintf("set foreign_key_checks=%d;", vr.originalFKCheckSetting)) + _, err := dbClient.Execute(fmt.Sprintf("set foreign_key_checks=%d", vr.originalFKCheckSetting)) return err } @@ -587,7 +587,7 @@ func (vr *vreplicator) updateHeartbeatTime(tm int64) error { } func (vr *vreplicator) clearFKCheck(dbClient *vdbClient) error { - _, err := dbClient.Execute("set foreign_key_checks=0;") + _, err := dbClient.Execute("set foreign_key_checks=0") return err } From 7de744fb845a6baf865c6ac50275162df9885558 Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Wed, 9 Aug 2023 12:58:29 -0400 Subject: [PATCH 53/56] We don't need the explicit ignore any more. Signed-off-by: Matt Lord --- go/vt/binlog/binlogplayer/mock_dbclient.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/go/vt/binlog/binlogplayer/mock_dbclient.go b/go/vt/binlog/binlogplayer/mock_dbclient.go index ac92608a63d..0c7b58b7916 100644 --- a/go/vt/binlog/binlogplayer/mock_dbclient.go +++ b/go/vt/binlog/binlogplayer/mock_dbclient.go @@ -58,12 +58,7 @@ func NewMockDBClient(t *testing.T) *MockDBClient { "select id, type, state, message from _vt.vreplication_log": {}, "insert into _vt.vreplication_log": {}, }, - // For some reason the following queries are not sent in a determinstic order. - // In the case of foreign_key_checks, it's likely because of a potential no-op - // done in a defer. - ignored: map[string]struct{}{ - //"set foreign_key_checks=1": {}, - }, + ignored: map[string]struct{}{}, } } From 9202a4c80aa5ac30a02bdbb7d747aeab85b7918d Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Wed, 9 Aug 2023 13:37:01 -0400 Subject: [PATCH 54/56] Attempts to do more deflaking Signed-off-by: Matt Lord --- go/vt/binlog/binlogplayer/mock_dbclient.go | 8 ++++++++ go/vt/vtctl/vdiff2_test.go | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/go/vt/binlog/binlogplayer/mock_dbclient.go b/go/vt/binlog/binlogplayer/mock_dbclient.go index 0c7b58b7916..a30ccdead7a 100644 --- a/go/vt/binlog/binlogplayer/mock_dbclient.go +++ b/go/vt/binlog/binlogplayer/mock_dbclient.go @@ -19,6 +19,7 @@ package binlogplayer import ( "regexp" "strings" + "sync" "testing" "time" @@ -34,6 +35,7 @@ type MockDBClient struct { t *testing.T UName string expect []*mockExpect + expectMu sync.Mutex currentResult int done chan struct{} invariants map[string]*sqltypes.Result @@ -79,6 +81,8 @@ func (dc *MockDBClient) ExpectRequest(query string, result *sqltypes.Result, err dc.done = make(chan struct{}) default: } + dc.expectMu.Lock() + defer dc.expectMu.Unlock() dc.expect = append(dc.expect, &mockExpect{ query: query, result: result, @@ -95,6 +99,8 @@ func (dc *MockDBClient) ExpectRequestRE(queryRE string, result *sqltypes.Result, dc.done = make(chan struct{}) default: } + dc.expectMu.Lock() + defer dc.expectMu.Unlock() dc.expect = append(dc.expect, &mockExpect{ query: queryRE, re: regexp.MustCompile(queryRE), @@ -163,6 +169,8 @@ func (dc *MockDBClient) ExecuteFetch(query string, maxrows int) (qr *sqltypes.Re } } + dc.expectMu.Lock() + defer dc.expectMu.Unlock() if dc.currentResult >= len(dc.expect) { dc.t.Fatalf("DBClientMock: query: %s, no more requests are expected", query) } diff --git a/go/vt/vtctl/vdiff2_test.go b/go/vt/vtctl/vdiff2_test.go index 368f21eb93b..cb19621be0c 100644 --- a/go/vt/vtctl/vdiff2_test.go +++ b/go/vt/vtctl/vdiff2_test.go @@ -473,7 +473,7 @@ func TestBuildProgressReport(t *testing.T) { t.Run(tt.name, func(t *testing.T) { buildProgressReport(tt.args.summary, tt.args.rowsToCompare) // We always check the percentage - require.Equal(t, tt.want.Percentage, tt.args.summary.Progress.Percentage) + require.Equal(t, int(tt.want.Percentage), int(tt.args.summary.Progress.Percentage)) // We only check the ETA if there is one if tt.want.ETA != "" { From 7f5d0a1affd7ee4bbce9279cc7bc165547224f5a Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Wed, 9 Aug 2023 16:28:32 -0400 Subject: [PATCH 55/56] Squeeze in another vtctldclient fix This was just noticed when helping with an issue. Signed-off-by: Matt Lord --- go/cmd/vtctldclient/command/movetables.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/go/cmd/vtctldclient/command/movetables.go b/go/cmd/vtctldclient/command/movetables.go index 55b964a468a..4ac7f2b605c 100644 --- a/go/cmd/vtctldclient/command/movetables.go +++ b/go/cmd/vtctldclient/command/movetables.go @@ -272,6 +272,9 @@ func commandMoveTablesCreate(cmd *cobra.Command, args []string) error { Workflow: moveTablesOptions.Workflow, TargetKeyspace: moveTablesOptions.TargetKeyspace, SourceKeyspace: moveTablesCreateOptions.SourceKeyspace, + SourceShards: moveTablesCreateOptions.SourceShards, + SourceTimeZone: moveTablesCreateOptions.SourceTimeZone, + ExternalClusterName: moveTablesCreateOptions.ExternalClusterName, Cells: moveTablesCreateOptions.Cells, TabletTypes: moveTablesCreateOptions.TabletTypes, TabletSelectionPreference: tsp, @@ -279,6 +282,7 @@ func commandMoveTablesCreate(cmd *cobra.Command, args []string) error { IncludeTables: moveTablesCreateOptions.IncludeTables, ExcludeTables: moveTablesCreateOptions.ExcludeTables, OnDdl: moveTablesCreateOptions.OnDDL, + DeferSecondaryKeys: moveTablesCreateOptions.DeferSecondaryKeys, AutoStart: moveTablesCreateOptions.AutoStart, StopAfterCopy: moveTablesCreateOptions.StopAfterCopy, } From 4d6d05c68e8d297178d600a89c4f078de1d92aab Mon Sep 17 00:00:00 2001 From: Matt Lord Date: Wed, 9 Aug 2023 16:50:07 -0400 Subject: [PATCH 56/56] Building on the last commit to cleanup option handling Signed-off-by: Matt Lord --- go/cmd/vtctldclient/command/movetables.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go/cmd/vtctldclient/command/movetables.go b/go/cmd/vtctldclient/command/movetables.go index 4ac7f2b605c..31a7c2fe7d8 100644 --- a/go/cmd/vtctldclient/command/movetables.go +++ b/go/cmd/vtctldclient/command/movetables.go @@ -226,7 +226,6 @@ var ( TabletTypes []topodatapb.TabletType TabletTypesInPreferenceOrder bool SourceShards []string - ExternalClusterName string AllTables bool IncludeTables []string ExcludeTables []string @@ -274,7 +273,6 @@ func commandMoveTablesCreate(cmd *cobra.Command, args []string) error { SourceKeyspace: moveTablesCreateOptions.SourceKeyspace, SourceShards: moveTablesCreateOptions.SourceShards, SourceTimeZone: moveTablesCreateOptions.SourceTimeZone, - ExternalClusterName: moveTablesCreateOptions.ExternalClusterName, Cells: moveTablesCreateOptions.Cells, TabletTypes: moveTablesCreateOptions.TabletTypes, TabletSelectionPreference: tsp, @@ -516,12 +514,14 @@ func init() { MoveTablesCreate.MarkPersistentFlagRequired("source-keyspace") MoveTablesCreate.Flags().StringSliceVarP(&moveTablesCreateOptions.Cells, "cells", "c", nil, "Cells and/or CellAliases to copy table data from") MoveTablesCreate.Flags().StringSliceVar(&moveTablesCreateOptions.SourceShards, "source-shards", nil, "Source shards to copy data from when performing a partial MoveTables (experimental)") + MoveTablesCreate.Flags().StringVar(&moveTablesCreateOptions.SourceTimeZone, "source-time-zone", "", "Specifying this causes any DATETIME fields to be converted from the given time zone into UTC") MoveTablesCreate.Flags().Var((*topoproto.TabletTypeListFlag)(&moveTablesCreateOptions.TabletTypes), "tablet-types", "Source tablet types to replicate table data from (e.g. PRIMARY,REPLICA,RDONLY)") MoveTablesCreate.Flags().BoolVar(&moveTablesCreateOptions.TabletTypesInPreferenceOrder, "tablet-types-in-preference-order", true, "When performing source tablet selection, look for candidates in the type order as they are listed in the tablet-types flag") MoveTablesCreate.Flags().BoolVar(&moveTablesCreateOptions.AllTables, "all-tables", false, "Copy all tables from the source") MoveTablesCreate.Flags().StringSliceVar(&moveTablesCreateOptions.IncludeTables, "tables", nil, "Source tables to copy") MoveTablesCreate.Flags().StringSliceVar(&moveTablesCreateOptions.ExcludeTables, "exclude-tables", nil, "Source tables to exclude from copying") MoveTablesCreate.Flags().StringVar(&moveTablesCreateOptions.OnDDL, "on-ddl", onDDLDefault, "What to do when DDL is encountered in the VReplication stream. Possible values are IGNORE, STOP, EXEC, and EXEC_IGNORE") + MoveTablesCreate.Flags().BoolVar(&moveTablesCreateOptions.DeferSecondaryKeys, "defer-secondary-keys", false, "Defer secondary index creation for a table until after it has been copied") MoveTablesCreate.Flags().BoolVar(&moveTablesCreateOptions.AutoStart, "auto-start", true, "Start the MoveTables workflow after creating it") MoveTablesCreate.Flags().BoolVar(&moveTablesCreateOptions.StopAfterCopy, "stop-after-copy", false, "Stop the MoveTables workflow after it's finished copying the existing rows and before it starts replicating changes") MoveTables.AddCommand(MoveTablesCreate)