-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
OnlineDDL: "cancel-all" command to cancel all pending migrations in keyspace #7099
Changes from all commits
a2abb0a
ee79167
5d802c5
b250bb6
fef58b7
8fa7da8
197cf09
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -65,6 +65,7 @@ var ( | |
) | ||
|
||
var vexecUpdateTemplates = []string{ | ||
`update _vt.schema_migrations set migration_status='val' where mysql_schema='val'`, | ||
`update _vt.schema_migrations set migration_status='val' where migration_uuid='val' and mysql_schema='val'`, | ||
`update _vt.schema_migrations set migration_status='val' where migration_uuid='val' and mysql_schema='val' and shard='val'`, | ||
} | ||
|
@@ -894,6 +895,32 @@ func (e *Executor) cancelMigrations(ctx context.Context, uuids []string) (err er | |
return nil | ||
} | ||
|
||
// cancelPendingMigrations cancels all pending migrations (that are expected to run or are running) | ||
// for this keyspace | ||
func (e *Executor) cancelPendingMigrations(ctx context.Context) (result *sqltypes.Result, err error) { | ||
parsed := sqlparser.BuildParsedQuery(sqlSelectPendingMigrations, "_vt") | ||
r, err := e.execQuery(ctx, parsed.Query) | ||
if err != nil { | ||
return result, err | ||
} | ||
Comment on lines
+900
to
+905
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there can be race condition right between running and completed Migration. For my understanding, what will happen when a running migration just got completed after it is selected in pending migrations Or when cancelPendingMigration is in progress? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're correct, and this race condition is unavoidable. If the user chooses to "cancel" or "cancel-all" just when the migration is about to complete, it's possible that the migration will win the race and complete; in which case the "cancel" is a no-op and nothing happens (the migration is just complete) |
||
var uuids []string | ||
for _, row := range r.Named().Rows { | ||
uuid := row["migration_uuid"].ToString() | ||
uuids = append(uuids, uuid) | ||
} | ||
|
||
result = &sqltypes.Result{} | ||
for _, uuid := range uuids { | ||
log.Infof("cancelPendingMigrations: cancelling %s", uuid) | ||
res, err := e.cancelMigration(ctx, uuid, true) | ||
if err != nil { | ||
return result, err | ||
} | ||
result.AppendResult(res) | ||
} | ||
return result, nil | ||
} | ||
|
||
// scheduleNextMigration attemps to schedule a single migration to run next. | ||
// possibly there's no migrations to run. Possibly there's a migration running right now, | ||
// in which cases nothing happens. | ||
|
@@ -1443,7 +1470,7 @@ func (e *Executor) VExec(ctx context.Context, vx *vexec.TabletVExec) (qr *queryp | |
return nil, err | ||
} | ||
if !match { | ||
return nil, fmt.Errorf("Query must match one of these templates: %s", strings.Join(vexecUpdateTemplates, "; ")) | ||
return nil, fmt.Errorf("Query must match one of these templates: %s; query=%s", strings.Join(vexecUpdateTemplates, "; "), vx.Query) | ||
} | ||
if shard, _ := vx.ColumnStringVal(vx.WhereCols, "shard"); shard != "" { | ||
// shard is specified. | ||
|
@@ -1464,7 +1491,16 @@ func (e *Executor) VExec(ctx context.Context, vx *vexec.TabletVExec) (qr *queryp | |
if err != nil { | ||
return nil, err | ||
} | ||
if !schema.IsOnlineDDLUUID(uuid) { | ||
return nil, fmt.Errorf("Not an Online DDL UUID: %s", uuid) | ||
} | ||
return response(e.cancelMigration(ctx, uuid, true)) | ||
case cancelAllMigrationHint: | ||
uuid, _ := vx.ColumnStringVal(vx.WhereCols, "migration_uuid") | ||
if uuid != "" { | ||
return nil, fmt.Errorf("Unexpetced UUID: %s", uuid) | ||
} | ||
return response(e.cancelPendingMigrations(ctx)) | ||
default: | ||
return nil, fmt.Errorf("Unexpected value for migration_status: %v. Supported values are: %s, %s", | ||
statusVal, retryMigrationHint, cancelMigrationHint) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can this sleep time be reduced. It just delays the e2e test
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately all the online-DDL flow works asynchronously, with check intervals. In this
endtoend
tests intervals are reduced to just a few secs, and still you have to give time for the check, forgh-ost
, for callback...This is why I extracted online-ddl to be on its own CI
shard
; it's still shorter than our otherendtoend
tests so right now I feel like it's not worth investing the optimization effort.