From 5a72e5e87e72130373cf19510b943529f2c599f7 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Tue, 28 Mar 2023 14:54:26 +0200 Subject: [PATCH] auto-update: stop+start instead of restart sytemd units It turns out the restart is _not_ a stop+start but keeps certain resources open and is subject to some timeouts that may differ across distributions' default settings. [NO NEW TESTS NEEDED] as I have absolutely no idea how to reliably cause the failure/flake/race. Fixes: #17607 Signed-off-by: Valentin Rothberg --- pkg/autoupdate/autoupdate.go | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/pkg/autoupdate/autoupdate.go b/pkg/autoupdate/autoupdate.go index 10258701f8d9..ccd81d8cdbce 100644 --- a/pkg/autoupdate/autoupdate.go +++ b/pkg/autoupdate/autoupdate.go @@ -334,8 +334,16 @@ func (t *task) rollbackImage() error { // restartSystemdUnit restarts the systemd unit the container is running in. func (u *updater) restartSystemdUnit(ctx context.Context, unit string) error { + if err := u.stopSystemdUnit(ctx, unit); err != nil { + return err + } + return u.startSystemdUnit(ctx, unit) +} + +// startSystemdUnit starts the systemd unit the container is running in. +func (u *updater) startSystemdUnit(ctx context.Context, unit string) error { restartChan := make(chan string) - if _, err := u.conn.RestartUnitContext(ctx, unit, "replace", restartChan); err != nil { + if _, err := u.conn.StartUnitContext(ctx, unit, "replace", restartChan); err != nil { return err } @@ -349,7 +357,28 @@ func (u *updater) restartSystemdUnit(ctx context.Context, unit string) error { return nil default: - return fmt.Errorf("expected %q but received %q", "done", result) + return fmt.Errorf("error starting systemd unit %q expected %q but received %q", unit, "done", result) + } +} + +// stopSystemdUnit stop the systemd unit the container is running in. +func (u *updater) stopSystemdUnit(ctx context.Context, unit string) error { + restartChan := make(chan string) + if _, err := u.conn.StopUnitContext(ctx, unit, "replace", restartChan); err != nil { + return err + } + + // Wait for the restart to finish and actually check if it was + // successful or not. + result := <-restartChan + + switch result { + case "done": + logrus.Infof("Successfully stopped systemd unit %q", unit) + return nil + + default: + return fmt.Errorf("error stopping systemd unit %q expected %q but received %q", unit, "done", result) } }