Skip to content

Commit

Permalink
Mark InProgress backup/restore as failed upon requeuing
Browse files Browse the repository at this point in the history
Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com>

remove uuid, return err to requeue instead of requeue: true

Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com>
  • Loading branch information
kaovilai committed Jun 6, 2024
1 parent a8d77ea commit 97b85e8
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 18 deletions.
18 changes: 17 additions & 1 deletion pkg/controller/backup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,20 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
switch original.Status.Phase {
case "", velerov1api.BackupPhaseNew:
// only process new backups
case velerov1api.BackupPhaseInProgress:
// if backup is in progress, we should not process it again
// we want to mark it as failed to avoid it being stuck in progress
// if so, mark it as failed, last loop did not successfully complete the backup
log.Debug("Backup was processed by this backupReconciler and still has in progress status upon reconcile, marking it as failed")
failedCopy := original.DeepCopy()
failedCopy.Status.Phase = velerov1api.BackupPhaseFailed
failedCopy.Status.FailureReason = "Backup from previous reconcile still in progress"
if err := kubeutil.PatchResource(original, failedCopy, b.kbClient); err != nil {
// return the error so the status can be re-processed; it's currently still not completed or failed
return ctrl.Result{}, err

Check warning on line 243 in pkg/controller/backup_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/backup_controller.go#L243

Added line #L243 was not covered by tests
}
// patch to mark it as failed succeeded, do not requeue
return ctrl.Result{}, nil
default:
b.logger.WithFields(logrus.Fields{
"backup": kubeutil.NamespaceAndName(original),
Expand All @@ -246,7 +260,6 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
request.Status.Phase = velerov1api.BackupPhaseInProgress
request.Status.StartTimestamp = &metav1.Time{Time: b.clock.Now()}
}

// update status
if err := kubeutil.PatchResource(original, request.Backup, b.kbClient); err != nil {
return ctrl.Result{}, errors.Wrapf(err, "error updating Backup status to %s", request.Status.Phase)
Expand Down Expand Up @@ -307,7 +320,10 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
log.Info("Updating backup's final status")
if err := kubeutil.PatchResource(original, request.Backup, b.kbClient); err != nil {
log.WithError(err).Error("error updating backup's final status")
// return the error so the status can be re-processed; it's currently still not completed or failed
return ctrl.Result{}, err

Check warning on line 324 in pkg/controller/backup_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/backup_controller.go#L324

Added line #L324 was not covered by tests
}

return ctrl.Result{}, nil
}

Expand Down
45 changes: 28 additions & 17 deletions pkg/controller/restore_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
// the controller.
log := r.logger.WithField("Restore", req.NamespacedName.String())

restore := &api.Restore{}
err := r.kbClient.Get(ctx, client.ObjectKey{Namespace: req.Namespace, Name: req.Name}, restore)
original := &api.Restore{}
err := r.kbClient.Get(ctx, client.ObjectKey{Namespace: req.Namespace, Name: req.Name}, original)
if err != nil {
if apierrors.IsNotFound(err) {
log.Debugf("restore[%s] not found", req.Name)
Expand All @@ -181,15 +181,15 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
}

// deal with finalizer
if !restore.DeletionTimestamp.IsZero() {
if !original.DeletionTimestamp.IsZero() {
// check the finalizer and run clean-up
if controllerutil.ContainsFinalizer(restore, ExternalResourcesFinalizer) {
if err := r.deleteExternalResources(restore); err != nil {
if controllerutil.ContainsFinalizer(original, ExternalResourcesFinalizer) {
if err := r.deleteExternalResources(original); err != nil {
log.Errorf("fail to delete external resources: %s", err.Error())
return ctrl.Result{}, err
}
// once finish clean-up, remove the finalizer from the restore so that the restore will be unlocked and deleted.
original := restore.DeepCopy()
restore := original.DeepCopy()
controllerutil.RemoveFinalizer(restore, ExternalResourcesFinalizer)
if err := kubeutil.PatchResource(original, restore, r.kbClient); err != nil {
log.Errorf("fail to remove finalizer: %s", err.Error())
Expand All @@ -203,29 +203,40 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
}

// add finalizer
if restore.DeletionTimestamp.IsZero() && !controllerutil.ContainsFinalizer(restore, ExternalResourcesFinalizer) {
original := restore.DeepCopy()
if original.DeletionTimestamp.IsZero() && !controllerutil.ContainsFinalizer(original, ExternalResourcesFinalizer) {
restore := original.DeepCopy()
controllerutil.AddFinalizer(restore, ExternalResourcesFinalizer)
if err := kubeutil.PatchResource(original, restore, r.kbClient); err != nil {
log.Errorf("fail to add finalizer: %s", err.Error())
return ctrl.Result{}, err
}
}

switch restore.Status.Phase {
switch original.Status.Phase {
case "", api.RestorePhaseNew:
// only process new restores
case api.RestorePhaseInProgress:

Check warning on line 218 in pkg/controller/restore_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/restore_controller.go#L218

Added line #L218 was not covered by tests
// if restore is in progress, we should not process it again
// we want to mark it as failed to avoid it being stuck in progress
// if so, mark it as failed, last loop did not successfully complete the restore
log.Debug("Restore was processed by this restoreReconciler and still has in progress status upon reconcile, marking it as failed")
failedCopy := original.DeepCopy()
failedCopy.Status.Phase = api.RestorePhaseFailed
failedCopy.Status.FailureReason = "Restore from previous reconcile still in progress"
if err := kubeutil.PatchResource(original, failedCopy, r.kbClient); err != nil {

Check warning on line 226 in pkg/controller/restore_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/restore_controller.go#L222-L226

Added lines #L222 - L226 were not covered by tests
// return the error so the status can be re-processed; it's currently still not completed or failed
return ctrl.Result{}, err

Check warning on line 228 in pkg/controller/restore_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/restore_controller.go#L228

Added line #L228 was not covered by tests
}
// patch to mark it as failed succeeded, do not requeue
return ctrl.Result{}, nil

Check warning on line 231 in pkg/controller/restore_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/restore_controller.go#L231

Added line #L231 was not covered by tests
default:
r.logger.WithFields(logrus.Fields{
"restore": kubeutil.NamespaceAndName(restore),
"phase": restore.Status.Phase,
"restore": kubeutil.NamespaceAndName(original),
"phase": original.Status.Phase,
}).Debug("Restore is not handled")
return ctrl.Result{}, nil
}

// store a copy of the original restore for creating patch
original := restore.DeepCopy()

restore := original.DeepCopy()
// Validate the restore and fetch the backup
info, resourceModifiers := r.validateAndComplete(restore)

Expand Down Expand Up @@ -278,8 +289,8 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct

if err = kubeutil.PatchResource(original, restore, r.kbClient); err != nil {
log.WithError(errors.WithStack(err)).Info("Error updating restore's final status")
// No need to re-enqueue here, because restore's already set to InProgress before.
// Controller only handle New restore.
// return the error so the status can be re-processed; it's currently still not completed or failed
return ctrl.Result{}, err

Check warning on line 293 in pkg/controller/restore_controller.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/restore_controller.go#L293

Added line #L293 was not covered by tests
}

return ctrl.Result{}, nil
Expand Down

0 comments on commit 97b85e8

Please sign in to comment.