Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

oadp-1.3: OADP-4265: Reconcile To Fail: Add backup/restore trackers #324

Merged
9 changes: 8 additions & 1 deletion pkg/controller/backup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,11 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
case "", velerov1api.BackupPhaseNew:
// only process new backups
case velerov1api.BackupPhaseInProgress:
// if backup is in progress, we should not process it again
if b.backupTracker.Contains(original.Namespace, original.Name) {
log.Debug("Backup is in progress, skipping")
return ctrl.Result{}, nil
}
// if backup phase is in progress, we should not process it again
// we want to mark it as failed to avoid it being stuck in progress
// if so, mark it as failed, last loop did not successfully complete the backup
log.Debug("Backup has in progress status from prior reconcile, marking it as failed")
Expand Down Expand Up @@ -266,6 +270,7 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr

// update status
if err := kubeutil.PatchResource(original, request.Backup, b.kbClient); err != nil {
// if we fail to patch to inprogress, we will try again in the next reconcile loop
return ctrl.Result{}, errors.Wrapf(err, "error updating Backup status to %s", request.Status.Phase)
}
// store ref to just-updated item for creating patch
Expand Down Expand Up @@ -323,6 +328,8 @@ func (b *backupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
log.Info("Updating backup's final status")
if err := kubeutil.PatchResource(original, request.Backup, b.kbClient); err != nil {
log.WithError(err).Error("error updating backup's final status")
// delete from tracker so next reconcile fails the backup
b.backupTracker.Delete(original.Namespace, original.Name)
// return the error so the status can be re-processed; it's currently still not completed or failed
return ctrl.Result{}, err
}
Expand Down
76 changes: 73 additions & 3 deletions pkg/controller/backup_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"reflect"
"sort"
"strings"
"syscall"
"testing"
"time"

Expand All @@ -43,6 +44,7 @@ import (
"k8s.io/utils/clock"
testclocks "k8s.io/utils/clock/testing"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
kbclient "sigs.k8s.io/controller-runtime/pkg/client"

kubeutil "github.com/vmware-tanzu/velero/pkg/util/kube"
Expand All @@ -63,6 +65,7 @@ import (
pluginmocks "github.com/vmware-tanzu/velero/pkg/plugin/mocks"
biav2 "github.com/vmware-tanzu/velero/pkg/plugin/velero/backupitemaction/v2"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
velerotestmocks "github.com/vmware-tanzu/velero/pkg/test/mocks"
"github.com/vmware-tanzu/velero/pkg/util/boolptr"
"github.com/vmware-tanzu/velero/pkg/util/logging"
)
Expand Down Expand Up @@ -129,9 +132,10 @@ func TestProcessBackupNonProcessedItems(t *testing.T) {
)

c := &backupReconciler{
kbClient: velerotest.NewFakeControllerRuntimeClient(t),
formatFlag: formatFlag,
logger: logger,
kbClient: velerotest.NewFakeControllerRuntimeClient(t),
formatFlag: formatFlag,
logger: logger,
backupTracker: NewBackupTracker(),
}
if test.backup != nil {
require.NoError(t, c.kbClient.Create(context.Background(), test.backup))
Expand All @@ -148,6 +152,72 @@ func TestProcessBackupNonProcessedItems(t *testing.T) {
}
}

// OADP Carry: Test that backup that has status inProgress on reconcile is changed to failed if velero has no memory of it still in-progress.
func TestProcessBackupInProgressFailOnSecondReconcile(t *testing.T) {
tests := []struct {
name string
tracked bool
reconciledPhase velerov1api.BackupPhase
expectedErr error
kaovilai marked this conversation as resolved.
Show resolved Hide resolved
mockFailedPatch bool
}{
{
name: "InProgress backup tracked as being in-progress is not processed",
tracked: true,
reconciledPhase: velerov1api.BackupPhaseInProgress,
},
{
name: "InProgress backup untracked as being in-progress is marked as failed",
tracked: false,
reconciledPhase: velerov1api.BackupPhaseFailed,
},
{
name: "InProgress backup untracked is marked as failed, if patch fails, err is returned from reconcile to retry patch again, backup still inprogress",
tracked: false,
reconciledPhase: velerov1api.BackupPhaseInProgress,
mockFailedPatch: true,
expectedErr: syscall.ECONNREFUSED,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
formatFlag := logging.FormatText
var (
logger = logging.DefaultLogger(logrus.DebugLevel, formatFlag)
)
backup := defaultBackup().Phase(velerov1api.BackupPhaseInProgress).Result()
var kclient kbclient.Client
fakeKclient := velerotest.NewFakeControllerRuntimeClient(t, backup)
if test.mockFailedPatch {
mockClient := velerotestmocks.NewClient(t)
mockClient.On("Patch", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(syscall.ECONNREFUSED).Once()
mockClient.On("Get", mock.Anything, mock.AnythingOfType("types.NamespacedName"), mock.AnythingOfType("*v1.Backup")).Return(func(ctx context.Context, key client.ObjectKey, obj client.Object) error {
return fakeKclient.Get(ctx, key, obj)
})
kclient = mockClient
} else {
kclient = fakeKclient
}
c := &backupReconciler{
kbClient: kclient,
formatFlag: formatFlag,
logger: logger,
backupTracker: NewBackupTracker(),
}
if test.tracked {
c.backupTracker.Add(backup.Namespace, backup.Name)
}
_, err := c.Reconcile(ctx, ctrl.Request{NamespacedName: types.NamespacedName{Namespace: backup.Namespace, Name: backup.Name}})
assert.Equal(t, test.expectedErr, err)
reconciledBackup := velerov1api.Backup{}
err = c.kbClient.Get(context.Background(), types.NamespacedName{Namespace: backup.Namespace, Name: backup.Name}, &reconciledBackup)
assert.Nil(t, err)
assert.Equal(t, test.reconciledPhase, reconciledBackup.Status.Phase)
})
}
}

func TestProcessBackupValidationFailures(t *testing.T) {
defaultBackupLocation := builder.ForBackupStorageLocation("velero", "loc-1").Result()

Expand Down
9 changes: 9 additions & 0 deletions pkg/controller/restore_controller.go
kaovilai marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ type restoreReconciler struct {

newPluginManager func(logger logrus.FieldLogger) clientmgmt.Manager
backupStoreGetter persistence.ObjectBackupStoreGetter
failingTracker RestoreTracker
}

type backupInfo struct {
Expand Down Expand Up @@ -134,6 +135,7 @@ func NewRestoreReconciler(
kbClient: kbClient,
logger: logger,
restoreLogLevel: restoreLogLevel,
failingTracker: NewRestoreTracker(),
metrics: metrics,
logFormat: logFormat,
clock: &clock.RealClock{},
Expand Down Expand Up @@ -210,6 +212,10 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
case "", api.RestorePhaseNew:
// only process new restores
case api.RestorePhaseInProgress:
if !r.failingTracker.Contains(restore.Namespace, restore.Name) {
log.Debug("Restore in progress, skipping")
return ctrl.Result{}, nil
}
// if restore is in progress, we should not process it again
// we want to mark it as failed to avoid it being stuck in progress
// if so, mark it as failed, last loop did not successfully complete the restore
Expand All @@ -222,6 +228,7 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
return ctrl.Result{}, err
}
// patch to mark it as failed succeeded, do not requeue
r.failingTracker.Delete(restore.Namespace, restore.Name)
return ctrl.Result{}, nil
default:
r.logger.WithFields(logrus.Fields{
Expand Down Expand Up @@ -283,6 +290,8 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct

if err = kubeutil.PatchResource(original, restore, r.kbClient); err != nil {
log.WithError(errors.WithStack(err)).Info("Error updating restore's final status")
// add to failureTracker so next reconcile fails the restore
r.failingTracker.Add(restore.Namespace, restore.Name)
// return the error so the status can be re-processed; it's currently still not completed or failed
return ctrl.Result{}, err
}
Expand Down
76 changes: 76 additions & 0 deletions pkg/controller/restore_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"bytes"
"context"
"io"
"syscall"
"testing"
"time"

Expand All @@ -34,6 +35,7 @@ import (
clocktesting "k8s.io/utils/clock/testing"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
kbclient "sigs.k8s.io/controller-runtime/pkg/client"

corev1 "k8s.io/api/core/v1"

Expand All @@ -48,6 +50,7 @@ import (
riav2 "github.com/vmware-tanzu/velero/pkg/plugin/velero/restoreitemaction/v2"
pkgrestore "github.com/vmware-tanzu/velero/pkg/restore"
velerotest "github.com/vmware-tanzu/velero/pkg/test"
velerotestmocks "github.com/vmware-tanzu/velero/pkg/test/mocks"
"github.com/vmware-tanzu/velero/pkg/util/logging"
"github.com/vmware-tanzu/velero/pkg/util/results"
"github.com/vmware-tanzu/velero/pkg/volume"
Expand Down Expand Up @@ -922,6 +925,79 @@ func TestMostRecentCompletedBackup(t *testing.T) {
assert.Equal(t, expected, mostRecentCompletedBackup(backups))
}

// OADP Carry: Test that restore that has status inProgress on reconcile is changed to failed if velero has memory of it failing.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sseago from a rebase perspective is this ok? would prefer another file or something different?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rebase perspective is ok from slack.

Adding cases where err is expected from Reconcile when patch fails again.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@weshayutin It's a separate test func, so it should be fine in the same file.

func TestProcessRestoreInProgressFailOnSecondReconcile(t *testing.T) {
tests := []struct {
name string
trackedAsFailed bool
reconciledPhase velerov1api.RestorePhase
expectedErr error
mockFailedPatch bool
}{
{
name: "InProgress restore not tracked as failing is not processed",
trackedAsFailed: false,
reconciledPhase: velerov1api.RestorePhaseInProgress,
},
{
name: "InProgress restore tracked as failing is marked as failed",
trackedAsFailed: true,
reconciledPhase: velerov1api.RestorePhaseFailed,
},
{
name: "InProgress restore tracked as failing is marked as failed, if patch fails, err is returned from reconcile to retry patch again, restore still inprogress",
trackedAsFailed: true,
reconciledPhase: velerov1api.RestorePhaseInProgress,
mockFailedPatch: true,
expectedErr: syscall.ECONNREFUSED,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
formatFlag := logging.FormatText
var (
logger = logging.DefaultLogger(logrus.DebugLevel, formatFlag)
)
restore := &velerov1api.Restore{
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1api.DefaultNamespace,
Name: "restore-1",
},
Status: velerov1api.RestoreStatus{
Phase: velerov1api.RestorePhaseInProgress,
},
}
var kclient kbclient.Client
fakeKclient := velerotest.NewFakeControllerRuntimeClient(t, restore)
if test.mockFailedPatch {
mockClient := velerotestmocks.NewClient(t)
mockClient.On("Patch", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(syscall.ECONNREFUSED).Once()
mockClient.On("Get", mock.Anything, mock.AnythingOfType("types.NamespacedName"), mock.AnythingOfType("*v1.Restore")).Return(func(ctx context.Context, key client.ObjectKey, obj client.Object) error {
return fakeKclient.Get(ctx, key, obj)
})
kclient = mockClient
} else {
kclient = fakeKclient
}
c := &restoreReconciler{
kbClient: kclient,
logger: logger,
failingTracker: NewRestoreTracker(),
}
if test.trackedAsFailed {
c.failingTracker.Add(restore.Namespace, restore.Name)
}
_, err := c.Reconcile(ctx, ctrl.Request{NamespacedName: types.NamespacedName{Namespace: restore.Namespace, Name: restore.Name}})
assert.Equal(t, test.expectedErr, err)
reconciledRestore := velerov1api.Restore{}
err = c.kbClient.Get(context.Background(), types.NamespacedName{Namespace: restore.Namespace, Name: restore.Name}, &reconciledRestore)
assert.Nil(t, err)
assert.Equal(t, test.reconciledPhase, reconciledRestore.Status.Phase)
})
}
}

func NewRestore(ns, name, backup, includeNS, includeResource string, phase velerov1api.RestorePhase) *builder.RestoreBuilder {
restore := builder.ForRestore(ns, name).Phase(phase).Backup(backup).ItemOperationTimeout(60 * time.Minute)

Expand Down
10 changes: 10 additions & 0 deletions pkg/controller/restore_tracker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package controller

type RestoreTracker interface {
BackupTracker
}

// NewRestoreTracker returns a new RestoreTracker.
func NewRestoreTracker() RestoreTracker {
return NewBackupTracker()
}
17 changes: 17 additions & 0 deletions pkg/test/mocks.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ package test
import (
snapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1"
snapshotv1listers "github.com/kubernetes-csi/external-snapshotter/client/v4/listers/volumesnapshot/v1"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)

// VolumeSnapshotLister helps list VolumeSnapshots.
Expand All @@ -18,3 +21,17 @@ type VolumeSnapshotLister interface {
VolumeSnapshots(namespace string) snapshotv1listers.VolumeSnapshotNamespaceLister
snapshotv1listers.VolumeSnapshotListerExpansion
}

// Client knows how to perform CRUD operations on Kubernetes objects.

//go:generate mockery --name Client
type Client interface {
client.Reader
client.Writer
client.StatusClient

// Scheme returns the scheme this client is using.
Scheme() *runtime.Scheme
// RESTMapper returns the rest this client is using.
RESTMapper() meta.RESTMapper
}
Loading