Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-1.6] 🐛 Watch external objects for machine before deleting #10177

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions internal/controllers/machine/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Clu
if err != nil {
switch err {
case errNoControlPlaneNodes, errLastControlPlaneNode, errNilNodeRef, errClusterIsBeingDeleted, errControlPlaneIsBeingDeleted:
var nodeName = ""
nodeName := ""
if m.Status.NodeRef != nil {
nodeName = m.Status.NodeRef.Name
}
Expand Down Expand Up @@ -427,7 +427,7 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Clu
return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine")
}

infrastructureDeleted, err := r.reconcileDeleteInfrastructure(ctx, m)
infrastructureDeleted, err := r.reconcileDeleteInfrastructure(ctx, cluster, m)
if err != nil {
return ctrl.Result{}, err
}
Expand All @@ -436,7 +436,7 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Clu
return ctrl.Result{}, nil
}

bootstrapDeleted, err := r.reconcileDeleteBootstrap(ctx, m)
bootstrapDeleted, err := r.reconcileDeleteBootstrap(ctx, cluster, m)
if err != nil {
return ctrl.Result{}, err
}
Expand Down Expand Up @@ -723,8 +723,8 @@ func (r *Reconciler) deleteNode(ctx context.Context, cluster *clusterv1.Cluster,
return nil
}

func (r *Reconciler) reconcileDeleteBootstrap(ctx context.Context, m *clusterv1.Machine) (bool, error) {
obj, err := r.reconcileDeleteExternal(ctx, m, m.Spec.Bootstrap.ConfigRef)
func (r *Reconciler) reconcileDeleteBootstrap(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (bool, error) {
obj, err := r.reconcileDeleteExternal(ctx, cluster, m, m.Spec.Bootstrap.ConfigRef)
if err != nil {
return false, err
}
Expand All @@ -743,8 +743,8 @@ func (r *Reconciler) reconcileDeleteBootstrap(ctx context.Context, m *clusterv1.
return false, nil
}

func (r *Reconciler) reconcileDeleteInfrastructure(ctx context.Context, m *clusterv1.Machine) (bool, error) {
obj, err := r.reconcileDeleteExternal(ctx, m, &m.Spec.InfrastructureRef)
func (r *Reconciler) reconcileDeleteInfrastructure(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (bool, error) {
obj, err := r.reconcileDeleteExternal(ctx, cluster, m, &m.Spec.InfrastructureRef)
if err != nil {
return false, err
}
Expand All @@ -764,7 +764,7 @@ func (r *Reconciler) reconcileDeleteInfrastructure(ctx context.Context, m *clust
}

// reconcileDeleteExternal tries to delete external references.
func (r *Reconciler) reconcileDeleteExternal(ctx context.Context, m *clusterv1.Machine, ref *corev1.ObjectReference) (*unstructured.Unstructured, error) {
func (r *Reconciler) reconcileDeleteExternal(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine, ref *corev1.ObjectReference) (*unstructured.Unstructured, error) {
if ref == nil {
return nil, nil
}
Expand All @@ -777,6 +777,14 @@ func (r *Reconciler) reconcileDeleteExternal(ctx context.Context, m *clusterv1.M
}

if obj != nil {
// reconcileExternal ensures that we set the object's OwnerReferences correctly and watch the object.
// The machine delete logic depends on reconciling the machine when the external objects are deleted.
// This avoids a race condition where the machine is deleted before the external objects are ever reconciled
// by this controller.
if _, err := r.ensureExternalOwnershipAndWatch(ctx, cluster, m, ref); err != nil {
return nil, err
}

// Issue a delete request.
if err := r.Client.Delete(ctx, obj); err != nil && !apierrors.IsNotFound(err) {
return obj, errors.Wrapf(err,
Expand Down
56 changes: 35 additions & 21 deletions internal/controllers/machine/machine_controller_phases.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ import (
"sigs.k8s.io/cluster-api/util/patch"
)

var (
externalReadyWait = 30 * time.Second
)
var externalReadyWait = 30 * time.Second

func (r *Reconciler) reconcilePhase(_ context.Context, m *clusterv1.Machine) {
originalPhase := m.Status.Phase
Expand Down Expand Up @@ -89,12 +87,44 @@ func (r *Reconciler) reconcilePhase(_ context.Context, m *clusterv1.Machine) {

// reconcileExternal handles generic unstructured objects referenced by a Machine.
func (r *Reconciler) reconcileExternal(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine, ref *corev1.ObjectReference) (external.ReconcileOutput, error) {
log := ctrl.LoggerFrom(ctx)

if err := utilconversion.UpdateReferenceAPIContract(ctx, r.Client, ref); err != nil {
return external.ReconcileOutput{}, err
}

result, err := r.ensureExternalOwnershipAndWatch(ctx, cluster, m, ref)
if err != nil {
return external.ReconcileOutput{}, err
}
if result.RequeueAfter > 0 || result.Paused {
return result, nil
}

obj := result.Result

// Set failure reason and message, if any.
failureReason, failureMessage, err := external.FailuresFrom(obj)
if err != nil {
return external.ReconcileOutput{}, err
}
if failureReason != "" {
machineStatusError := capierrors.MachineStatusError(failureReason)
m.Status.FailureReason = &machineStatusError
}
if failureMessage != "" {
m.Status.FailureMessage = pointer.String(
fmt.Sprintf("Failure detected from referenced resource %v with name %q: %s",
obj.GroupVersionKind(), obj.GetName(), failureMessage),
)
}

return external.ReconcileOutput{Result: obj}, nil
}

// ensureExternalOwnershipAndWatch ensures that only the Machine owns the external object,
// adds a watch to the external object if one does not already exist and adds the necessary labels.
func (r *Reconciler) ensureExternalOwnershipAndWatch(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine, ref *corev1.ObjectReference) (external.ReconcileOutput, error) {
log := ctrl.LoggerFrom(ctx)

obj, err := external.Get(ctx, r.UnstructuredCachingClient, ref, m.Namespace)
if err != nil {
if apierrors.IsNotFound(errors.Cause(err)) {
Expand Down Expand Up @@ -146,22 +176,6 @@ func (r *Reconciler) reconcileExternal(ctx context.Context, cluster *clusterv1.C
return external.ReconcileOutput{}, err
}

// Set failure reason and message, if any.
failureReason, failureMessage, err := external.FailuresFrom(obj)
if err != nil {
return external.ReconcileOutput{}, err
}
if failureReason != "" {
machineStatusError := capierrors.MachineStatusError(failureReason)
m.Status.FailureReason = &machineStatusError
}
if failureMessage != "" {
m.Status.FailureMessage = pointer.String(
fmt.Sprintf("Failure detected from referenced resource %v with name %q: %s",
obj.GroupVersionKind(), obj.GetName(), failureMessage),
)
}

return external.ReconcileOutput{Result: obj}, nil
}

Expand Down
Loading
Loading