From 463c545165b5414c5d4e9f4a9b5e7a17285ed18c Mon Sep 17 00:00:00 2001 From: Sedef Date: Mon, 21 Sep 2020 09:05:29 -0700 Subject: [PATCH] Add Node related condition to Machine conditions --- api/v1alpha3/condition_consts.go | 24 ++- api/v1alpha4/condition_consts.go | 24 ++- controllers/machine_controller.go | 13 +- controllers/machine_controller_noderef.go | 115 ++++++++---- .../machine_controller_noderef_test.go | 77 +++++++- controllers/machine_controller_phases_test.go | 126 +++++++++---- controllers/machine_controller_test.go | 166 +++++++++++++++++- controllers/remote/cluster_cache.go | 21 +++ 8 files changed, 479 insertions(+), 87 deletions(-) diff --git a/api/v1alpha3/condition_consts.go b/api/v1alpha3/condition_consts.go index d75d0fc78cea..2bbed0a35011 100644 --- a/api/v1alpha3/condition_consts.go +++ b/api/v1alpha3/condition_consts.go @@ -109,9 +109,6 @@ const ( // MachineHasFailureReason is the reason used when a machine has either a FailureReason or a FailureMessage set on its status. MachineHasFailureReason = "MachineHasFailure" - // NodeNotFoundReason is the reason used when a machine's node has previously been observed but is now gone. - NodeNotFoundReason = "NodeNotFound" - // NodeStartupTimeoutReason is the reason used when a machine's node does not appear within the specified timeout. NodeStartupTimeoutReason = "NodeStartupTimeout" @@ -127,3 +124,24 @@ const ( // WaitingForRemediationReason is the reason used when a machine fails a health check and remediation is needed. WaitingForRemediationReason = "WaitingForRemediation" ) + +// Conditions and condition Reasons for the Machine's Node object +const ( + // MachineNodeHealthyCondition provides info about the operational state of the Kubernetes node hosted on the machine by summarizing node conditions. + // If the conditions defined in a Kubernetes node (i.e., NodeReady, NodeMemoryPressure, NodeDiskPressure, NodePIDPressure, and NodeNetworkUnavailable) are in a healthy state, it will be set to True. + MachineNodeHealthyCondition ConditionType = "NodeHealthy" + + // WaitingForNodeRefReason (Severity=Info) documents a machine.spec.providerId is not assigned yet. + WaitingForNodeRefReason = "WaitingForNodeRef" + + // NodeProvisioningReason (Severity=Info) documents machine in the process of provisioning a node. + // NB. provisioning --> NodeRef == "" + NodeProvisioningReason = "NodeProvisioning" + + // NodeNotFoundReason (Severity=Error) documents a machine's node has previously been observed but is now gone. + // NB. provisioned --> NodeRef != "" + NodeNotFoundReason = "NodeNotFound" + + // NodeConditionsFailedReason (Severity=Warning) documents a node is not in a healthy state due to the failed state of at least 1 Kubelet condition. + NodeConditionsFailedReason = "NodeConditionsFailed" +) diff --git a/api/v1alpha4/condition_consts.go b/api/v1alpha4/condition_consts.go index 191460012150..b21f5d14bcb7 100644 --- a/api/v1alpha4/condition_consts.go +++ b/api/v1alpha4/condition_consts.go @@ -116,9 +116,6 @@ const ( // MachineHasFailureReason is the reason used when a machine has either a FailureReason or a FailureMessage set on its status. MachineHasFailureReason = "MachineHasFailure" - // NodeNotFoundReason is the reason used when a machine's node has previously been observed but is now gone. - NodeNotFoundReason = "NodeNotFound" - // NodeStartupTimeoutReason is the reason used when a machine's node does not appear within the specified timeout. NodeStartupTimeoutReason = "NodeStartupTimeout" @@ -134,3 +131,24 @@ const ( // WaitingForRemediationReason is the reason used when a machine fails a health check and remediation is needed. WaitingForRemediationReason = "WaitingForRemediation" ) + +// Conditions and condition Reasons for the Machine's Node object +const ( + // MachineNodeHealthyCondition provides info about the operational state of the Kubernetes node hosted on the machine by summarizing node conditions. + // If the conditions defined in a Kubernetes node (i.e., NodeReady, NodeMemoryPressure, NodeDiskPressure, NodePIDPressure, and NodeNetworkUnavailable) are in a healthy state, it will be set to True. + MachineNodeHealthyCondition ConditionType = "NodeHealthy" + + // WaitingForNodeRefReason (Severity=Info) documents a machine.spec.providerId is not assigned yet. + WaitingForNodeRefReason = "WaitingForNodeRef" + + // NodeProvisioningReason (Severity=Info) documents machine in the process of provisioning a node. + // NB. provisioning --> NodeRef == "" + NodeProvisioningReason = "NodeProvisioning" + + // NodeNotFoundReason (Severity=Error) documents a machine's node has previously been observed but is now gone. + // NB. provisioned --> NodeRef != "" + NodeNotFoundReason = "NodeNotFound" + + // NodeConditionsFailedReason (Severity=Warning) documents a node is not in a healthy state due to the failed state of at least 1 Kubelet condition. + NodeConditionsFailedReason = "NodeConditionsFailed" +) diff --git a/controllers/machine_controller.go b/controllers/machine_controller.go index a0a5fff11297..6d5ea03934a6 100644 --- a/controllers/machine_controller.go +++ b/controllers/machine_controller.go @@ -263,7 +263,7 @@ func (r *MachineReconciler) reconcile(ctx context.Context, cluster *clusterv1.Cl phases := []func(context.Context, *clusterv1.Cluster, *clusterv1.Machine) (ctrl.Result, error){ r.reconcileBootstrap, r.reconcileInfrastructure, - r.reconcileNodeRef, + r.reconcileNode, } res := ctrl.Result{} @@ -346,6 +346,16 @@ func (r *MachineReconciler) reconcileDelete(ctx context.Context, cluster *cluste // Return early and don't remove the finalizer if we got an error or // the external reconciliation deletion isn't ready. + patchHelper, err := patch.NewHelper(m, r.Client) + if err != nil { + return ctrl.Result{}, err + } + conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "") + if err := patchMachine(ctx, patchHelper, m); err != nil { + conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletionFailedReason, clusterv1.ConditionSeverityInfo, "") + return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine") + } + if ok, err := r.reconcileDeleteInfrastructure(ctx, m); !ok || err != nil { return ctrl.Result{}, err } @@ -368,6 +378,7 @@ func (r *MachineReconciler) reconcileDelete(ctx context.Context, cluster *cluste }) if waitErr != nil { log.Error(deleteNodeErr, "Timed out deleting node, moving on", "node", m.Status.NodeRef.Name) + conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletionFailedReason, clusterv1.ConditionSeverityWarning, "") r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedDeleteNode", "error deleting Machine's node: %v", deleteNodeErr) } } diff --git a/controllers/machine_controller_noderef.go b/controllers/machine_controller_noderef.go index dbbd6ce7ca46..e56dac682cf6 100644 --- a/controllers/machine_controller_noderef.go +++ b/controllers/machine_controller_noderef.go @@ -19,13 +19,13 @@ package controllers import ( "context" "fmt" - "time" "github.com/pkg/errors" - apicorev1 "k8s.io/api/core/v1" + corev1 "k8s.io/api/core/v1" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" "sigs.k8s.io/cluster-api/controllers/noderefutil" "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/conditions" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -34,24 +34,14 @@ var ( ErrNodeNotFound = errors.New("cannot find node with matching ProviderID") ) -func (r *MachineReconciler) reconcileNodeRef(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine) (ctrl.Result, error) { - log := ctrl.LoggerFrom(ctx, "cluster", cluster.Name) - - // Check that the Machine hasn't been deleted or in the process. - if !machine.DeletionTimestamp.IsZero() { - return ctrl.Result{}, nil - } - - // Check that the Machine doesn't already have a NodeRef. - if machine.Status.NodeRef != nil { - return ctrl.Result{}, nil - } - +func (r *MachineReconciler) reconcileNode(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx, "machine", machine.Name, "namespace", machine.Namespace) log = log.WithValues("cluster", cluster.Name) // Check that the Machine has a valid ProviderID. if machine.Spec.ProviderID == nil || *machine.Spec.ProviderID == "" { - log.Info("Machine doesn't have a valid ProviderID yet") + log.Info("Cannot reconcile Machine's Node, no valid ProviderID yet") + conditions.MarkFalse(machine, clusterv1.MachineNodeHealthyCondition, clusterv1.WaitingForNodeRefReason, clusterv1.ConditionSeverityInfo, "") return ctrl.Result{}, nil } @@ -65,29 +55,93 @@ func (r *MachineReconciler) reconcileNodeRef(ctx context.Context, cluster *clust return ctrl.Result{}, err } - // Get the Node reference. - nodeRef, err := r.getNodeReference(ctx, remoteClient, providerID) + // Even if Status.NodeRef exists, continue to do the following checks to make sure Node is healthy + node, err := r.getNode(ctx, remoteClient, providerID) if err != nil { if err == ErrNodeNotFound { - log.Info(fmt.Sprintf("Cannot assign NodeRef to Machine: %s, requeuing", ErrNodeNotFound.Error())) - return ctrl.Result{RequeueAfter: 20 * time.Second}, nil + // While a NodeRef is set in the status, failing to get that node means the node is deleted. + // If Status.NodeRef is not set before, node still can be in the provisioning state. + if machine.Status.NodeRef != nil { + conditions.MarkFalse(machine, clusterv1.MachineNodeHealthyCondition, clusterv1.NodeNotFoundReason, clusterv1.ConditionSeverityError, "") + return ctrl.Result{}, errors.Wrapf(err, "no matching Node for Machine %q in namespace %q", machine.Name, machine.Namespace) + } + conditions.MarkFalse(machine, clusterv1.MachineNodeHealthyCondition, clusterv1.NodeProvisioningReason, clusterv1.ConditionSeverityWarning, "") + return ctrl.Result{Requeue: true}, nil } - log.Error(err, "Failed to assign NodeRef") - r.recorder.Event(machine, apicorev1.EventTypeWarning, "FailedSetNodeRef", err.Error()) + log.Error(err, "Failed to retrieve Node by ProviderID") + r.recorder.Event(machine, corev1.EventTypeWarning, "Failed to retrieve Node by ProviderID", err.Error()) return ctrl.Result{}, err } // Set the Machine NodeRef. - machine.Status.NodeRef = nodeRef - log.Info("Set Machine's NodeRef", "noderef", machine.Status.NodeRef.Name) - r.recorder.Event(machine, apicorev1.EventTypeNormal, "SuccessfulSetNodeRef", machine.Status.NodeRef.Name) + if machine.Status.NodeRef == nil { + machine.Status.NodeRef = &corev1.ObjectReference{ + Kind: node.Kind, + APIVersion: node.APIVersion, + Name: node.Name, + UID: node.UID, + } + log.Info("Set Machine's NodeRef", "noderef", machine.Status.NodeRef.Name) + r.recorder.Event(machine, corev1.EventTypeNormal, "SuccessfulSetNodeRef", machine.Status.NodeRef.Name) + } + + // Do the remaining node health checks, then set the node health to true if all checks pass. + status, message := summarizeNodeConditions(node) + if status == corev1.ConditionFalse { + conditions.MarkFalse(machine, clusterv1.MachineNodeHealthyCondition, clusterv1.NodeConditionsFailedReason, clusterv1.ConditionSeverityWarning, message) + return ctrl.Result{}, nil + } + + conditions.MarkTrue(machine, clusterv1.MachineNodeHealthyCondition) return ctrl.Result{}, nil } -func (r *MachineReconciler) getNodeReference(ctx context.Context, c client.Reader, providerID *noderefutil.ProviderID) (*apicorev1.ObjectReference, error) { +// summarizeNodeConditions summarizes a Node's conditions and returns the summary of condition statuses and concatenate failed condition messages: +// if there is at least 1 semantically-negative condition, summarized status = False; +// if there is at least 1 semantically-positive condition when there is 0 semantically negative condition, summarized status = True; +// if all conditions are unknown, summarized status = Unknown. +// (semantically true conditions: NodeMemoryPressure/NodeDiskPressure/NodePIDPressure == false or Ready == true.) +func summarizeNodeConditions(node *corev1.Node) (corev1.ConditionStatus, string) { + totalNumOfConditionsChecked := 4 + semanticallyFalseStatus := 0 + unknownStatus := 0 + + message := "" + for _, condition := range node.Status.Conditions { + switch condition.Type { + case corev1.NodeMemoryPressure, corev1.NodeDiskPressure, corev1.NodePIDPressure: + if condition.Status != corev1.ConditionFalse { + message += fmt.Sprintf("Node condition %s is %s", condition.Type, condition.Status) + ". " + if condition.Status == corev1.ConditionUnknown { + unknownStatus++ + continue + } + semanticallyFalseStatus++ + } + case corev1.NodeReady: + if condition.Status != corev1.ConditionTrue { + message += fmt.Sprintf("Node condition %s is %s", condition.Type, condition.Status) + ". " + if condition.Status == corev1.ConditionUnknown { + unknownStatus++ + continue + } + semanticallyFalseStatus++ + } + } + } + if semanticallyFalseStatus > 0 { + return corev1.ConditionFalse, message + } + if semanticallyFalseStatus+unknownStatus < totalNumOfConditionsChecked { + return corev1.ConditionTrue, message + } + return corev1.ConditionUnknown, message +} + +func (r *MachineReconciler) getNode(ctx context.Context, c client.Reader, providerID *noderefutil.ProviderID) (*corev1.Node, error) { log := ctrl.LoggerFrom(ctx, "providerID", providerID) - nodeList := apicorev1.NodeList{} + nodeList := corev1.NodeList{} for { if err := c.List(ctx, &nodeList, client.Continue(nodeList.Continue)); err != nil { return nil, err @@ -101,12 +155,7 @@ func (r *MachineReconciler) getNodeReference(ctx context.Context, c client.Reade } if providerID.Equals(nodeProviderID) { - return &apicorev1.ObjectReference{ - Kind: node.Kind, - APIVersion: node.APIVersion, - Name: node.Name, - UID: node.UID, - }, nil + return &node, nil } } diff --git a/controllers/machine_controller_noderef_test.go b/controllers/machine_controller_noderef_test.go index 29d03cb9c831..1c1be2f8ed53 100644 --- a/controllers/machine_controller_noderef_test.go +++ b/controllers/machine_controller_noderef_test.go @@ -25,11 +25,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/record" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" - clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" "sigs.k8s.io/cluster-api/controllers/noderefutil" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" ) func TestGetNodeReference(t *testing.T) { @@ -106,7 +105,7 @@ func TestGetNodeReference(t *testing.T) { providerID, err := noderefutil.NewProviderID(test.providerID) gt.Expect(err).NotTo(HaveOccurred(), "Expected no error parsing provider id %q, got %v", test.providerID, err) - reference, err := r.getNodeReference(ctx, client, providerID) + node, err := r.getNode(ctx, client, providerID) if test.err == nil { g.Expect(err).To(BeNil()) } else { @@ -114,13 +113,77 @@ func TestGetNodeReference(t *testing.T) { gt.Expect(err).To(Equal(test.err), "Expected error %v, got %v", test.err, err) } - if test.expected == nil && reference == nil { + if test.expected == nil && node == nil { return } - gt.Expect(reference.Name).To(Equal(test.expected.Name), "Expected NodeRef's name to be %v, got %v", reference.Name, test.expected.Name) - gt.Expect(reference.Namespace).To(Equal(test.expected.Namespace), "Expected NodeRef's namespace to be %v, got %v", reference.Namespace, test.expected.Namespace) + gt.Expect(node.Name).To(Equal(test.expected.Name), "Expected NodeRef's name to be %v, got %v", node.Name, test.expected.Name) + gt.Expect(node.Namespace).To(Equal(test.expected.Namespace), "Expected NodeRef's namespace to be %v, got %v", node.Namespace, test.expected.Namespace) }) } } + +func TestSummarizeNodeConditions(t *testing.T) { + testCases := []struct { + name string + conditions []corev1.NodeCondition + status corev1.ConditionStatus + }{ + { + name: "node is healthy", + conditions: []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionTrue}, + {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionFalse}, + {Type: corev1.NodeDiskPressure, Status: corev1.ConditionFalse}, + {Type: corev1.NodePIDPressure, Status: corev1.ConditionFalse}, + }, + status: corev1.ConditionTrue, + }, + { + name: "all conditions are unknown", + conditions: []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionUnknown}, + {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionUnknown}, + {Type: corev1.NodeDiskPressure, Status: corev1.ConditionUnknown}, + {Type: corev1.NodePIDPressure, Status: corev1.ConditionUnknown}, + }, + status: corev1.ConditionUnknown, + }, + { + name: "multiple semantically failed condition", + conditions: []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionUnknown}, + {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionTrue}, + {Type: corev1.NodeDiskPressure, Status: corev1.ConditionTrue}, + {Type: corev1.NodePIDPressure, Status: corev1.ConditionTrue}, + }, + status: corev1.ConditionFalse, + }, + { + name: "one positive condition when the rest is unknown", + conditions: []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionTrue}, + {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionUnknown}, + {Type: corev1.NodeDiskPressure, Status: corev1.ConditionUnknown}, + {Type: corev1.NodePIDPressure, Status: corev1.ConditionUnknown}, + }, + status: corev1.ConditionTrue, + }, + } + for _, test := range testCases { + t.Run(test.name, func(t *testing.T) { + g := NewWithT(t) + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + }, + Status: corev1.NodeStatus{ + Conditions: test.conditions, + }, + } + status, _ := summarizeNodeConditions(node) + g.Expect(status).To(Equal(test.status)) + }) + } +} diff --git a/controllers/machine_controller_phases_test.go b/controllers/machine_controller_phases_test.go index 3854f7453194..c36b3228d572 100644 --- a/controllers/machine_controller_phases_test.go +++ b/controllers/machine_controller_phases_test.go @@ -29,12 +29,17 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" + "k8s.io/client-go/tools/record" "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" "sigs.k8s.io/cluster-api/controllers/external" + "sigs.k8s.io/cluster-api/controllers/remote" + "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/kubeconfig" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/log" ) func init() { @@ -250,16 +255,26 @@ var _ = Describe("Reconcile Machine Phases", func() { lastUpdated := metav1.NewTime(time.Now().Add(-10 * time.Second)) machine.Status.LastUpdated = &lastUpdated + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test-node", + Namespace: metav1.NamespaceDefault, + }, + Spec: corev1.NodeSpec{ProviderID: "test://id-1"}, + } + cl := fake.NewFakeClientWithScheme(scheme.Scheme, + defaultCluster, + machine, + node, + external.TestGenericBootstrapCRD.DeepCopy(), + external.TestGenericInfrastructureCRD.DeepCopy(), + bootstrapConfig, + infraConfig, + defaultKubeconfigSecret, + ) r := &MachineReconciler{ - Client: fake.NewFakeClientWithScheme(scheme.Scheme, - defaultCluster, - defaultKubeconfigSecret, - machine, - external.TestGenericBootstrapCRD.DeepCopy(), - external.TestGenericInfrastructureCRD.DeepCopy(), - bootstrapConfig, - infraConfig, - ), + Client: cl, + Tracker: remote.NewTestClusterCacheTracker(log.NullLogger{}, cl, scheme.Scheme, client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), } res, err := r.reconcile(ctx, defaultCluster, machine) @@ -302,16 +317,26 @@ var _ = Describe("Reconcile Machine Phases", func() { lastUpdated := metav1.NewTime(time.Now().Add(-10 * time.Second)) machine.Status.LastUpdated = &lastUpdated + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test-node", + Namespace: metav1.NamespaceDefault, + }, + Spec: corev1.NodeSpec{ProviderID: "test://id-1"}, + } + cl := fake.NewFakeClientWithScheme(scheme.Scheme, + defaultCluster, + machine, + node, + external.TestGenericBootstrapCRD.DeepCopy(), + external.TestGenericInfrastructureCRD.DeepCopy(), + bootstrapConfig, + infraConfig, + defaultKubeconfigSecret, + ) r := &MachineReconciler{ - Client: fake.NewFakeClientWithScheme(scheme.Scheme, - defaultCluster, - defaultKubeconfigSecret, - machine, - external.TestGenericBootstrapCRD.DeepCopy(), - external.TestGenericInfrastructureCRD.DeepCopy(), - bootstrapConfig, - infraConfig, - ), + Client: cl, + Tracker: remote.NewTestClusterCacheTracker(log.NullLogger{}, cl, scheme.Scheme, client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), } res, err := r.reconcile(ctx, defaultCluster, machine) @@ -364,17 +389,26 @@ var _ = Describe("Reconcile Machine Phases", func() { // Set the LastUpdated to be able to verify it is updated when the phase changes lastUpdated := metav1.NewTime(time.Now().Add(-10 * time.Second)) machine.Status.LastUpdated = &lastUpdated - + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test-node", + Namespace: metav1.NamespaceDefault, + }, + Spec: corev1.NodeSpec{ProviderID: "test://id-1"}, + } + cl := fake.NewFakeClientWithScheme(scheme.Scheme, + defaultCluster, + machine, + node, + external.TestGenericBootstrapCRD.DeepCopy(), + external.TestGenericInfrastructureCRD.DeepCopy(), + bootstrapConfig, + infraConfig, + defaultKubeconfigSecret, + ) r := &MachineReconciler{ - Client: fake.NewFakeClientWithScheme(scheme.Scheme, - defaultCluster, - defaultKubeconfigSecret, - machine, - external.TestGenericBootstrapCRD.DeepCopy(), - external.TestGenericInfrastructureCRD.DeepCopy(), - bootstrapConfig, - infraConfig, - ), + Client: cl, + Tracker: remote.NewTestClusterCacheTracker(log.NullLogger{}, cl, scheme.Scheme, client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), } res, err := r.reconcile(ctx, defaultCluster, machine) @@ -434,6 +468,9 @@ var _ = Describe("Reconcile Machine Phases", func() { It("Should set `Deleting` when Machine is being deleted", func() { machine := defaultMachine.DeepCopy() + // Need the second Machine to allow deletion of one. + machineSecond := defaultMachine.DeepCopy() + bootstrapConfig := defaultBootstrap.DeepCopy() infraConfig := defaultInfra.DeepCopy() @@ -463,6 +500,11 @@ var _ = Describe("Reconcile Machine Phases", func() { }, "addresses") Expect(err).NotTo(HaveOccurred()) + // Set Cluster label. + machine.Labels[clusterv1.ClusterLabelName] = machine.Spec.ClusterName + machine.ResourceVersion = "1" + machineSecond.Labels[clusterv1.ClusterLabelName] = machine.Spec.ClusterName + machineSecond.Name = "SecondMachine" // Set NodeRef. machine.Status.NodeRef = &corev1.ObjectReference{Kind: "Node", Name: "machine-test-node"} @@ -473,25 +515,33 @@ var _ = Describe("Reconcile Machine Phases", func() { lastUpdated := metav1.NewTime(time.Now().Add(-10 * time.Second)) machine.Status.LastUpdated = &lastUpdated + cl := fake.NewFakeClientWithScheme(scheme.Scheme, + defaultCluster, + defaultKubeconfigSecret, + machine, + machineSecond, + external.TestGenericBootstrapCRD.DeepCopy(), + external.TestGenericInfrastructureCRD.DeepCopy(), + bootstrapConfig, + infraConfig, + ) r := &MachineReconciler{ - Client: fake.NewFakeClientWithScheme(scheme.Scheme, - defaultCluster, - defaultKubeconfigSecret, - machine, - external.TestGenericBootstrapCRD.DeepCopy(), - external.TestGenericInfrastructureCRD.DeepCopy(), - bootstrapConfig, - infraConfig, - ), + Client: cl, + Tracker: remote.NewTestClusterCacheTracker(log.NullLogger{}, cl, scheme.Scheme, client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), + recorder: record.NewFakeRecorder(32), } - res, err := r.reconcile(ctx, defaultCluster, machine) + res, err := r.reconcileDelete(ctx, defaultCluster, machine) Expect(err).NotTo(HaveOccurred()) Expect(res.Requeue).To(BeFalse()) r.reconcilePhase(ctx, machine) Expect(machine.Status.GetTypedPhase()).To(Equal(clusterv1.MachinePhaseDeleting)) + nodeHealthyCondition := conditions.Get(machine, clusterv1.MachineNodeHealthyCondition) + Expect(nodeHealthyCondition.Status).To(Equal(corev1.ConditionFalse)) + Expect(nodeHealthyCondition.Reason).To(Equal(clusterv1.DeletingReason)) + // Verify that the LastUpdated timestamp was updated Expect(machine.Status.LastUpdated).ToNot(BeNil()) Expect(machine.Status.LastUpdated.After(lastUpdated.Time)).To(BeTrue()) diff --git a/controllers/machine_controller_test.go b/controllers/machine_controller_test.go index 3478c16ba4b0..ab9a25a80b40 100644 --- a/controllers/machine_controller_test.go +++ b/controllers/machine_controller_test.go @@ -23,18 +23,21 @@ import ( . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/client-go/kubernetes/scheme" "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4" "sigs.k8s.io/cluster-api/controllers/external" + "sigs.k8s.io/cluster-api/controllers/remote" "sigs.k8s.io/cluster-api/test/helpers" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -210,6 +213,144 @@ func TestIndexMachineByNodeName(t *testing.T) { } } +func TestMachine_Reconcile(t *testing.T) { + g := NewWithT(t) + infraMachine := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "kind": "InfrastructureMachine", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1alpha4", + "metadata": map[string]interface{}{ + "name": "infra-config1", + "namespace": "default", + }, + "spec": map[string]interface{}{ + "providerID": "test://id-1", + }, + }, + } + + defaultBootstrap := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "kind": "BootstrapMachine", + "apiVersion": "bootstrap.cluster.x-k8s.io/v1alpha4", + "metadata": map[string]interface{}{ + "name": "bootstrap-config-machinereconcile", + "namespace": "default", + }, + "spec": map[string]interface{}{}, + "status": map[string]interface{}{}, + }, + } + + testCluster := &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "machine-reconcile-", + Namespace: "default", + }, + } + + g.Expect(testEnv.Create(ctx, testCluster)).To(BeNil()) + g.Expect(testEnv.Create(ctx, infraMachine)).To(BeNil()) + g.Expect(testEnv.Create(ctx, defaultBootstrap)).To(BeNil()) + + defer func(do ...client.Object) { + g.Expect(testEnv.Cleanup(ctx, do...)).To(Succeed()) + }(testCluster) + + machine := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "machine-created-", + Namespace: "default", + Finalizers: []string{clusterv1.MachineFinalizer}, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: testCluster.Name, + InfrastructureRef: corev1.ObjectReference{ + APIVersion: "infrastructure.cluster.x-k8s.io/v1alpha4", + Kind: "InfrastructureMachine", + Name: "infra-config1", + }, + Bootstrap: clusterv1.Bootstrap{ + ConfigRef: &corev1.ObjectReference{ + APIVersion: "bootstrap.cluster.x-k8s.io/v1alpha4", + Kind: "BootstrapMachine", + Name: "bootstrap-config-machinereconcile", + }, + }}, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{ + Name: "test", + }, + }, + } + g.Expect(testEnv.Create(ctx, machine)).To(BeNil()) + + key := client.ObjectKey{Name: machine.Name, Namespace: machine.Namespace} + + // Wait for reconciliation to happen when infra and bootstrap objects are not ready. + g.Eventually(func() bool { + if err := testEnv.Get(ctx, key, machine); err != nil { + return false + } + return len(machine.Finalizers) > 0 + }, timeout).Should(BeTrue()) + + // Set bootstrap ready. + bootstrapPatch := client.MergeFrom(defaultBootstrap.DeepCopy()) + g.Expect(unstructured.SetNestedField(defaultBootstrap.Object, true, "status", "ready")).NotTo(HaveOccurred()) + g.Expect(testEnv.Status().Patch(ctx, defaultBootstrap, bootstrapPatch)).To(Succeed()) + + // Set infrastructure ready. + infraMachinePatch := client.MergeFrom(infraMachine.DeepCopy()) + g.Expect(unstructured.SetNestedField(infraMachine.Object, true, "status", "ready")).To(Succeed()) + g.Expect(testEnv.Status().Patch(ctx, infraMachine, infraMachinePatch)).To(Succeed()) + + // Wait for Machine Ready Condition to become True. + g.Eventually(func() bool { + if err := testEnv.Get(ctx, key, machine); err != nil { + return false + } + if conditions.Has(machine, clusterv1.InfrastructureReadyCondition) != true { + return false + } + readyCondition := conditions.Get(machine, clusterv1.ReadyCondition) + return readyCondition.Status == corev1.ConditionTrue + }, timeout).Should(BeTrue()) + + g.Expect(testEnv.Delete(ctx, machine)).NotTo(HaveOccurred()) + // Wait for Machine to be deleted. + g.Eventually(func() bool { + if err := testEnv.Get(ctx, key, machine); err != nil { + if apierrors.IsNotFound(err) { + return true + } + } + return false + }, timeout).Should(BeTrue()) + + // Check if Machine deletion successfully deleted infrastructure external reference. + keyInfra := client.ObjectKey{Name: infraMachine.GetName(), Namespace: infraMachine.GetNamespace()} + g.Eventually(func() bool { + if err := testEnv.Get(ctx, keyInfra, infraMachine); err != nil { + if apierrors.IsNotFound(err) { + return true + } + } + return false + }, timeout).Should(BeTrue()) + + // Check if Machine deletion successfully deleted bootstrap external reference. + keyBootstrap := client.ObjectKey{Name: defaultBootstrap.GetName(), Namespace: defaultBootstrap.GetNamespace()} + g.Eventually(func() bool { + if err := testEnv.Get(ctx, keyBootstrap, defaultBootstrap); err != nil { + if apierrors.IsNotFound(err) { + return true + } + } + return false + }, timeout).Should(BeTrue()) +} + func TestMachineFinalizer(t *testing.T) { bootstrapData := "some valid data" clusterCorrectMeta := &clusterv1.Cluster{ @@ -500,6 +641,14 @@ func TestReconcileRequest(t *testing.T) { }, } + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + }, + Spec: corev1.NodeSpec{ProviderID: "test://id-1"}, + } + type expected struct { result reconcile.Result err bool @@ -598,6 +747,7 @@ func TestReconcileRequest(t *testing.T) { clientFake := helpers.NewFakeClientWithScheme( scheme.Scheme, + node, &testCluster, &tc.machine, external.TestGenericInfrastructureCRD.DeepCopy(), @@ -605,7 +755,8 @@ func TestReconcileRequest(t *testing.T) { ) r := &MachineReconciler{ - Client: clientFake, + Client: clientFake, + Tracker: remote.NewTestClusterCacheTracker(log.NullLogger{}, clientFake, scheme.Scheme, client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), } result, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: util.ObjectKey(&tc.machine)}) @@ -683,6 +834,7 @@ func TestMachineConditions(t *testing.T) { Finalizers: []string{clusterv1.MachineFinalizer}, }, Spec: clusterv1.MachineSpec{ + ProviderID: pointer.StringPtr("test://id-1"), ClusterName: "test-cluster", InfrastructureRef: corev1.ObjectReference{ APIVersion: "infrastructure.cluster.x-k8s.io/v1alpha4", @@ -705,6 +857,14 @@ func TestMachineConditions(t *testing.T) { }, } + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: metav1.NamespaceDefault, + }, + Spec: corev1.NodeSpec{ProviderID: "test://id-1"}, + } + testcases := []struct { name string infraReady bool @@ -837,10 +997,12 @@ func TestMachineConditions(t *testing.T) { infra, external.TestGenericBootstrapCRD.DeepCopy(), bootstrap, + node, ) r := &MachineReconciler{ - Client: clientFake, + Client: clientFake, + Tracker: remote.NewTestClusterCacheTracker(log.NullLogger{}, clientFake, scheme.Scheme, client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), } _, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: util.ObjectKey(&machine)}) diff --git a/controllers/remote/cluster_cache.go b/controllers/remote/cluster_cache.go index ea1d63077e08..985d221e22f0 100644 --- a/controllers/remote/cluster_cache.go +++ b/controllers/remote/cluster_cache.go @@ -68,6 +68,27 @@ func NewClusterCacheTracker(log logr.Logger, manager ctrl.Manager) (*ClusterCach }, nil } +// NewTestClusterCacheTracker creates a new dummy ClusterCacheTracker that can be used by unit tests with fake client. +func NewTestClusterCacheTracker(log logr.Logger, cl client.Client, scheme *runtime.Scheme, objKey client.ObjectKey) *ClusterCacheTracker { + testCacheTracker := &ClusterCacheTracker{ + log: log, + client: cl, + scheme: scheme, + clusterAccessors: make(map[client.ObjectKey]*clusterAccessor), + } + + delegatingClient := client.NewDelegatingClient(client.NewDelegatingClientInput{ + CacheReader: cl, + Client: cl, + }) + testCacheTracker.clusterAccessors[objKey] = &clusterAccessor{ + cache: nil, + client: delegatingClient, + watches: nil, + } + return testCacheTracker +} + // GetClient returns a client for the given cluster. func (t *ClusterCacheTracker) GetClient(ctx context.Context, cluster client.ObjectKey) (client.Client, error) { t.lock.Lock()