From 87dda471a25cf43ef395ba621747fdbcd0a76183 Mon Sep 17 00:00:00 2001 From: vportella Date: Fri, 23 Aug 2024 16:36:14 +1000 Subject: [PATCH 1/3] Change Wait method to select undisruptable pods using annotation --- docs/cycling/README.md | 22 +++-- docs/cycling/examples/README.md | 24 ++++- .../cyclenodestatus/transitioner/pod.go | 8 +- pkg/controller/pod.go | 24 ++++- pkg/controller/pod_test.go | 89 +++++++++++++++++++ pkg/k8s/pod.go | 20 +++++ 6 files changed, 166 insertions(+), 21 deletions(-) create mode 100644 pkg/controller/pod_test.go diff --git a/docs/cycling/README.md b/docs/cycling/README.md index 4f374e9..600e6a3 100644 --- a/docs/cycling/README.md +++ b/docs/cycling/README.md @@ -35,11 +35,11 @@ The CycleNodeRequest CRD handles a request to cycle nodes belonging to a specifi 5. In the **ScalingUp** phase, wait for the cloud provider to bring up the new nodes and then wait for the new nodes to be **Ready** in the Kubernetes API. Wait for the configured health checks on the node succeed. Transition the object to **CordoningNode**. 6. In the **CordoningNode** phase, cordon the selected nodes in the Kubernetes API then perform the pre-termination checks. Transition the object to **WaitingTermination**. - + 7. In the **WaitingTermination** phase, create a CycleNodeStatus CRD for every node that was cordoned. Each of these CycleNodeStatuses handles the termination of an individual node. The controller will wait for a number of them to enter the **Successful** or **Failed** phase before moving on. - + If any of them have **Failed** then the CycleNodeRequest will move to **Failed** and will not add any more nodes for cycling. If they are all **Successful** then the CycleNodeRequest will move back to **Initialised** to cycle more nodes. - + ### CycleNodeStatus The CycleNodeStatus CRD handles the draining of pods from, and termination of, an individual node. These should only be created by the controller. @@ -51,7 +51,7 @@ The CycleNodeStatus CRD handles the draining of pods from, and termination of, a 1. In the **Pending** phase, validate that the node still exists and store information about the node. Transition the object to **WaitingPods** if the Method is set to "Wait", otherwise transition to **RemovingLabelsFromPods**. - + 1. In the **WaitingPods** phase, wait for all pods that are not ignored by the `waitRules` to be removed from the node. Will wait for a long time before finally giving up if pods still remain. Transition the object to **Failed** if it times out waiting, or to **RemovingLabelsFromPods** once there are no pods left. 1. In the **RemovingLabelsFromPods** phase, remove any labels that are defined in the `labelsToRemove` option from any pod that is running on the target node. This is useful when you want to "detach" a pod from a service before draining it from a node to prevent requests in progress to the pod from being interrupted. Transition the object to **DrainingPods**. @@ -106,14 +106,10 @@ spec: - "node-name-A" - "node-name-B" - # Optional section - collection of validation options to define stricter or more lenient validation during cycling. - validationOptions: - # Optional field - Skip node names defined in the CNR that do not match any existing nodes in the Kubernetes API. - skipMissingNamedNodes: true|false - cycleNodeSettings: # Method can be "Wait" or "Drain", defaults to "Drain" if not provided - # "Wait" will wait for pods on the node to complete, while "Drain" will forcefully drain them off the node + # "Wait" will wait for pods with the "cyclops.atlassian.com/do-not-disrupt=true" + # annotation on the node to complete, while "Drain" will forcefully drain them off the node method: "Wait|Drain" # Optional field - use this to scale up by `concurrency` nodes at a time. The default is the current number @@ -128,18 +124,20 @@ spec: # if you want to remove them from existing services before draining the nodes labelsToRemove: - - + # Optional field - only used if method=Wait # ignorePodsLabels is a map of label names to a list of label values, where any value for the given # label name will cause a pod to not be waited for + # Takes precendence over selecting pods with the "cyclops.atlassian.com/do-not-disrupt=true" annotation. ignorePodsLabels: # This example ignores all pods where labelName=value1 or labelName=value2 labelName: - "value1" - "value2" - + # Optional field - only used if method=Wait # ignoreNamespaces is a list of namespaces from which to ignore pods when waiting for pods on a node to finish + # Takes precendence over selecting pods with the "cyclops.atlassian.com/do-not-disrupt=true" annotation. ignoreNamespaces: - "kube-system" ``` diff --git a/docs/cycling/examples/README.md b/docs/cycling/examples/README.md index 68e4e8a..93042c8 100644 --- a/docs/cycling/examples/README.md +++ b/docs/cycling/examples/README.md @@ -166,18 +166,34 @@ spec: - stickypod ``` -This example shows the usage of the `Wait` method which opposed to `Drain` which attempts to remove pods from the node before terminating, will wait for all pods to leave the node naturally by themselves. This is useful for situations where you cannot forcefully remove pods, such as high churn jobs which need to be run to completion. +This example shows the usage of the `Wait` method which as opposed to `Drain`, which attempts to remove pods from the node before terminating, will wait for pods with the `cyclops.atlassian.com/do-not-disrupt=true` annotation to leave the node naturally by themselves. This is useful for situations where you cannot forcefully remove pods, such as high churn jobs which need to be run to completion. + +```yaml +# Pod example +apiVersion: v1 +kind: Pod +metadata: + name: do-not-disrupt + annotations: + cyclops.atlassian.com/do-not-disrupt: "true" +spec: + containers: + - name: sleep + image: alpine + command: ["sleep", "3600"] +``` + +Cyclops provides an option to ignore pods with specific labels in order to support nodes that may run pods that will never exit themselves. In this example, the pod with label `name=stickypod` would be ignore when waiting for all other pods to terminate. The node will be terminated while `name=stickypod` is running, and all others have finished. ```yaml cycleSettings: method: "Wait" ignorePodsLabels: name: - - stickypod + - stickypod ``` -Cyclops provides an option to ignore pods with specific labels in order to support nodes that may run pods that will never exit themselves. In this example, the pod with label `name=stickypod` would be ignore when waiting for all other pods to terminate. The node will be terminated while `name=stickypod` is running, and all others have finished. - +The label selector to ignore pods will take precedence over selecting pods with the `cyclops.atlassian.com/do-not-disrupt=true` annotation, which means a pod with both the annotation and the label will be ignored. ## Example 5 - Concurrency within multiple cloud provider node groups diff --git a/pkg/controller/cyclenodestatus/transitioner/pod.go b/pkg/controller/cyclenodestatus/transitioner/pod.go index 5d1a58c..7195e81 100644 --- a/pkg/controller/cyclenodestatus/transitioner/pod.go +++ b/pkg/controller/cyclenodestatus/transitioner/pod.go @@ -3,8 +3,8 @@ package transitioner import ( "fmt" - corev1 "k8s.io/api/core/v1" "github.com/atlassian-labs/cyclops/pkg/k8s" + corev1 "k8s.io/api/core/v1" ) func (t *CycleNodeStatusTransitioner) removeLabelsFromPods() (finished bool, err error) { @@ -53,14 +53,14 @@ func (t *CycleNodeStatusTransitioner) removeLabelsFromPods() (finished bool, err // podsFinished returns true if all relevant pods on the node are finished. func (t *CycleNodeStatusTransitioner) podsFinished() (bool, error) { - // Get drainable pods - drainablePods, err := t.rm.GetDrainablePodsOnNode(t.cycleNodeStatus.Status.CurrentNode.Name) + // Get undisruptable pods + undisruptablePods, err := t.rm.GetUndisruptablePods(t.cycleNodeStatus.Status.CurrentNode.Name) if err != nil { return false, err } waitingOnPods, err := getRunningPods( - drainablePods, + undisruptablePods, t.cycleNodeStatus.Spec.CycleSettings.IgnoreNamespaces, t.cycleNodeStatus.Spec.CycleSettings.IgnorePodsLabels, ) diff --git a/pkg/controller/pod.go b/pkg/controller/pod.go index 455ec13..c09639e 100644 --- a/pkg/controller/pod.go +++ b/pkg/controller/pod.go @@ -3,10 +3,10 @@ package controller import ( "context" + "github.com/atlassian-labs/cyclops/pkg/k8s" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/fields" "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/atlassian-labs/cyclops/pkg/k8s" ) // GetPodsOnNode gets a list of the pods running on the given node, optionally filtered by the given label selector. @@ -24,6 +24,28 @@ func (rm *ResourceManager) GetPodsOnNode(nodeName string) (pods []v1.Pod, err er return podList.Items, nil } +func getUndisruptablePods(pods []v1.Pod) []v1.Pod { + filteredPods := make([]v1.Pod, 0) + + for _, pod := range pods { + if k8s.PodCannotBeDisrupted(&pod) && pod.Status.Phase == v1.PodRunning { + filteredPods = append(filteredPods, pod) + } + } + + return filteredPods +} + +// GetUndisruptablePods gets a list of pods on a named node that cannot evicted or deleted from the node. +func (rm *ResourceManager) GetUndisruptablePods(nodeName string) (pods []v1.Pod, err error) { + allPods, err := rm.GetPodsOnNode(nodeName) + if err != nil { + return pods, err + } + + return getUndisruptablePods(allPods), nil +} + // GetDrainablePodsOnNode gets a list of pods on a named node that we can evict or delete from the node. func (rm *ResourceManager) GetDrainablePodsOnNode(nodeName string) (pods []v1.Pod, err error) { allPods, err := rm.GetPodsOnNode(nodeName) diff --git a/pkg/controller/pod_test.go b/pkg/controller/pod_test.go new file mode 100644 index 0000000..b8cf6b5 --- /dev/null +++ b/pkg/controller/pod_test.go @@ -0,0 +1,89 @@ +package controller + +import ( + "testing" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestGetUndisruptablePods(t *testing.T) { + pod1 := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-1", + Annotations: map[string]string{ + "cyclops.atlassian.com/do-not-disrupt": "true", + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + }, + } + + pod2 := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-2", + Annotations: map[string]string{ + "cyclops.atlassian.com/do-not-disrupt": "true", + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodSucceeded, + }, + } + + pod3 := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-3", + Annotations: map[string]string{ + "cyclops.atlassian.com/do-not-disrupt": "false", + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + }, + } + + pod4 := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod-4", + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + }, + } + + tests := []struct { + name string + pods []corev1.Pod + want []corev1.Pod + }{ + { + "test with no pods with annotation", + []corev1.Pod{pod4}, + []corev1.Pod{}, + }, + { + "test with 1 pod with annotation", + []corev1.Pod{pod1}, + []corev1.Pod{pod1}, + }, + { + "test succeeded pod with annotation", + []corev1.Pod{pod2}, + []corev1.Pod{}, + }, + { + "test with 1 pod without correct annotation value", + []corev1.Pod{pod3}, + []corev1.Pod{}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.ElementsMatch(t, tc.want, getUndisruptablePods(tc.pods)) + }) + } +} diff --git a/pkg/k8s/pod.go b/pkg/k8s/pod.go index f997523..7540235 100644 --- a/pkg/k8s/pod.go +++ b/pkg/k8s/pod.go @@ -16,6 +16,7 @@ import ( const ( podConditionTypeForUnhealthy = v1.PodReady + doNotDisruptAnnotation = "cyclops.atlassian.com/do-not-disrupt" ) var log = logf.Log.WithName("k8s.pod.go") @@ -84,6 +85,25 @@ func PodIsDaemonSet(pod *v1.Pod) bool { return true } } + + return false +} + +// PodCannotBeDisrupted returns true if the pod cannot be forcibly drained from +// a node. +func PodCannotBeDisrupted(pod *v1.Pod) bool { + for annotationName, annotationValue := range pod.ObjectMeta.Annotations { + if annotationName != doNotDisruptAnnotation { + continue + } + + if annotationValue != "true" { + continue + } + + return true + } + return false } From b83e6ef0c085fba8d128a5de0211e36be7acc952 Mon Sep 17 00:00:00 2001 From: vportella Date: Mon, 26 Aug 2024 13:13:06 +1000 Subject: [PATCH 2/3] Address comments --- docs/cycling/README.md | 5 +++++ pkg/k8s/pod.go | 15 +++++---------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/cycling/README.md b/docs/cycling/README.md index 600e6a3..0ad3ab2 100644 --- a/docs/cycling/README.md +++ b/docs/cycling/README.md @@ -106,6 +106,11 @@ spec: - "node-name-A" - "node-name-B" + # Optional section - collection of validation options to define stricter or more lenient validation during cycling. + validationOptions: + # Optional field - Skip node names defined in the CNR that do not match any existing nodes in the Kubernetes API. + skipMissingNamedNodes: true|false + cycleNodeSettings: # Method can be "Wait" or "Drain", defaults to "Drain" if not provided # "Wait" will wait for pods with the "cyclops.atlassian.com/do-not-disrupt=true" diff --git a/pkg/k8s/pod.go b/pkg/k8s/pod.go index 7540235..79b0d82 100644 --- a/pkg/k8s/pod.go +++ b/pkg/k8s/pod.go @@ -15,8 +15,9 @@ import ( ) const ( - podConditionTypeForUnhealthy = v1.PodReady - doNotDisruptAnnotation = "cyclops.atlassian.com/do-not-disrupt" + podConditionTypeForUnhealthy = v1.PodReady + doNotDisruptAnnotation = "cyclops.atlassian.com/do-not-disrupt" + doNotDisruptAnnotationRequredValue = "true" ) var log = logf.Log.WithName("k8s.pod.go") @@ -93,15 +94,9 @@ func PodIsDaemonSet(pod *v1.Pod) bool { // a node. func PodCannotBeDisrupted(pod *v1.Pod) bool { for annotationName, annotationValue := range pod.ObjectMeta.Annotations { - if annotationName != doNotDisruptAnnotation { - continue - } - - if annotationValue != "true" { - continue + if annotationName == doNotDisruptAnnotation && annotationValue == doNotDisruptAnnotationRequredValue { + return true } - - return true } return false From 4e0fafc887dca61b6c4f9fcfbb7c57812a90215e Mon Sep 17 00:00:00 2001 From: vportella Date: Mon, 26 Aug 2024 13:37:54 +1000 Subject: [PATCH 3/3] fix typo --- pkg/k8s/pod.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/k8s/pod.go b/pkg/k8s/pod.go index 79b0d82..f1fab93 100644 --- a/pkg/k8s/pod.go +++ b/pkg/k8s/pod.go @@ -15,9 +15,9 @@ import ( ) const ( - podConditionTypeForUnhealthy = v1.PodReady - doNotDisruptAnnotation = "cyclops.atlassian.com/do-not-disrupt" - doNotDisruptAnnotationRequredValue = "true" + podConditionTypeForUnhealthy = v1.PodReady + doNotDisruptAnnotation = "cyclops.atlassian.com/do-not-disrupt" + doNotDisruptAnnotationRequiredValue = "true" ) var log = logf.Log.WithName("k8s.pod.go") @@ -94,7 +94,7 @@ func PodIsDaemonSet(pod *v1.Pod) bool { // a node. func PodCannotBeDisrupted(pod *v1.Pod) bool { for annotationName, annotationValue := range pod.ObjectMeta.Annotations { - if annotationName == doNotDisruptAnnotation && annotationValue == doNotDisruptAnnotationRequredValue { + if annotationName == doNotDisruptAnnotation && annotationValue == doNotDisruptAnnotationRequiredValue { return true } }