From 38486bcb5a880399a9fa47ee4c15e3c640be269c Mon Sep 17 00:00:00 2001 From: skagalwala Date: Mon, 11 Nov 2024 13:22:26 -0600 Subject: [PATCH] implement observability for usage of deprecated AMIs --- .../karpenter.k8s.aws_ec2nodeclasses.yaml | 3 + .../karpenter.k8s.aws_ec2nodeclasses.yaml | 3 + pkg/apis/v1/ec2nodeclass_status.go | 4 + pkg/controllers/nodeclass/status/ami.go | 13 ++ pkg/controllers/nodeclass/status/ami_test.go | 210 ++++++++++++++++++ test/suites/ami/suite_test.go | 6 + test/suites/drift/suite_test.go | 13 ++ .../en/preview/concepts/nodeclasses.md | 6 +- 8 files changed, 256 insertions(+), 2 deletions(-) diff --git a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml index ff1bc9da2b40..210bdeca1421 100644 --- a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -592,6 +592,9 @@ spec: items: description: AMI contains resolved AMI selector values utilized for node launch properties: + deprecated: + description: Deprecation status of the AMI + type: boolean id: description: ID of the AMI type: string diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index ff1bc9da2b40..210bdeca1421 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -592,6 +592,9 @@ spec: items: description: AMI contains resolved AMI selector values utilized for node launch properties: + deprecated: + description: Deprecation status of the AMI + type: boolean id: description: ID of the AMI type: string diff --git a/pkg/apis/v1/ec2nodeclass_status.go b/pkg/apis/v1/ec2nodeclass_status.go index 89afe2370e8c..f064e879fc97 100644 --- a/pkg/apis/v1/ec2nodeclass_status.go +++ b/pkg/apis/v1/ec2nodeclass_status.go @@ -23,6 +23,7 @@ const ( ConditionTypeSubnetsReady = "SubnetsReady" ConditionTypeSecurityGroupsReady = "SecurityGroupsReady" ConditionTypeAMIsReady = "AMIsReady" + ConditionTypeAMIsDeprecated = "AMIsDeprecated" ConditionTypeInstanceProfileReady = "InstanceProfileReady" ) @@ -54,6 +55,9 @@ type AMI struct { // ID of the AMI // +required ID string `json:"id"` + // Deprecation status of the AMI + // +optional + Deprecated bool `json:"deprecated,omitempty"` // Name of the AMI // +optional Name string `json:"name,omitempty"` diff --git a/pkg/controllers/nodeclass/status/ami.go b/pkg/controllers/nodeclass/status/ami.go index faf94d2dac08..52debaba9b75 100644 --- a/pkg/controllers/nodeclass/status/ami.go +++ b/pkg/controllers/nodeclass/status/ami.go @@ -58,9 +58,22 @@ func (a *AMI) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) (reconc return v1.AMI{ Name: ami.Name, ID: ami.AmiID, + Deprecated: ami.Deprecated, Requirements: reqs, } }) + + // If deprecated AMIs are discovered set the AMIsDeprecated status condition + // If no deprecated AMIs are present, and previous status condition for AMIsDeprecated exists, remove the condition + hasDeprecatedAMIs := lo.Filter(nodeClass.Status.AMIs, func(ami v1.AMI, _ int) bool { + return ami.Deprecated + }) + hasDeprecatedCondition := nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsDeprecated) != nil + if len(hasDeprecatedAMIs) > 0 { + nodeClass.StatusConditions().SetTrue(v1.ConditionTypeAMIsDeprecated) + } else if hasDeprecatedCondition { + _ = nodeClass.StatusConditions().Clear(v1.ConditionTypeAMIsDeprecated) + } nodeClass.StatusConditions().SetTrue(v1.ConditionTypeAMIsReady) return reconcile.Result{RequeueAfter: 5 * time.Minute}, nil } diff --git a/pkg/controllers/nodeclass/status/ami_test.go b/pkg/controllers/nodeclass/status/ami_test.go index ee9ad4d7bf01..94fadca2eb53 100644 --- a/pkg/controllers/nodeclass/status/ami_test.go +++ b/pkg/controllers/nodeclass/status/ami_test.go @@ -534,4 +534,214 @@ var _ = Describe("NodeClass AMI Status Controller", func() { nodeClass = ExpectExists(ctx, env.Client, nodeClass) Expect(nodeClass.StatusConditions().IsTrue(v1.ConditionTypeAMIsReady)).To(BeFalse()) }) + Context("NodeClass AMI Status", func() { + BeforeEach(func() { + // Set time using the injectable/fake clock to now + awsEnv.Clock.SetTime(time.Now()) + nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{ + { + Tags: map[string]string{"*": "*"}, + }, + } + awsEnv.EC2API.DescribeImagesOutput.Set(&ec2.DescribeImagesOutput{ + Images: []ec2types.Image{ + { + Name: aws.String("test-ami-3"), + ImageId: aws.String("ami-id-789"), + CreationDate: aws.String("2021-08-31T00:12:42.000Z"), + DeprecationTime: aws.String(awsEnv.Clock.Now().Add(30 * time.Minute).Format(time.RFC3339)), + Architecture: "x86_64", + Tags: []ec2types.Tag{ + {Key: aws.String("Name"), Value: aws.String("test-ami-3")}, + {Key: aws.String("foo"), Value: aws.String("bar")}, + }, + }, + { + Name: aws.String("test-ami-2"), + ImageId: aws.String("ami-id-456"), + CreationDate: aws.String("2021-08-31T00:12:42.000Z"), + Architecture: "arm64", + Tags: []ec2types.Tag{ + {Key: aws.String("Name"), Value: aws.String("test-ami-2")}, + {Key: aws.String("foo"), Value: aws.String("bar")}, + }, + }, + }, + }) + }) + It("should update nodeclass AMI status with correct deprecation value and conditions", func() { + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + + Expect(len(nodeClass.Status.AMIs)).To(Equal(2)) + Expect(nodeClass.Status.AMIs).To(Equal( + []v1.AMI{ + { + Name: "test-ami-2", + ID: "ami-id-456", + Requirements: []corev1.NodeSelectorRequirement{ + { + Key: corev1.LabelArchStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.ArchitectureArm64}, + }, + }, + }, + { + Name: "test-ami-3", + ID: "ami-id-789", + Requirements: []corev1.NodeSelectorRequirement{ + { + Key: corev1.LabelArchStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.ArchitectureAmd64}, + }, + }, + }, + }, + )) + Expect(nodeClass.StatusConditions().IsTrue(v1.ConditionTypeAMIsReady)).To(BeTrue()) + + // Increment clock to simulate status updates on deprecated AMIs + awsEnv.Clock.Step(40 * time.Minute) + + // Flush Cache + awsEnv.EC2Cache.Flush() + + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(len(nodeClass.Status.AMIs)).To(Equal(2)) + Expect(nodeClass.Status.AMIs).To(Equal( + []v1.AMI{ + { + Name: "test-ami-2", + ID: "ami-id-456", + Requirements: []corev1.NodeSelectorRequirement{{ + Key: corev1.LabelArchStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.ArchitectureArm64}, + }, + }, + }, + { + Name: "test-ami-3", + ID: "ami-id-789", + // Adds deprecated field to the AMI status on the NodeClass + Deprecated: true, + Requirements: []corev1.NodeSelectorRequirement{{ + Key: corev1.LabelArchStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.ArchitectureAmd64}, + }, + }, + }, + }, + )) + Expect(nodeClass.StatusConditions().IsTrue(v1.ConditionTypeAMIsDeprecated)).To(BeTrue()) + Expect(nodeClass.StatusConditions().IsTrue(v1.ConditionTypeAMIsReady)).To(BeTrue()) + }) + It("should remove AMIDeprecated status condition when non deprecated AMIs are discovered", func() { + // Increment clock to simulate status updates on deprecated AMIs + awsEnv.Clock.Step(40 * time.Minute) + + // Initial reconcile discovers AMIs which are deprecated + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + + Expect(len(nodeClass.Status.AMIs)).To(Equal(2)) + Expect(nodeClass.Status.AMIs).To(Equal( + []v1.AMI{ + { + Name: "test-ami-2", + ID: "ami-id-456", + Requirements: []corev1.NodeSelectorRequirement{{ + Key: corev1.LabelArchStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.ArchitectureArm64}, + }, + }, + }, + { + Name: "test-ami-3", + ID: "ami-id-789", + // Adds deprecated field to the AMI status on the NodeClass + Deprecated: true, + Requirements: []corev1.NodeSelectorRequirement{{ + Key: corev1.LabelArchStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.ArchitectureAmd64}, + }, + }, + }, + }, + )) + // Checks if both AMIsReady and AMIsDeprecated status conditions are set + Expect(nodeClass.StatusConditions().IsTrue(v1.ConditionTypeAMIsDeprecated)).To(BeTrue()) + Expect(nodeClass.StatusConditions().IsTrue(v1.ConditionTypeAMIsReady)).To(BeTrue()) + + // rediscover AMIs again and reconcile + awsEnv.EC2API.DescribeImagesOutput.Set(&ec2.DescribeImagesOutput{ + Images: []ec2types.Image{ + { + Name: aws.String("test-ami-4"), + ImageId: aws.String("ami-id-123"), + CreationDate: aws.String("2021-08-31T00:12:42.000Z"), + DeprecationTime: aws.String(awsEnv.Clock.Now().Add(30 * time.Minute).Format(time.RFC3339)), + Architecture: "x86_64", + Tags: []ec2types.Tag{ + {Key: aws.String("Name"), Value: aws.String("test-ami-4")}, + {Key: aws.String("foo"), Value: aws.String("bar")}, + }, + }, + { + Name: aws.String("test-ami-2"), + ImageId: aws.String("ami-id-456"), + CreationDate: aws.String("2021-08-31T00:12:42.000Z"), + Architecture: "arm64", + Tags: []ec2types.Tag{ + {Key: aws.String("Name"), Value: aws.String("test-ami-2")}, + {Key: aws.String("foo"), Value: aws.String("bar")}, + }, + }, + }, + }) + + awsEnv.EC2Cache.Flush() + + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, statusController, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + + Expect(len(nodeClass.Status.AMIs)).To(Equal(2)) + Expect(nodeClass.Status.AMIs).To(Equal( + []v1.AMI{ + { + Name: "test-ami-4", + ID: "ami-id-123", + Requirements: []corev1.NodeSelectorRequirement{{ + Key: corev1.LabelArchStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.ArchitectureAmd64}, + }, + }, + }, + { + Name: "test-ami-2", + ID: "ami-id-456", + Requirements: []corev1.NodeSelectorRequirement{{ + Key: corev1.LabelArchStable, + Operator: corev1.NodeSelectorOpIn, + Values: []string{karpv1.ArchitectureArm64}, + }, + }, + }, + }, + )) + // Since all AMIs discovered are non deprecated, the status conditions should remove AMIsDeprecated and only set AMIsReady + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsDeprecated)).To(BeNil()) + Expect(nodeClass.StatusConditions().IsTrue(v1.ConditionTypeAMIsReady)).To(BeTrue()) + }) + }) }) diff --git a/test/suites/ami/suite_test.go b/test/suites/ami/suite_test.go index 7830c4a935c1..0af1df22f1fb 100644 --- a/test/suites/ami/suite_test.go +++ b/test/suites/ami/suite_test.go @@ -173,6 +173,12 @@ var _ = Describe("AMI", func() { env.ExpectCreatedNodeCount("==", 1) env.ExpectInstance(pod.Spec.NodeName).To(HaveField("ImageId", HaveValue(Equal(deprecatedAMI)))) + + nc := EventuallyExpectAMIsToExist(nodeClass) + Expect(len(nc.Status.AMIs)).To(BeNumerically("==", 1)) + Expect(nc.Status.AMIs[0].Deprecated).To(BeTrue()) + ExpectStatusConditions(env, env.Client, 1*time.Minute, nodeClass, status.Condition{Type: v1.ConditionTypeAMIsReady, Status: metav1.ConditionTrue}) + ExpectStatusConditions(env, env.Client, 1*time.Minute, nodeClass, status.Condition{Type: v1.ConditionTypeAMIsDeprecated, Status: metav1.ConditionTrue}) }) It("should prioritize launch with non-deprecated AMIs", func() { nodeClass.Spec.AMIFamily = lo.ToPtr(v1.AMIFamilyAL2023) diff --git a/test/suites/drift/suite_test.go b/test/suites/drift/suite_test.go index 53ea48599fcd..1de17b072b61 100644 --- a/test/suites/drift/suite_test.go +++ b/test/suites/drift/suite_test.go @@ -373,6 +373,13 @@ var _ = Describe("Drift", func() { env.ExpectCreated(dep, nodeClass, nodePool) pod := env.EventuallyExpectHealthyPodCount(selector, numPods)[0] env.ExpectCreatedNodeCount("==", 1) + env.ExpectUpdated(nodeClass) + + By("validating the deprecated status condition has propagated") + Eventually(func(g Gomega) { + g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsDeprecated).IsTrue()).To(BeTrue()) + g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsReady).IsTrue()).To(BeTrue()) + }).Should(Succeed()) nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] node := env.EventuallyExpectNodeCount("==", 1)[0] @@ -390,6 +397,12 @@ var _ = Describe("Drift", func() { pod = env.EventuallyExpectHealthyPodCount(selector, numPods)[0] env.ExpectInstance(pod.Spec.NodeName).To(HaveField("ImageId", HaveValue(Equal(amdAMI)))) + By("validating the deprecated status condition has been removed") + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env.Context, client.ObjectKeyFromObject(nodeClass), nodeClass)).Should(Succeed()) + g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsDeprecated)).To(BeNil()) + g.Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsReady).IsTrue()).To(BeTrue()) + }).Should(Succeed()) }) It("should return drifted if the AMI no longer matches the existing NodeClaims instance type", func() { armAMI := env.GetAMIBySSMPath(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/arm64/standard/recommended/image_id", env.K8sVersion())) diff --git a/website/content/en/preview/concepts/nodeclasses.md b/website/content/en/preview/concepts/nodeclasses.md index ce271720d024..c3741f3a50f1 100644 --- a/website/content/en/preview/concepts/nodeclasses.md +++ b/website/content/en/preview/concepts/nodeclasses.md @@ -1464,7 +1464,7 @@ status: ## status.amis -[`status.amis`]({{< ref "#statusamis" >}}) contains the resolved `id`, `name`, and `requirements` of either the default AMIs for the [`spec.amiFamily`]({{< ref "#specamifamily" >}}) or the AMIs selected by the [`spec.amiSelectorTerms`]({{< ref "#specamiselectorterms" >}}) if this field is specified. +[`status.amis`]({{< ref "#statusamis" >}}) contains the resolved `id`, `name`, `requirements`, and the `deprecated` status of either the default AMIs for the [`spec.amiFamily`]({{< ref "#specamifamily" >}}) or the AMIs selected by the [`spec.amiSelectorTerms`]({{< ref "#specamiselectorterms" >}}) if this field is specified. The `deprecated` status will be shown for resolved AMIs that are deprecated. #### Examples @@ -1529,6 +1529,7 @@ status: amis: - id: ami-01234567890123456 name: custom-ami-amd64 + deprecated: true requirements: - key: kubernetes.io/arch operator: In @@ -1565,7 +1566,8 @@ NodeClasses have the following status conditions: | SubnetsReady | Subnets are discovered. | | SecurityGroupsReady | Security Groups are discovered. | | InstanceProfileReady | Instance Profile is discovered. | -| AMIsReady | AMIs are discovered | +| AMIsReady | AMIs are discovered. | +| AMIsDeprecated | AMIs are discovered, but they are deprecated. Individual deprecated AMIs can be identified by reviewing the `status.amis`. | | Ready | Top level condition that indicates if the nodeClass is ready. If any of the underlying conditions is `False` then this condition is set to `False` and `Message` on the condition indicates the dependency that was not resolved. | If a NodeClass is not ready, NodePools that reference it through their `nodeClassRef` will not be considered for scheduling.