diff --git a/api/v1beta1/common_types.go b/api/v1beta1/common_types.go index 218120b18375..1dd7c382cf22 100644 --- a/api/v1beta1/common_types.go +++ b/api/v1beta1/common_types.go @@ -128,6 +128,12 @@ const ( // any changes to the actual object because it is a dry run) and the topology controller // will receive the resulting object. TopologyDryRunAnnotation = "topology.cluster.x-k8s.io/dry-run" + + // ReplicasManagedByAnnotation is an annotation that indicates external (non-Cluster API) management of infra scaling. + // The practical effect of this is that the capi "replica" count should be passively derived from the number of observed infra machines, + // instead of being a source of truth for eventual consistency. + // This annotation can be used to inform MachinePool status during in-progress scaling scenarios. + ReplicasManagedByAnnotation = "cluster.x-k8s.io/replicas-managed-by" ) const ( diff --git a/docs/book/src/developer/architecture/controllers/machine-pool.md b/docs/book/src/developer/architecture/controllers/machine-pool.md index 7dd3be60dc9c..04cd327e1144 100644 --- a/docs/book/src/developer/architecture/controllers/machine-pool.md +++ b/docs/book/src/developer/architecture/controllers/machine-pool.md @@ -96,6 +96,30 @@ The `status` object **may** define several fields that do not affect functionali * `failureReason` - is a string that explains why a fatal error has occurred, if possible. * `failureMessage` - is a string that holds the message contained by the error. +Example: +```yaml +kind: MyMachinePool +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +spec: + providerIDList: + - cloud:////my-cloud-provider-id-0 + - cloud:////my-cloud-provider-id-1 +status: + ready: true +``` + +#### Externally Managed Autoscaler + +A provider may implement an InfrastructureMachinePool that is externally managed by an autoscaler. For example, if you are using a Managed Kubernetes provider, it may include its own autoscaler solution. To indicate this to Cluster API, you would decorate the MachinePool object with the following annotation: + +`"cluster.x-k8s.io/replicas-managed-by": ""` + +Cluster API treats the annotation as a "boolean", meaning that the presence of the annotation is sufficient to indicate external replica count management, with one exception: if the value is `"false"`, then that indicates to Cluster API that replica enforcement is nominal, and managed by Cluster API. + +Providers may choose to implement the `cluster.x-k8s.io/replicas-managed-by` annotation with different values (e.g., `external-autoscaler`, or `karpenter`) that may inform different provider-specific behaviors, but those values will have no effect upon Cluster API. + +The effect upon Cluster API of this annotation is that during autoscaling events (initiated externally, not by Cluster API), when more or fewer MachinePool replicas are observed compared to the `Spec.Replicas` configuration, it will update its `Status.Phase` property to the value of `"Scaling"`. + Example: ```yaml kind: MyMachinePool @@ -104,10 +128,15 @@ spec: providerIDList: - cloud:////my-cloud-provider-id-0 - cloud:////my-cloud-provider-id-1 + - cloud:////my-cloud-provider-id-2 + replicas: 1 status: ready: true + phase: Scaling ``` +It is the provider's responsibility to update Cluster API's `Spec.Replicas` property to the value observed in the underlying infra environment as it changes in response to external autoscaling behaviors. Once that is done, and the number of providerID items is equal to the `Spec.Replicas` property, the MachinePools's `Status.Phase` property will be set to `Running` by Cluster API. + ### Secrets The machine pool controller will use a secret in the following format: diff --git a/docs/book/src/developer/providers/v1.2-to-v1.3.md b/docs/book/src/developer/providers/v1.2-to-v1.3.md index 8c53861451b3..e46652d00dc1 100644 --- a/docs/book/src/developer/providers/v1.2-to-v1.3.md +++ b/docs/book/src/developer/providers/v1.2-to-v1.3.md @@ -41,6 +41,7 @@ in Cluster API are kept in sync with the versions used by `sigs.k8s.io/controlle - A new timeout `nodeVolumeDetachTimeout` has been introduced that defines how long the controller will spend on waiting for all volumes to be detached. The default value is 0, meaning that the volume can be detached without any time limitations. - A new annotation `machine.cluster.x-k8s.io/exclude-wait-for-node-volume-detach` has been introduced that allows explicitly skip the waiting for node volume detaching. +- A new annotation `"cluster.x-k8s.io/replicas-managed-by"` has been introduced to indicate that a MachinePool's replica enforcement is delegated to an external autoscaler (not managed by Cluster API). For more information see the documentation [here](../architecture/controllers/machine-pool.md#externally-managed-autoscaler). - The `Path` func in the `sigs.k8s.io/cluster-api/cmd/clusterctl/client/repository.Overrider` interface has been adjusted to also return an error. ### Other @@ -55,8 +56,8 @@ The default value is 0, meaning that the volume can be detached without any time * the `--junit-report` argument [replaces JUnit custom reporter](https://onsi.github.io/ginkgo/MIGRATING_TO_V2#improved-reporting-infrastructure) code * see the ["Update tests to Ginkgo v2" PR](https://github.com/kubernetes-sigs/cluster-api/pull/6906) for a reference example - Cluster API introduced new [logging guidelines](../../developer/logging.md). All reconcilers in the core repository were updated - to [log the entire object hierarchy](../../developer/logging.md#keyvalue-pairs). It would be great if providers would be adjusted - as well to make it possible to cross-reference log entries across providers (please see CAPD for an infra provider reference implementation). + to [log the entire object hierarchy](../../developer/logging.md#keyvalue-pairs). It would be great if providers would be adjusted + as well to make it possible to cross-reference log entries across providers (please see CAPD for an infra provider reference implementation). - The `CreateLogFile` function and `CreateLogFileInput` struct in the E2E test framework for clusterctl has been renamed to `OpenLogFile` and `OpenLogFileInput` because the function will now append to the logfile instead of truncating the content. - The `Move` function in E2E test framework for clusterctl has been modified to: * print the `clusterctl move` command including the arguments similar to `Init`. diff --git a/docs/book/src/reference/labels_and_annotations.md b/docs/book/src/reference/labels_and_annotations.md index e9cdcad5d24f..4a2a98633d7c 100644 --- a/docs/book/src/reference/labels_and_annotations.md +++ b/docs/book/src/reference/labels_and_annotations.md @@ -34,6 +34,7 @@ | cluster.x-k8s.io/cloned-from-groupkind | It is the infrastructure machine annotation that stores the group-kind of the infrastructure template resource that was cloned for the machine. This annotation is set only during cloning a template. Older/adopted machines will not have this annotation. | | cluster.x-k8s.io/skip-remediation | It is used to mark the machines that should not be considered for remediation by MachineHealthCheck reconciler. | | cluster.x-k8s.io/managed-by | It can be applied to InfraCluster resources to signify that some external system is managing the cluster infrastructure. Provider InfraCluster controllers will ignore resources with this annotation. An external controller must fulfill the contract of the InfraCluster resource. External infrastructure providers should ensure that the annotation, once set, cannot be removed. | +| cluster.x-k8s.io/replicas-managed-by | It can be applied to MachinePool resources to signify that some external system is managing infrastructure scaling for that pool. See [the MachinePool documentation](../developer/architecture/controllers/machine-pool.md#externally-managed-autoscaler) for more details. | | topology.cluster.x-k8s.io/dry-run | It is an annotation that gets set on objects by the topology controller only during a server side dry run apply operation. It is used for validating update webhooks for objects which get updated by template rotation (e.g. InfrastructureMachineTemplate). When the annotation is set and the admission request is a dry run, the webhook should deny validation due to immutability. By that the request will succeed (without any changes to the actual object because it is a dry run) and the topology controller will receive the resulting object. | | machine.cluster.x-k8s.io/certificates-expiry | It captures the expiry date of the machine certificates in RFC3339 format. It is used to trigger rollout of control plane machines before certificates expire. It can be set on BootstrapConfig and Machine objects. The value set on Machine object takes precedence. The annotation is only used by control plane machines. | | machine.cluster.x-k8s.io/exclude-node-draining | It explicitly skips node draining if set. | diff --git a/exp/api/v1beta1/machinepool_types.go b/exp/api/v1beta1/machinepool_types.go index a20397097eda..2634affe1db3 100644 --- a/exp/api/v1beta1/machinepool_types.go +++ b/exp/api/v1beta1/machinepool_types.go @@ -163,6 +163,11 @@ const ( // MachinePool infrastructure is scaling down. MachinePoolPhaseScalingDown = MachinePoolPhase("ScalingDown") + // MachinePoolPhaseScaling is the MachinePool state when the + // MachinePool infrastructure is scaling. + // This phase value is appropriate to indicate an active state of scaling by an external autoscaler. + MachinePoolPhaseScaling = MachinePoolPhase("Scaling") + // MachinePoolPhaseDeleting is the MachinePool state when a delete // request has been sent to the API Server, // but its infrastructure has not yet been fully deleted. diff --git a/exp/internal/controllers/machinepool_controller_phases.go b/exp/internal/controllers/machinepool_controller_phases.go index 24a514049548..c6cb361aa884 100644 --- a/exp/internal/controllers/machinepool_controller_phases.go +++ b/exp/internal/controllers/machinepool_controller_phases.go @@ -67,14 +67,26 @@ func (r *MachinePoolReconciler) reconcilePhase(mp *expv1.MachinePool) { mp.Status.SetTypedPhase(expv1.MachinePoolPhaseRunning) } - // Set the phase to "scalingUp" if the infrastructure is scaling up. + // Set the appropriate phase in response to the MachinePool replica count being greater than the observed infrastructure replicas. if mp.Status.InfrastructureReady && *mp.Spec.Replicas > mp.Status.ReadyReplicas { - mp.Status.SetTypedPhase(expv1.MachinePoolPhaseScalingUp) + // If we are being managed by an external autoscaler and can't predict scaling direction, set to "Scaling". + if annotations.ReplicasManagedByExternalAutoscaler(mp) { + mp.Status.SetTypedPhase(expv1.MachinePoolPhaseScaling) + } else { + // Set the phase to "ScalingUp" if we are actively scaling the infrastructure out. + mp.Status.SetTypedPhase(expv1.MachinePoolPhaseScalingUp) + } } - // Set the phase to "scalingDown" if the infrastructure is scaling down. + // Set the appropriate phase in response to the MachinePool replica count being less than the observed infrastructure replicas. if mp.Status.InfrastructureReady && *mp.Spec.Replicas < mp.Status.ReadyReplicas { - mp.Status.SetTypedPhase(expv1.MachinePoolPhaseScalingDown) + // If we are being managed by an external autoscaler and can't predict scaling direction, set to "Scaling". + if annotations.ReplicasManagedByExternalAutoscaler(mp) { + mp.Status.SetTypedPhase(expv1.MachinePoolPhaseScaling) + } else { + // Set the phase to "ScalingDown" if we are actively scaling the infrastructure in. + mp.Status.SetTypedPhase(expv1.MachinePoolPhaseScalingDown) + } } // Set the phase to "failed" if any of Status.FailureReason or Status.FailureMessage is not-nil. diff --git a/util/annotations/helpers.go b/util/annotations/helpers.go index de07f7a55b92..0ec9ef9388ac 100644 --- a/util/annotations/helpers.go +++ b/util/annotations/helpers.go @@ -58,6 +58,11 @@ func HasWithPrefix(prefix string, annotations map[string]string) bool { return false } +// ReplicasManagedByExternalAutoscaler returns true if the standard annotation for external autoscaler is present. +func ReplicasManagedByExternalAutoscaler(o metav1.Object) bool { + return hasTruthyAnnotationValue(o, clusterv1.ReplicasManagedByAnnotation) +} + // AddAnnotations sets the desired annotations on the object and returns true if the annotations have changed. func AddAnnotations(o metav1.Object, desired map[string]string) bool { if len(desired) == 0 { @@ -87,3 +92,15 @@ func hasAnnotation(o metav1.Object, annotation string) bool { _, ok := annotations[annotation] return ok } + +// hasTruthyAnnotationValue returns true if the object has an annotation with a value that is not "false". +func hasTruthyAnnotationValue(o metav1.Object, annotation string) bool { + annotations := o.GetAnnotations() + if annotations == nil { + return false + } + if val, ok := annotations[annotation]; ok { + return val != "false" + } + return false +} diff --git a/util/annotations/helpers_test.go b/util/annotations/helpers_test.go index b6f7c119e860..9793fcf87369 100644 --- a/util/annotations/helpers_test.go +++ b/util/annotations/helpers_test.go @@ -151,3 +151,95 @@ func TestAddAnnotations(t *testing.T) { }) } } + +func TestHasTruthyAnnotationValue(t *testing.T) { + tests := []struct { + name string + obj metav1.Object + annotationKey string + expected bool + }{ + { + name: "annotation does not exist", + obj: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "cluster.x-k8s.io/some-other-annotation": "", + }, + }, + Spec: corev1.NodeSpec{}, + Status: corev1.NodeStatus{}, + }, + annotationKey: "cluster.x-k8s.io/replicas-managed-by", + expected: false, + }, + { + name: "no val", + obj: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "cluster.x-k8s.io/replicas-managed-by": "", + }, + }, + Spec: corev1.NodeSpec{}, + Status: corev1.NodeStatus{}, + }, + annotationKey: "cluster.x-k8s.io/replicas-managed-by", + expected: true, + }, + { + name: "annotation exists, true value", + obj: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "cluster.x-k8s.io/replicas-managed-by": "true", + }, + }, + Spec: corev1.NodeSpec{}, + Status: corev1.NodeStatus{}, + }, + annotationKey: "cluster.x-k8s.io/replicas-managed-by", + expected: true, + }, + { + name: "annotation exists, random string value", + obj: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "cluster.x-k8s.io/replicas-managed-by": "foo", + }, + }, + Spec: corev1.NodeSpec{}, + Status: corev1.NodeStatus{}, + }, + annotationKey: "cluster.x-k8s.io/replicas-managed-by", + expected: true, + }, + { + name: "annotation exists, false value", + obj: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "cluster.x-k8s.io/replicas-managed-by": "false", + }, + }, + Spec: corev1.NodeSpec{}, + Status: corev1.NodeStatus{}, + }, + annotationKey: "cluster.x-k8s.io/replicas-managed-by", + expected: false, + }, + } + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + ret := hasTruthyAnnotationValue(tt.obj, tt.annotationKey) + if tt.expected { + g.Expect(ret).To(BeTrue()) + } else { + g.Expect(ret).To(BeFalse()) + } + }) + } +}