From c1ef1d779998f3489889aa34be42e33c7c336e03 Mon Sep 17 00:00:00 2001 From: Stefan Bueringer Date: Thu, 25 May 2023 07:39:12 +0200 Subject: [PATCH 01/94] Remove unnecessary requeues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stefan Büringer buringerst@vmware.com --- controllers/external/tracker.go | 5 ++- .../controllers/machinepool_controller.go | 14 +++---- .../machinepool_controller_phases.go | 38 ++++++------------- .../machinepool_controller_phases_test.go | 8 +--- .../cluster/cluster_controller_phases.go | 10 ++--- .../controllers/machine/machine_controller.go | 1 - .../machine/machine_controller_phases.go | 16 ++++---- .../machine/machine_controller_phases_test.go | 4 +- 8 files changed, 39 insertions(+), 57 deletions(-) diff --git a/controllers/external/tracker.go b/controllers/external/tracker.go index 650690b77f3c..2dd88120d722 100644 --- a/controllers/external/tracker.go +++ b/controllers/external/tracker.go @@ -17,6 +17,7 @@ limitations under the License. package external import ( + "fmt" "sync" "github.com/go-logr/logr" @@ -56,7 +57,7 @@ func (o *ObjectTracker) Watch(log logr.Logger, obj runtime.Object, handler handl u := &unstructured.Unstructured{} u.SetGroupVersionKind(gvk) - log.Info("Adding watcher on external object", "groupVersionKind", gvk.String()) + log.Info(fmt.Sprintf("Adding watch on external object %q", gvk.String())) err := o.Controller.Watch( source.Kind(o.Cache, u), handler, @@ -64,7 +65,7 @@ func (o *ObjectTracker) Watch(log logr.Logger, obj runtime.Object, handler handl ) if err != nil { o.m.Delete(key) - return errors.Wrapf(err, "failed to add watcher on external object %q", gvk.String()) + return errors.Wrapf(err, "failed to add watch on external object %q", gvk.String()) } return nil } diff --git a/exp/internal/controllers/machinepool_controller.go b/exp/internal/controllers/machinepool_controller.go index d431206cc7a5..43dd84bc9465 100644 --- a/exp/internal/controllers/machinepool_controller.go +++ b/exp/internal/controllers/machinepool_controller.go @@ -19,7 +19,6 @@ package controllers import ( "context" "fmt" - "sync" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" @@ -31,7 +30,6 @@ import ( "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" - "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -70,10 +68,9 @@ type MachinePoolReconciler struct { // WatchFilterValue is the label value used to filter events prior to reconciliation. WatchFilterValue string - controller controller.Controller - recorder record.EventRecorder - externalWatchers sync.Map - cache cache.Cache + controller controller.Controller + recorder record.EventRecorder + externalTracker external.ObjectTracker } func (r *MachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { @@ -104,7 +101,10 @@ func (r *MachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.M r.controller = c r.recorder = mgr.GetEventRecorderFor("machinepool-controller") - r.cache = mgr.GetCache() + r.externalTracker = external.ObjectTracker{ + Controller: c, + Cache: mgr.GetCache(), + } return nil } diff --git a/exp/internal/controllers/machinepool_controller_phases.go b/exp/internal/controllers/machinepool_controller_phases.go index 5290270a6e2d..a68527031dd5 100644 --- a/exp/internal/controllers/machinepool_controller_phases.go +++ b/exp/internal/controllers/machinepool_controller_phases.go @@ -20,17 +20,16 @@ import ( "context" "fmt" "reflect" - "time" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/klog/v2" "k8s.io/utils/pointer" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/handler" - "sigs.k8s.io/controller-runtime/pkg/source" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/controllers/external" @@ -43,10 +42,6 @@ import ( "sigs.k8s.io/cluster-api/util/patch" ) -var ( - externalReadyWait = 30 * time.Second -) - func (r *MachinePoolReconciler) reconcilePhase(mp *expv1.MachinePool) { // Set the phase to "pending" if nil. if mp.Status.Phase == "" { @@ -118,6 +113,11 @@ func (r *MachinePoolReconciler) reconcileExternal(ctx context.Context, cluster * return external.ReconcileOutput{}, err } + // Ensure we add a watch to the external object, if there isn't one already. + if err := r.externalTracker.Watch(log, obj, handler.EnqueueRequestForOwner(r.Client.Scheme(), r.Client.RESTMapper(), &expv1.MachinePool{})); err != nil { + return external.ReconcileOutput{}, err + } + // if external ref is paused, return error. if annotations.IsPaused(cluster, obj) { log.V(3).Info("External object referenced is paused") @@ -148,20 +148,6 @@ func (r *MachinePoolReconciler) reconcileExternal(ctx context.Context, cluster * return external.ReconcileOutput{}, err } - // Add watcher for external object, if there isn't one already. - _, loaded := r.externalWatchers.LoadOrStore(obj.GroupVersionKind().String(), struct{}{}) - if !loaded && r.controller != nil { - log.Info("Adding watcher on external object", "groupVersionKind", obj.GroupVersionKind()) - err := r.controller.Watch( - source.Kind(r.cache, obj), - handler.EnqueueRequestForOwner(r.Client.Scheme(), r.Client.RESTMapper(), &expv1.MachinePool{}), - ) - if err != nil { - r.externalWatchers.Delete(obj.GroupVersionKind().String()) - return external.ReconcileOutput{}, errors.Wrapf(err, "failed to add watcher on external object %q", obj.GroupVersionKind()) - } - } - // Set failure reason and message, if any. failureReason, failureMessage, err := external.FailuresFrom(obj) if err != nil { @@ -216,9 +202,9 @@ func (r *MachinePoolReconciler) reconcileBootstrap(ctx context.Context, cluster ) if !ready { - log.V(2).Info("Bootstrap provider is not ready, requeuing") + log.Info("Waiting for bootstrap provider to generate data secret and report status.ready", bootstrapConfig.GetKind(), klog.KObj(bootstrapConfig)) m.Status.BootstrapReady = ready - return ctrl.Result{RequeueAfter: externalReadyWait}, nil + return ctrl.Result{}, nil } // Get and set the name of the secret containing the bootstrap data. @@ -289,8 +275,8 @@ func (r *MachinePoolReconciler) reconcileInfrastructure(ctx context.Context, clu ) if !mp.Status.InfrastructureReady { - log.Info("Infrastructure provider is not ready, requeuing") - return ctrl.Result{RequeueAfter: externalReadyWait}, nil + log.Info("Infrastructure provider is not yet ready", infraConfig.GetKind(), klog.KObj(infraConfig)) + return ctrl.Result{}, nil } var providerIDList []string @@ -308,8 +294,8 @@ func (r *MachinePoolReconciler) reconcileInfrastructure(ctx context.Context, clu } if len(providerIDList) == 0 && mp.Status.Replicas != 0 { - log.Info("Retrieved empty Spec.ProviderIDList from infrastructure provider but Status.Replicas is not zero.", "replicas", mp.Status.Replicas) - return ctrl.Result{RequeueAfter: externalReadyWait}, nil + log.Info("Retrieved empty spec.providerIDList from infrastructure provider but status.replicas is not zero.", "replicas", mp.Status.Replicas) + return ctrl.Result{}, nil } if !reflect.DeepEqual(mp.Spec.ProviderIDList, providerIDList) { diff --git a/exp/internal/controllers/machinepool_controller_phases_test.go b/exp/internal/controllers/machinepool_controller_phases_test.go index a7236d99d5a8..08a813f6d321 100644 --- a/exp/internal/controllers/machinepool_controller_phases_test.go +++ b/exp/internal/controllers/machinepool_controller_phases_test.go @@ -43,10 +43,6 @@ const ( wrongNamespace = "wrong-namespace" ) -func init() { - externalReadyWait = 1 * time.Second -} - func TestReconcileMachinePoolPhases(t *testing.T) { deletionTimestamp := metav1.Now() @@ -569,7 +565,7 @@ func TestReconcileMachinePoolBootstrap(t *testing.T) { "status": map[string]interface{}{}, }, expectError: false, - expectResult: ctrl.Result{RequeueAfter: externalReadyWait}, + expectResult: ctrl.Result{}, expected: func(g *WithT, m *expv1.MachinePool) { g.Expect(m.Status.BootstrapReady).To(BeFalse()) }, @@ -727,7 +723,7 @@ func TestReconcileMachinePoolBootstrap(t *testing.T) { }, }, expectError: false, - expectResult: ctrl.Result{RequeueAfter: externalReadyWait}, + expectResult: ctrl.Result{}, expected: func(g *WithT, m *expv1.MachinePool) { g.Expect(m.Status.BootstrapReady).To(BeFalse()) }, diff --git a/internal/controllers/cluster/cluster_controller_phases.go b/internal/controllers/cluster/cluster_controller_phases.go index e1bd4c31c9a3..cd0d62b27c94 100644 --- a/internal/controllers/cluster/cluster_controller_phases.go +++ b/internal/controllers/cluster/cluster_controller_phases.go @@ -92,6 +92,11 @@ func (r *Reconciler) reconcileExternal(ctx context.Context, cluster *clusterv1.C return external.ReconcileOutput{}, err } + // Ensure we add a watcher to the external object. + if err := r.externalTracker.Watch(log, obj, handler.EnqueueRequestForOwner(r.Client.Scheme(), r.Client.RESTMapper(), &clusterv1.Cluster{})); err != nil { + return external.ReconcileOutput{}, err + } + // if external ref is paused, return error. if annotations.IsPaused(cluster, obj) { log.V(3).Info("External object referenced is paused") @@ -122,11 +127,6 @@ func (r *Reconciler) reconcileExternal(ctx context.Context, cluster *clusterv1.C return external.ReconcileOutput{}, err } - // Ensure we add a watcher to the external object. - if err := r.externalTracker.Watch(log, obj, handler.EnqueueRequestForOwner(r.Client.Scheme(), r.Client.RESTMapper(), &clusterv1.Cluster{})); err != nil { - return external.ReconcileOutput{}, err - } - // Set failure reason and message, if any. failureReason, failureMessage, err := external.FailuresFrom(obj) if err != nil { diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index 3ec241add227..5b62bbb9a9d5 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -128,7 +128,6 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt } r.controller = c - r.recorder = mgr.GetEventRecorderFor("machine-controller") r.externalTracker = external.ObjectTracker{ Controller: c, diff --git a/internal/controllers/machine/machine_controller_phases.go b/internal/controllers/machine/machine_controller_phases.go index a0e21ac4f649..083a3b3ff4a4 100644 --- a/internal/controllers/machine/machine_controller_phases.go +++ b/internal/controllers/machine/machine_controller_phases.go @@ -98,12 +98,17 @@ func (r *Reconciler) reconcileExternal(ctx context.Context, cluster *clusterv1.C obj, err := external.Get(ctx, r.Client, ref, m.Namespace) if err != nil { if apierrors.IsNotFound(errors.Cause(err)) { - log.Info("could not find external ref, requeuing", ref.Kind, klog.KRef(m.Namespace, ref.Name)) + log.Info("could not find external ref, requeuing", ref.Kind, klog.KRef(ref.Namespace, ref.Name)) return external.ReconcileOutput{RequeueAfter: externalReadyWait}, nil } return external.ReconcileOutput{}, err } + // Ensure we add a watch to the external object, if there isn't one already. + if err := r.externalTracker.Watch(log, obj, handler.EnqueueRequestForOwner(r.Client.Scheme(), r.Client.RESTMapper(), &clusterv1.Machine{})); err != nil { + return external.ReconcileOutput{}, err + } + // if external ref is paused, return error. if annotations.IsPaused(cluster, obj) { log.V(3).Info("External object referenced is paused") @@ -141,11 +146,6 @@ func (r *Reconciler) reconcileExternal(ctx context.Context, cluster *clusterv1.C return external.ReconcileOutput{}, err } - // Ensure we add a watcher to the external object. - if err := r.externalTracker.Watch(log, obj, handler.EnqueueRequestForOwner(r.Client.Scheme(), r.Client.RESTMapper(), &clusterv1.Machine{})); err != nil { - return external.ReconcileOutput{}, err - } - // Set failure reason and message, if any. failureReason, failureMessage, err := external.FailuresFrom(obj) if err != nil { @@ -217,7 +217,7 @@ func (r *Reconciler) reconcileBootstrap(ctx context.Context, cluster *clusterv1. // If the bootstrap provider is not ready, requeue. if !ready { log.Info("Waiting for bootstrap provider to generate data secret and report status.ready", bootstrapConfig.GetKind(), klog.KObj(bootstrapConfig)) - return ctrl.Result{RequeueAfter: externalReadyWait}, nil + return ctrl.Result{}, nil } // Get and set the name of the secret containing the bootstrap data. @@ -284,7 +284,7 @@ func (r *Reconciler) reconcileInfrastructure(ctx context.Context, cluster *clust // If the infrastructure provider is not ready, return early. if !ready { log.Info("Waiting for infrastructure provider to create machine infrastructure and report status.ready", infraConfig.GetKind(), klog.KObj(infraConfig)) - return ctrl.Result{RequeueAfter: externalReadyWait}, nil + return ctrl.Result{}, nil } // Get Spec.ProviderID from the infrastructure provider. diff --git a/internal/controllers/machine/machine_controller_phases_test.go b/internal/controllers/machine/machine_controller_phases_test.go index 85f49d55be34..b049196da124 100644 --- a/internal/controllers/machine/machine_controller_phases_test.go +++ b/internal/controllers/machine/machine_controller_phases_test.go @@ -715,7 +715,7 @@ func TestReconcileBootstrap(t *testing.T) { "spec": map[string]interface{}{}, "status": map[string]interface{}{}, }, - expectResult: ctrl.Result{RequeueAfter: externalReadyWait}, + expectResult: ctrl.Result{}, expectError: false, expected: func(g *WithT, m *clusterv1.Machine) { g.Expect(m.Status.BootstrapReady).To(BeFalse()) @@ -836,7 +836,7 @@ func TestReconcileBootstrap(t *testing.T) { BootstrapReady: true, }, }, - expectResult: ctrl.Result{RequeueAfter: externalReadyWait}, + expectResult: ctrl.Result{}, expectError: false, expected: func(g *WithT, m *clusterv1.Machine) { g.Expect(m.GetOwnerReferences()).NotTo(ContainRefOfGroupKind("cluster.x-k8s.io", "MachineSet")) From 1ac058c49ffd9311e69c2247c7fd6013a0717e60 Mon Sep 17 00:00:00 2001 From: Yuvaraj Kakaraparthi Date: Fri, 2 Jun 2023 01:09:11 -0700 Subject: [PATCH 02/94] update lifecycle hooks documentation with new upgrade rules --- .../runtime-sdk/implement-lifecycle-hooks.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/book/src/tasks/experimental-features/runtime-sdk/implement-lifecycle-hooks.md b/docs/book/src/tasks/experimental-features/runtime-sdk/implement-lifecycle-hooks.md index 5c82207972e5..0bb0805ca5f1 100644 --- a/docs/book/src/tasks/experimental-features/runtime-sdk/implement-lifecycle-hooks.md +++ b/docs/book/src/tasks/experimental-features/runtime-sdk/implement-lifecycle-hooks.md @@ -115,6 +115,12 @@ This hook is called after the Cluster object has been updated with a new `spec.t immediately before the new version is going to be propagated to the control plane (*). Runtime Extension implementers can use this hook to execute pre-upgrade add-on tasks and block upgrades of the ControlPlane and Workers. +Note: While the upgrade is blocked changes made to the Cluster Topology will be delayed propagating to the underlying +objects while the object is waiting for upgrade. Example: modifying ControlPlane/MachineDeployments (think scale up), +or creating new MachineDeployments will be delayed until the target ControlPlane/MachineDeployment is ready to pick up the upgrade. +This ensures that the ControlPlane and MachineDeployments do not perform a rollout prematurely while waiting to be rolled out again for the version upgrade (no double rollouts). +This also ensures that any version specific changes are only pushed to the underlying objects also at the correct version. + #### Example Request: ```yaml @@ -158,6 +164,12 @@ and immediately before the new version is going to be propagated to the MachineD Runtime Extension implementers can use this hook to execute post-upgrade add-on tasks and block upgrades to workers until everything is ready. +Note: While the MachineDeployments upgrade is blocked changes made to existing MachineDeployments and creating new MachineDeployments +will be delayed while the object is waiting for upgrade. Example: modifying MachineDeployments (think scale up), +or creating new MachineDeployments will be delayed until the target MachineDeployment is ready to pick up the upgrade. +This ensures that the MachineDeployments do not perform a rollout prematurely while waiting to be rolled out again for the version upgrade (no double rollouts). +This also ensures that any version specific changes are only pushed to the underlying objects also at the correct version. + #### Example Request: ```yaml From a75ff050b9a8cdbba06fcd4711314940817784a2 Mon Sep 17 00:00:00 2001 From: Dhairya-Arora01 Date: Thu, 1 Jun 2023 18:07:01 +0530 Subject: [PATCH 03/94] Validation for warnings in unit tests --- api/v1beta1/machine_webhook_test.go | 40 ++++++++++----- api/v1beta1/machinedeployment_webhook_test.go | 36 ++++++++----- .../machinehealthcheck_webhook_test.go | 51 ++++++++++++------- api/v1beta1/machineset_webhook_test.go | 27 ++++++---- .../api/v1beta1/kubeadmconfig_webhook_test.go | 12 +++-- .../kubeadmconfigtemplate_webhook_test.go | 6 ++- .../kubeadm_control_plane_webhook_test.go | 9 ++-- ...ubeadmcontrolplanetemplate_webhook_test.go | 6 ++- .../clusterresourceset_webhook_test.go | 20 +++++--- .../clusterresourcesetbinding_webhook_test.go | 9 ++-- exp/api/v1beta1/machinepool_webhook_test.go | 39 +++++++++----- exp/ipam/internal/webhooks/ipaddress_test.go | 3 +- .../internal/webhooks/ipaddressclaim_test.go | 6 ++- internal/webhooks/cluster_test.go | 16 ++++-- .../runtime/extensionconfig_webhook_test.go | 8 ++- .../dockerclustertemplate_webhook_test.go | 6 ++- .../dockermachinetemplate_webhook_test.go | 11 ++-- util/defaulting/defaulting.go | 9 ++-- 18 files changed, 212 insertions(+), 102 deletions(-) diff --git a/api/v1beta1/machine_webhook_test.go b/api/v1beta1/machine_webhook_test.go index 079ecb1e78eb..880db070ea8b 100644 --- a/api/v1beta1/machine_webhook_test.go +++ b/api/v1beta1/machine_webhook_test.go @@ -79,15 +79,19 @@ func TestMachineBootstrapValidation(t *testing.T) { Spec: MachineSpec{Bootstrap: tt.bootstrap}, } if tt.expectErr { - _, err := m.ValidateCreate() + warnings, err := m.ValidateCreate() g.Expect(err).To(HaveOccurred()) - _, err = m.ValidateUpdate(m) + g.Expect(warnings).To(BeEmpty()) + warnings, err = m.ValidateUpdate(m) g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { - _, err := m.ValidateCreate() + warnings, err := m.ValidateCreate() g.Expect(err).ToNot(HaveOccurred()) - _, err = m.ValidateUpdate(m) + g.Expect(warnings).To(BeEmpty()) + warnings, err = m.ValidateUpdate(m) g.Expect(err).ToNot(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } }) } @@ -141,15 +145,19 @@ func TestMachineNamespaceValidation(t *testing.T) { } if tt.expectErr { - _, err := m.ValidateCreate() + warnings, err := m.ValidateCreate() g.Expect(err).To(HaveOccurred()) - _, err = m.ValidateUpdate(m) + g.Expect(warnings).To(BeEmpty()) + warnings, err = m.ValidateUpdate(m) g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { - _, err := m.ValidateCreate() + warnings, err := m.ValidateCreate() g.Expect(err).ToNot(HaveOccurred()) - _, err = m.ValidateUpdate(m) + g.Expect(warnings).To(BeEmpty()) + warnings, err = m.ValidateUpdate(m) g.Expect(err).ToNot(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } }) } @@ -193,11 +201,13 @@ func TestMachineClusterNameImmutable(t *testing.T) { }, } - _, err := newMachine.ValidateUpdate(oldMachine) + warnings, err := newMachine.ValidateUpdate(oldMachine) if tt.expectErr { g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } }) } @@ -248,15 +258,19 @@ func TestMachineVersionValidation(t *testing.T) { } if tt.expectErr { - _, err := m.ValidateCreate() + warnings, err := m.ValidateCreate() g.Expect(err).To(HaveOccurred()) - _, err = m.ValidateUpdate(m) + g.Expect(warnings).To(BeEmpty()) + warnings, err = m.ValidateUpdate(m) g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { - _, err := m.ValidateCreate() + warnings, err := m.ValidateCreate() g.Expect(err).ToNot(HaveOccurred()) - _, err = m.ValidateUpdate(m) + g.Expect(warnings).To(BeEmpty()) + warnings, err = m.ValidateUpdate(m) g.Expect(err).ToNot(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } }) } diff --git a/api/v1beta1/machinedeployment_webhook_test.go b/api/v1beta1/machinedeployment_webhook_test.go index e5059601766f..fe4717d5be54 100644 --- a/api/v1beta1/machinedeployment_webhook_test.go +++ b/api/v1beta1/machinedeployment_webhook_test.go @@ -393,15 +393,19 @@ func TestMachineDeploymentValidation(t *testing.T) { }, } if tt.expectErr { - _, err := md.ValidateCreate() + warnings, err := md.ValidateCreate() g.Expect(err).To(HaveOccurred()) - _, err = md.ValidateUpdate(md) + g.Expect(warnings).To(BeEmpty()) + warnings, err = md.ValidateUpdate(md) g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { - _, err := md.ValidateCreate() + warnings, err := md.ValidateCreate() g.Expect(err).NotTo(HaveOccurred()) - _, err = md.ValidateUpdate(md) + g.Expect(warnings).To(BeEmpty()) + warnings, err = md.ValidateUpdate(md) g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } }) } @@ -456,15 +460,19 @@ func TestMachineDeploymentVersionValidation(t *testing.T) { } if tt.expectErr { - _, err := md.ValidateCreate() + warnings, err := md.ValidateCreate() g.Expect(err).To(HaveOccurred()) - _, err = md.ValidateUpdate(md) + g.Expect(warnings).To(BeEmpty()) + warnings, err = md.ValidateUpdate(md) g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { - _, err := md.ValidateCreate() + warnings, err := md.ValidateCreate() g.Expect(err).ToNot(HaveOccurred()) - _, err = md.ValidateUpdate(md) + g.Expect(warnings).To(BeEmpty()) + warnings, err = md.ValidateUpdate(md) g.Expect(err).ToNot(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } }) } @@ -507,12 +515,13 @@ func TestMachineDeploymentClusterNameImmutable(t *testing.T) { }, } - _, err := newMD.ValidateUpdate(oldMD) + warnings, err := newMD.ValidateUpdate(oldMD) if tt.expectErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).NotTo(HaveOccurred()) } + g.Expect(warnings).To(BeEmpty()) }) } } @@ -539,21 +548,24 @@ func defaultValidateTestCustomDefaulter(object admission.Validator, customDefaul t.Run("validate-on-create", func(t *testing.T) { g := NewWithT(t) g.Expect(customDefaulter.Default(ctx, createCopy)).To(Succeed()) - _, err := createCopy.ValidateCreate() + warnings, err := createCopy.ValidateCreate() g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) }) t.Run("validate-on-update", func(t *testing.T) { g := NewWithT(t) g.Expect(customDefaulter.Default(ctx, defaultingUpdateCopy)).To(Succeed()) g.Expect(customDefaulter.Default(ctx, updateCopy)).To(Succeed()) - _, err := defaultingUpdateCopy.ValidateUpdate(updateCopy) + warnings, err := defaultingUpdateCopy.ValidateUpdate(updateCopy) g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) }) t.Run("validate-on-delete", func(t *testing.T) { g := NewWithT(t) g.Expect(customDefaulter.Default(ctx, deleteCopy)).To(Succeed()) - _, err := deleteCopy.ValidateDelete() + warnings, err := deleteCopy.ValidateDelete() g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) }) } } diff --git a/api/v1beta1/machinehealthcheck_webhook_test.go b/api/v1beta1/machinehealthcheck_webhook_test.go index c1d5f69bc3ac..9738d1f9bdf8 100644 --- a/api/v1beta1/machinehealthcheck_webhook_test.go +++ b/api/v1beta1/machinehealthcheck_webhook_test.go @@ -92,15 +92,19 @@ func TestMachineHealthCheckLabelSelectorAsSelectorValidation(t *testing.T) { }, } if tt.expectErr { - _, err := mhc.ValidateCreate() + warnings, err := mhc.ValidateCreate() g.Expect(err).To(HaveOccurred()) - _, err = mhc.ValidateUpdate(mhc) + g.Expect(warnings).To(BeEmpty()) + warnings, err = mhc.ValidateUpdate(mhc) g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { - _, err := mhc.ValidateCreate() + warnings, err := mhc.ValidateCreate() g.Expect(err).NotTo(HaveOccurred()) - _, err = mhc.ValidateUpdate(mhc) + g.Expect(warnings).To(BeEmpty()) + warnings, err = mhc.ValidateUpdate(mhc) g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } }) } @@ -164,12 +168,13 @@ func TestMachineHealthCheckClusterNameImmutable(t *testing.T) { }, } - _, err := newMHC.ValidateUpdate(oldMHC) + warnings, err := newMHC.ValidateUpdate(oldMHC) if tt.expectErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).NotTo(HaveOccurred()) } + g.Expect(warnings).To(BeEmpty()) }) } } @@ -216,15 +221,19 @@ func TestMachineHealthCheckUnhealthyConditions(t *testing.T) { }, } if tt.expectErr { - _, err := mhc.ValidateCreate() + warnings, err := mhc.ValidateCreate() g.Expect(err).To(HaveOccurred()) - _, err = mhc.ValidateUpdate(mhc) + g.Expect(warnings).To(BeEmpty()) + warnings, err = mhc.ValidateUpdate(mhc) g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { - _, err := mhc.ValidateCreate() + warnings, err := mhc.ValidateCreate() g.Expect(err).NotTo(HaveOccurred()) - _, err = mhc.ValidateUpdate(mhc) + g.Expect(warnings).To(BeEmpty()) + warnings, err = mhc.ValidateUpdate(mhc) g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } }) } @@ -295,15 +304,19 @@ func TestMachineHealthCheckNodeStartupTimeout(t *testing.T) { } if tt.expectErr { - _, err := mhc.ValidateCreate() + warnings, err := mhc.ValidateCreate() g.Expect(err).To(HaveOccurred()) - _, err = mhc.ValidateUpdate(mhc) + g.Expect(warnings).To(BeEmpty()) + warnings, err = mhc.ValidateUpdate(mhc) g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { - _, err := mhc.ValidateCreate() + warnings, err := mhc.ValidateCreate() g.Expect(err).NotTo(HaveOccurred()) - _, err = mhc.ValidateUpdate(mhc) + g.Expect(warnings).To(BeEmpty()) + warnings, err = mhc.ValidateUpdate(mhc) g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } } } @@ -358,15 +371,19 @@ func TestMachineHealthCheckMaxUnhealthy(t *testing.T) { } if tt.expectErr { - _, err := mhc.ValidateCreate() + warnings, err := mhc.ValidateCreate() g.Expect(err).To(HaveOccurred()) - _, err = mhc.ValidateUpdate(mhc) + g.Expect(warnings).To(BeEmpty()) + warnings, err = mhc.ValidateUpdate(mhc) g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { - _, err := mhc.ValidateCreate() + warnings, err := mhc.ValidateCreate() g.Expect(err).NotTo(HaveOccurred()) - _, err = mhc.ValidateUpdate(mhc) + g.Expect(warnings).To(BeEmpty()) + warnings, err = mhc.ValidateUpdate(mhc) g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } } } diff --git a/api/v1beta1/machineset_webhook_test.go b/api/v1beta1/machineset_webhook_test.go index 32bcfc60b952..9753ee2d4ebf 100644 --- a/api/v1beta1/machineset_webhook_test.go +++ b/api/v1beta1/machineset_webhook_test.go @@ -106,15 +106,19 @@ func TestMachineSetLabelSelectorMatchValidation(t *testing.T) { } if tt.expectErr { - _, err := ms.ValidateCreate() + warnings, err := ms.ValidateCreate() g.Expect(err).To(HaveOccurred()) - _, err = ms.ValidateUpdate(ms) + g.Expect(warnings).To(BeEmpty()) + warnings, err = ms.ValidateUpdate(ms) g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { - _, err := ms.ValidateCreate() + warnings, err := ms.ValidateCreate() g.Expect(err).NotTo(HaveOccurred()) - _, err = ms.ValidateUpdate(ms) + g.Expect(warnings).To(BeEmpty()) + warnings, err = ms.ValidateUpdate(ms) g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } }) } @@ -157,12 +161,13 @@ func TestMachineSetClusterNameImmutable(t *testing.T) { }, } - _, err := newMS.ValidateUpdate(oldMS) + warnings, err := newMS.ValidateUpdate(oldMS) if tt.expectErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).NotTo(HaveOccurred()) } + g.Expect(warnings).To(BeEmpty()) }) } } @@ -215,15 +220,19 @@ func TestMachineSetVersionValidation(t *testing.T) { } if tt.expectErr { - _, err := md.ValidateCreate() + warnings, err := md.ValidateCreate() g.Expect(err).To(HaveOccurred()) - _, err = md.ValidateUpdate(md) + g.Expect(warnings).To(BeEmpty()) + warnings, err = md.ValidateUpdate(md) g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { - _, err := md.ValidateCreate() + warnings, err := md.ValidateCreate() g.Expect(err).NotTo(HaveOccurred()) - _, err = md.ValidateUpdate(md) + g.Expect(warnings).To(BeEmpty()) + warnings, err = md.ValidateUpdate(md) g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } }) } diff --git a/bootstrap/kubeadm/api/v1beta1/kubeadmconfig_webhook_test.go b/bootstrap/kubeadm/api/v1beta1/kubeadmconfig_webhook_test.go index f89b87a6ac82..eafa6d92812a 100644 --- a/bootstrap/kubeadm/api/v1beta1/kubeadmconfig_webhook_test.go +++ b/bootstrap/kubeadm/api/v1beta1/kubeadmconfig_webhook_test.go @@ -464,15 +464,19 @@ func TestKubeadmConfigValidate(t *testing.T) { g := NewWithT(t) if tt.expectErr { - _, err := tt.in.ValidateCreate() + warnings, err := tt.in.ValidateCreate() g.Expect(err).To(HaveOccurred()) - _, err = tt.in.ValidateUpdate(nil) + g.Expect(warnings).To(BeEmpty()) + warnings, err = tt.in.ValidateUpdate(nil) g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { - _, err := tt.in.ValidateCreate() + warnings, err := tt.in.ValidateCreate() g.Expect(err).NotTo(HaveOccurred()) - _, err = tt.in.ValidateUpdate(nil) + g.Expect(warnings).To(BeEmpty()) + warnings, err = tt.in.ValidateUpdate(nil) g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } }) } diff --git a/bootstrap/kubeadm/api/v1beta1/kubeadmconfigtemplate_webhook_test.go b/bootstrap/kubeadm/api/v1beta1/kubeadmconfigtemplate_webhook_test.go index 897858a10434..48629df6b7c7 100644 --- a/bootstrap/kubeadm/api/v1beta1/kubeadmconfigtemplate_webhook_test.go +++ b/bootstrap/kubeadm/api/v1beta1/kubeadmconfigtemplate_webhook_test.go @@ -68,10 +68,12 @@ func TestKubeadmConfigTemplateValidation(t *testing.T) { t.Run(name, func(t *testing.T) { g := NewWithT(t) - _, err := tt.in.ValidateCreate() + warnings, err := tt.in.ValidateCreate() g.Expect(err).NotTo(HaveOccurred()) - _, err = tt.in.ValidateUpdate(nil) + g.Expect(warnings).To(BeEmpty()) + warnings, err = tt.in.ValidateUpdate(nil) g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) }) } } diff --git a/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook_test.go b/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook_test.go index 86a4b62c1381..a0a03d1a87b6 100644 --- a/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook_test.go +++ b/controlplane/kubeadm/api/v1beta1/kubeadm_control_plane_webhook_test.go @@ -248,12 +248,13 @@ func TestKubeadmControlPlaneValidateCreate(t *testing.T) { g := NewWithT(t) - _, err := tt.kcp.ValidateCreate() + warnings, err := tt.kcp.ValidateCreate() if tt.expectErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).NotTo(HaveOccurred()) } + g.Expect(warnings).To(BeEmpty()) }) } } @@ -1027,12 +1028,13 @@ func TestKubeadmControlPlaneValidateUpdate(t *testing.T) { g := NewWithT(t) - _, err := tt.kcp.ValidateUpdate(tt.before.DeepCopy()) + warnings, err := tt.kcp.ValidateUpdate(tt.before.DeepCopy()) if tt.expectErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).To(Succeed()) } + g.Expect(warnings).To(BeEmpty()) }) } } @@ -1232,7 +1234,7 @@ func TestKubeadmControlPlaneValidateUpdateAfterDefaulting(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) - _, err := tt.kcp.ValidateUpdate(tt.before.DeepCopy()) + warnings, err := tt.kcp.ValidateUpdate(tt.before.DeepCopy()) if tt.expectErr { g.Expect(err).To(HaveOccurred()) } else { @@ -1243,6 +1245,7 @@ func TestKubeadmControlPlaneValidateUpdateAfterDefaulting(t *testing.T) { g.Expect(tt.kcp.Spec.RolloutStrategy.RollingUpdate.MaxSurge.IntVal).To(Equal(int32(1))) g.Expect(tt.kcp.Spec.Replicas).To(Equal(pointer.Int32(1))) } + g.Expect(warnings).To(BeEmpty()) }) } } diff --git a/controlplane/kubeadm/api/v1beta1/kubeadmcontrolplanetemplate_webhook_test.go b/controlplane/kubeadm/api/v1beta1/kubeadmcontrolplanetemplate_webhook_test.go index b6b5c4e7f616..50abc7743a4d 100644 --- a/controlplane/kubeadm/api/v1beta1/kubeadmcontrolplanetemplate_webhook_test.go +++ b/controlplane/kubeadm/api/v1beta1/kubeadmcontrolplanetemplate_webhook_test.go @@ -79,8 +79,9 @@ func TestKubeadmControlPlaneTemplateValidationFeatureGateEnabled(t *testing.T) { }, }, } - _, err := kcpTemplate.ValidateCreate() + warnings, err := kcpTemplate.ValidateCreate() g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) }) } @@ -104,7 +105,8 @@ func TestKubeadmControlPlaneTemplateValidationFeatureGateDisabled(t *testing.T) }, }, } - _, err := kcpTemplate.ValidateCreate() + warnings, err := kcpTemplate.ValidateCreate() g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) }) } diff --git a/exp/addons/api/v1beta1/clusterresourceset_webhook_test.go b/exp/addons/api/v1beta1/clusterresourceset_webhook_test.go index 77f0fbcd5a53..b98ff5be1f6e 100644 --- a/exp/addons/api/v1beta1/clusterresourceset_webhook_test.go +++ b/exp/addons/api/v1beta1/clusterresourceset_webhook_test.go @@ -67,15 +67,19 @@ func TestClusterResourceSetLabelSelectorAsSelectorValidation(t *testing.T) { }, } if tt.expectErr { - _, err := clusterResourceSet.ValidateCreate() + warnings, err := clusterResourceSet.ValidateCreate() g.Expect(err).To(HaveOccurred()) - _, err = clusterResourceSet.ValidateUpdate(clusterResourceSet) + g.Expect(warnings).To(BeEmpty()) + warnings, err = clusterResourceSet.ValidateUpdate(clusterResourceSet) g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { - _, err := clusterResourceSet.ValidateCreate() + warnings, err := clusterResourceSet.ValidateCreate() g.Expect(err).NotTo(HaveOccurred()) - _, err = clusterResourceSet.ValidateUpdate(clusterResourceSet) + g.Expect(warnings).To(BeEmpty()) + warnings, err = clusterResourceSet.ValidateUpdate(clusterResourceSet) g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } }) } @@ -128,12 +132,14 @@ func TestClusterResourceSetStrategyImmutable(t *testing.T) { }, } - _, err := newClusterResourceSet.ValidateUpdate(oldClusterResourceSet) + warnings, err := newClusterResourceSet.ValidateUpdate(oldClusterResourceSet) if tt.expectErr { g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) return } g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) }) } } @@ -179,12 +185,14 @@ func TestClusterResourceSetClusterSelectorImmutable(t *testing.T) { }, } - _, err := newClusterResourceSet.ValidateUpdate(oldClusterResourceSet) + warnings, err := newClusterResourceSet.ValidateUpdate(oldClusterResourceSet) if tt.expectErr { g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) return } g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) }) } } diff --git a/exp/addons/api/v1beta1/clusterresourcesetbinding_webhook_test.go b/exp/addons/api/v1beta1/clusterresourcesetbinding_webhook_test.go index eacaf608e05a..e34ca5261c27 100644 --- a/exp/addons/api/v1beta1/clusterresourcesetbinding_webhook_test.go +++ b/exp/addons/api/v1beta1/clusterresourcesetbinding_webhook_test.go @@ -77,14 +77,17 @@ func TestClusterResourceSetBindingClusterNameImmutable(t *testing.T) { }, } - _, err := newClusterResourceSetBinding.ValidateCreate() + warnings, err := newClusterResourceSetBinding.ValidateCreate() g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) if tt.expectErr { - _, err = newClusterResourceSetBinding.ValidateUpdate(oldClusterResourceSetBinding) + warnings, err = newClusterResourceSetBinding.ValidateUpdate(oldClusterResourceSetBinding) g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { - _, err = newClusterResourceSetBinding.ValidateUpdate(oldClusterResourceSetBinding) + warnings, err = newClusterResourceSetBinding.ValidateUpdate(oldClusterResourceSetBinding) g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } }) } diff --git a/exp/api/v1beta1/machinepool_webhook_test.go b/exp/api/v1beta1/machinepool_webhook_test.go index 987750ee532d..c53172ed0948 100644 --- a/exp/api/v1beta1/machinepool_webhook_test.go +++ b/exp/api/v1beta1/machinepool_webhook_test.go @@ -101,15 +101,19 @@ func TestMachinePoolBootstrapValidation(t *testing.T) { } if tt.expectErr { - _, err := m.ValidateCreate() + warnings, err := m.ValidateCreate() g.Expect(err).To(HaveOccurred()) - _, err = m.ValidateUpdate(m) + g.Expect(warnings).To(BeEmpty()) + warnings, err = m.ValidateUpdate(m) g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { - _, err := m.ValidateCreate() + warnings, err := m.ValidateCreate() g.Expect(err).NotTo(HaveOccurred()) - _, err = m.ValidateUpdate(m) + g.Expect(warnings).To(BeEmpty()) + warnings, err = m.ValidateUpdate(m) g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } }) } @@ -173,15 +177,19 @@ func TestMachinePoolNamespaceValidation(t *testing.T) { } if tt.expectErr { - _, err := m.ValidateCreate() + warnings, err := m.ValidateCreate() g.Expect(err).To(HaveOccurred()) - _, err = m.ValidateUpdate(m) + g.Expect(warnings).To(BeEmpty()) + warnings, err = m.ValidateUpdate(m) g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { - _, err := m.ValidateCreate() + warnings, err := m.ValidateCreate() g.Expect(err).NotTo(HaveOccurred()) - _, err = m.ValidateUpdate(m) + g.Expect(warnings).To(BeEmpty()) + warnings, err = m.ValidateUpdate(m) g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } }) } @@ -237,12 +245,13 @@ func TestMachinePoolClusterNameImmutable(t *testing.T) { }, } - _, err := newMP.ValidateUpdate(oldMP) + warnings, err := newMP.ValidateUpdate(oldMP) if tt.expectErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).NotTo(HaveOccurred()) } + g.Expect(warnings).To(BeEmpty()) }) } } @@ -294,15 +303,19 @@ func TestMachinePoolVersionValidation(t *testing.T) { } if tt.expectErr { - _, err := m.ValidateCreate() + warnings, err := m.ValidateCreate() g.Expect(err).To(HaveOccurred()) - _, err = m.ValidateUpdate(m) + g.Expect(warnings).To(BeEmpty()) + warnings, err = m.ValidateUpdate(m) g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } else { - _, err := m.ValidateCreate() + warnings, err := m.ValidateCreate() g.Expect(err).NotTo(HaveOccurred()) - _, err = m.ValidateUpdate(m) + g.Expect(warnings).To(BeEmpty()) + warnings, err = m.ValidateUpdate(m) g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) } }) } diff --git a/exp/ipam/internal/webhooks/ipaddress_test.go b/exp/ipam/internal/webhooks/ipaddress_test.go index 9052181118e6..2e8e2e7399ef 100644 --- a/exp/ipam/internal/webhooks/ipaddress_test.go +++ b/exp/ipam/internal/webhooks/ipaddress_test.go @@ -222,12 +222,13 @@ func TestIPAddressValidateUpdate(t *testing.T) { wh := IPAddress{ Client: fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.extraObjs...).Build(), } - _, err := wh.ValidateUpdate(context.Background(), &tt.oldIP, &tt.newIP) + warnings, err := wh.ValidateUpdate(context.Background(), &tt.oldIP, &tt.newIP) if tt.expectErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).NotTo(HaveOccurred()) } + g.Expect(warnings).To(BeEmpty()) }) } } diff --git a/exp/ipam/internal/webhooks/ipaddressclaim_test.go b/exp/ipam/internal/webhooks/ipaddressclaim_test.go index dc338a64af60..9a3ddd91ff71 100644 --- a/exp/ipam/internal/webhooks/ipaddressclaim_test.go +++ b/exp/ipam/internal/webhooks/ipaddressclaim_test.go @@ -65,12 +65,13 @@ func TestIPAddressClaimValidateCreate(t *testing.T) { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) wh := IPAddressClaim{} - _, err := wh.ValidateCreate(context.Background(), &tt.claim) + warnings, err := wh.ValidateCreate(context.Background(), &tt.claim) if tt.expectErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).NotTo(HaveOccurred()) } + g.Expect(warnings).To(BeEmpty()) }) } } @@ -114,12 +115,13 @@ func TestIPAddressClaimValidateUpdate(t *testing.T) { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) wh := IPAddressClaim{} - _, err := wh.ValidateUpdate(context.Background(), &tt.oldClaim, &tt.newClaim) + warnings, err := wh.ValidateUpdate(context.Background(), &tt.oldClaim, &tt.newClaim) if tt.expectErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).NotTo(HaveOccurred()) } + g.Expect(warnings).To(BeEmpty()) }) } } diff --git a/internal/webhooks/cluster_test.go b/internal/webhooks/cluster_test.go index 9f0874509c5f..7218f9f70043 100644 --- a/internal/webhooks/cluster_test.go +++ b/internal/webhooks/cluster_test.go @@ -1268,12 +1268,14 @@ func TestClusterTopologyValidation(t *testing.T) { // Create the webhook and add the fakeClient as its client. This is required because the test uses a Managed Topology. webhook := &Cluster{Client: fakeClient} - _, err := webhook.validate(ctx, tt.old, tt.in) + warnings, err := webhook.validate(ctx, tt.old, tt.in) if tt.expectErr { g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) return } g.Expect(err).ToNot(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) }) } } @@ -1549,12 +1551,13 @@ func TestClusterTopologyValidationWithClient(t *testing.T) { c := &Cluster{Client: fakeClient} // Checks the return error. - _, err := c.ValidateCreate(ctx, tt.cluster) + warnings, err := c.ValidateCreate(ctx, tt.cluster) if tt.wantErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).NotTo(HaveOccurred()) } + g.Expect(warnings).To(BeEmpty()) }) } } @@ -1971,12 +1974,13 @@ func TestClusterTopologyValidationForTopologyClassChange(t *testing.T) { secondCluster.Spec.Topology.Class = tt.secondClass.Name // Checks the return error. - _, err := c.ValidateUpdate(ctx, cluster, secondCluster) + warnings, err := c.ValidateUpdate(ctx, cluster, secondCluster) if tt.wantErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).NotTo(HaveOccurred()) } + g.Expect(warnings).To(BeEmpty()) }) } } @@ -2093,12 +2097,13 @@ func TestMovingBetweenManagedAndUnmanaged(t *testing.T) { updatedCluster.Spec.Topology = tt.updatedTopology // Checks the return error. - _, err := c.ValidateUpdate(ctx, tt.cluster, updatedCluster) + warnings, err := c.ValidateUpdate(ctx, tt.cluster, updatedCluster) if tt.wantErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).NotTo(HaveOccurred()) } + g.Expect(warnings).To(BeEmpty()) }) } } @@ -2217,12 +2222,13 @@ func TestClusterClassPollingErrors(t *testing.T) { Build()} // Checks the return error. - _, err := c.validate(ctx, tt.oldCluster, tt.cluster) + warnings, err := c.validate(ctx, tt.oldCluster, tt.cluster) if tt.wantErr { g.Expect(err).To(HaveOccurred()) } else { g.Expect(err).ToNot(HaveOccurred()) } + g.Expect(warnings).To(BeEmpty()) }) } } diff --git a/internal/webhooks/runtime/extensionconfig_webhook_test.go b/internal/webhooks/runtime/extensionconfig_webhook_test.go index f0536aaf91e6..b6fbd0afefb6 100644 --- a/internal/webhooks/runtime/extensionconfig_webhook_test.go +++ b/internal/webhooks/runtime/extensionconfig_webhook_test.go @@ -95,12 +95,14 @@ func TestExtensionConfigValidationFeatureGated(t *testing.T) { defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.RuntimeSDK, tt.featureGate)() webhook := ExtensionConfig{} g := NewWithT(t) - _, err := webhook.validate(context.TODO(), tt.old, tt.new) + warnings, err := webhook.validate(context.TODO(), tt.old, tt.new) if tt.expectErr { g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) return } g.Expect(err).ToNot(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) }) } } @@ -340,12 +342,14 @@ func TestExtensionConfigValidate(t *testing.T) { g.Expect(webhook.Default(ctx, tt.old)).To(Succeed()) } - _, err := webhook.validate(ctx, tt.old, tt.in) + warnings, err := webhook.validate(ctx, tt.old, tt.in) if tt.expectErr { g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) return } g.Expect(err).ToNot(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) }) } } diff --git a/test/infrastructure/docker/api/v1beta1/dockerclustertemplate_webhook_test.go b/test/infrastructure/docker/api/v1beta1/dockerclustertemplate_webhook_test.go index 95e38aecba59..bc5b148cbbca 100644 --- a/test/infrastructure/docker/api/v1beta1/dockerclustertemplate_webhook_test.go +++ b/test/infrastructure/docker/api/v1beta1/dockerclustertemplate_webhook_test.go @@ -42,8 +42,9 @@ func TestDockerClusterTemplateValidationFeatureGateEnabled(t *testing.T) { }, }, } - _, err := dct.ValidateCreate() + warnings, err := dct.ValidateCreate() g.Expect(err).NotTo(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) }) } @@ -62,7 +63,8 @@ func TestDockerClusterTemplateValidationFeatureGateDisabled(t *testing.T) { }, }, } - _, err := dct.ValidateCreate() + warnings, err := dct.ValidateCreate() g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) }) } diff --git a/test/infrastructure/docker/api/v1beta1/dockermachinetemplate_webhook_test.go b/test/infrastructure/docker/api/v1beta1/dockermachinetemplate_webhook_test.go index 0e5d6422d3f8..b70fbfbd6ec0 100644 --- a/test/infrastructure/docker/api/v1beta1/dockermachinetemplate_webhook_test.go +++ b/test/infrastructure/docker/api/v1beta1/dockermachinetemplate_webhook_test.go @@ -20,6 +20,7 @@ import ( "context" "testing" + . "github.com/onsi/gomega" admissionv1 "k8s.io/api/admission/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/pointer" @@ -86,15 +87,19 @@ func TestDockerMachineTemplateInvalid(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) wh := &DockerMachineTemplateWebhook{} ctx := context.Background() if tt.req != nil { ctx = admission.NewContextWithRequest(ctx, *tt.req) } - _, err := wh.ValidateUpdate(ctx, tt.oldTemplate, tt.newTemplate) - if (err != nil) != tt.wantError { - t.Errorf("unexpected result - wanted %+v, got %+v", tt.wantError, err) + warnings, err := wh.ValidateUpdate(ctx, tt.oldTemplate, tt.newTemplate) + if tt.wantError { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).ToNot(HaveOccurred()) } + g.Expect(warnings).To(BeEmpty()) }) } } diff --git a/util/defaulting/defaulting.go b/util/defaulting/defaulting.go index 7436dc3ac9f6..6bfc8a0e119a 100644 --- a/util/defaulting/defaulting.go +++ b/util/defaulting/defaulting.go @@ -45,21 +45,24 @@ func DefaultValidateTest(object DefaultingValidator) func(*testing.T) { t.Run("validate-on-create", func(t *testing.T) { g := NewWithT(t) createCopy.Default() - _, err := createCopy.ValidateCreate() + warnings, err := createCopy.ValidateCreate() g.Expect(err).ToNot(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) }) t.Run("validate-on-update", func(t *testing.T) { g := NewWithT(t) defaultingUpdateCopy.Default() updateCopy.Default() - _, err := defaultingUpdateCopy.ValidateUpdate(updateCopy) + warnings, err := defaultingUpdateCopy.ValidateUpdate(updateCopy) g.Expect(err).ToNot(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) }) t.Run("validate-on-delete", func(t *testing.T) { g := NewWithT(t) deleteCopy.Default() - _, err := deleteCopy.ValidateDelete() + warnings, err := deleteCopy.ValidateDelete() g.Expect(err).ToNot(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) }) } } From 0be8e6e082d82b7256a069a4690f8ab03339a5dd Mon Sep 17 00:00:00 2001 From: Yuvaraj Kakaraparthi Date: Tue, 9 May 2023 17:36:56 -0700 Subject: [PATCH 04/94] topology controller should avoid unnecessary rollouts during upgrades --- api/v1beta1/condition_consts.go | 5 + .../topology/cluster/conditions.go | 46 ++-- .../topology/cluster/conditions_test.go | 72 ++++-- .../topology/cluster/desired_state.go | 131 +++++----- .../topology/cluster/desired_state_test.go | 236 +++++++++--------- .../topology/cluster/reconcile_state.go | 103 ++++---- .../topology/cluster/reconcile_state_test.go | 217 +++++++++++----- .../cluster/scope/hookresponsetracker.go | 20 ++ .../cluster/scope/hookresponsetracker_test.go | 57 +++++ .../topology/cluster/scope/upgradetracker.go | 138 +++++++--- 10 files changed, 650 insertions(+), 375 deletions(-) diff --git a/api/v1beta1/condition_consts.go b/api/v1beta1/condition_consts.go index c0c5de162efa..c33a38f793e6 100644 --- a/api/v1beta1/condition_consts.go +++ b/api/v1beta1/condition_consts.go @@ -282,6 +282,11 @@ const ( // not yet completed because Control Plane is not yet updated to match the desired topology spec. TopologyReconciledControlPlaneUpgradePendingReason = "ControlPlaneUpgradePending" + // TopologyReconciledMachineDeploymentsCreatePendingReason (Severity=Info) documents reconciliation of a Cluster topology + // not yet completed because at least one of the MachineDeployments is yet to be created. + // This generally happens because new MachineDeployment creations are held off while the ControlPlane is not stable. + TopologyReconciledMachineDeploymentsCreatePendingReason = "MachineDeploymentsCreatePending" + // TopologyReconciledMachineDeploymentsUpgradePendingReason (Severity=Info) documents reconciliation of a Cluster topology // not yet completed because at least one of the MachineDeployments is not yet updated to match the desired topology spec. TopologyReconciledMachineDeploymentsUpgradePendingReason = "MachineDeploymentsUpgradePending" diff --git a/internal/controllers/topology/cluster/conditions.go b/internal/controllers/topology/cluster/conditions.go index 20066d133268..ed0140ed66d6 100644 --- a/internal/controllers/topology/cluster/conditions.go +++ b/internal/controllers/topology/cluster/conditions.go @@ -56,6 +56,7 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste ) return nil } + // If an error occurred during reconciliation set the TopologyReconciled condition to false. // Add the error message from the reconcile function to the message of the condition. if reconcileErr != nil { @@ -108,25 +109,37 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste // * either the Control Plane or any of the MachineDeployments are still pending to pick up the new version // (generally happens when upgrading the cluster) // * when there are MachineDeployments for which the upgrade has been deferred - if s.UpgradeTracker.ControlPlane.PendingUpgrade || - s.UpgradeTracker.MachineDeployments.PendingUpgrade() || + // * when new MachineDeployments are pending to be created + // (generally happens when upgrading the cluster) + if s.UpgradeTracker.ControlPlane.IsPendingUpgrade || + s.UpgradeTracker.MachineDeployments.IsAnyPendingCreate() || + s.UpgradeTracker.MachineDeployments.IsAnyPendingUpgrade() || s.UpgradeTracker.MachineDeployments.DeferredUpgrade() { msgBuilder := &strings.Builder{} var reason string + // TODO(ykakarap): Evaluate potential improvements to building the condition. Multiple causes can trigger the + // condition to be false at the same time (Example: ControlPlane.IsPendingUpgrade and MachineDeployments.IsAnyPendingCreate can + // occur at the same time). Find better wording and `Reason` for the condition so that the condition can be rich + // with all the relevant information. switch { - case s.UpgradeTracker.ControlPlane.PendingUpgrade: - fmt.Fprintf(msgBuilder, "Control plane upgrade to %s on hold.", s.Blueprint.Topology.Version) + case s.UpgradeTracker.ControlPlane.IsPendingUpgrade: + fmt.Fprintf(msgBuilder, "Control plane rollout and upgrade to version %s on hold.", s.Blueprint.Topology.Version) reason = clusterv1.TopologyReconciledControlPlaneUpgradePendingReason - case s.UpgradeTracker.MachineDeployments.PendingUpgrade(): - fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s upgrade to version %s on hold.", - computeMachineDeploymentNameList(s.UpgradeTracker.MachineDeployments.PendingUpgradeNames()), + case s.UpgradeTracker.MachineDeployments.IsAnyPendingUpgrade(): + fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s rollout and upgrade to version %s on hold.", + computeNameList(s.UpgradeTracker.MachineDeployments.PendingUpgradeNames()), s.Blueprint.Topology.Version, ) reason = clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason + case s.UpgradeTracker.MachineDeployments.IsAnyPendingCreate(): + fmt.Fprintf(msgBuilder, "MachineDeployment(s) for Topologies %s creation on hold.", + computeNameList(s.UpgradeTracker.MachineDeployments.PendingCreateTopologyNames()), + ) + reason = clusterv1.TopologyReconciledMachineDeploymentsCreatePendingReason case s.UpgradeTracker.MachineDeployments.DeferredUpgrade(): - fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s upgrade to version %s deferred.", - computeMachineDeploymentNameList(s.UpgradeTracker.MachineDeployments.DeferredUpgradeNames()), + fmt.Fprintf(msgBuilder, "MachineDeployment(s) %s rollout and upgrade to version %s deferred.", + computeNameList(s.UpgradeTracker.MachineDeployments.DeferredUpgradeNames()), s.Blueprint.Topology.Version, ) reason = clusterv1.TopologyReconciledMachineDeploymentsUpgradeDeferredReason @@ -148,7 +161,7 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste case len(s.UpgradeTracker.MachineDeployments.UpgradingNames()) > 0: fmt.Fprintf(msgBuilder, " MachineDeployment(s) %s are upgrading", - computeMachineDeploymentNameList(s.UpgradeTracker.MachineDeployments.UpgradingNames()), + computeNameList(s.UpgradeTracker.MachineDeployments.UpgradingNames()), ) } @@ -175,12 +188,13 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste return nil } -// computeMachineDeploymentNameList computes the MD name list to be shown in conditions. -// It shortens the list to at most 5 MachineDeployment names. -func computeMachineDeploymentNameList(mdList []string) any { - if len(mdList) > 5 { - mdList = append(mdList[:5], "...") +// computeNameList computes list of names form the given list to be shown in conditions. +// It shortens the list to at most 5 names and adds an ellipsis at the end if the list +// has more than 5 elements. +func computeNameList(list []string) any { + if len(list) > 5 { + list = append(list[:5], "...") } - return strings.Join(mdList, ", ") + return strings.Join(list, ", ") } diff --git a/internal/controllers/topology/cluster/conditions_test.go b/internal/controllers/topology/cluster/conditions_test.go index 5b18fbe90b13..d2b21aa3af55 100644 --- a/internal/controllers/topology/cluster/conditions_test.go +++ b/internal/controllers/topology/cluster/conditions_test.go @@ -123,7 +123,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, UpgradeTracker: func() *scope.UpgradeTracker { ut := scope.NewUpgradeTracker() - ut.ControlPlane.PendingUpgrade = true + ut.ControlPlane.IsPendingUpgrade = true ut.ControlPlane.IsProvisioning = true return ut }(), @@ -131,7 +131,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, wantConditionStatus: corev1.ConditionFalse, wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason, - wantConditionMessage: "Control plane upgrade to v1.22.0 on hold. Control plane is completing initial provisioning", + wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is completing initial provisioning", }, { name: "should set the condition to false if new version is not picked up because control plane is upgrading", @@ -154,7 +154,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, UpgradeTracker: func() *scope.UpgradeTracker { ut := scope.NewUpgradeTracker() - ut.ControlPlane.PendingUpgrade = true + ut.ControlPlane.IsPendingUpgrade = true ut.ControlPlane.IsUpgrading = true return ut }(), @@ -162,7 +162,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, wantConditionStatus: corev1.ConditionFalse, wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason, - wantConditionMessage: "Control plane upgrade to v1.22.0 on hold. Control plane is upgrading to version v1.21.2", + wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.21.2", }, { name: "should set the condition to false if new version is not picked up because control plane is scaling", @@ -185,7 +185,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, UpgradeTracker: func() *scope.UpgradeTracker { ut := scope.NewUpgradeTracker() - ut.ControlPlane.PendingUpgrade = true + ut.ControlPlane.IsPendingUpgrade = true ut.ControlPlane.IsScaling = true return ut }(), @@ -193,7 +193,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, wantConditionStatus: corev1.ConditionFalse, wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason, - wantConditionMessage: "Control plane upgrade to v1.22.0 on hold. Control plane is reconciling desired replicas", + wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas", }, { name: "should set the condition to false if new version is not picked up because at least one of the machine deployment is upgrading", @@ -230,7 +230,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, UpgradeTracker: func() *scope.UpgradeTracker { ut := scope.NewUpgradeTracker() - ut.ControlPlane.PendingUpgrade = true + ut.ControlPlane.IsPendingUpgrade = true ut.MachineDeployments.MarkUpgrading("md0-abc123") return ut }(), @@ -238,7 +238,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, wantConditionStatus: corev1.ConditionFalse, wantConditionReason: clusterv1.TopologyReconciledControlPlaneUpgradePendingReason, - wantConditionMessage: "Control plane upgrade to v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading", + wantConditionMessage: "Control plane rollout and upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading", }, { name: "should set the condition to false if control plane picked the new version but machine deployments did not because control plane is upgrading", @@ -275,7 +275,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, UpgradeTracker: func() *scope.UpgradeTracker { ut := scope.NewUpgradeTracker() - ut.ControlPlane.PendingUpgrade = false + ut.ControlPlane.IsPendingUpgrade = false ut.ControlPlane.IsUpgrading = true ut.MachineDeployments.MarkPendingUpgrade("md0-abc123") return ut @@ -284,7 +284,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, wantConditionStatus: corev1.ConditionFalse, wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason, - wantConditionMessage: "MachineDeployment(s) md0-abc123 upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.22.0", + wantConditionMessage: "MachineDeployment(s) md0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is upgrading to version v1.22.0", }, { name: "should set the condition to false if control plane picked the new version but machine deployments did not because control plane is scaling", @@ -321,7 +321,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, UpgradeTracker: func() *scope.UpgradeTracker { ut := scope.NewUpgradeTracker() - ut.ControlPlane.PendingUpgrade = false + ut.ControlPlane.IsPendingUpgrade = false ut.ControlPlane.IsScaling = true ut.MachineDeployments.MarkPendingUpgrade("md0-abc123") return ut @@ -330,7 +330,39 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, wantConditionStatus: corev1.ConditionFalse, wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason, - wantConditionMessage: "MachineDeployment(s) md0-abc123 upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas", + wantConditionMessage: "MachineDeployment(s) md0-abc123 rollout and upgrade to version v1.22.0 on hold. Control plane is reconciling desired replicas", + }, + { + name: "should set the condition to false if control plane picked the new version but there are machine deployments pending create because control plane is scaling", + reconcileErr: nil, + cluster: &clusterv1.Cluster{}, + s: &scope.Scope{ + Blueprint: &scope.ClusterBlueprint{ + Topology: &clusterv1.Topology{ + Version: "v1.22.0", + }, + }, + Current: &scope.ClusterState{ + Cluster: &clusterv1.Cluster{}, + ControlPlane: &scope.ControlPlaneState{ + Object: builder.ControlPlane("ns1", "controlplane1"). + WithVersion("v1.22.0"). + WithReplicas(3). + Build(), + }, + }, + UpgradeTracker: func() *scope.UpgradeTracker { + ut := scope.NewUpgradeTracker() + ut.ControlPlane.IsPendingUpgrade = false + ut.ControlPlane.IsScaling = true + ut.MachineDeployments.MarkPendingCreate("md0") + return ut + }(), + HookResponseTracker: scope.NewHookResponseTracker(), + }, + wantConditionStatus: corev1.ConditionFalse, + wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsCreatePendingReason, + wantConditionMessage: "MachineDeployment(s) for Topologies md0 creation on hold. Control plane is reconciling desired replicas", }, { name: "should set the condition to true if control plane picked the new version and is upgrading but there are no machine deployments", @@ -353,7 +385,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, UpgradeTracker: func() *scope.UpgradeTracker { ut := scope.NewUpgradeTracker() - ut.ControlPlane.PendingUpgrade = false + ut.ControlPlane.IsPendingUpgrade = false ut.ControlPlane.IsUpgrading = true return ut }(), @@ -382,7 +414,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, UpgradeTracker: func() *scope.UpgradeTracker { ut := scope.NewUpgradeTracker() - ut.ControlPlane.PendingUpgrade = false + ut.ControlPlane.IsPendingUpgrade = false ut.ControlPlane.IsScaling = true return ut }(), @@ -450,7 +482,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, UpgradeTracker: func() *scope.UpgradeTracker { ut := scope.NewUpgradeTracker() - ut.ControlPlane.PendingUpgrade = false + ut.ControlPlane.IsPendingUpgrade = false ut.MachineDeployments.MarkUpgrading("md0-abc123") ut.MachineDeployments.MarkPendingUpgrade("md1-abc123") return ut @@ -469,7 +501,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, wantConditionStatus: corev1.ConditionFalse, wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradePendingReason, - wantConditionMessage: "MachineDeployment(s) md1-abc123 upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading", + wantConditionMessage: "MachineDeployment(s) md1-abc123 rollout and upgrade to version v1.22.0 on hold. MachineDeployment(s) md0-abc123 are upgrading", }, { name: "should set the condition to false if some machine deployments have not picked the new version because their upgrade has been deferred", @@ -520,7 +552,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, UpgradeTracker: func() *scope.UpgradeTracker { ut := scope.NewUpgradeTracker() - ut.ControlPlane.PendingUpgrade = false + ut.ControlPlane.IsPendingUpgrade = false ut.MachineDeployments.MarkDeferredUpgrade("md1-abc123") return ut }(), @@ -528,7 +560,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, wantConditionStatus: corev1.ConditionFalse, wantConditionReason: clusterv1.TopologyReconciledMachineDeploymentsUpgradeDeferredReason, - wantConditionMessage: "MachineDeployment(s) md1-abc123 upgrade to version v1.22.0 deferred.", + wantConditionMessage: "MachineDeployment(s) md1-abc123 rollout and upgrade to version v1.22.0 deferred.", }, { name: "should set the condition to true if there are no reconcile errors and control plane and all machine deployments picked up the new version", @@ -579,7 +611,7 @@ func TestReconcileTopologyReconciledCondition(t *testing.T) { }, UpgradeTracker: func() *scope.UpgradeTracker { ut := scope.NewUpgradeTracker() - ut.ControlPlane.PendingUpgrade = false + ut.ControlPlane.IsPendingUpgrade = false return ut }(), HookResponseTracker: scope.NewHookResponseTracker(), @@ -656,7 +688,7 @@ func TestComputeMachineDeploymentNameList(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) - g.Expect(computeMachineDeploymentNameList(tt.mdList)).To(Equal(tt.expected)) + g.Expect(computeNameList(tt.mdList)).To(Equal(tt.expected)) }) } } diff --git a/internal/controllers/topology/cluster/desired_state.go b/internal/controllers/topology/cluster/desired_state.go index 821d42ef0cbb..ce222170c28a 100644 --- a/internal/controllers/topology/cluster/desired_state.go +++ b/internal/controllers/topology/cluster/desired_state.go @@ -100,7 +100,7 @@ func (r *Reconciler) computeDesiredState(ctx context.Context, s *scope.Scope) (* // If required, compute the desired state of the MachineDeployments from the list of MachineDeploymentTopologies // defined in the cluster. if s.Blueprint.HasMachineDeployments() { - desiredState.MachineDeployments, err = r.computeMachineDeployments(ctx, s, desiredState.ControlPlane) + desiredState.MachineDeployments, err = r.computeMachineDeployments(ctx, s) if err != nil { return nil, errors.Wrapf(err, "failed to compute MachineDeployments") } @@ -343,12 +343,12 @@ func (r *Reconciler) computeControlPlaneVersion(ctx context.Context, s *scope.Sc return "", errors.Wrap(err, "failed to get the version from control plane spec") } - s.UpgradeTracker.ControlPlane.PendingUpgrade = true + s.UpgradeTracker.ControlPlane.IsPendingUpgrade = true if *currentVersion == desiredVersion { // Mark that the control plane spec is already at the desired version. // This information is used to show the appropriate message for the TopologyReconciled // condition. - s.UpgradeTracker.ControlPlane.PendingUpgrade = false + s.UpgradeTracker.ControlPlane.IsPendingUpgrade = false } // Check if the control plane is being created for the first time. @@ -380,7 +380,7 @@ func (r *Reconciler) computeControlPlaneVersion(ctx context.Context, s *scope.Sc } // Return here if the control plane is already at the desired version - if !s.UpgradeTracker.ControlPlane.PendingUpgrade { + if !s.UpgradeTracker.ControlPlane.IsPendingUpgrade { // At this stage the control plane is not upgrading and is already at the desired version. // We can return. // Nb. We do not return early in the function if the control plane is already at the desired version so as @@ -409,7 +409,6 @@ func (r *Reconciler) computeControlPlaneVersion(ctx context.Context, s *scope.Sc // can remove this hook from the list of pending-hooks. if hookResponse.RetryAfterSeconds != 0 { log.Infof("MachineDeployments upgrade to version %q are blocked by %q hook", desiredVersion, runtimecatalog.HookName(runtimehooksv1.AfterControlPlaneUpgrade)) - s.UpgradeTracker.MachineDeployments.HoldUpgrades(true) } else { if err := hooks.MarkAsDone(ctx, r.Client, s.Current.Cluster, runtimehooksv1.AfterControlPlaneUpgrade); err != nil { return "", err @@ -471,6 +470,8 @@ func (r *Reconciler) computeControlPlaneVersion(ctx context.Context, s *scope.Sc // Control plane and machine deployments are stable. All the required hook are called. // Ready to pick up the topology version. + s.UpgradeTracker.ControlPlane.IsPendingUpgrade = false + s.UpgradeTracker.ControlPlane.IsStartingUpgrade = true return desiredVersion, nil } @@ -534,10 +535,10 @@ func calculateRefDesiredAPIVersion(currentRef *corev1.ObjectReference, desiredRe } // computeMachineDeployments computes the desired state of the list of MachineDeployments. -func (r *Reconciler) computeMachineDeployments(ctx context.Context, s *scope.Scope, desiredControlPlaneState *scope.ControlPlaneState) (scope.MachineDeploymentsStateMap, error) { +func (r *Reconciler) computeMachineDeployments(ctx context.Context, s *scope.Scope) (scope.MachineDeploymentsStateMap, error) { machineDeploymentsStateMap := make(scope.MachineDeploymentsStateMap) for _, mdTopology := range s.Blueprint.Topology.Workers.MachineDeployments { - desiredMachineDeployment, err := computeMachineDeployment(ctx, s, desiredControlPlaneState, mdTopology) + desiredMachineDeployment, err := computeMachineDeployment(ctx, s, mdTopology) if err != nil { return nil, errors.Wrapf(err, "failed to compute MachineDepoyment for topology %q", mdTopology.Name) } @@ -549,7 +550,7 @@ func (r *Reconciler) computeMachineDeployments(ctx context.Context, s *scope.Sco // computeMachineDeployment computes the desired state for a MachineDeploymentTopology. // The generated machineDeployment object is calculated using the values from the machineDeploymentTopology and // the machineDeployment class. -func computeMachineDeployment(_ context.Context, s *scope.Scope, desiredControlPlaneState *scope.ControlPlaneState, machineDeploymentTopology clusterv1.MachineDeploymentTopology) (*scope.MachineDeploymentState, error) { +func computeMachineDeployment(_ context.Context, s *scope.Scope, machineDeploymentTopology clusterv1.MachineDeploymentTopology) (*scope.MachineDeploymentState, error) { desiredMachineDeployment := &scope.MachineDeploymentState{} // Gets the blueprint for the MachineDeployment class. @@ -621,10 +622,7 @@ func computeMachineDeployment(_ context.Context, s *scope.Scope, desiredControlP // Add ClusterTopologyMachineDeploymentLabel to the generated InfrastructureMachine template infraMachineTemplateLabels[clusterv1.ClusterTopologyMachineDeploymentNameLabel] = machineDeploymentTopology.Name desiredMachineDeployment.InfrastructureMachineTemplate.SetLabels(infraMachineTemplateLabels) - version, err := computeMachineDeploymentVersion(s, machineDeploymentTopology, desiredControlPlaneState, currentMachineDeployment) - if err != nil { - return nil, errors.Wrapf(err, "failed to compute version for %s", machineDeploymentTopology.Name) - } + version := computeMachineDeploymentVersion(s, machineDeploymentTopology, currentMachineDeployment) // Compute values that can be set both in the MachineDeploymentClass and in the MachineDeploymentTopology minReadySeconds := machineDeploymentClass.MinReadySeconds @@ -754,16 +752,17 @@ func computeMachineDeployment(_ context.Context, s *scope.Scope, desiredControlP // computeMachineDeploymentVersion calculates the version of the desired machine deployment. // The version is calculated using the state of the current machine deployments, // the current control plane and the version defined in the topology. -// Nb: No MachineDeployment upgrades will be triggered while any MachineDeployment is in the middle -// of an upgrade. Even if the number of MachineDeployments that are being upgraded is less -// than the number of allowed concurrent upgrades. -func computeMachineDeploymentVersion(s *scope.Scope, machineDeploymentTopology clusterv1.MachineDeploymentTopology, desiredControlPlaneState *scope.ControlPlaneState, currentMDState *scope.MachineDeploymentState) (string, error) { +func computeMachineDeploymentVersion(s *scope.Scope, machineDeploymentTopology clusterv1.MachineDeploymentTopology, currentMDState *scope.MachineDeploymentState) string { desiredVersion := s.Blueprint.Topology.Version - // If creating a new machine deployment, we can pick up the desired version - // Note: We are not blocking the creation of new machine deployments when - // the control plane or any of the machine deployments are upgrading/scaling. + // If creating a new machine deployment, mark it as pending if the control plane is not + // yet stable. Creating a new MD while the control plane is upgrading can lead to unexpected race conditions. + // Example: join could fail if the load balancers are slow in detecting when CP machines are + // being deleted. if currentMDState == nil || currentMDState.Object == nil { - return desiredVersion, nil + if !isControlPlaneStable(s) || s.HookResponseTracker.IsBlocking(runtimehooksv1.AfterControlPlaneUpgrade) { + s.UpgradeTracker.MachineDeployments.MarkPendingCreate(machineDeploymentTopology.Name) + } + return desiredVersion } // Get the current version of the machine deployment. @@ -772,86 +771,68 @@ func computeMachineDeploymentVersion(s *scope.Scope, machineDeploymentTopology c // Return early if the currentVersion is already equal to the desiredVersion // no further checks required. if currentVersion == desiredVersion { - return currentVersion, nil + return currentVersion } // Return early if the upgrade for the MachineDeployment is deferred. if isMachineDeploymentDeferred(s.Blueprint.Topology, machineDeploymentTopology) { s.UpgradeTracker.MachineDeployments.MarkDeferredUpgrade(currentMDState.Object.Name) - return currentVersion, nil + s.UpgradeTracker.MachineDeployments.MarkPendingUpgrade(currentMDState.Object.Name) + return currentVersion } - // Return early if we are not allowed to upgrade the machine deployment. - if !s.UpgradeTracker.MachineDeployments.AllowUpgrade() { + // Return early if the AfterControlPlaneUpgrade hook returns a blocking response. + if s.HookResponseTracker.IsBlocking(runtimehooksv1.AfterControlPlaneUpgrade) { s.UpgradeTracker.MachineDeployments.MarkPendingUpgrade(currentMDState.Object.Name) - return currentVersion, nil + return currentVersion } - // If the control plane is being created (current control plane is nil), do not perform - // any machine deployment upgrade in this case. - // Return the current version of the machine deployment. - // NOTE: this case should never happen (upgrading a MachineDeployment) before creating a CP, - // but we are implementing this check for extra safety. - if s.Current.ControlPlane == nil || s.Current.ControlPlane.Object == nil { + // Return early if the upgrade concurrency is reached. + if s.UpgradeTracker.MachineDeployments.UpgradeConcurrencyReached() { s.UpgradeTracker.MachineDeployments.MarkPendingUpgrade(currentMDState.Object.Name) - return currentVersion, nil + return currentVersion } - // If the current control plane is upgrading, then do not pick up the desiredVersion yet. + // Return early if the Control Plane is not stable. Do not pick up the desiredVersion yet. // Return the current version of the machine deployment. We will pick up the new version after the control // plane is stable. - cpUpgrading, err := contract.ControlPlane().IsUpgrading(s.Current.ControlPlane.Object) - if err != nil { - return "", errors.Wrap(err, "failed to check if control plane is upgrading") - } - if cpUpgrading { + if !isControlPlaneStable(s) { s.UpgradeTracker.MachineDeployments.MarkPendingUpgrade(currentMDState.Object.Name) - return currentVersion, nil + return currentVersion } - // If control plane supports replicas, check if the control plane is in the middle of a scale operation. - // If the current control plane is scaling, then do not pick up the desiredVersion yet. - // Return the current version of the machine deployment. We will pick up the new version after the control - // plane is stable. - if s.Blueprint.Topology.ControlPlane.Replicas != nil { - cpScaling, err := contract.ControlPlane().IsScaling(s.Current.ControlPlane.Object) - if err != nil { - return "", errors.Wrap(err, "failed to check if the control plane is scaling") - } - if cpScaling { - s.UpgradeTracker.MachineDeployments.MarkPendingUpgrade(currentMDState.Object.Name) - return currentVersion, nil - } - } + // Control plane and machine deployments are stable. + // Ready to pick up the topology version. + s.UpgradeTracker.MachineDeployments.MarkUpgrading(currentMDState.Object.Name) + return desiredVersion +} - // Check if we are about to upgrade the control plane. In that case, do not upgrade the machine deployment yet. - // Wait for the new upgrade operation on the control plane to finish before picking up the new version for the - // machine deployment. - currentCPVersion, err := contract.ControlPlane().Version().Get(s.Current.ControlPlane.Object) - if err != nil { - return "", errors.Wrap(err, "failed to get version of current control plane") +// isControlPlaneStable returns true is the ControlPlane is stable. +func isControlPlaneStable(s *scope.Scope) bool { + // If the current control plane is upgrading it is not considered stable. + if s.UpgradeTracker.ControlPlane.IsUpgrading { + return false } - desiredCPVersion, err := contract.ControlPlane().Version().Get(desiredControlPlaneState.Object) - if err != nil { - return "", errors.Wrap(err, "failed to get version of desired control plane") + + // If control plane supports replicas, check if the control plane is in the middle of a scale operation. + // If the current control plane is scaling then it is not considered stable. + if s.UpgradeTracker.ControlPlane.IsScaling { + return false } - if *currentCPVersion != *desiredCPVersion { - // The versions of the current and desired control planes do no match, - // implies we are about to upgrade the control plane. - s.UpgradeTracker.MachineDeployments.MarkPendingUpgrade(currentMDState.Object.Name) - return currentVersion, nil + + // Check if we are about to upgrade the control plane. Since the control plane is about to start its upgrade process + // it cannot be considered stable. + if s.UpgradeTracker.ControlPlane.IsStartingUpgrade { + return false } - // If the ControlPlane is pending picking up an upgrade then do not pick up the new version yet. - if s.UpgradeTracker.ControlPlane.PendingUpgrade { - s.UpgradeTracker.MachineDeployments.MarkPendingUpgrade(currentMDState.Object.Name) - return currentVersion, nil + // If the ControlPlane is pending picking up an upgrade then it is not yet at the desired state and + // cannot be considered stable. + if s.UpgradeTracker.ControlPlane.IsPendingUpgrade { + return false } - // Control plane and machine deployments are stable. - // Ready to pick up the topology version. - s.UpgradeTracker.MachineDeployments.MarkUpgrading(currentMDState.Object.Name) - return desiredVersion, nil + return true } // isMachineDeploymentDeferred returns true if the upgrade for the mdTopology is deferred. diff --git a/internal/controllers/topology/cluster/desired_state_test.go b/internal/controllers/topology/cluster/desired_state_test.go index 57fc5f167de5..020d5c8adec2 100644 --- a/internal/controllers/topology/cluster/desired_state_test.go +++ b/internal/controllers/topology/cluster/desired_state_test.go @@ -867,6 +867,9 @@ func TestComputeControlPlaneVersion(t *testing.T) { } else { g.Expect(err).To(BeNil()) g.Expect(version).To(Equal(tt.expectedVersion)) + // Verify that if the upgrade is pending it is captured in the upgrade tracker. + upgradePending := tt.expectedVersion != tt.topologyVersion + g.Expect(s.UpgradeTracker.ControlPlane.IsPendingUpgrade).To(Equal(upgradePending)) } }) } @@ -946,13 +949,13 @@ func TestComputeControlPlaneVersion(t *testing.T) { Build() tests := []struct { - name string - s *scope.Scope - hookResponse *runtimehooksv1.AfterControlPlaneUpgradeResponse - wantIntentToCall bool - wantHookToBeCalled bool - wantAllowMDUpgrades bool - wantErr bool + name string + s *scope.Scope + hookResponse *runtimehooksv1.AfterControlPlaneUpgradeResponse + wantIntentToCall bool + wantHookToBeCalled bool + wantHookToBlock bool + wantErr bool }{ { name: "should not call hook if it is not marked", @@ -1071,11 +1074,11 @@ func TestComputeControlPlaneVersion(t *testing.T) { UpgradeTracker: scope.NewUpgradeTracker(), HookResponseTracker: scope.NewHookResponseTracker(), }, - hookResponse: nonBlockingResponse, - wantIntentToCall: false, - wantHookToBeCalled: true, - wantAllowMDUpgrades: true, - wantErr: false, + hookResponse: nonBlockingResponse, + wantIntentToCall: false, + wantHookToBeCalled: true, + wantHookToBlock: false, + wantErr: false, }, { name: "should call hook if the control plane is at desired version - blocking response should leave the hook in pending hooks list and block MD upgrades", @@ -1104,11 +1107,11 @@ func TestComputeControlPlaneVersion(t *testing.T) { UpgradeTracker: scope.NewUpgradeTracker(), HookResponseTracker: scope.NewHookResponseTracker(), }, - hookResponse: blockingResponse, - wantIntentToCall: true, - wantHookToBeCalled: true, - wantAllowMDUpgrades: false, - wantErr: false, + hookResponse: blockingResponse, + wantIntentToCall: true, + wantHookToBeCalled: true, + wantHookToBlock: true, + wantErr: false, }, { name: "should call hook if the control plane is at desired version - failure response should leave the hook in pending hooks list", @@ -1168,7 +1171,7 @@ func TestComputeControlPlaneVersion(t *testing.T) { g.Expect(hooks.IsPending(runtimehooksv1.AfterControlPlaneUpgrade, tt.s.Current.Cluster)).To(Equal(tt.wantIntentToCall)) g.Expect(err != nil).To(Equal(tt.wantErr)) if tt.wantHookToBeCalled && !tt.wantErr { - g.Expect(tt.s.UpgradeTracker.MachineDeployments.AllowUpgrade()).To(Equal(tt.wantAllowMDUpgrades)) + g.Expect(tt.s.HookResponseTracker.IsBlocking(runtimehooksv1.AfterControlPlaneUpgrade)).To(Equal(tt.wantHookToBlock)) } }) } @@ -1408,7 +1411,7 @@ func TestComputeMachineDeployment(t *testing.T) { scope := scope.New(cluster) scope.Blueprint = blueprint - actual, err := computeMachineDeployment(ctx, scope, nil, mdTopology) + actual, err := computeMachineDeployment(ctx, scope, mdTopology) g.Expect(err).ToNot(HaveOccurred()) g.Expect(actual.BootstrapTemplate.GetLabels()).To(HaveKeyWithValue(clusterv1.ClusterTopologyMachineDeploymentNameLabel, "big-pool-of-machines")) @@ -1477,7 +1480,7 @@ func TestComputeMachineDeployment(t *testing.T) { // missing FailureDomain, NodeDrainTimeout, NodeVolumeDetachTimeout, NodeDeletionTimeout, MinReadySeconds, Strategy } - actual, err := computeMachineDeployment(ctx, scope, nil, mdTopology) + actual, err := computeMachineDeployment(ctx, scope, mdTopology) g.Expect(err).ToNot(HaveOccurred()) // checking only values from CC defaults @@ -1521,7 +1524,7 @@ func TestComputeMachineDeployment(t *testing.T) { }, } - actual, err := computeMachineDeployment(ctx, s, nil, mdTopology) + actual, err := computeMachineDeployment(ctx, s, mdTopology) g.Expect(err).ToNot(HaveOccurred()) actualMd := actual.Object @@ -1569,7 +1572,7 @@ func TestComputeMachineDeployment(t *testing.T) { Name: "big-pool-of-machines", } - _, err := computeMachineDeployment(ctx, scope, nil, mdTopology) + _, err := computeMachineDeployment(ctx, scope, mdTopology) g.Expect(err).To(HaveOccurred()) }) @@ -1674,9 +1677,6 @@ func TestComputeMachineDeployment(t *testing.T) { s.Current.ControlPlane = &scope.ControlPlaneState{ Object: controlPlaneStable123, } - desiredControlPlaneState := &scope.ControlPlaneState{ - Object: controlPlaneStable123, - } mdTopology := clusterv1.MachineDeploymentTopology{ Class: "linux-worker", @@ -1684,7 +1684,7 @@ func TestComputeMachineDeployment(t *testing.T) { Replicas: pointer.Int32(2), } s.UpgradeTracker.MachineDeployments.MarkUpgrading(tt.upgradingMachineDeployments...) - obj, err := computeMachineDeployment(ctx, s, desiredControlPlaneState, mdTopology) + obj, err := computeMachineDeployment(ctx, s, mdTopology) g.Expect(err).NotTo(HaveOccurred()) g.Expect(*obj.Object.Spec.Template.Spec.Version).To(Equal(tt.expectedVersion)) }) @@ -1700,7 +1700,7 @@ func TestComputeMachineDeployment(t *testing.T) { Name: "big-pool-of-machines", } - actual, err := computeMachineDeployment(ctx, scope, nil, mdTopology) + actual, err := computeMachineDeployment(ctx, scope, mdTopology) g.Expect(err).To(BeNil()) // Check that the ClusterName and selector are set properly for the MachineHealthCheck. g.Expect(actual.MachineHealthCheck.Spec.ClusterName).To(Equal(cluster.Name)) @@ -1718,75 +1718,47 @@ func TestComputeMachineDeployment(t *testing.T) { } func TestComputeMachineDeploymentVersion(t *testing.T) { - controlPlaneStable122 := builder.ControlPlane("test1", "cp1"). - WithSpecFields(map[string]interface{}{ - "spec.version": "v1.2.2", - "spec.replicas": int64(2), - }). - WithStatusFields(map[string]interface{}{ - "status.version": "v1.2.2", - "status.replicas": int64(2), - "status.updatedReplicas": int64(2), - "status.readyReplicas": int64(2), - "status.unavailableReplicas": int64(0), - }). - Build() - controlPlaneStable123 := builder.ControlPlane("test1", "cp1"). - WithSpecFields(map[string]interface{}{ - "spec.version": "v1.2.3", - "spec.replicas": int64(2), - }). - WithStatusFields(map[string]interface{}{ - "status.version": "v1.2.3", - "status.replicas": int64(2), - "status.updatedReplicas": int64(2), - "status.readyReplicas": int64(2), - "status.unavailableReplicas": int64(0), - }). - Build() - controlPlaneUpgrading := builder.ControlPlane("test1", "cp1"). - WithSpecFields(map[string]interface{}{ - "spec.version": "v1.2.3", - }). - WithStatusFields(map[string]interface{}{ - "status.version": "v1.2.1", - }). - Build() - controlPlaneScaling := builder.ControlPlane("test1", "cp1"). - WithSpecFields(map[string]interface{}{ - "spec.version": "v1.2.3", - "spec.replicas": int64(2), - }). - WithStatusFields(map[string]interface{}{ - "status.version": "v1.2.3", - "status.replicas": int64(1), - "status.updatedReplicas": int64(1), - "status.readyReplicas": int64(1), - "status.unavailableReplicas": int64(0), - }). - Build() - controlPlaneDesired := builder.ControlPlane("test1", "cp1"). - WithSpecFields(map[string]interface{}{ - "spec.version": "v1.2.3", - }). + controlPlaneObj := builder.ControlPlane("test1", "cp1"). Build() + mdName := "md-1" + currentMachineDeploymentState := &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", mdName).WithVersion("v1.2.2").Build()} + tests := []struct { - name string - machineDeploymentTopology clusterv1.MachineDeploymentTopology - currentMachineDeploymentState *scope.MachineDeploymentState - upgradingMachineDeployments []string - upgradeConcurrency int - currentControlPlane *unstructured.Unstructured - desiredControlPlane *unstructured.Unstructured - topologyVersion string - expectedVersion string + name string + machineDeploymentTopology clusterv1.MachineDeploymentTopology + currentMachineDeploymentState *scope.MachineDeploymentState + upgradingMachineDeployments []string + upgradeConcurrency int + controlPlaneStartingUpgrade bool + controlPlaneUpgrading bool + controlPlaneScaling bool + controlPlaneProvisioning bool + afterControlPlaneUpgradeHookBlocking bool + topologyVersion string + expectedVersion string + expectPendingCreate bool + expectPendingUpgrade bool }{ { - name: "should return cluster.spec.topology.version if creating a new machine deployment", + name: "should return cluster.spec.topology.version if creating a new machine deployment and if control plane is stable - not marked as pending create", currentMachineDeploymentState: nil, - topologyVersion: "v1.2.3", - expectedVersion: "v1.2.3", + machineDeploymentTopology: clusterv1.MachineDeploymentTopology{ + Name: "md-topology-1", + }, + topologyVersion: "v1.2.3", + expectedVersion: "v1.2.3", + expectPendingCreate: false, + }, + { + name: "should return cluster.spec.topology.version if creating a new machine deployment and if control plane is not stable - marked as pending create", + controlPlaneScaling: true, + machineDeploymentTopology: clusterv1.MachineDeploymentTopology{ + Name: "md-topology-1", + }, + topologyVersion: "v1.2.3", + expectedVersion: "v1.2.3", + expectPendingCreate: true, }, { name: "should return machine deployment's spec.template.spec.version if upgrade is deferred", @@ -1797,69 +1769,77 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { }, }, }, - currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()}, + currentMachineDeploymentState: currentMachineDeploymentState, upgradingMachineDeployments: []string{}, - currentControlPlane: controlPlaneStable123, - desiredControlPlane: controlPlaneDesired, topologyVersion: "v1.2.3", expectedVersion: "v1.2.2", + expectPendingUpgrade: true, }, { // Control plane is considered upgrading if the control plane's spec.version and status.version is not equal. name: "should return machine deployment's spec.template.spec.version if control plane is upgrading", - currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()}, + currentMachineDeploymentState: currentMachineDeploymentState, upgradingMachineDeployments: []string{}, - currentControlPlane: controlPlaneUpgrading, + controlPlaneUpgrading: true, topologyVersion: "v1.2.3", expectedVersion: "v1.2.2", + expectPendingUpgrade: true, }, { // Control plane is considered ready to upgrade if spec.version of current and desired control planes are not equal. - name: "should return machine deployment's spec.template.spec.version if control plane is ready to upgrade", - currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()}, + name: "should return machine deployment's spec.template.spec.version if control plane is starting upgrade", + currentMachineDeploymentState: currentMachineDeploymentState, upgradingMachineDeployments: []string{}, - currentControlPlane: controlPlaneStable122, - desiredControlPlane: controlPlaneDesired, + controlPlaneStartingUpgrade: true, topologyVersion: "v1.2.3", expectedVersion: "v1.2.2", + expectPendingUpgrade: true, }, { // Control plane is considered scaling if its spec.replicas is not equal to any of status.replicas, status.readyReplicas or status.updatedReplicas. name: "should return machine deployment's spec.template.spec.version if control plane is scaling", - currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()}, + currentMachineDeploymentState: currentMachineDeploymentState, upgradingMachineDeployments: []string{}, - currentControlPlane: controlPlaneScaling, + controlPlaneScaling: true, topologyVersion: "v1.2.3", expectedVersion: "v1.2.2", + expectPendingUpgrade: true, }, { name: "should return cluster.spec.topology.version if the control plane is not upgrading, not scaling, not ready to upgrade and none of the machine deployments are upgrading", - currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()}, + currentMachineDeploymentState: currentMachineDeploymentState, upgradingMachineDeployments: []string{}, - currentControlPlane: controlPlaneStable123, - desiredControlPlane: controlPlaneDesired, topologyVersion: "v1.2.3", expectedVersion: "v1.2.3", + expectPendingUpgrade: false, + }, + { + name: "should return machine deployment's spec.template.spec.version if control plane is stable, other machine deployments are upgrading, concurrency limit not reached but AfterControlPlaneUpgrade hook is blocking", + currentMachineDeploymentState: currentMachineDeploymentState, + upgradingMachineDeployments: []string{"upgrading-md1"}, + upgradeConcurrency: 2, + afterControlPlaneUpgradeHookBlocking: true, + topologyVersion: "v1.2.3", + expectedVersion: "v1.2.2", + expectPendingUpgrade: true, }, { name: "should return cluster.spec.topology.version if control plane is stable, other machine deployments are upgrading, concurrency limit not reached", - currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()}, + currentMachineDeploymentState: currentMachineDeploymentState, upgradingMachineDeployments: []string{"upgrading-md1"}, upgradeConcurrency: 2, - currentControlPlane: controlPlaneStable123, - desiredControlPlane: controlPlaneDesired, topologyVersion: "v1.2.3", expectedVersion: "v1.2.3", + expectPendingUpgrade: false, }, { name: "should return machine deployment's spec.template.spec.version if control plane is stable, other machine deployments are upgrading, concurrency limit reached", - currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()}, + currentMachineDeploymentState: currentMachineDeploymentState, upgradingMachineDeployments: []string{"upgrading-md1", "upgrading-md2"}, upgradeConcurrency: 2, - currentControlPlane: controlPlaneStable123, - desiredControlPlane: controlPlaneDesired, topologyVersion: "v1.2.3", expectedVersion: "v1.2.2", + expectPendingUpgrade: true, }, } @@ -1876,15 +1856,41 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { Workers: &clusterv1.WorkersTopology{}, }}, Current: &scope.ClusterState{ - ControlPlane: &scope.ControlPlaneState{Object: tt.currentControlPlane}, + ControlPlane: &scope.ControlPlaneState{Object: controlPlaneObj}, }, - UpgradeTracker: scope.NewUpgradeTracker(scope.MaxMDUpgradeConcurrency(tt.upgradeConcurrency)), + UpgradeTracker: scope.NewUpgradeTracker(scope.MaxMDUpgradeConcurrency(tt.upgradeConcurrency)), + HookResponseTracker: scope.NewHookResponseTracker(), } - desiredControlPlaneState := &scope.ControlPlaneState{Object: tt.desiredControlPlane} + if tt.afterControlPlaneUpgradeHookBlocking { + s.HookResponseTracker.Add(runtimehooksv1.AfterControlPlaneUpgrade, &runtimehooksv1.AfterControlPlaneUpgradeResponse{ + CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ + RetryAfterSeconds: 10, + }, + }) + } + s.UpgradeTracker.ControlPlane.IsStartingUpgrade = tt.controlPlaneStartingUpgrade + s.UpgradeTracker.ControlPlane.IsUpgrading = tt.controlPlaneUpgrading + s.UpgradeTracker.ControlPlane.IsScaling = tt.controlPlaneScaling + s.UpgradeTracker.ControlPlane.IsProvisioning = tt.controlPlaneProvisioning s.UpgradeTracker.MachineDeployments.MarkUpgrading(tt.upgradingMachineDeployments...) - version, err := computeMachineDeploymentVersion(s, tt.machineDeploymentTopology, desiredControlPlaneState, tt.currentMachineDeploymentState) - g.Expect(err).NotTo(HaveOccurred()) + version := computeMachineDeploymentVersion(s, tt.machineDeploymentTopology, tt.currentMachineDeploymentState) g.Expect(version).To(Equal(tt.expectedVersion)) + + if tt.currentMachineDeploymentState != nil { + // Verify that if the upgrade is pending it is captured in the upgrade tracker. + if tt.expectPendingUpgrade { + g.Expect(s.UpgradeTracker.MachineDeployments.IsPendingUpgrade(mdName)).To(BeTrue(), "MachineDeployment should be marked as pending upgrade") + } else { + g.Expect(s.UpgradeTracker.MachineDeployments.IsPendingUpgrade(mdName)).To(BeFalse(), "MachineDeployment should not be marked as pending upgrade") + } + } else { + // Verify that if create the pending it is capture in the tracker. + if tt.expectPendingCreate { + g.Expect(s.UpgradeTracker.MachineDeployments.IsPendingCreate(tt.machineDeploymentTopology.Name)).To(BeTrue(), "MachineDeployment topology should be marked as pending create") + } else { + g.Expect(s.UpgradeTracker.MachineDeployments.IsPendingCreate(tt.machineDeploymentTopology.Name)).To(BeFalse(), "MachineDeployment topology should not be marked as pending create") + } + } }) } } diff --git a/internal/controllers/topology/cluster/reconcile_state.go b/internal/controllers/topology/cluster/reconcile_state.go index b6dc1e8cc8a1..ef4b11a39d14 100644 --- a/internal/controllers/topology/cluster/reconcile_state.go +++ b/internal/controllers/topology/cluster/reconcile_state.go @@ -240,33 +240,15 @@ func (r *Reconciler) callAfterClusterUpgrade(ctx context.Context, s *scope.Scope if hooks.IsPending(runtimehooksv1.AfterClusterUpgrade, s.Current.Cluster) { // Call the registered extensions for the hook after the cluster is fully upgraded. // A clusters is considered fully upgraded if: - // - Control plane is not upgrading - // - Control plane is not scaling - // - Control plane is not pending an upgrade - // - MachineDeployments are not currently rolling out - // - MAchineDeployments are not about to roll out + // - Control plane is stable (not upgrading, not scaling, not about to upgrade) + // - MachineDeployments are not currently upgrading // - MachineDeployments are not pending an upgrade - - // Check if the control plane is upgrading. - cpUpgrading, err := contract.ControlPlane().IsUpgrading(s.Current.ControlPlane.Object) - if err != nil { - return errors.Wrap(err, "failed to check if control plane is upgrading") - } - - // Check if the control plane is scaling. If the control plane does not support replicas - // it will be considered as not scaling. - var cpScaling bool - if s.Blueprint.Topology.ControlPlane.Replicas != nil { - cpScaling, err = contract.ControlPlane().IsScaling(s.Current.ControlPlane.Object) - if err != nil { - return errors.Wrap(err, "failed to check if the control plane is scaling") - } - } - - if !cpUpgrading && !cpScaling && !s.UpgradeTracker.ControlPlane.PendingUpgrade && // Control Plane checks + // - MachineDeployments are not pending create + if isControlPlaneStable(s) && // Control Plane stable checks len(s.UpgradeTracker.MachineDeployments.UpgradingNames()) == 0 && // Machine deployments are not upgrading or not about to upgrade - !s.UpgradeTracker.MachineDeployments.PendingUpgrade() && // No MachineDeployments have an upgrade pending - !s.UpgradeTracker.MachineDeployments.DeferredUpgrade() { // No MachineDeployments have an upgrade deferred + !s.UpgradeTracker.MachineDeployments.IsAnyPendingCreate() && // No MachineDeployments are pending create + !s.UpgradeTracker.MachineDeployments.IsAnyPendingUpgrade() && // No MachineDeployments are pending an upgrade + !s.UpgradeTracker.MachineDeployments.DeferredUpgrade() { // No MachineDeployments have deferred an upgrade // Everything is stable and the cluster can be considered fully upgraded. hookRequest := &runtimehooksv1.AfterClusterUpgradeRequest{ Cluster: *s.Current.Cluster, @@ -307,6 +289,22 @@ func (r *Reconciler) reconcileInfrastructureCluster(ctx context.Context, s *scop // reconcileControlPlane works to bring the current state of a managed topology in line with the desired state. This involves // updating the cluster where needed. func (r *Reconciler) reconcileControlPlane(ctx context.Context, s *scope.Scope) error { + // If the ControlPlane has defined a current or desired MachineHealthCheck attempt to reconcile it. + // MHC changes are not Kubernetes version dependent, therefore proceed with MHC reconciliation + // even if the Control Plane is pending an upgrade. + if s.Desired.ControlPlane.MachineHealthCheck != nil || s.Current.ControlPlane.MachineHealthCheck != nil { + // Reconcile the current and desired state of the MachineHealthCheck. + if err := r.reconcileMachineHealthCheck(ctx, s.Current.ControlPlane.MachineHealthCheck, s.Desired.ControlPlane.MachineHealthCheck); err != nil { + return err + } + } + + // Return early if the control plane is pending an upgrade. + // Do not reconcile the control plane yet to avoid updating the control plane while it is still pending a + // version upgrade. This will prevent the control plane from performing a double rollout. + if s.UpgradeTracker.ControlPlane.IsPendingUpgrade { + return nil + } // If the clusterClass mandates the controlPlane has infrastructureMachines, reconcile it. if s.Blueprint.HasControlPlaneInfrastructureMachine() { ctx, _ := tlog.LoggerFrom(ctx).WithObject(s.Desired.ControlPlane.InfrastructureMachineTemplate).Into(ctx) @@ -361,13 +359,6 @@ func (r *Reconciler) reconcileControlPlane(ctx context.Context, s *scope.Scope) } } - // If the ControlPlane has defined a current or desired MachineHealthCheck attempt to reconcile it. - if s.Desired.ControlPlane.MachineHealthCheck != nil || s.Current.ControlPlane.MachineHealthCheck != nil { - // Reconcile the current and desired state of the MachineHealthCheck. - if err := r.reconcileMachineHealthCheck(ctx, s.Current.ControlPlane.MachineHealthCheck, s.Desired.ControlPlane.MachineHealthCheck); err != nil { - return err - } - } return nil } @@ -457,7 +448,7 @@ func (r *Reconciler) reconcileMachineDeployments(ctx context.Context, s *scope.S // Create MachineDeployments. for _, mdTopologyName := range diff.toCreate { md := s.Desired.MachineDeployments[mdTopologyName] - if err := r.createMachineDeployment(ctx, s.Current.Cluster, md); err != nil { + if err := r.createMachineDeployment(ctx, s, md); err != nil { return err } } @@ -466,7 +457,7 @@ func (r *Reconciler) reconcileMachineDeployments(ctx context.Context, s *scope.S for _, mdTopologyName := range diff.toUpdate { currentMD := s.Current.MachineDeployments[mdTopologyName] desiredMD := s.Desired.MachineDeployments[mdTopologyName] - if err := r.updateMachineDeployment(ctx, s.Current.Cluster, mdTopologyName, currentMD, desiredMD); err != nil { + if err := r.updateMachineDeployment(ctx, s, mdTopologyName, currentMD, desiredMD); err != nil { return err } } @@ -482,9 +473,23 @@ func (r *Reconciler) reconcileMachineDeployments(ctx context.Context, s *scope.S } // createMachineDeployment creates a MachineDeployment and the corresponding Templates. -func (r *Reconciler) createMachineDeployment(ctx context.Context, cluster *clusterv1.Cluster, md *scope.MachineDeploymentState) error { - log := tlog.LoggerFrom(ctx).WithMachineDeployment(md.Object) +func (r *Reconciler) createMachineDeployment(ctx context.Context, s *scope.Scope, md *scope.MachineDeploymentState) error { + // Do not create the MachineDeployment if it is marked as pending create. + // This will also block MHC creation because creating the MHC without the corresponding + // MachineDeployment is unnecessary. + mdTopologyName, ok := md.Object.Labels[clusterv1.ClusterTopologyMachineDeploymentNameLabel] + if !ok || mdTopologyName == "" { + // Note: This is only an additional safety check and should not happen. The label will always be added when computing + // the desired MachineDeployment. + return errors.Errorf("new MachineDeployment is missing the %q label", clusterv1.ClusterTopologyMachineDeploymentNameLabel) + } + // Return early if the MachineDeployment is pending create. + if s.UpgradeTracker.MachineDeployments.IsPendingCreate(mdTopologyName) { + return nil + } + log := tlog.LoggerFrom(ctx).WithMachineDeployment(md.Object) + cluster := s.Current.Cluster infraCtx, _ := log.WithObject(md.InfrastructureMachineTemplate).Into(ctx) if err := r.reconcileReferencedTemplate(infraCtx, reconcileReferencedTemplateInput{ cluster: cluster, @@ -522,9 +527,26 @@ func (r *Reconciler) createMachineDeployment(ctx context.Context, cluster *clust } // updateMachineDeployment updates a MachineDeployment. Also rotates the corresponding Templates if necessary. -func (r *Reconciler) updateMachineDeployment(ctx context.Context, cluster *clusterv1.Cluster, mdTopologyName string, currentMD, desiredMD *scope.MachineDeploymentState) error { +func (r *Reconciler) updateMachineDeployment(ctx context.Context, s *scope.Scope, mdTopologyName string, currentMD, desiredMD *scope.MachineDeploymentState) error { log := tlog.LoggerFrom(ctx).WithMachineDeployment(desiredMD.Object) + // Patch MachineHealthCheck for the MachineDeployment. + // MHC changes are not Kubernetes version dependent, therefore proceed with MHC reconciliation + // even if the MachineDeployment is pending an upgrade. + if desiredMD.MachineHealthCheck != nil || currentMD.MachineHealthCheck != nil { + if err := r.reconcileMachineHealthCheck(ctx, currentMD.MachineHealthCheck, desiredMD.MachineHealthCheck); err != nil { + return err + } + } + + // Return early if the MachineDeployment is pending an upgrade. + // Do not reconcile the MachineDeployment yet to avoid updating the MachineDeployment while it is still pending a + // version upgrade. This will prevent the MachineDeployment from performing a double rollout. + if s.UpgradeTracker.MachineDeployments.IsPendingUpgrade(currentMD.Object.Name) { + return nil + } + + cluster := s.Current.Cluster infraCtx, _ := log.WithObject(desiredMD.InfrastructureMachineTemplate).Into(ctx) if err := r.reconcileReferencedTemplate(infraCtx, reconcileReferencedTemplateInput{ cluster: cluster, @@ -549,13 +571,6 @@ func (r *Reconciler) updateMachineDeployment(ctx context.Context, cluster *clust return errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: currentMD.Object}) } - // Patch MachineHealthCheck for the MachineDeployment. - if desiredMD.MachineHealthCheck != nil || currentMD.MachineHealthCheck != nil { - if err := r.reconcileMachineHealthCheck(ctx, currentMD.MachineHealthCheck, desiredMD.MachineHealthCheck); err != nil { - return err - } - } - // Check differences between current and desired MachineDeployment, and eventually patch the current object. log = log.WithObject(desiredMD.Object) patchHelper, err := r.patchHelperFactory(ctx, currentMD.Object, desiredMD.Object) diff --git a/internal/controllers/topology/cluster/reconcile_state_test.go b/internal/controllers/topology/cluster/reconcile_state_test.go index b1e9cc830bd1..32363fc62abd 100644 --- a/internal/controllers/topology/cluster/reconcile_state_test.go +++ b/internal/controllers/topology/cluster/reconcile_state_test.go @@ -492,58 +492,7 @@ func TestReconcile_callAfterClusterUpgrade(t *testing.T) { } topologyVersion := "v1.2.3" - lowerVersion := "v1.2.2" - controlPlaneStableAtTopologyVersion := builder.ControlPlane("test1", "cp1"). - WithSpecFields(map[string]interface{}{ - "spec.version": topologyVersion, - "spec.replicas": int64(2), - }). - WithStatusFields(map[string]interface{}{ - "status.version": topologyVersion, - "status.replicas": int64(2), - "status.updatedReplicas": int64(2), - "status.readyReplicas": int64(2), - "status.unavailableReplicas": int64(0), - }). - Build() - controlPlaneStableAtLowerVersion := builder.ControlPlane("test1", "cp1"). - WithSpecFields(map[string]interface{}{ - "spec.version": lowerVersion, - "spec.replicas": int64(2), - }). - WithStatusFields(map[string]interface{}{ - "status.version": lowerVersion, - "status.replicas": int64(2), - "status.updatedReplicas": int64(2), - "status.readyReplicas": int64(2), - "status.unavailableReplicas": int64(0), - }). - Build() - controlPlaneUpgrading := builder.ControlPlane("test1", "cp1"). - WithSpecFields(map[string]interface{}{ - "spec.version": topologyVersion, - "spec.replicas": int64(2), - }). - WithStatusFields(map[string]interface{}{ - "status.version": lowerVersion, - "status.replicas": int64(2), - "status.updatedReplicas": int64(2), - "status.readyReplicas": int64(2), - "status.unavailableReplicas": int64(0), - }). - Build() - controlPlaneScaling := builder.ControlPlane("test1", "cp1"). - WithSpecFields(map[string]interface{}{ - "spec.version": topologyVersion, - "spec.replicas": int64(2), - }). - WithStatusFields(map[string]interface{}{ - "status.version": topologyVersion, - "status.replicas": int64(1), - "status.updatedReplicas": int64(1), - "status.readyReplicas": int64(1), - "status.unavailableReplicas": int64(0), - }). + controlPlaneObj := builder.ControlPlane("test1", "cp1"). Build() tests := []struct { @@ -572,6 +521,9 @@ func TestReconcile_callAfterClusterUpgrade(t *testing.T) { }, Spec: clusterv1.ClusterSpec{}, }, + ControlPlane: &scope.ControlPlaneState{ + Object: controlPlaneObj, + }, }, HookResponseTracker: scope.NewHookResponseTracker(), UpgradeTracker: scope.NewUpgradeTracker(), @@ -581,6 +533,43 @@ func TestReconcile_callAfterClusterUpgrade(t *testing.T) { wantHookToBeCalled: false, wantError: false, }, + { + name: "hook should not be called if the control plane is starting a new upgrade - hook is marked", + s: &scope.Scope{ + Blueprint: &scope.ClusterBlueprint{ + Topology: &clusterv1.Topology{ + ControlPlane: clusterv1.ControlPlaneTopology{ + Replicas: pointer.Int32(2), + }, + }, + }, + Current: &scope.ClusterState{ + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-ns", + Annotations: map[string]string{ + runtimev1.PendingHooksAnnotation: "AfterClusterUpgrade", + }, + }, + Spec: clusterv1.ClusterSpec{}, + }, + ControlPlane: &scope.ControlPlaneState{ + Object: controlPlaneObj, + }, + }, + HookResponseTracker: scope.NewHookResponseTracker(), + UpgradeTracker: func() *scope.UpgradeTracker { + ut := scope.NewUpgradeTracker() + ut.ControlPlane.IsStartingUpgrade = true + return ut + }(), + }, + wantMarked: true, + hookResponse: successResponse, + wantHookToBeCalled: false, + wantError: false, + }, { name: "hook should not be called if the control plane is upgrading - hook is marked", s: &scope.Scope{ @@ -603,11 +592,15 @@ func TestReconcile_callAfterClusterUpgrade(t *testing.T) { Spec: clusterv1.ClusterSpec{}, }, ControlPlane: &scope.ControlPlaneState{ - Object: controlPlaneUpgrading, + Object: controlPlaneObj, }, }, HookResponseTracker: scope.NewHookResponseTracker(), - UpgradeTracker: scope.NewUpgradeTracker(), + UpgradeTracker: func() *scope.UpgradeTracker { + ut := scope.NewUpgradeTracker() + ut.ControlPlane.IsUpgrading = true + return ut + }(), }, wantMarked: true, hookResponse: successResponse, @@ -636,11 +629,15 @@ func TestReconcile_callAfterClusterUpgrade(t *testing.T) { Spec: clusterv1.ClusterSpec{}, }, ControlPlane: &scope.ControlPlaneState{ - Object: controlPlaneScaling, + Object: controlPlaneObj, }, }, HookResponseTracker: scope.NewHookResponseTracker(), - UpgradeTracker: scope.NewUpgradeTracker(), + UpgradeTracker: func() *scope.UpgradeTracker { + ut := scope.NewUpgradeTracker() + ut.ControlPlane.IsScaling = true + return ut + }(), }, wantMarked: true, hookResponse: successResponse, @@ -648,7 +645,7 @@ func TestReconcile_callAfterClusterUpgrade(t *testing.T) { wantError: false, }, { - name: "hook should not be called if the control plane is stable at a lower version and is pending an upgrade - hook is marked", + name: "hook should not be called if the control plane is pending an upgrade - hook is marked", s: &scope.Scope{ Blueprint: &scope.ClusterBlueprint{ Topology: &clusterv1.Topology{ @@ -669,13 +666,13 @@ func TestReconcile_callAfterClusterUpgrade(t *testing.T) { Spec: clusterv1.ClusterSpec{}, }, ControlPlane: &scope.ControlPlaneState{ - Object: controlPlaneStableAtLowerVersion, + Object: controlPlaneObj, }, }, HookResponseTracker: scope.NewHookResponseTracker(), UpgradeTracker: func() *scope.UpgradeTracker { ut := scope.NewUpgradeTracker() - ut.ControlPlane.PendingUpgrade = true + ut.ControlPlane.IsPendingUpgrade = true return ut }(), }, @@ -706,13 +703,13 @@ func TestReconcile_callAfterClusterUpgrade(t *testing.T) { Spec: clusterv1.ClusterSpec{}, }, ControlPlane: &scope.ControlPlaneState{ - Object: controlPlaneStableAtTopologyVersion, + Object: controlPlaneObj, }, }, HookResponseTracker: scope.NewHookResponseTracker(), UpgradeTracker: func() *scope.UpgradeTracker { ut := scope.NewUpgradeTracker() - ut.ControlPlane.PendingUpgrade = false + ut.ControlPlane.IsPendingUpgrade = false ut.MachineDeployments.MarkUpgrading("md1") return ut }(), @@ -723,7 +720,7 @@ func TestReconcile_callAfterClusterUpgrade(t *testing.T) { wantError: false, }, { - name: "hook should not be called if the control plane is stable at desired version but MDs are pending upgrade - hook is marked", + name: "hook should not be called if the control plane is stable at desired version but MDs are pending create - hook is marked", s: &scope.Scope{ Blueprint: &scope.ClusterBlueprint{ Topology: &clusterv1.Topology{ @@ -744,13 +741,49 @@ func TestReconcile_callAfterClusterUpgrade(t *testing.T) { Spec: clusterv1.ClusterSpec{}, }, ControlPlane: &scope.ControlPlaneState{ - Object: controlPlaneStableAtTopologyVersion, + Object: controlPlaneObj, + }}, + HookResponseTracker: scope.NewHookResponseTracker(), + UpgradeTracker: func() *scope.UpgradeTracker { + ut := scope.NewUpgradeTracker() + ut.ControlPlane.IsPendingUpgrade = false + ut.MachineDeployments.MarkPendingCreate("md-topology-1") + return ut + }(), + }, + wantMarked: true, + hookResponse: successResponse, + wantHookToBeCalled: false, + wantError: false, + }, + { + name: "hook should not be called if the control plane is stable at desired version but MDs are pending upgrade - hook is marked", + s: &scope.Scope{ + Blueprint: &scope.ClusterBlueprint{ + Topology: &clusterv1.Topology{ + ControlPlane: clusterv1.ControlPlaneTopology{ + Replicas: pointer.Int32(2), + }, }, }, + Current: &scope.ClusterState{ + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-ns", + Annotations: map[string]string{ + runtimev1.PendingHooksAnnotation: "AfterClusterUpgrade", + }, + }, + Spec: clusterv1.ClusterSpec{}, + }, + ControlPlane: &scope.ControlPlaneState{ + Object: controlPlaneObj, + }}, HookResponseTracker: scope.NewHookResponseTracker(), UpgradeTracker: func() *scope.UpgradeTracker { ut := scope.NewUpgradeTracker() - ut.ControlPlane.PendingUpgrade = false + ut.ControlPlane.IsPendingUpgrade = false ut.MachineDeployments.MarkPendingUpgrade("md1") return ut }(), @@ -782,13 +815,13 @@ func TestReconcile_callAfterClusterUpgrade(t *testing.T) { Spec: clusterv1.ClusterSpec{}, }, ControlPlane: &scope.ControlPlaneState{ - Object: controlPlaneStableAtTopologyVersion, + Object: controlPlaneObj, }, }, HookResponseTracker: scope.NewHookResponseTracker(), UpgradeTracker: func() *scope.UpgradeTracker { ut := scope.NewUpgradeTracker() - ut.ControlPlane.PendingUpgrade = false + ut.ControlPlane.IsPendingUpgrade = false ut.MachineDeployments.MarkDeferredUpgrade("md1") return ut }(), @@ -824,7 +857,7 @@ func TestReconcile_callAfterClusterUpgrade(t *testing.T) { }, }, ControlPlane: &scope.ControlPlaneState{ - Object: controlPlaneStableAtTopologyVersion, + Object: controlPlaneObj, }, }, HookResponseTracker: scope.NewHookResponseTracker(), @@ -861,7 +894,7 @@ func TestReconcile_callAfterClusterUpgrade(t *testing.T) { }, }, ControlPlane: &scope.ControlPlaneState{ - Object: controlPlaneStableAtTopologyVersion, + Object: controlPlaneObj, }, }, HookResponseTracker: scope.NewHookResponseTracker(), @@ -1182,12 +1215,16 @@ func TestReconcileControlPlane(t *testing.T) { gvk.Kind = "KindChanged" infrastructureMachineTemplateWithIncompatibleChanges.SetGroupVersionKind(gvk) + upgradeTrackerWithControlPlanePendingUpgrade := scope.NewUpgradeTracker() + upgradeTrackerWithControlPlanePendingUpgrade.ControlPlane.IsPendingUpgrade = true + tests := []struct { name string class *scope.ControlPlaneBlueprint original *scope.ControlPlaneState controlPlaneExternalChanges string machineInfrastructureExternalChanges string + upgradeTracker *scope.UpgradeTracker desired *scope.ControlPlaneState want *scope.ControlPlaneState wantRotation bool @@ -1210,6 +1247,15 @@ func TestReconcileControlPlane(t *testing.T) { want: &scope.ControlPlaneState{Object: controlPlaneWithoutInfrastructureWithChanges.DeepCopy()}, wantErr: false, }, + { + name: "Should not update the ControlPlane if ControlPlane is pending upgrade", + class: ccWithoutControlPlaneInfrastructure, + upgradeTracker: upgradeTrackerWithControlPlanePendingUpgrade, + original: &scope.ControlPlaneState{Object: controlPlaneWithoutInfrastructure.DeepCopy()}, + desired: &scope.ControlPlaneState{Object: controlPlaneWithoutInfrastructureWithChanges.DeepCopy()}, + want: &scope.ControlPlaneState{Object: controlPlaneWithoutInfrastructure.DeepCopy()}, + wantErr: false, + }, { name: "Should preserve external changes to ControlPlane without machine infrastructure", class: ccWithoutControlPlaneInfrastructure, @@ -1311,6 +1357,9 @@ func TestReconcileControlPlane(t *testing.T) { Ref: contract.ObjToRef(tt.class.InfrastructureMachineTemplate), } } + if tt.upgradeTracker != nil { + s.UpgradeTracker = tt.upgradeTracker + } s.Current.ControlPlane = &scope.ControlPlaneState{} if tt.original != nil { @@ -1644,12 +1693,17 @@ func TestReconcileMachineDeployments(t *testing.T) { bootstrapTemplate1 := builder.TestBootstrapTemplate(metav1.NamespaceDefault, "bootstrap-config-1").Build() md1 := newFakeMachineDeploymentTopologyState("md-1", infrastructureMachineTemplate1, bootstrapTemplate1, nil) + upgradeTrackerWithMD1PendingCreate := scope.NewUpgradeTracker() + upgradeTrackerWithMD1PendingCreate.MachineDeployments.MarkPendingCreate("md-1-topology") + infrastructureMachineTemplate2 := builder.TestInfrastructureMachineTemplate(metav1.NamespaceDefault, "infrastructure-machine-2").Build() bootstrapTemplate2 := builder.TestBootstrapTemplate(metav1.NamespaceDefault, "bootstrap-config-2").Build() md2 := newFakeMachineDeploymentTopologyState("md-2", infrastructureMachineTemplate2, bootstrapTemplate2, nil) infrastructureMachineTemplate2WithChanges := infrastructureMachineTemplate2.DeepCopy() g.Expect(unstructured.SetNestedField(infrastructureMachineTemplate2WithChanges.Object, "foo", "spec", "template", "spec", "foo")).To(Succeed()) md2WithRotatedInfrastructureMachineTemplate := newFakeMachineDeploymentTopologyState("md-2", infrastructureMachineTemplate2WithChanges, bootstrapTemplate2, nil) + upgradeTrackerWithMD2PendingUpgrade := scope.NewUpgradeTracker() + upgradeTrackerWithMD2PendingUpgrade.MachineDeployments.MarkPendingUpgrade("md-2") infrastructureMachineTemplate3 := builder.TestInfrastructureMachineTemplate(metav1.NamespaceDefault, "infrastructure-machine-3").Build() bootstrapTemplate3 := builder.TestBootstrapTemplate(metav1.NamespaceDefault, "bootstrap-config-3").Build() @@ -1725,6 +1779,7 @@ func TestReconcileMachineDeployments(t *testing.T) { name string current []*scope.MachineDeploymentState desired []*scope.MachineDeploymentState + upgradeTracker *scope.UpgradeTracker want []*scope.MachineDeploymentState wantInfrastructureMachineTemplateRotation map[string]bool wantBootstrapTemplateRotation map[string]bool @@ -1737,6 +1792,14 @@ func TestReconcileMachineDeployments(t *testing.T) { want: []*scope.MachineDeploymentState{md1}, wantErr: false, }, + { + name: "Should not create desired MachineDeployment if the current does not exists yet and it marked as pending create", + current: nil, + upgradeTracker: upgradeTrackerWithMD1PendingCreate, + desired: []*scope.MachineDeploymentState{md1}, + want: nil, + wantErr: false, + }, { name: "No-op if current MachineDeployment is equal to desired", current: []*scope.MachineDeploymentState{md1}, @@ -1752,6 +1815,15 @@ func TestReconcileMachineDeployments(t *testing.T) { wantInfrastructureMachineTemplateRotation: map[string]bool{"md-2": true}, wantErr: false, }, + { + name: "Should not update MachineDeployment if MachineDeployment is pending upgrade", + current: []*scope.MachineDeploymentState{md2}, + desired: []*scope.MachineDeploymentState{md2WithRotatedInfrastructureMachineTemplate}, + upgradeTracker: upgradeTrackerWithMD2PendingUpgrade, + want: []*scope.MachineDeploymentState{md2}, + wantInfrastructureMachineTemplateRotation: map[string]bool{"md-2": false}, + wantErr: false, + }, { name: "Should update MachineDeployment with BootstrapTemplate rotation", current: []*scope.MachineDeploymentState{md3}, @@ -1849,6 +1921,10 @@ func TestReconcileMachineDeployments(t *testing.T) { s.Desired = &scope.ClusterState{MachineDeployments: toMachineDeploymentTopologyStateMap(tt.desired)} + if tt.upgradeTracker != nil { + s.UpgradeTracker = tt.upgradeTracker + } + r := Reconciler{ Client: env, patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache()), @@ -1865,6 +1941,11 @@ func TestReconcileMachineDeployments(t *testing.T) { g.Expect(env.GetAPIReader().List(ctx, &gotMachineDeploymentList, &client.ListOptions{Namespace: namespace.GetName()})).To(Succeed()) g.Expect(gotMachineDeploymentList.Items).To(HaveLen(len(tt.want))) + if tt.want == nil { + // No machine deployments should exist. + g.Expect(gotMachineDeploymentList.Items).To(BeEmpty()) + } + for _, wantMachineDeploymentState := range tt.want { for _, gotMachineDeployment := range gotMachineDeploymentList.Items { if wantMachineDeploymentState.Object.Name != gotMachineDeployment.Name { diff --git a/internal/controllers/topology/cluster/scope/hookresponsetracker.go b/internal/controllers/topology/cluster/scope/hookresponsetracker.go index 92e26115ed4c..e66a3ac718ec 100644 --- a/internal/controllers/topology/cluster/scope/hookresponsetracker.go +++ b/internal/controllers/topology/cluster/scope/hookresponsetracker.go @@ -44,6 +44,26 @@ func (h *HookResponseTracker) Add(hook runtimecatalog.Hook, response runtimehook h.responses[hookName] = response } +// IsBlocking returns true if the hook returned a blocking response. +// If the hook is not called or did not return a blocking response it returns false. +func (h *HookResponseTracker) IsBlocking(hook runtimecatalog.Hook) bool { + hookName := runtimecatalog.HookName(hook) + response, ok := h.responses[hookName] + if !ok { + return false + } + retryableResponse, ok := response.(runtimehooksv1.RetryResponseObject) + if !ok { + // Not a retryable response. Cannot be blocking. + return false + } + if retryableResponse.GetRetryAfterSeconds() == 0 { + // Not a blocking response. + return false + } + return true +} + // AggregateRetryAfter calculates the lowest non-zero retryAfterSeconds time from all the tracked responses. func (h *HookResponseTracker) AggregateRetryAfter() time.Duration { res := int32(0) diff --git a/internal/controllers/topology/cluster/scope/hookresponsetracker_test.go b/internal/controllers/topology/cluster/scope/hookresponsetracker_test.go index c4e5e06d3667..6ced86ba9189 100644 --- a/internal/controllers/topology/cluster/scope/hookresponsetracker_test.go +++ b/internal/controllers/topology/cluster/scope/hookresponsetracker_test.go @@ -21,6 +21,7 @@ import ( "time" . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtimecatalog "sigs.k8s.io/cluster-api/exp/runtime/catalog" runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1" @@ -145,3 +146,59 @@ func TestHookResponseTracker_AggregateMessage(t *testing.T) { g.Expect(hrt.AggregateMessage()).To(Equal("")) }) } + +func TestHookResponseTracker_IsBlocking(t *testing.T) { + nonBlockingBeforeClusterCreateResponse := &runtimehooksv1.BeforeClusterCreateResponse{ + CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ + RetryAfterSeconds: int32(0), + CommonResponse: runtimehooksv1.CommonResponse{ + Status: runtimehooksv1.ResponseStatusSuccess, + }, + }, + } + blockingBeforeClusterCreateResponse := &runtimehooksv1.BeforeClusterCreateResponse{ + CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ + RetryAfterSeconds: int32(10), + CommonResponse: runtimehooksv1.CommonResponse{ + Status: runtimehooksv1.ResponseStatusSuccess, + }, + }, + } + + afterClusterUpgradeResponse := &runtimehooksv1.AfterClusterUpgradeResponse{ + TypeMeta: metav1.TypeMeta{}, + CommonResponse: runtimehooksv1.CommonResponse{}, + } + + t.Run("should return true if the tracker received a blocking response for the hook", func(t *testing.T) { + g := NewWithT(t) + + hrt := NewHookResponseTracker() + hrt.Add(runtimehooksv1.BeforeClusterCreate, blockingBeforeClusterCreateResponse) + + g.Expect(hrt.IsBlocking(runtimehooksv1.BeforeClusterCreate)).To(BeTrue()) + }) + + t.Run("should return false if the tracker received a non blocking response for the hook", func(t *testing.T) { + g := NewWithT(t) + + hrt := NewHookResponseTracker() + hrt.Add(runtimehooksv1.BeforeClusterCreate, nonBlockingBeforeClusterCreateResponse) + + g.Expect(hrt.IsBlocking(runtimehooksv1.BeforeClusterCreate)).To(BeFalse()) + }) + + t.Run("should return false if the tracker did not receive a response for the hook", func(t *testing.T) { + g := NewWithT(t) + hrt := NewHookResponseTracker() + g.Expect(hrt.IsBlocking(runtimehooksv1.BeforeClusterCreate)).To(BeFalse()) + }) + + t.Run("should return false if the hook is non-blocking", func(t *testing.T) { + g := NewWithT(t) + hrt := NewHookResponseTracker() + // AfterClusterUpgradeHook is non-blocking. + hrt.Add(runtimehooksv1.AfterClusterUpgrade, afterClusterUpgradeResponse) + g.Expect(hrt.IsBlocking(runtimehooksv1.AfterClusterUpgrade)).To(BeFalse()) + }) +} diff --git a/internal/controllers/topology/cluster/scope/upgradetracker.go b/internal/controllers/topology/cluster/scope/upgradetracker.go index dae15e67c101..3ac1590365a9 100644 --- a/internal/controllers/topology/cluster/scope/upgradetracker.go +++ b/internal/controllers/topology/cluster/scope/upgradetracker.go @@ -26,28 +26,76 @@ type UpgradeTracker struct { // ControlPlaneUpgradeTracker holds the current upgrade status of the Control Plane. type ControlPlaneUpgradeTracker struct { - // PendingUpgrade is true if the control plane version needs to be updated. False otherwise. - PendingUpgrade bool - - // IsProvisioning is true if the control plane is being provisioned for the first time. False otherwise. + // IsPendingUpgrade is true if the Control Plane version needs to be updated. False otherwise. + // If IsPendingUpgrade is true it also means the Control Plane is not going to pick up the new version + // in the current reconcile loop. + // Example cases when IsPendingUpgrade is set to true: + // - Upgrade is blocked by BeforeClusterUpgrade hook + // - Upgrade is blocked because the current ControlPlane is not stable (provisioning OR scaling OR upgrading) + // - Upgrade is blocked because any of the current MachineDeployments are upgrading. + IsPendingUpgrade bool + + // IsProvisioning is true if the current Control Plane is being provisioned for the first time. False otherwise. IsProvisioning bool - // IsUpgrading is true if the control plane is in the middle of an upgrade. - // Note: Refer to control plane contract for definition of upgrading. + // IsUpgrading is true if the Control Plane is in the middle of an upgrade. + // Note: Refer to Control Plane contract for definition of upgrading. + // IsUpgrading is set to true if the current ControlPlane (ControlPlane at the beginning of the reconcile) + // is upgrading. + // Note: IsUpgrading only represents the current ControlPlane state. If the Control Plane is about to pick up the + // version in the reconcile loop IsUpgrading will not be true, because the current ControlPlane is not upgrading, + // the desired ControlPlane is. + // Also look at: IsStartingUpgrade. IsUpgrading bool - // IsScaling is true if the control plane is in the middle of a scale operation. + // IsStartingUpgrade is true if the Control Plane is picking up the new version in the current reconcile loop. + // If IsStartingUpgrade is true it implies that the desired Control Plane version and the current Control Plane + // versions are different. + IsStartingUpgrade bool + + // IsScaling is true if the current Control Plane is scaling. False otherwise. + // IsScaling only represents the state of the current Control Plane. IsScaling does not represent the state + // of the desired Control Plane. + // Example: + // - IsScaling will be true if the current ControlPlane is scaling. + // - IsScaling will not be true if the current Control Plane is stable and the reconcile loop is going to scale the Control Plane. // Note: Refer to control plane contract for definition of scaling. + // Note: IsScaling will be false if the Control Plane does not support replicas. IsScaling bool } -// MachineDeploymentUpgradeTracker holds the current upgrade status and makes upgrade -// decisions for MachineDeployments. +// MachineDeploymentUpgradeTracker holds the current upgrade status of MachineDeployments. type MachineDeploymentUpgradeTracker struct { - pendingNames sets.Set[string] - deferredNames sets.Set[string] - upgradingNames sets.Set[string] - holdUpgrades bool + // pendingCreateTopologyNames is the set of MachineDeployment topology names that are newly added to the + // Cluster Topology but will not be created in the current reconcile loop. + // By marking a MachineDeployment topology as pendingCreate we skip creating the MachineDeployment. + // Nb. We use MachineDeployment topology names instead of MachineDeployment names because the new MachineDeployment + // names can keep changing for each reconcile loop leading to continuous updates to the TopologyReconciled condition. + pendingCreateTopologyNames sets.Set[string] + + // pendingUpgradeNames is the set of MachineDeployment names that are not going to pick up the new version + // in the current reconcile loop. + // By marking a MachineDeployment as pendingUpgrade we skip reconciling the MachineDeployment. + pendingUpgradeNames sets.Set[string] + + // deferredNames is the set of MachineDeployment names that are not going to pick up the new version + // in the current reconcile loop because they are deferred by the user. + // Note: If a MachineDeployment is marked as deferred it should also be marked as pendingUpgrade. + deferredNames sets.Set[string] + + // upgradingNames is the set of MachineDeployment names that are upgrading. This set contains the names of + // MachineDeployments that are currently upgrading and the names of MachineDeployments that will pick up the upgrade + // in the current reconcile loop. + // Note: This information is used to: + // - decide if ControlPlane can be upgraded. + // - calculate MachineDeployment upgrade concurrency. + // - update TopologyReconciled Condition. + // - decide if the AfterClusterUpgrade hook can be called. + upgradingNames sets.Set[string] + + // maxMachineDeploymentUpgradeConcurrency defines the maximum number of MachineDeployments that should be in an + // upgrading state. This includes the MachineDeployments that are currently upgrading and the MachineDeployments that + // will start the upgrade after the current reconcile loop. maxMachineDeploymentUpgradeConcurrency int } @@ -82,7 +130,8 @@ func NewUpgradeTracker(opts ...UpgradeTrackerOption) *UpgradeTracker { } return &UpgradeTracker{ MachineDeployments: MachineDeploymentUpgradeTracker{ - pendingNames: sets.Set[string]{}, + pendingCreateTopologyNames: sets.Set[string]{}, + pendingUpgradeNames: sets.Set[string]{}, deferredNames: sets.Set[string]{}, upgradingNames: sets.Set[string]{}, maxMachineDeploymentUpgradeConcurrency: options.maxMDUpgradeConcurrency, @@ -103,42 +152,57 @@ func (m *MachineDeploymentUpgradeTracker) UpgradingNames() []string { return sets.List(m.upgradingNames) } -// HoldUpgrades is used to set if any subsequent upgrade operations should be paused, -// e.g. because a AfterControlPlaneUpgrade hook response asked to do so. -// If HoldUpgrades is called with `true` then AllowUpgrade would return false. -func (m *MachineDeploymentUpgradeTracker) HoldUpgrades(val bool) { - m.holdUpgrades = val +// UpgradeConcurrencyReached returns true if the number of MachineDeployments upgrading is at the concurrency limit. +func (m *MachineDeploymentUpgradeTracker) UpgradeConcurrencyReached() bool { + return m.upgradingNames.Len() >= m.maxMachineDeploymentUpgradeConcurrency } -// AllowUpgrade returns true if a MachineDeployment is allowed to upgrade, -// returns false otherwise. -// Note: If AllowUpgrade returns true the machine deployment will pick up -// the topology version. This will eventually trigger a machine deployment -// rollout. -func (m *MachineDeploymentUpgradeTracker) AllowUpgrade() bool { - if m.holdUpgrades { - return false - } - return m.upgradingNames.Len() < m.maxMachineDeploymentUpgradeConcurrency +// MarkPendingCreate marks a machine deployment topology that is pending to be created. +// This is generally used to capture machine deployments that are yet to be created +// because the control plane is not yet stable. +func (m *MachineDeploymentUpgradeTracker) MarkPendingCreate(mdTopologyName string) { + m.pendingCreateTopologyNames.Insert(mdTopologyName) +} + +// IsPendingCreate returns true is the MachineDeployment topology is marked as pending create. +func (m *MachineDeploymentUpgradeTracker) IsPendingCreate(mdTopologyName string) bool { + return m.pendingCreateTopologyNames.Has(mdTopologyName) +} + +// IsAnyPendingCreate returns true if any of the machine deployments are pending +// to be created. Returns false, otherwise. +func (m *MachineDeploymentUpgradeTracker) IsAnyPendingCreate() bool { + return len(m.pendingCreateTopologyNames) != 0 +} + +// PendingCreateTopologyNames returns the list of machine deployment topology names that +// are pending create. +func (m *MachineDeploymentUpgradeTracker) PendingCreateTopologyNames() []string { + return sets.List(m.pendingCreateTopologyNames) } // MarkPendingUpgrade marks a machine deployment as in need of an upgrade. // This is generally used to capture machine deployments that have not yet // picked up the topology version. func (m *MachineDeploymentUpgradeTracker) MarkPendingUpgrade(name string) { - m.pendingNames.Insert(name) + m.pendingUpgradeNames.Insert(name) } -// PendingUpgradeNames returns the list of machine deployment names that -// are pending an upgrade. -func (m *MachineDeploymentUpgradeTracker) PendingUpgradeNames() []string { - return sets.List(m.pendingNames) +// IsPendingUpgrade returns true is the MachineDeployment marked as pending upgrade. +func (m *MachineDeploymentUpgradeTracker) IsPendingUpgrade(name string) bool { + return m.pendingUpgradeNames.Has(name) } -// PendingUpgrade returns true if any of the machine deployments are pending +// IsAnyPendingUpgrade returns true if any of the machine deployments are pending // an upgrade. Returns false, otherwise. -func (m *MachineDeploymentUpgradeTracker) PendingUpgrade() bool { - return len(m.pendingNames) != 0 +func (m *MachineDeploymentUpgradeTracker) IsAnyPendingUpgrade() bool { + return len(m.pendingUpgradeNames) != 0 +} + +// PendingUpgradeNames returns the list of machine deployment names that +// are pending an upgrade. +func (m *MachineDeploymentUpgradeTracker) PendingUpgradeNames() []string { + return sets.List(m.pendingUpgradeNames) } // MarkDeferredUpgrade marks that the upgrade for a MachineDeployment From d191a4b2a77e26d34e902871249904886b8654cf Mon Sep 17 00:00:00 2001 From: Jan Roehrich Date: Tue, 6 Jun 2023 21:41:07 +0200 Subject: [PATCH 05/94] Remove use of deprecated clusterctl flag in docs --- docs/book/src/clusterctl/commands/describe-cluster.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/book/src/clusterctl/commands/describe-cluster.md b/docs/book/src/clusterctl/commands/describe-cluster.md index d90348f9c948..6d89914f21b0 100644 --- a/docs/book/src/clusterctl/commands/describe-cluster.md +++ b/docs/book/src/clusterctl/commands/describe-cluster.md @@ -26,7 +26,7 @@ bootstrap object linked to a machine, unless their state differs from the machin By default, the visualization generated by `clusterctl describe cluster` hides details for the sake of simplicity and shortness. However, if required, the user can ask for showing all the detail: -By using the `--disable-grouping` flag, the user can force the visualization to show all the machines +By using `--grouping=false`, the user can force the visualization to show all the machines on separated lines, no matter if they have the same state or not: ![](../../images/describe-cluster-disable-grouping.png) From 8763299e1e42a952db8231fb57a1c0323c1af8e2 Mon Sep 17 00:00:00 2001 From: Stefan Bueringer Date: Wed, 7 Jun 2023 09:37:31 +0200 Subject: [PATCH 06/94] test/e2e: Fixup dump kube-system pods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stefan Büringer buringerst@vmware.com --- test/e2e/common.go | 6 ++- test/framework/alltypes_helpers.go | 62 ++++++++++++------------------ test/framework/cluster_proxy.go | 2 +- 3 files changed, 30 insertions(+), 40 deletions(-) diff --git a/test/e2e/common.go b/test/e2e/common.go index 8cf6c669a4cd..c0c46ae23c01 100644 --- a/test/e2e/common.go +++ b/test/e2e/common.go @@ -79,10 +79,12 @@ func dumpSpecResourcesAndCleanup(ctx context.Context, specName string, clusterPr LogPath: filepath.Join(artifactFolder, "clusters", clusterProxy.GetName(), "resources"), }) - // If the cluster still exists, dump kube-system pods in the workload cluster before deleting the cluster. + // If the cluster still exists, dump kube-system pods of the workload cluster before deleting the cluster. if err := clusterProxy.GetClient().Get(ctx, client.ObjectKeyFromObject(cluster), &clusterv1.Cluster{}); err == nil { - framework.DumpKubeSystemPods(ctx, framework.DumpKubeSystemPodsInput{ + Byf("Dumping kube-system Pods of Cluster %s", klog.KObj(cluster)) + framework.DumpKubeSystemPodsForCluster(ctx, framework.DumpKubeSystemPodsForClusterInput{ Lister: clusterProxy.GetWorkloadCluster(ctx, cluster.Namespace, cluster.Name).GetClient(), + Cluster: cluster, LogPath: filepath.Join(artifactFolder, "clusters", cluster.Name, "resources"), }) } diff --git a/test/framework/alltypes_helpers.go b/test/framework/alltypes_helpers.go index cd88ae94d799..d5ce38bb5f5c 100644 --- a/test/framework/alltypes_helpers.go +++ b/test/framework/alltypes_helpers.go @@ -33,6 +33,8 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/yaml" @@ -77,34 +79,6 @@ func GetCAPIResources(ctx context.Context, input GetCAPIResourcesInput) []*unstr return objList } -// GetKubeSystemPodsInput is the input for GetKubeSystemPods. -type GetKubeSystemPodsInput struct { - Lister Lister -} - -// GetKubeSystemPods reads all pods in the kube-system namespace. -// Note: This function intentionally retrieves Pods as Unstructured, because we need the Pods -// as Unstructured eventually. -func GetKubeSystemPods(ctx context.Context, input GetKubeSystemPodsInput) []*unstructured.Unstructured { - Expect(ctx).NotTo(BeNil(), "ctx is required for GetKubeSystemPods") - Expect(input.Lister).NotTo(BeNil(), "input.Lister is required for GetKubeSystemPods") - - podList := new(unstructured.UnstructuredList) - podList.SetAPIVersion(corev1.SchemeGroupVersion.String()) - podList.SetKind("Pod") - if err := input.Lister.List(ctx, podList, client.InNamespace(metav1.NamespaceSystem)); err != nil { - Fail(fmt.Sprintf("failed to list Pods in kube-system: %v", err)) - } - - objList := []*unstructured.Unstructured{} - for i := range podList.Items { - obj := podList.Items[i] - objList = append(objList, &obj) - } - - return objList -} - // getClusterAPITypes returns the list of TypeMeta to be considered for the move discovery phase. // This list includes all the types belonging to CAPI providers. func getClusterAPITypes(ctx context.Context, lister Lister) []metav1.TypeMeta { @@ -158,24 +132,38 @@ func DumpAllResources(ctx context.Context, input DumpAllResourcesInput) { } } -// DumpKubeSystemPodsInput is the input for DumpKubeSystemPods. -type DumpKubeSystemPodsInput struct { +// DumpKubeSystemPodsForClusterInput is the input for DumpKubeSystemPodsForCluster. +type DumpKubeSystemPodsForClusterInput struct { Lister Lister LogPath string + Cluster *clusterv1.Cluster } -// DumpKubeSystemPods dumps kube-system Pods to YAML. -func DumpKubeSystemPods(ctx context.Context, input DumpKubeSystemPodsInput) { +// DumpKubeSystemPodsForCluster dumps kube-system Pods to YAML. +func DumpKubeSystemPodsForCluster(ctx context.Context, input DumpKubeSystemPodsForClusterInput) { Expect(ctx).NotTo(BeNil(), "ctx is required for DumpAllResources") Expect(input.Lister).NotTo(BeNil(), "input.Lister is required for DumpAllResources") + Expect(input.Cluster).NotTo(BeNil(), "input.Cluster is required for DumpAllResources") - resources := GetKubeSystemPods(ctx, GetKubeSystemPodsInput{ - Lister: input.Lister, + // Note: We intentionally retrieve Pods as Unstructured because we need the Pods as Unstructured for dumpObject. + podList := new(unstructured.UnstructuredList) + podList.SetAPIVersion(corev1.SchemeGroupVersion.String()) + podList.SetKind("Pod") + var listErr error + _ = wait.PollUntilContextTimeout(ctx, retryableOperationInterval, retryableOperationTimeout, true, func(ctx context.Context) (bool, error) { + if listErr = input.Lister.List(ctx, podList, client.InNamespace(metav1.NamespaceSystem)); listErr != nil { + return false, nil //nolint:nilerr + } + return true, nil }) + if listErr != nil { + // NB. we are treating failures in collecting kube-system pods as a non-blocking operation (best effort) + fmt.Printf("Failed to list Pods in kube-system for Cluster %s: %v\n", klog.KObj(input.Cluster), listErr) + return + } - for i := range resources { - r := resources[i] - dumpObject(r, input.LogPath) + for i := range podList.Items { + dumpObject(&podList.Items[i], input.LogPath) } } diff --git a/test/framework/cluster_proxy.go b/test/framework/cluster_proxy.go index 1b690c9497ff..1b840723037d 100644 --- a/test/framework/cluster_proxy.go +++ b/test/framework/cluster_proxy.go @@ -315,7 +315,7 @@ func (p *clusterProxy) CollectWorkloadClusterLogs(ctx context.Context, namespace mp := &machinePools.Items[i] err := p.logCollector.CollectMachinePoolLog(ctx, p.GetClient(), mp, path.Join(outputPath, "machine-pools", mp.GetName())) if err != nil { - // NB. we are treating failures in collecting logs as a non blocking operation (best effort) + // NB. we are treating failures in collecting logs as a non-blocking operation (best effort) fmt.Printf("Failed to get logs for MachinePool %s, Cluster %s: %v\n", mp.GetName(), klog.KRef(namespace, name), err) } } From 08b4d2997486240d971aaf1fa3cbec7b151357ae Mon Sep 17 00:00:00 2001 From: Jakob Schrettenbrunner Date: Wed, 7 Jun 2023 12:40:12 +0200 Subject: [PATCH 07/94] book: add in-cluster ipam and helm addon providers --- docs/book/src/reference/providers.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/book/src/reference/providers.md b/docs/book/src/reference/providers.md index d233738eb058..571f5ccfa161 100644 --- a/docs/book/src/reference/providers.md +++ b/docs/book/src/reference/providers.md @@ -46,6 +46,12 @@ updated info about which API version they are supporting. - [VMware Cloud Director](https://github.com/vmware/cluster-api-provider-cloud-director) - [vSphere](https://github.com/kubernetes-sigs/cluster-api-provider-vsphere) +## IP Address Management (IPAM) +- [In Cluster](https://github.com/kubernetes-sigs/cluster-api-ipam-provider-in-cluster) + +## Addon +- [Helm](https://github.com/kubernetes-sigs/cluster-api-addon-provider-helm/) + ## API Adopters Following are the implementations managed by third-parties adopting the standard cluster-api and/or machine-api being developed here. From 7b21bd181636f6cb107c5c621e09cf10de4fdb6c Mon Sep 17 00:00:00 2001 From: Guillermo Gaston Date: Wed, 22 Mar 2023 22:22:41 +0100 Subject: [PATCH 08/94] Add area label lookup for prefixes to release notes tool Co-authored-by: Oscar Utbult --- hack/tools/release/notes.go | 91 ++++++++++++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 1 deletion(-) diff --git a/hack/tools/release/notes.go b/hack/tools/release/notes.go index e852bd0e1f0d..f5812502e7e8 100644 --- a/hack/tools/release/notes.go +++ b/hack/tools/release/notes.go @@ -22,11 +22,14 @@ package main import ( "bytes" + "encoding/json" + "errors" "flag" "fmt" "os" "os/exec" "regexp" + "sort" "strings" "time" ) @@ -104,7 +107,70 @@ func increaseDateByOneDay(date string) (string, error) { return datetime.Format(layout), nil } +const ( + missingAreaLabelPrefix = "MISSING_AREA" + areaLabelPrefix = "area/" + multipleAreaLabelsPrefix = "MULTIPLE_AREAS[" +) + +type githubPullRequest struct { + Labels []githubLabel `json:"labels"` +} + +type githubLabel struct { + Name string `json:"name"` +} + +func getAreaLabel(merge string) (string, error) { + // Get pr id from merge commit + prID := strings.Replace(strings.TrimSpace(strings.Split(merge, " ")[3]), "#", "", -1) + + cmd := exec.Command("gh", "api", "repos/kubernetes-sigs/cluster-api/pulls/"+prID) //nolint:gosec + + out, err := cmd.CombinedOutput() + if err != nil { + return "", err + } + + pr := &githubPullRequest{} + if err := json.Unmarshal(out, pr); err != nil { + return "", err + } + + var areaLabels []string + for _, label := range pr.Labels { + if area, ok := trimAreaLabel(label.Name); ok { + areaLabels = append(areaLabels, area) + } + } + + switch len(areaLabels) { + case 0: + return missingAreaLabelPrefix, nil + case 1: + return areaLabels[0], nil + default: + return multipleAreaLabelsPrefix + strings.Join(areaLabels, "|") + "]", nil + } +} + +// trimAreaLabel removes the "area/" prefix from area labels and returns it. +// If the label is an area label, the second return value is true, otherwise false. +func trimAreaLabel(label string) (string, bool) { + trimmed := strings.TrimPrefix(label, areaLabelPrefix) + if len(trimmed) < len(label) { + return trimmed, true + } + + return label, false +} + func run() int { + if err := ensureInstalledDependencies(); err != nil { + fmt.Println(err) + return 1 + } + var commitRange string var cmd *exec.Cmd @@ -160,6 +226,11 @@ func run() int { for _, c := range commits { body := trimTitle(c.body) var key, prNumber, fork string + prefix, err := getAreaLabel(c.merge) + if err != nil { + fmt.Println(err) + os.Exit(1) + } switch { case strings.HasPrefix(body, ":sparkles:"), strings.HasPrefix(body, "✨"): key = features @@ -192,7 +263,7 @@ func run() int { if body == "" { continue } - body = fmt.Sprintf("- %s", body) + body = fmt.Sprintf("- %s: %s", prefix, body) _, _ = fmt.Sscanf(c.merge, "Merge pull request %s from %s", &prNumber, &fork) if key == documentation { merges[key] = append(merges[key], prNumber) @@ -226,6 +297,7 @@ func run() int { ) default: fmt.Println("## " + key) + sort.Strings(mergeslice) for _, merge := range mergeslice { fmt.Println(merge) } @@ -257,3 +329,20 @@ func formatMerge(line, prNumber string) string { } return fmt.Sprintf("%s (%s)", line, prNumber) } + +func ensureInstalledDependencies() error { + if !commandExists("git") { + return errors.New("git not available. Git is required to be present in the PATH") + } + + if !commandExists("gh") { + return errors.New("gh GitHub CLI not available. GitHub CLI is required to be present in the PATH. Refer to https://cli.github.com/ for installation") + } + + return nil +} + +func commandExists(cmd string) bool { + _, err := exec.LookPath(cmd) + return err == nil +} From fa1cd34285b23798df7931f6f68ddf6c232f0c0c Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Wed, 7 Jun 2023 21:12:21 +0200 Subject: [PATCH 09/94] fix cluster deletion in the in-memory API server --- .../controllers/goofycluster_controller.go | 18 ++-- .../controllers/goofymachine_controller.go | 89 +++++++++++++++++-- .../inmemory/internal/server/api/handler.go | 8 +- .../inmemory/internal/server/mux.go | 63 ++++++++++++- .../inmemory/internal/server/mux_test.go | 55 ++++++++++++ 5 files changed, 217 insertions(+), 16 deletions(-) diff --git a/test/infrastructure/inmemory/internal/controllers/goofycluster_controller.go b/test/infrastructure/inmemory/internal/controllers/goofycluster_controller.go index 3ae587aa5a23..30cd9d66c1a6 100644 --- a/test/infrastructure/inmemory/internal/controllers/goofycluster_controller.go +++ b/test/infrastructure/inmemory/internal/controllers/goofycluster_controller.go @@ -114,7 +114,7 @@ func (r *InMemoryClusterReconciler) Reconcile(ctx context.Context, req ctrl.Requ // Handle deleted clusters if !inMemoryCluster.DeletionTimestamp.IsZero() { - return r.reconcileDelete(ctx, inMemoryCluster) + return r.reconcileDelete(ctx, cluster, inMemoryCluster) } // Handle non-deleted clusters @@ -187,11 +187,19 @@ func (r *InMemoryClusterReconciler) reconcileNormal(_ context.Context, cluster * return ctrl.Result{}, nil } -//nolint:unparam // once we implemented this func we will also return errors -func (r *InMemoryClusterReconciler) reconcileDelete(_ context.Context, inMemoryCluster *infrav1.InMemoryCluster) (ctrl.Result, error) { - // TODO: implement - controllerutil.RemoveFinalizer(inMemoryCluster, infrav1.ClusterFinalizer) +func (r *InMemoryClusterReconciler) reconcileDelete(_ context.Context, cluster *clusterv1.Cluster, inMemoryCluster *infrav1.InMemoryCluster) (ctrl.Result, error) { + // Compute the resource group unique name. + resourceGroup := klog.KObj(cluster).String() + + // Delete the resource group hosting all the cloud resources belonging the workload cluster; + r.CloudManager.DeleteResourceGroup(resourceGroup) + // Delete the listener for the workload cluster; + if err := r.APIServerMux.DeleteWorkloadClusterListener(resourceGroup); err != nil { + return ctrl.Result{}, err + } + + controllerutil.RemoveFinalizer(inMemoryCluster, infrav1.ClusterFinalizer) return ctrl.Result{}, nil } diff --git a/test/infrastructure/inmemory/internal/controllers/goofymachine_controller.go b/test/infrastructure/inmemory/internal/controllers/goofymachine_controller.go index e13bd9da90aa..80ee58010a38 100644 --- a/test/infrastructure/inmemory/internal/controllers/goofymachine_controller.go +++ b/test/infrastructure/inmemory/internal/controllers/goofymachine_controller.go @@ -155,7 +155,7 @@ func (r *InMemoryMachineReconciler) Reconcile(ctx context.Context, req ctrl.Requ // Handle deleted machines if !inMemoryMachine.DeletionTimestamp.IsZero() { - return r.reconcileDelete(ctx, inMemoryMachine) + return r.reconcileDelete(ctx, cluster, machine, inMemoryMachine) } // Handle non-deleted machines @@ -324,7 +324,7 @@ func (r *InMemoryMachineReconciler) reconcileNormal(ctx context.Context, cluster } if err := cloudClient.Get(ctx, client.ObjectKeyFromObject(etcdPod), etcdPod); err != nil { if !apierrors.IsNotFound(err) { - return ctrl.Result{}, errors.Wrapf(err, "failed to get etcdPod Pod") + return ctrl.Result{}, errors.Wrapf(err, "failed to get etcd Pod") } etcdPod.Labels = map[string]string{ @@ -468,11 +468,88 @@ func (r *InMemoryMachineReconciler) reconcileNormal(ctx context.Context, cluster return ctrl.Result{}, nil } -//nolint:unparam // once we implemented this func we will also return errors -func (r *InMemoryMachineReconciler) reconcileDelete(_ context.Context, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { - // TODO: implement - controllerutil.RemoveFinalizer(inMemoryMachine, infrav1.MachineFinalizer) +func (r *InMemoryMachineReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { + // Compute the resource group unique name. + // NOTE: We are using reconcilerGroup also as a name for the listener for sake of simplicity. + resourceGroup := klog.KObj(cluster).String() + cloudClient := r.CloudManager.GetResourceGroup(resourceGroup).GetClient() + // Delete VM + cloudMachine := &cloudv1.CloudMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: inMemoryMachine.Name, + }, + } + if err := cloudClient.Delete(ctx, cloudMachine); err != nil && !apierrors.IsNotFound(err) { + return ctrl.Result{}, errors.Wrapf(err, "failed to delete CloudMachine") + } + + // Delete Node + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: inMemoryMachine.Name, + }, + } + if err := cloudClient.Delete(ctx, node); err != nil && !apierrors.IsNotFound(err) { + return ctrl.Result{}, errors.Wrapf(err, "failed to delete Node") + } + + if util.IsControlPlaneMachine(machine) { + controllerManagerPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: fmt.Sprintf("kube-controller-manager-%s", inMemoryMachine.Name), + }, + } + if err := cloudClient.Delete(ctx, controllerManagerPod); err != nil && !apierrors.IsNotFound(err) { + return ctrl.Result{}, errors.Wrapf(err, "failed to controller manager Pod") + } + + schedulerPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: fmt.Sprintf("kube-scheduler-%s", inMemoryMachine.Name), + }, + } + if err := cloudClient.Delete(ctx, schedulerPod); err != nil && !apierrors.IsNotFound(err) { + return ctrl.Result{}, errors.Wrapf(err, "failed to scheduler Pod") + } + + apiServer := fmt.Sprintf("kube-apiserver-%s", inMemoryMachine.Name) + apiServerPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: apiServer, + }, + } + if err := cloudClient.Delete(ctx, apiServerPod); err != nil && !apierrors.IsNotFound(err) { + return ctrl.Result{}, errors.Wrapf(err, "failed to apiServer Pod") + } + if err := r.APIServerMux.DeleteAPIServer(resourceGroup, apiServer); err != nil { + return ctrl.Result{}, err + } + + // TODO: if all the API server are gone, cleanup all the k8s objects from the resource group. + // note: it is not possible to delete the resource group, because cloud resources should be preserved. + // given that, in order to implement this it is required to find a way to identify all the k8s resources (might be via gvk); + // also, deletion must happen suddently, without respecting finalizers or owner references links. + + etcdMember := fmt.Sprintf("etcd-%s", inMemoryMachine.Name) + etcdPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: etcdMember, + }, + } + if err := cloudClient.Delete(ctx, etcdPod); err != nil && !apierrors.IsNotFound(err) { + return ctrl.Result{}, errors.Wrapf(err, "failed to etcd Pod") + } + if err := r.APIServerMux.DeleteEtcdMember(resourceGroup, etcdMember); err != nil { + return ctrl.Result{}, err + } + } + + controllerutil.RemoveFinalizer(inMemoryMachine, infrav1.MachineFinalizer) return ctrl.Result{}, nil } diff --git a/test/infrastructure/inmemory/internal/server/api/handler.go b/test/infrastructure/inmemory/internal/server/api/handler.go index e9691ea3b895..8a6cf32b390c 100644 --- a/test/infrastructure/inmemory/internal/server/api/handler.go +++ b/test/infrastructure/inmemory/internal/server/api/handler.go @@ -77,14 +77,14 @@ func NewAPIServerHandler(manager cmanager.Manager, log logr.Logger, resolver Res ws.Route(ws.GET("/api/v1/{resource}/{name}").To(apiServer.apiV1Get)) ws.Route(ws.PUT("/api/v1/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Update)) ws.Route(ws.PATCH("/api/v1/{resource}/{name}").Consumes(string(types.MergePatchType), string(types.StrategicMergePatchType)).To(apiServer.apiV1Patch)) - ws.Route(ws.DELETE("/api/v1/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Delete)) + ws.Route(ws.DELETE("/api/v1/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf, runtime.ContentTypeJSON).To(apiServer.apiV1Delete)) ws.Route(ws.POST("/apis/{group}/{version}/{resource}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Create)) ws.Route(ws.GET("/apis/{group}/{version}/{resource}").To(apiServer.apiV1List)) ws.Route(ws.GET("/apis/{group}/{version}/{resource}/{name}").To(apiServer.apiV1Get)) ws.Route(ws.PUT("/apis/{group}/{version}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Update)) ws.Route(ws.PATCH("/apis/{group}/{version}/{resource}/{name}").Consumes(string(types.MergePatchType), string(types.StrategicMergePatchType)).To(apiServer.apiV1Patch)) - ws.Route(ws.DELETE("/apis/{group}/{version}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Delete)) + ws.Route(ws.DELETE("/apis/{group}/{version}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf, runtime.ContentTypeJSON).To(apiServer.apiV1Delete)) // CRUD endpoints (namespaced objects) ws.Route(ws.POST("/api/v1/namespaces/{namespace}/{resource}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Create)) @@ -92,14 +92,14 @@ func NewAPIServerHandler(manager cmanager.Manager, log logr.Logger, resolver Res ws.Route(ws.GET("/api/v1/namespaces/{namespace}/{resource}/{name}").To(apiServer.apiV1Get)) ws.Route(ws.PUT("/api/v1/namespaces/{namespace}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Update)) ws.Route(ws.PATCH("/api/v1/namespaces/{namespace}/{resource}/{name}").Consumes(string(types.MergePatchType), string(types.StrategicMergePatchType)).To(apiServer.apiV1Patch)) - ws.Route(ws.DELETE("/api/v1/namespaces/{namespace}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Delete)) + ws.Route(ws.DELETE("/api/v1/namespaces/{namespace}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf, runtime.ContentTypeJSON).To(apiServer.apiV1Delete)) ws.Route(ws.POST("/apis/{group}/{version}/namespaces/{namespace}/{resource}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Create)) ws.Route(ws.GET("/apis/{group}/{version}/namespaces/{namespace}/{resource}").To(apiServer.apiV1List)) ws.Route(ws.GET("/apis/{group}/{version}/namespaces/{namespace}/{resource}/{name}").To(apiServer.apiV1Get)) ws.Route(ws.PUT("/apis/{group}/{version}/namespaces/{namespace}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Update)) ws.Route(ws.PATCH("/apis/{group}/{version}/namespaces/{namespace}/{resource}/{name}").Consumes(string(types.MergePatchType), string(types.StrategicMergePatchType)).To(apiServer.apiV1Patch)) - ws.Route(ws.DELETE("/apis/{group}/{version}/namespaces/{namespace}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Delete)) + ws.Route(ws.DELETE("/apis/{group}/{version}/namespaces/{namespace}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf, runtime.ContentTypeJSON).To(apiServer.apiV1Delete)) // Port forward endpoints ws.Route(ws.GET("/api/v1/namespaces/{namespace}/pods/{name}/portforward").To(apiServer.apiV1PortForward)) diff --git a/test/infrastructure/inmemory/internal/server/mux.go b/test/infrastructure/inmemory/internal/server/mux.go index 98048a4a4312..c691db7e3652 100644 --- a/test/infrastructure/inmemory/internal/server/mux.go +++ b/test/infrastructure/inmemory/internal/server/mux.go @@ -350,6 +350,28 @@ func (m *WorkloadClustersMux) AddAPIServer(wclName, podName string, caCert *x509 return nil } +// DeleteAPIServer removes an API server instance from the WorkloadClusterListener. +func (m *WorkloadClustersMux) DeleteAPIServer(wclName, podName string) error { + m.lock.Lock() + defer m.lock.Unlock() + + wcl, ok := m.workloadClusterListeners[wclName] + if !ok { + return errors.Errorf("workloadClusterListener with name %s must be initialized before removing an APIserver", wclName) + } + wcl.apiServers.Delete(podName) + m.log.Info("APIServer instance removed from the workloadClusterListener", "listenerName", wclName, "address", wcl.Address(), "podName", podName) + + if wcl.apiServers.Len() < 1 && wcl.listener != nil { + if err := wcl.listener.Close(); err != nil { + return errors.Wrapf(err, "failed to stop WorkloadClusterListener %s, %s", wclName, wcl.HostPort()) + } + wcl.listener = nil + m.log.Info("WorkloadClusterListener stopped because there are no APIServer left", "listenerName", wclName, "address", wcl.Address()) + } + return nil +} + // HasAPIServer returns true if the workload cluster already has an apiserver with podName. func (m *WorkloadClustersMux) HasAPIServer(wclName, podName string) bool { m.lock.RLock() @@ -371,7 +393,7 @@ func (m *WorkloadClustersMux) AddEtcdMember(wclName, podName string, caCert *x50 wcl, ok := m.workloadClusterListeners[wclName] if !ok { - return errors.Errorf("workloadClusterListener with name %s must be initialized before adding an APIserver", wclName) + return errors.Errorf("workloadClusterListener with name %s must be initialized before adding an etcd member", wclName) } wcl.etcdMembers.Insert(podName) m.log.Info("Etcd member added to WorkloadClusterListener", "listenerName", wclName, "address", wcl.Address(), "podName", podName) @@ -406,6 +428,22 @@ func (m *WorkloadClustersMux) HasEtcdMember(wclName, podName string) bool { return wcl.etcdMembers.Has(podName) } +// DeleteEtcdMember removes an etcd Member from the WorkloadClusterListener. +func (m *WorkloadClustersMux) DeleteEtcdMember(wclName, podName string) error { + m.lock.Lock() + defer m.lock.Unlock() + + wcl, ok := m.workloadClusterListeners[wclName] + if !ok { + return errors.Errorf("workloadClusterListener with name %s must be initialized before removing an etcd member", wclName) + } + wcl.etcdMembers.Delete(podName) + delete(wcl.etcdServingCertificates, podName) + m.log.Info("Etcd member removed from WorkloadClusterListener", "listenerName", wclName, "address", wcl.Address(), "podName", podName) + + return nil +} + // ListListeners implements api.DebugInfoProvider. func (m *WorkloadClustersMux) ListListeners() map[string]string { m.lock.RLock() @@ -418,6 +456,29 @@ func (m *WorkloadClustersMux) ListListeners() map[string]string { return ret } +// DeleteWorkloadClusterListener deletes a WorkloadClusterListener. +func (m *WorkloadClustersMux) DeleteWorkloadClusterListener(wclName string) error { + m.lock.Lock() + defer m.lock.Unlock() + + wcl, ok := m.workloadClusterListeners[wclName] + if !ok { + return nil + } + + if wcl.listener != nil { + if err := wcl.listener.Close(); err != nil { + return errors.Wrapf(err, "failed to stop WorkloadClusterListener %s, %s", wclName, wcl.HostPort()) + } + } + + delete(m.workloadClusterListeners, wclName) + delete(m.workloadClusterNameByHost, wcl.HostPort()) + + m.log.Info("Workload cluster listener deleted", "listenerName", wclName, "address", wcl.Address()) + return nil +} + // Shutdown shuts down the workload cluster mux. func (m *WorkloadClustersMux) Shutdown(ctx context.Context) error { m.lock.Lock() diff --git a/test/infrastructure/inmemory/internal/server/mux_test.go b/test/infrastructure/inmemory/internal/server/mux_test.go index 8f6e6e9b5807..fab13f1a6d63 100644 --- a/test/infrastructure/inmemory/internal/server/mux_test.go +++ b/test/infrastructure/inmemory/internal/server/mux_test.go @@ -59,6 +59,61 @@ func init() { ctrl.SetLogger(klog.Background()) } +func TestMux(t *testing.T) { + g := NewWithT(t) + + manager := cmanager.New(scheme) + + wcl := "workload-cluster" + host := "127.0.0.1" //nolint:goconst + wcmux := NewWorkloadClustersMux(manager, host) + + listener, err := wcmux.InitWorkloadClusterListener(wcl) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(listener.Host()).To(Equal(host)) + g.Expect(listener.Port()).ToNot(BeZero()) + + caCert, caKey, err := newCertificateAuthority() + g.Expect(err).ToNot(HaveOccurred()) + + etcdCert, etcdKey, err := newCertificateAuthority() + g.Expect(err).ToNot(HaveOccurred()) + + apiServerPod1 := "apiserver1" + err = wcmux.AddAPIServer(wcl, apiServerPod1, caCert, caKey) + g.Expect(err).ToNot(HaveOccurred()) + + etcdPodMember1 := "etcd1" + err = wcmux.AddEtcdMember(wcl, etcdPodMember1, etcdCert, etcdKey) + g.Expect(err).ToNot(HaveOccurred()) + + apiServerPod2 := "apiserver2" + err = wcmux.AddAPIServer(wcl, apiServerPod2, caCert, caKey) + g.Expect(err).ToNot(HaveOccurred()) + + etcdPodMember2 := "etcd2" + err = wcmux.AddEtcdMember(wcl, etcdPodMember2, etcdCert, etcdKey) + g.Expect(err).ToNot(HaveOccurred()) + + err = wcmux.DeleteAPIServer(wcl, apiServerPod2) + g.Expect(err).ToNot(HaveOccurred()) + + err = wcmux.DeleteEtcdMember(wcl, etcdPodMember2) + g.Expect(err).ToNot(HaveOccurred()) + + err = wcmux.DeleteAPIServer(wcl, apiServerPod1) + g.Expect(err).ToNot(HaveOccurred()) + + err = wcmux.DeleteEtcdMember(wcl, etcdPodMember1) + g.Expect(err).ToNot(HaveOccurred()) + + err = wcmux.DeleteWorkloadClusterListener(wcl) + g.Expect(err).ToNot(HaveOccurred()) + + err = wcmux.Shutdown(ctx) + g.Expect(err).ToNot(HaveOccurred()) +} + func TestAPI_corev1_CRUD(t *testing.T) { g := NewWithT(t) From 76f02861f4d3fefedd8f5360d79b5440898a5c4b Mon Sep 17 00:00:00 2001 From: Stefan Bueringer Date: Fri, 9 Jun 2023 10:06:50 +0200 Subject: [PATCH 10/94] test/infra/inmemory: rename controller files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stefan Büringer buringerst@vmware.com --- .../{goofycluster_controller.go => inmemorycluster_controller.go} | 0 .../{goofymachine_controller.go => inmemorymachine_controller.go} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename test/infrastructure/inmemory/internal/controllers/{goofycluster_controller.go => inmemorycluster_controller.go} (100%) rename test/infrastructure/inmemory/internal/controllers/{goofymachine_controller.go => inmemorymachine_controller.go} (100%) diff --git a/test/infrastructure/inmemory/internal/controllers/goofycluster_controller.go b/test/infrastructure/inmemory/internal/controllers/inmemorycluster_controller.go similarity index 100% rename from test/infrastructure/inmemory/internal/controllers/goofycluster_controller.go rename to test/infrastructure/inmemory/internal/controllers/inmemorycluster_controller.go diff --git a/test/infrastructure/inmemory/internal/controllers/goofymachine_controller.go b/test/infrastructure/inmemory/internal/controllers/inmemorymachine_controller.go similarity index 100% rename from test/infrastructure/inmemory/internal/controllers/goofymachine_controller.go rename to test/infrastructure/inmemory/internal/controllers/inmemorymachine_controller.go From 5adeb347b60ed921b0eb5de985328cdba46bff8a Mon Sep 17 00:00:00 2001 From: Mario Constanti Date: Tue, 23 May 2023 13:01:42 +0200 Subject: [PATCH 11/94] handle nil pointer in clusterctl describe if a cluster as an empy bootstrap.configref in the machinedeployment (which is allowed), a nil pointer will raise if clusterctl describe cluster is called with --show-templates. Also add a second type of a fake machinedeployment which has a secret reference in the boostrap object instead of a config ref Signed-off-by: Mario Constanti --- cmd/clusterctl/client/cluster/mover_test.go | 72 ++++++-- .../client/cluster/objectgraph_test.go | 79 +++++++- cmd/clusterctl/client/tree/discovery.go | 7 +- cmd/clusterctl/client/tree/discovery_test.go | 23 +++ cmd/clusterctl/internal/test/fake_objects.go | 168 ++++++++++++------ 5 files changed, 283 insertions(+), 66 deletions(-) diff --git a/cmd/clusterctl/client/cluster/mover_test.go b/cmd/clusterctl/client/cluster/mover_test.go index ade86d12733c..23180b5b153d 100644 --- a/cmd/clusterctl/client/cluster/mover_test.go +++ b/cmd/clusterctl/client/cluster/mover_test.go @@ -248,6 +248,54 @@ var moveTests = []struct { }, wantErr: false, }, + { + name: "Cluster with MachineDeployment with a static bootstrap config", + fields: moveTestsFields{ + objs: test.NewFakeCluster("ns1", "cluster1"). + WithMachineDeployments( + test.NewFakeMachineDeployment("md1"). + WithStaticBootstrapConfig(). + WithMachineSets( + test.NewFakeMachineSet("ms1"). + WithStaticBootstrapConfig(). + WithMachines( + test.NewFakeMachine("m1"). + WithStaticBootstrapConfig(), + test.NewFakeMachine("m2"). + WithStaticBootstrapConfig(), + ), + ), + ).Objs(), + }, + wantMoveGroups: [][]string{ + { // group 1 + "cluster.x-k8s.io/v1beta1, Kind=Cluster, ns1/cluster1", + }, + { // group 2 (objects with ownerReferences in group 1) + // owned by Clusters + "/v1, Kind=Secret, ns1/cluster1-ca", + "/v1, Kind=Secret, ns1/cluster1-kubeconfig", + "cluster.x-k8s.io/v1beta1, Kind=MachineDeployment, ns1/md1", + "infrastructure.cluster.x-k8s.io/v1beta1, Kind=GenericInfrastructureCluster, ns1/cluster1", + "infrastructure.cluster.x-k8s.io/v1beta1, Kind=GenericInfrastructureMachineTemplate, ns1/md1", + }, + { // group 3 (objects with ownerReferences in group 1,2) + // owned by MachineDeployments + "cluster.x-k8s.io/v1beta1, Kind=MachineSet, ns1/ms1", + }, + { // group 4 (objects with ownerReferences in group 1,2,3) + // owned by MachineSets + "cluster.x-k8s.io/v1beta1, Kind=Machine, ns1/m1", + "cluster.x-k8s.io/v1beta1, Kind=Machine, ns1/m2", + }, + { // group 5 (objects with ownerReferences in group 1,2,3,4) + // owned by Machines + "infrastructure.cluster.x-k8s.io/v1beta1, Kind=GenericInfrastructureMachine, ns1/m1", + "infrastructure.cluster.x-k8s.io/v1beta1, Kind=GenericInfrastructureMachine, ns1/m2", + }, + }, + wantErr: false, + }, { name: "Cluster with Control Plane", fields: moveTestsFields{ @@ -825,7 +873,7 @@ func Test_objectMover_restoreTargetObject(t *testing.T) { oTo.SetKind(node.identity.Kind) if err := csTo.Get(ctx, key, oTo); err != nil { - t.Errorf("error = %v when checking for %v created in target cluster", err, key) + t.Errorf("error = %v when checking for %s %v created in target cluster", err, oTo.GetKind(), key) continue } @@ -853,7 +901,7 @@ func Test_objectMover_restoreTargetObject(t *testing.T) { oAfter.SetKind(node.identity.Kind) if err := csAfter.Get(ctx, keyAfter, oAfter); err != nil { - t.Errorf("error = %v when checking for %v created in target cluster", err, key) + t.Errorf("error = %v when checking for %s %v created in target cluster", err, oAfter.GetKind(), key) continue } @@ -1076,7 +1124,7 @@ func Test_objectMover_fromDirectory(t *testing.T) { oTo.SetKind(node.identity.Kind) if err := csTo.Get(ctx, key, oTo); err != nil { - t.Errorf("error = %v when checking for %v created in target cluster", err, key) + t.Errorf("error = %v when checking for %s %v created in target cluster", err, oTo.GetKind(), key) continue } } @@ -1164,7 +1212,7 @@ func Test_objectMover_move_dryRun(t *testing.T) { oFrom.SetKind(node.identity.Kind) if err := csFrom.Get(ctx, key, oFrom); err != nil { - t.Errorf("error = %v when checking for %v kept in source cluster", err, key) + t.Errorf("error = %v when checking for %s %v kept in source cluster", err, oFrom.GetKind(), key) continue } @@ -1176,11 +1224,11 @@ func Test_objectMover_move_dryRun(t *testing.T) { err := csTo.Get(ctx, key, oTo) if err == nil { if oFrom.GetNamespace() != "" { - t.Errorf("%v created in target cluster which should not", key) + t.Errorf("%s %v created in target cluster which should not", oFrom.GetKind(), key) continue } } else if !apierrors.IsNotFound(err) { - t.Errorf("error = %v when checking for %v should not created ojects in target cluster", err, key) + t.Errorf("error = %v when checking for %s %v should not created ojects in target cluster", err, oFrom.GetKind(), key) continue } } @@ -1240,11 +1288,11 @@ func Test_objectMover_move(t *testing.T) { err := csFrom.Get(ctx, key, oFrom) if err == nil { if !node.isGlobal && !node.isGlobalHierarchy { - t.Errorf("%v not deleted in source cluster", key) + t.Errorf("%s %v not deleted in source cluster", oFrom.GetKind(), key) continue } } else if !apierrors.IsNotFound(err) { - t.Errorf("error = %v when checking for %v deleted in source cluster", err, key) + t.Errorf("error = %v when checking for %s %v deleted in source cluster", err, oFrom.GetKind(), key) continue } @@ -1254,7 +1302,7 @@ func Test_objectMover_move(t *testing.T) { oTo.SetKind(node.identity.Kind) if err := csTo.Get(ctx, key, oTo); err != nil { - t.Errorf("error = %v when checking for %v created in target cluster", err, key) + t.Errorf("error = %v when checking for %s %v created in target cluster", err, oFrom.GetKind(), key) continue } } @@ -1349,11 +1397,11 @@ func Test_objectMover_move_with_Mutator(t *testing.T) { err := csFrom.Get(ctx, key, oFrom) if err == nil { if !node.isGlobal && !node.isGlobalHierarchy { - t.Errorf("%v not deleted in source cluster", key) + t.Errorf("%s %v not deleted in source cluster", oFrom.GetKind(), key) continue } } else if !apierrors.IsNotFound(err) { - t.Errorf("error = %v when checking for %v deleted in source cluster", err, key) + t.Errorf("error = %v when checking for %s %v deleted in source cluster", err, oFrom.GetKind(), key) continue } @@ -1366,7 +1414,7 @@ func Test_objectMover_move_with_Mutator(t *testing.T) { } if err := csTo.Get(ctx, key, oTo); err != nil { - t.Errorf("error = %v when checking for %v created in target cluster", err, key) + t.Errorf("error = %v when checking for %s %v created in target cluster", err, oFrom.GetKind(), key) continue } if fields, knownKind := updateKnownKinds[oTo.GetKind()]; knownKind { diff --git a/cmd/clusterctl/client/cluster/objectgraph_test.go b/cmd/clusterctl/client/cluster/objectgraph_test.go index dcd856f48521..314760da0aba 100644 --- a/cmd/clusterctl/client/cluster/objectgraph_test.go +++ b/cmd/clusterctl/client/cluster/objectgraph_test.go @@ -252,7 +252,7 @@ func assertGraph(t *testing.T, got *objectGraph, want wantGraph) { g := NewWithT(t) - g.Expect(got.uidToNode).To(HaveLen(len(want.nodes)), "the number of nodes in the objectGraph doesn't match the number of expected nodes") + g.Expect(got.uidToNode).To(HaveLen(len(want.nodes)), "the number of nodes in the objectGraph doesn't match the number of expected nodes - got: %d expected: %d", len(got.uidToNode), len(want.nodes)) for uid, wantNode := range want.nodes { gotNode, ok := got.uidToNode[types.UID(uid)] @@ -810,6 +810,83 @@ var objectGraphsTests = []struct { }, }, }, + { + name: "Cluster with MachineDeployment without a BootstrapConfigRef", + args: objectGraphTestArgs{ + objs: test.NewFakeCluster("ns1", "cluster1"). + WithMachineDeployments( + test.NewFakeMachineDeployment("md1"). + WithStaticBootstrapConfig(). + WithMachineSets( + test.NewFakeMachineSet("ms1"). + WithMachines( + test.NewFakeMachine("m1"), + ), + ), + ).Objs(), + }, + want: wantGraph{ + nodes: map[string]wantGraphItem{ + "cluster.x-k8s.io/v1beta1, Kind=Cluster, ns1/cluster1": { + forceMove: true, + forceMoveHierarchy: true, + }, + "infrastructure.cluster.x-k8s.io/v1beta1, Kind=GenericInfrastructureCluster, ns1/cluster1": { + owners: []string{ + "cluster.x-k8s.io/v1beta1, Kind=Cluster, ns1/cluster1", + }, + }, + "/v1, Kind=Secret, ns1/cluster1-ca": { + softOwners: []string{ + "cluster.x-k8s.io/v1beta1, Kind=Cluster, ns1/cluster1", // NB. this secret is not linked to the cluster through owner ref + }, + }, + "/v1, Kind=Secret, ns1/cluster1-kubeconfig": { + owners: []string{ + "cluster.x-k8s.io/v1beta1, Kind=Cluster, ns1/cluster1", + }, + }, + + "cluster.x-k8s.io/v1beta1, Kind=MachineDeployment, ns1/md1": { + owners: []string{ + "cluster.x-k8s.io/v1beta1, Kind=Cluster, ns1/cluster1", + }, + }, + "infrastructure.cluster.x-k8s.io/v1beta1, Kind=GenericInfrastructureMachineTemplate, ns1/md1": { + owners: []string{ + "cluster.x-k8s.io/v1beta1, Kind=Cluster, ns1/cluster1", + }, + }, + + "cluster.x-k8s.io/v1beta1, Kind=MachineSet, ns1/ms1": { + owners: []string{ + "cluster.x-k8s.io/v1beta1, Kind=MachineDeployment, ns1/md1", + }, + }, + + "cluster.x-k8s.io/v1beta1, Kind=Machine, ns1/m1": { + owners: []string{ + "cluster.x-k8s.io/v1beta1, Kind=MachineSet, ns1/ms1", + }, + }, + "infrastructure.cluster.x-k8s.io/v1beta1, Kind=GenericInfrastructureMachine, ns1/m1": { + owners: []string{ + "cluster.x-k8s.io/v1beta1, Kind=Machine, ns1/m1", + }, + }, + "bootstrap.cluster.x-k8s.io/v1beta1, Kind=GenericBootstrapConfig, ns1/m1": { + owners: []string{ + "cluster.x-k8s.io/v1beta1, Kind=Machine, ns1/m1", + }, + }, + "/v1, Kind=Secret, ns1/m1": { + owners: []string{ + "bootstrap.cluster.x-k8s.io/v1beta1, Kind=GenericBootstrapConfig, ns1/m1", + }, + }, + }, + }, + }, { name: "Cluster with Control Plane", args: objectGraphTestArgs{ diff --git a/cmd/clusterctl/client/tree/discovery.go b/cmd/clusterctl/client/tree/discovery.go index e8ea53cdc462..ab9f9e2feb82 100644 --- a/cmd/clusterctl/client/tree/discovery.go +++ b/cmd/clusterctl/client/tree/discovery.go @@ -241,8 +241,11 @@ func addMachineDeploymentToObjectTree(ctx context.Context, c client.Client, clus templateParent = md } - bootstrapTemplateRefObject := ObjectReferenceObject(md.Spec.Template.Spec.Bootstrap.ConfigRef) - tree.Add(templateParent, bootstrapTemplateRefObject, ObjectMetaName("BootstrapConfigTemplate")) + // md.Spec.Template.Spec.Bootstrap.ConfigRef is optional + if md.Spec.Template.Spec.Bootstrap.ConfigRef != nil { + bootstrapTemplateRefObject := ObjectReferenceObject(md.Spec.Template.Spec.Bootstrap.ConfigRef) + tree.Add(templateParent, bootstrapTemplateRefObject, ObjectMetaName("BootstrapConfigTemplate")) + } machineTemplateRefObject := ObjectReferenceObject(&md.Spec.Template.Spec.InfrastructureRef) tree.Add(templateParent, machineTemplateRefObject, ObjectMetaName("MachineInfrastructureTemplate")) diff --git a/cmd/clusterctl/client/tree/discovery_test.go b/cmd/clusterctl/client/tree/discovery_test.go index 7509aec38f65..1542681a5c28 100644 --- a/cmd/clusterctl/client/tree/discovery_test.go +++ b/cmd/clusterctl/client/tree/discovery_test.go @@ -397,6 +397,17 @@ func Test_Discovery(t *testing.T) { test.NewFakeInfrastructureTemplate("md1"), ), ). + WithMachineDeployments( + test.NewFakeMachineDeployment("md2"). + WithStaticBootstrapConfig(). + WithMachineSets( + test.NewFakeMachineSet("ms2"). + WithMachines( + test.NewFakeMachine("m3"), + test.NewFakeMachine("m4"), + ), + ), + ). Objs(), }, wantTree: map[string][]string{ @@ -418,6 +429,7 @@ func Test_Discovery(t *testing.T) { // Workers should have a machine deployment "virtual.cluster.x-k8s.io/v1beta1, Kind=WorkerGroup, ns1/Workers": { "cluster.x-k8s.io/v1beta1, Kind=MachineDeployment, ns1/md1", + "cluster.x-k8s.io/v1beta1, Kind=MachineDeployment, ns1/md2", }, // Machine deployment should have a group of machines (grouping) and templates group "cluster.x-k8s.io/v1beta1, Kind=MachineDeployment, ns1/md1": { @@ -433,6 +445,17 @@ func Test_Discovery(t *testing.T) { "infrastructure.cluster.x-k8s.io/v1beta1, Kind=GenericInfrastructureMachineTemplate, ns1/md1": {}, // MachineDeployment BootstrapConfigRef should be a leaf "bootstrap.cluster.x-k8s.io/v1beta1, Kind=GenericBootstrapConfigTemplate, ns1/md1": {}, + // Machine deployment should have a group of machines (grouping) and templates group + "cluster.x-k8s.io/v1beta1, Kind=MachineDeployment, ns1/md2": { + "virtual.cluster.x-k8s.io/v1beta1, Kind=MachineGroup, ns1/zzz_", + "virtual.cluster.x-k8s.io/v1beta1, Kind=TemplateGroup, ns1/md2", + }, + // MachineDeployment TemplateGroup using static bootstrap will only have InfrastructureRef + "virtual.cluster.x-k8s.io/v1beta1, Kind=TemplateGroup, ns1/md2": { + "infrastructure.cluster.x-k8s.io/v1beta1, Kind=GenericInfrastructureMachineTemplate, ns1/md2", + }, + // MachineDeployment InfrastructureRef should be a leaf + "infrastructure.cluster.x-k8s.io/v1beta1, Kind=GenericInfrastructureMachineTemplate, ns1/md2": {}, // ControlPlane TemplateGroup should have a InfrastructureRef "virtual.cluster.x-k8s.io/v1beta1, Kind=TemplateGroup, ns1/cp": { "infrastructure.cluster.x-k8s.io/v1beta1, Kind=GenericInfrastructureMachineTemplate, ns1/cp", diff --git a/cmd/clusterctl/internal/test/fake_objects.go b/cmd/clusterctl/internal/test/fake_objects.go index 71c9661bff3f..55c382b4b223 100644 --- a/cmd/clusterctl/internal/test/fake_objects.go +++ b/cmd/clusterctl/internal/test/fake_objects.go @@ -27,6 +27,7 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/types" "k8s.io/klog/v2" + "k8s.io/utils/pointer" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -409,7 +410,8 @@ func (f *FakeControlPlane) Objs(cluster *clusterv1.Cluster) []client.Object { } type FakeMachinePool struct { - name string + name string + bootstrapConfig *clusterv1.Bootstrap } // NewFakeMachinePool return a FakeMachinePool that can generate a MachinePool object, all its own ancillary objects: @@ -421,6 +423,11 @@ func NewFakeMachinePool(name string) *FakeMachinePool { } } +func (f *FakeMachinePool) WithStaticBootstrapConfig() *FakeMachinePool { + f.bootstrapConfig = NewStaticBootstrapConfig(f.name) + return f +} + func (f *FakeMachinePool) Objs(cluster *clusterv1.Cluster) []client.Object { machinePoolInfrastructure := &fakeinfrastructure.GenericInfrastructureMachineTemplate{ TypeMeta: metav1.TypeMeta{ @@ -462,6 +469,11 @@ func (f *FakeMachinePool) Objs(cluster *clusterv1.Cluster) []client.Object { }, } + bootstrapConfig := f.bootstrapConfig + if bootstrapConfig == nil { + bootstrapConfig = NewBootstrapConfigTemplate(machinePoolBootstrap) + } + machinePool := &expv1.MachinePool{ TypeMeta: metav1.TypeMeta{ Kind: "MachinePool", @@ -491,14 +503,7 @@ func (f *FakeMachinePool) Objs(cluster *clusterv1.Cluster) []client.Object { Name: machinePoolInfrastructure.Name, Namespace: machinePoolInfrastructure.Namespace, }, - Bootstrap: clusterv1.Bootstrap{ - ConfigRef: &corev1.ObjectReference{ - APIVersion: machinePoolBootstrap.APIVersion, - Kind: machinePoolBootstrap.Kind, - Name: machinePoolBootstrap.Name, - Namespace: machinePoolBootstrap.Namespace, - }, - }, + Bootstrap: *bootstrapConfig, }, }, ClusterName: cluster.Name, @@ -511,7 +516,11 @@ func (f *FakeMachinePool) Objs(cluster *clusterv1.Cluster) []client.Object { objs := []client.Object{ machinePool, machinePoolInfrastructure, - machinePoolBootstrap, + } + + // if the bootstrapConfig doesn't use a static secret, add the GenericBootstrapConfigTemplate to the object list + if bootstrapConfig.ConfigRef != nil { + objs = append(objs, machinePoolBootstrap) } return objs @@ -531,10 +540,42 @@ func NewFakeInfrastructureTemplate(name string) *fakeinfrastructure.GenericInfra } } +// NewStaticBootstrapConfig return a clusterv1.Bootstrap where +// - the ConfigRef is nil +// - the DataSecretName contains the name of the static data secret. +func NewStaticBootstrapConfig(name string) *clusterv1.Bootstrap { + return &clusterv1.Bootstrap{ + DataSecretName: pointer.String(name + "-bootstrap-secret"), + } +} + +func NewBootstrapConfigTemplate(machineBootstrapTemplate *fakebootstrap.GenericBootstrapConfigTemplate) *clusterv1.Bootstrap { + return &clusterv1.Bootstrap{ + ConfigRef: &corev1.ObjectReference{ + APIVersion: machineBootstrapTemplate.APIVersion, + Kind: machineBootstrapTemplate.Kind, + Name: machineBootstrapTemplate.Name, + Namespace: machineBootstrapTemplate.Namespace, + }, + } +} + +func NewBootstrapConfig(machineBootstrap *fakebootstrap.GenericBootstrapConfig) *clusterv1.Bootstrap { + return &clusterv1.Bootstrap{ + ConfigRef: &corev1.ObjectReference{ + APIVersion: machineBootstrap.APIVersion, + Kind: machineBootstrap.Kind, + Name: machineBootstrap.Name, + Namespace: machineBootstrap.Namespace, + }, + } +} + type FakeMachineDeployment struct { name string machineSets []*FakeMachineSet sharedInfrastructureTemplate *fakeinfrastructure.GenericInfrastructureMachineTemplate + bootstrapConfig *clusterv1.Bootstrap } // NewFakeMachineDeployment return a FakeMachineDeployment that can generate a MachineDeployment object, all its own ancillary objects: @@ -552,6 +593,11 @@ func (f *FakeMachineDeployment) WithMachineSets(fakeMachineSet ...*FakeMachineSe return f } +func (f *FakeMachineDeployment) WithStaticBootstrapConfig() *FakeMachineDeployment { + f.bootstrapConfig = NewStaticBootstrapConfig(f.name) + return f +} + func (f *FakeMachineDeployment) WithInfrastructureTemplate(infrastructureTemplate *fakeinfrastructure.GenericInfrastructureMachineTemplate) *FakeMachineDeployment { f.sharedInfrastructureTemplate = infrastructureTemplate return f @@ -594,6 +640,11 @@ func (f *FakeMachineDeployment) Objs(cluster *clusterv1.Cluster) []client.Object }, } + bootstrapConfig := f.bootstrapConfig + if bootstrapConfig == nil { + bootstrapConfig = NewBootstrapConfigTemplate(machineDeploymentBootstrap) + } + machineDeployment := &clusterv1.MachineDeployment{ TypeMeta: metav1.TypeMeta{ Kind: "MachineDeployment", @@ -623,14 +674,7 @@ func (f *FakeMachineDeployment) Objs(cluster *clusterv1.Cluster) []client.Object Name: machineDeploymentInfrastructure.Name, Namespace: machineDeploymentInfrastructure.Namespace, }, - Bootstrap: clusterv1.Bootstrap{ - ConfigRef: &corev1.ObjectReference{ - APIVersion: machineDeploymentBootstrap.APIVersion, - Kind: machineDeploymentBootstrap.Kind, - Name: machineDeploymentBootstrap.Name, - Namespace: machineDeploymentBootstrap.Namespace, - }, - }, + Bootstrap: *bootstrapConfig, }, }, ClusterName: cluster.Name, @@ -642,8 +686,13 @@ func (f *FakeMachineDeployment) Objs(cluster *clusterv1.Cluster) []client.Object objs := []client.Object{ machineDeployment, - machineDeploymentBootstrap, } + + // if the bootstrapConfig doesn't use a static secret, add the GenericBootstrapConfigTemplate to the object list + if bootstrapConfig.ConfigRef != nil { + objs = append(objs, machineDeploymentBootstrap) + } + // if the infra template is specific to the machine deployment, add it to the object list if f.sharedInfrastructureTemplate == nil { objs = append(objs, machineDeploymentInfrastructure) @@ -661,6 +710,7 @@ type FakeMachineSet struct { name string machines []*FakeMachine sharedInfrastructureTemplate *fakeinfrastructure.GenericInfrastructureMachineTemplate + bootstrapConfig *clusterv1.Bootstrap } // NewFakeMachineSet return a FakeMachineSet that can generate a MachineSet object, all its own ancillary objects: @@ -678,6 +728,11 @@ func (f *FakeMachineSet) WithMachines(fakeMachine ...*FakeMachine) *FakeMachineS return f } +func (f *FakeMachineSet) WithStaticBootstrapConfig() *FakeMachineSet { + f.bootstrapConfig = NewStaticBootstrapConfig(f.name) + return f +} + func (f *FakeMachineSet) WithInfrastructureTemplate(infrastructureTemplate *fakeinfrastructure.GenericInfrastructureMachineTemplate) *FakeMachineSet { f.sharedInfrastructureTemplate = infrastructureTemplate return f @@ -750,6 +805,8 @@ func (f *FakeMachineSet) Objs(cluster *clusterv1.Cluster, machineDeployment *clu Namespace: machineSetInfrastructure.Namespace, } + objs = append(objs, machineSet) + machineSetBootstrap := &fakebootstrap.GenericBootstrapConfigTemplate{ TypeMeta: metav1.TypeMeta{ APIVersion: fakebootstrap.GroupVersion.String(), @@ -770,16 +827,18 @@ func (f *FakeMachineSet) Objs(cluster *clusterv1.Cluster, machineDeployment *clu }, } - machineSet.Spec.Template.Spec.Bootstrap = clusterv1.Bootstrap{ - ConfigRef: &corev1.ObjectReference{ - APIVersion: machineSetBootstrap.APIVersion, - Kind: machineSetBootstrap.Kind, - Name: machineSetBootstrap.Name, - Namespace: machineSetBootstrap.Namespace, - }, + bootstrapConfig := f.bootstrapConfig + if bootstrapConfig == nil { + bootstrapConfig = NewBootstrapConfigTemplate(machineSetBootstrap) + } + + machineSet.Spec.Template.Spec.Bootstrap = *bootstrapConfig + + // if the bootstrapConfig doesn't use a static secret, add the GenericBootstrapConfigTemplate to the object list + if bootstrapConfig.ConfigRef != nil { + objs = append(objs, machineSetBootstrap) } - objs = append(objs, machineSet, machineSetBootstrap) // if the infra template is specific to the machine set, add it to the object list if f.sharedInfrastructureTemplate == nil { objs = append(objs, machineSetInfrastructure) @@ -795,7 +854,8 @@ func (f *FakeMachineSet) Objs(cluster *clusterv1.Cluster, machineDeployment *clu } type FakeMachine struct { - name string + name string + bootstrapConfig *clusterv1.Bootstrap } // NewFakeMachine return a FakeMachine that can generate a Machine object, all its own ancillary objects: @@ -808,6 +868,11 @@ func NewFakeMachine(name string) *FakeMachine { } } +func (f *FakeMachine) WithStaticBootstrapConfig() *FakeMachine { + f.bootstrapConfig = NewStaticBootstrapConfig(f.name) + return f +} + func (f *FakeMachine) Objs(cluster *clusterv1.Cluster, generateCerts bool, machineSet *clusterv1.MachineSet, controlPlane *fakecontrolplane.GenericControlPlane) []client.Object { machineInfrastructure := &fakeinfrastructure.GenericInfrastructureMachine{ TypeMeta: metav1.TypeMeta{ @@ -840,6 +905,12 @@ func (f *FakeMachine) Objs(cluster *clusterv1.Cluster, generateCerts bool, machi }, } + bootstrapConfig := f.bootstrapConfig + if bootstrapConfig == nil { + bootstrapConfig = NewBootstrapConfig(machineBootstrap) + bootstrapConfig.DataSecretName = &bootstrapDataSecretName + } + // Ensure the machineBootstrap gets a UID to be used by dependant objects for creating OwnerReferences. setUID(machineBootstrap) @@ -880,19 +951,12 @@ func (f *FakeMachine) Objs(cluster *clusterv1.Cluster, generateCerts bool, machi Name: machineInfrastructure.Name, Namespace: cluster.Namespace, }, - Bootstrap: clusterv1.Bootstrap{ - ConfigRef: &corev1.ObjectReference{ - APIVersion: machineBootstrap.APIVersion, - Kind: machineBootstrap.Kind, - Name: machineBootstrap.Name, - Namespace: cluster.Namespace, - }, - DataSecretName: &bootstrapDataSecretName, - }, ClusterName: cluster.Name, }, } + machine.Spec.Bootstrap = *bootstrapConfig + // Ensure the machine gets a UID to be used by dependant objects for creating OwnerReferences. setUID(machine) @@ -948,23 +1012,25 @@ func (f *FakeMachine) Objs(cluster *clusterv1.Cluster, generateCerts bool, machi clusterv1.ClusterNameLabel: machine.Spec.ClusterName, }) - machineBootstrap.SetOwnerReferences([]metav1.OwnerReference{ - { - APIVersion: machine.APIVersion, - Kind: machine.Kind, - Name: machine.Name, - UID: machine.UID, - }, - }) - machineBootstrap.SetLabels(map[string]string{ - clusterv1.ClusterNameLabel: machine.Spec.ClusterName, - }) - objs := []client.Object{ machine, machineInfrastructure, - machineBootstrap, - bootstrapDataSecret, + } + + if machine.Spec.Bootstrap.ConfigRef != nil { + machineBootstrap.SetOwnerReferences([]metav1.OwnerReference{ + { + APIVersion: machine.APIVersion, + Kind: machine.Kind, + Name: machine.Name, + UID: machine.UID, + }, + }) + machineBootstrap.SetLabels(map[string]string{ + clusterv1.ClusterNameLabel: machine.Spec.ClusterName, + }) + + objs = append(objs, bootstrapDataSecret, machineBootstrap) } objs = append(objs, additionalObjs...) From 051b00606dfacd61fbff687155e485040c1b0461 Mon Sep 17 00:00:00 2001 From: Stefan Bueringer Date: Thu, 25 May 2023 08:17:28 +0200 Subject: [PATCH 12/94] Use ClusterCacheTracker consistently (intead of NewClusterClient) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stefan Büringer buringerst@vmware.com --- bootstrap/kubeadm/controllers/alias.go | 4 + .../controllers/kubeadmconfig_controller.go | 37 +++++---- .../kubeadmconfig_controller_test.go | 75 ++++++++++--------- bootstrap/kubeadm/main.go | 36 ++++++++- controllers/remote/cluster_cache_tracker.go | 17 ++++- controlplane/kubeadm/main.go | 12 +-- .../controllers/machinepool_controller.go | 20 ++++- .../machinepool_controller_noderef.go | 3 +- .../machinepool_controller_phases_test.go | 31 ++++++-- .../machineset/machineset_controller.go | 1 - main.go | 14 ++-- test/infrastructure/docker/main.go | 12 +-- 12 files changed, 179 insertions(+), 83 deletions(-) diff --git a/bootstrap/kubeadm/controllers/alias.go b/bootstrap/kubeadm/controllers/alias.go index 7f8b99b136ca..199c1a655957 100644 --- a/bootstrap/kubeadm/controllers/alias.go +++ b/bootstrap/kubeadm/controllers/alias.go @@ -25,6 +25,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller" kubeadmbootstrapcontrollers "sigs.k8s.io/cluster-api/bootstrap/kubeadm/internal/controllers" + "sigs.k8s.io/cluster-api/controllers/remote" ) // Following types provides access to reconcilers implemented in internal/controllers, thus @@ -39,6 +40,8 @@ const ( type KubeadmConfigReconciler struct { Client client.Client + Tracker *remote.ClusterCacheTracker + // WatchFilterValue is the label value used to filter events prior to reconciliation. WatchFilterValue string @@ -50,6 +53,7 @@ type KubeadmConfigReconciler struct { func (r *KubeadmConfigReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { return (&kubeadmbootstrapcontrollers.KubeadmConfigReconciler{ Client: r.Client, + Tracker: r.Tracker, WatchFilterValue: r.WatchFilterValue, TokenTTL: r.TokenTTL, }).SetupWithManager(ctx, mgr, options) diff --git a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go index 45fe463f5d5b..be97c4d71910 100644 --- a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go +++ b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go @@ -59,11 +59,6 @@ import ( "sigs.k8s.io/cluster-api/util/secret" ) -const ( - // KubeadmConfigControllerName defines the controller used when creating clients. - KubeadmConfigControllerName = "kubeadmconfig-controller" -) - const ( // DefaultTokenTTL is the default TTL used for tokens. DefaultTokenTTL = 15 * time.Minute @@ -82,6 +77,7 @@ type InitLocker interface { // KubeadmConfigReconciler reconciles a KubeadmConfig object. type KubeadmConfigReconciler struct { Client client.Client + Tracker *remote.ClusterCacheTracker KubeadmInitLock InitLocker // WatchFilterValue is the label value used to filter events prior to reconciliation. @@ -89,8 +85,6 @@ type KubeadmConfigReconciler struct { // TokenTTL is the amount of time a bootstrap token (and therefore a KubeadmConfig) will be valid. TokenTTL time.Duration - - remoteClientGetter remote.ClusterClientGetter } // Scope is a scoped struct used during reconciliation. @@ -106,9 +100,6 @@ func (r *KubeadmConfigReconciler) SetupWithManager(ctx context.Context, mgr ctrl if r.KubeadmInitLock == nil { r.KubeadmInitLock = locking.NewControlPlaneInitMutex(mgr.GetClient()) } - if r.remoteClientGetter == nil { - r.remoteClientGetter = remote.NewClusterClient - } if r.TokenTTL == 0 { r.TokenTTL = DefaultTokenTTL } @@ -239,6 +230,25 @@ func (r *KubeadmConfigReconciler) Reconcile(ctx context.Context, req ctrl.Reques } } }() + + // Ignore deleted KubeadmConfigs. + if !config.DeletionTimestamp.IsZero() { + return ctrl.Result{}, nil + } + + res, err := r.reconcile(ctx, scope, cluster, config, configOwner) + if err != nil && errors.Is(err, remote.ErrClusterLocked) { + // Requeue if the reconcile failed because the ClusterCacheTracker was locked for + // the current cluster because of concurrent access. + log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker") + return ctrl.Result{Requeue: true}, nil + } + return res, err +} + +func (r *KubeadmConfigReconciler) reconcile(ctx context.Context, scope *Scope, cluster *clusterv1.Cluster, config *bootstrapv1.KubeadmConfig, configOwner *bsutil.ConfigOwner) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + // Ensure the bootstrap secret associated with this KubeadmConfig has the correct ownerReference. if err := r.ensureBootstrapSecretOwnersRef(ctx, scope); err != nil { return ctrl.Result{}, err @@ -305,9 +315,8 @@ func (r *KubeadmConfigReconciler) refreshBootstrapToken(ctx context.Context, con log := ctrl.LoggerFrom(ctx) token := config.Spec.JoinConfiguration.Discovery.BootstrapToken.Token - remoteClient, err := r.remoteClientGetter(ctx, KubeadmConfigControllerName, r.Client, util.ObjectKey(cluster)) + remoteClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster)) if err != nil { - log.Error(err, "Error creating remote cluster client") return ctrl.Result{}, err } @@ -323,7 +332,7 @@ func (r *KubeadmConfigReconciler) refreshBootstrapToken(ctx context.Context, con func (r *KubeadmConfigReconciler) rotateMachinePoolBootstrapToken(ctx context.Context, config *bootstrapv1.KubeadmConfig, cluster *clusterv1.Cluster, scope *Scope) (ctrl.Result, error) { log := ctrl.LoggerFrom(ctx) log.V(2).Info("Config is owned by a MachinePool, checking if token should be rotated") - remoteClient, err := r.remoteClientGetter(ctx, KubeadmConfigControllerName, r.Client, util.ObjectKey(cluster)) + remoteClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster)) if err != nil { return ctrl.Result{}, err } @@ -928,7 +937,7 @@ func (r *KubeadmConfigReconciler) reconcileDiscovery(ctx context.Context, cluste // if BootstrapToken already contains a token, respect it; otherwise create a new bootstrap token for the node to join if config.Spec.JoinConfiguration.Discovery.BootstrapToken.Token == "" { - remoteClient, err := r.remoteClientGetter(ctx, KubeadmConfigControllerName, r.Client, util.ObjectKey(cluster)) + remoteClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster)) if err != nil { return ctrl.Result{}, err } diff --git a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller_test.go b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller_test.go index 2aabce32c477..77deb624f067 100644 --- a/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller_test.go +++ b/bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller_test.go @@ -25,6 +25,7 @@ import ( "time" ignition "github.com/flatcar/ignition/config/v2_3" + "github.com/go-logr/logr" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -34,13 +35,14 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/yaml" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" bootstrapbuilder "sigs.k8s.io/cluster-api/bootstrap/kubeadm/internal/builder" - fakeremote "sigs.k8s.io/cluster-api/controllers/remote/fake" + "sigs.k8s.io/cluster-api/controllers/remote" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/feature" "sigs.k8s.io/cluster-api/internal/test/builder" @@ -495,9 +497,9 @@ func TestKubeadmConfigReconciler_Reconcile_GenerateCloudConfigData(t *testing.T) myclient := fake.NewClientBuilder().WithObjects(objects...).WithStatusSubresource(&bootstrapv1.KubeadmConfig{}).Build() k := &KubeadmConfigReconciler{ - Client: myclient, - KubeadmInitLock: &myInitLocker{}, - remoteClientGetter: fakeremote.NewClusterClient, + Client: myclient, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + KubeadmInitLock: &myInitLocker{}, } request := ctrl.Request{ @@ -556,9 +558,9 @@ func TestKubeadmConfigReconciler_Reconcile_ErrorIfJoiningControlPlaneHasInvalidC myclient := fake.NewClientBuilder().WithObjects(objects...).WithStatusSubresource(&bootstrapv1.KubeadmConfig{}).Build() k := &KubeadmConfigReconciler{ - Client: myclient, - KubeadmInitLock: &myInitLocker{}, - remoteClientGetter: fakeremote.NewClusterClient, + Client: myclient, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + KubeadmInitLock: &myInitLocker{}, } request := ctrl.Request{ @@ -677,9 +679,9 @@ func TestReconcileIfJoinCertificatesAvailableConditioninNodesAndControlPlaneIsRe objects = append(objects, createSecrets(t, cluster, config)...) myclient := fake.NewClientBuilder().WithObjects(objects...).WithStatusSubresource(&bootstrapv1.KubeadmConfig{}).Build() k := &KubeadmConfigReconciler{ - Client: myclient, - KubeadmInitLock: &myInitLocker{}, - remoteClientGetter: fakeremote.NewClusterClient, + Client: myclient, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + KubeadmInitLock: &myInitLocker{}, } request := ctrl.Request{ @@ -754,9 +756,9 @@ func TestReconcileIfJoinNodePoolsAndControlPlaneIsReady(t *testing.T) { objects = append(objects, createSecrets(t, cluster, config)...) myclient := fake.NewClientBuilder().WithObjects(objects...).WithStatusSubresource(&bootstrapv1.KubeadmConfig{}).Build() k := &KubeadmConfigReconciler{ - Client: myclient, - KubeadmInitLock: &myInitLocker{}, - remoteClientGetter: fakeremote.NewClusterClient, + Client: myclient, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + KubeadmInitLock: &myInitLocker{}, } request := ctrl.Request{ @@ -854,9 +856,9 @@ func TestBootstrapDataFormat(t *testing.T) { myclient := fake.NewClientBuilder().WithObjects(objects...).WithStatusSubresource(&bootstrapv1.KubeadmConfig{}).Build() k := &KubeadmConfigReconciler{ - Client: myclient, - KubeadmInitLock: &myInitLocker{}, - remoteClientGetter: fakeremote.NewClusterClient, + Client: myclient, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + KubeadmInitLock: &myInitLocker{}, } request := ctrl.Request{ NamespacedName: client.ObjectKey{ @@ -934,9 +936,9 @@ func TestKubeadmConfigSecretCreatedStatusNotPatched(t *testing.T) { objects = append(objects, createSecrets(t, cluster, initConfig)...) myclient := fake.NewClientBuilder().WithObjects(objects...).WithStatusSubresource(&bootstrapv1.KubeadmConfig{}).Build() k := &KubeadmConfigReconciler{ - Client: myclient, - KubeadmInitLock: &myInitLocker{}, - remoteClientGetter: fakeremote.NewClusterClient, + Client: myclient, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + KubeadmInitLock: &myInitLocker{}, } request := ctrl.Request{ NamespacedName: client.ObjectKey{ @@ -1011,10 +1013,10 @@ func TestBootstrapTokenTTLExtension(t *testing.T) { objects = append(objects, createSecrets(t, cluster, initConfig)...) myclient := fake.NewClientBuilder().WithObjects(objects...).WithStatusSubresource(&bootstrapv1.KubeadmConfig{}, &clusterv1.Machine{}).Build() k := &KubeadmConfigReconciler{ - Client: myclient, - KubeadmInitLock: &myInitLocker{}, - TokenTTL: DefaultTokenTTL, - remoteClientGetter: fakeremote.NewClusterClient, + Client: myclient, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + KubeadmInitLock: &myInitLocker{}, + TokenTTL: DefaultTokenTTL, } request := ctrl.Request{ NamespacedName: client.ObjectKey{ @@ -1212,10 +1214,10 @@ func TestBootstrapTokenRotationMachinePool(t *testing.T) { objects = append(objects, createSecrets(t, cluster, initConfig)...) myclient := fake.NewClientBuilder().WithObjects(objects...).WithStatusSubresource(&bootstrapv1.KubeadmConfig{}, &expv1.MachinePool{}).Build() k := &KubeadmConfigReconciler{ - Client: myclient, - KubeadmInitLock: &myInitLocker{}, - TokenTTL: DefaultTokenTTL, - remoteClientGetter: fakeremote.NewClusterClient, + Client: myclient, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + KubeadmInitLock: &myInitLocker{}, + TokenTTL: DefaultTokenTTL, } request := ctrl.Request{ NamespacedName: client.ObjectKey{ @@ -1368,12 +1370,6 @@ func TestBootstrapTokenRotationMachinePool(t *testing.T) { // Ensure the discovery portion of the JoinConfiguration gets generated correctly. func TestKubeadmConfigReconciler_Reconcile_DiscoveryReconcileBehaviors(t *testing.T) { - k := &KubeadmConfigReconciler{ - Client: fake.NewClientBuilder().Build(), - KubeadmInitLock: &myInitLocker{}, - remoteClientGetter: fakeremote.NewClusterClient, - } - caHash := []string{"...."} bootstrapToken := bootstrapv1.Discovery{ BootstrapToken: &bootstrapv1.BootstrapTokenDiscovery{ @@ -1499,6 +1495,13 @@ func TestKubeadmConfigReconciler_Reconcile_DiscoveryReconcileBehaviors(t *testin t.Run(tc.name, func(t *testing.T) { g := NewWithT(t) + fakeClient := fake.NewClientBuilder().Build() + k := &KubeadmConfigReconciler{ + Client: fakeClient, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), fakeClient, fakeClient.Scheme(), client.ObjectKey{Name: tc.cluster.Name, Namespace: tc.cluster.Namespace}), + KubeadmInitLock: &myInitLocker{}, + } + res, err := k.reconcileDiscovery(ctx, tc.cluster, tc.config, secret.Certificates{}) g.Expect(res.IsZero()).To(BeTrue()) g.Expect(err).NotTo(HaveOccurred()) @@ -1710,9 +1713,9 @@ func TestKubeadmConfigReconciler_Reconcile_AlwaysCheckCAVerificationUnlessReques myclient := fake.NewClientBuilder().WithObjects(objects...).Build() reconciler := KubeadmConfigReconciler{ - Client: myclient, - KubeadmInitLock: &myInitLocker{}, - remoteClientGetter: fakeremote.NewClusterClient, + Client: myclient, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), myclient, myclient.Scheme(), client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}), + KubeadmInitLock: &myInitLocker{}, } wc := newWorkerJoinKubeadmConfig(metav1.NamespaceDefault, "worker-join-cfg") diff --git a/bootstrap/kubeadm/main.go b/bootstrap/kubeadm/main.go index 699f166b53e6..d7703cc58ccd 100644 --- a/bootstrap/kubeadm/main.go +++ b/bootstrap/kubeadm/main.go @@ -55,8 +55,9 @@ import ( ) var ( - scheme = runtime.NewScheme() - setupLog = ctrl.Log.WithName("setup") + scheme = runtime.NewScheme() + setupLog = ctrl.Log.WithName("setup") + controllerName = "cluster-api-kubeadm-bootstrap-manager" ) func init() { @@ -80,6 +81,7 @@ var ( watchFilterValue string watchNamespace string profilerAddress string + clusterConcurrency int kubeadmConfigConcurrency int syncPeriod time.Duration restConfigQPS float32 @@ -117,6 +119,9 @@ func InitFlags(fs *pflag.FlagSet) { fs.StringVar(&profilerAddress, "profiler-address", "", "Bind address to expose the pprof profiler (e.g. localhost:6060)") + fs.IntVar(&clusterConcurrency, "cluster-concurrency", 10, + "Number of clusters to process simultaneously") + fs.IntVar(&kubeadmConfigConcurrency, "kubeadmconfig-concurrency", 10, "Number of kubeadm configs to process simultaneously") @@ -166,7 +171,7 @@ func main() { restConfig := ctrl.GetConfigOrDie() restConfig.QPS = restConfigQPS restConfig.Burst = restConfigBurst - restConfig.UserAgent = remote.DefaultClusterAPIUserAgent("cluster-api-kubeadm-bootstrap-manager") + restConfig.UserAgent = remote.DefaultClusterAPIUserAgent(controllerName) tlsOptionOverrides, err := flags.GetTLSOptionOverrideFuncs(tlsOptions) if err != nil { @@ -245,8 +250,33 @@ func setupChecks(mgr ctrl.Manager) { } func setupReconcilers(ctx context.Context, mgr ctrl.Manager) { + // Set up a ClusterCacheTracker and ClusterCacheReconciler to provide to controllers + // requiring a connection to a remote cluster + log := ctrl.Log.WithName("remote").WithName("ClusterCacheTracker") + tracker, err := remote.NewClusterCacheTracker( + mgr, + remote.ClusterCacheTrackerOptions{ + ControllerName: controllerName, + Log: &log, + Indexes: remote.DefaultIndexes, + }, + ) + if err != nil { + setupLog.Error(err, "unable to create cluster cache tracker") + os.Exit(1) + } + if err := (&remote.ClusterCacheReconciler{ + Client: mgr.GetClient(), + Tracker: tracker, + WatchFilterValue: watchFilterValue, + }).SetupWithManager(ctx, mgr, concurrency(clusterConcurrency)); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "ClusterCacheReconciler") + os.Exit(1) + } + if err := (&kubeadmbootstrapcontrollers.KubeadmConfigReconciler{ Client: mgr.GetClient(), + Tracker: tracker, WatchFilterValue: watchFilterValue, TokenTTL: tokenTTL, }).SetupWithManager(ctx, mgr, concurrency(kubeadmConfigConcurrency)); err != nil { diff --git a/controllers/remote/cluster_cache_tracker.go b/controllers/remote/cluster_cache_tracker.go index f506eed2603e..685a89eb9123 100644 --- a/controllers/remote/cluster_cache_tracker.go +++ b/controllers/remote/cluster_cache_tracker.go @@ -81,6 +81,10 @@ type ClusterCacheTracker struct { indexes []Index + // controllerName is the name of the controller. + // This is used to calculate the user agent string. + controllerName string + // controllerPodMetadata is the Pod metadata of the controller using this ClusterCacheTracker. // This is only set when the POD_NAMESPACE, POD_NAME and POD_UID environment variables are set. // This information will be used to detected if the controller is running on a workload cluster, so @@ -100,6 +104,11 @@ type ClusterCacheTrackerOptions struct { // Defaults to never caching ConfigMap and Secret if not set. ClientUncachedObjects []client.Object Indexes []Index + + // ControllerName is the name of the controller. + // This is used to calculate the user agent string. + // If not set, it defaults to "cluster-cache-tracker". + ControllerName string } func setDefaultOptions(opts *ClusterCacheTrackerOptions) { @@ -120,6 +129,11 @@ func setDefaultOptions(opts *ClusterCacheTrackerOptions) { func NewClusterCacheTracker(manager ctrl.Manager, options ClusterCacheTrackerOptions) (*ClusterCacheTracker, error) { setDefaultOptions(&options) + controllerName := options.ControllerName + if controllerName == "" { + controllerName = clusterCacheControllerName + } + var controllerPodMetadata *metav1.ObjectMeta podNamespace := os.Getenv("POD_NAMESPACE") podName := os.Getenv("POD_NAME") @@ -136,6 +150,7 @@ func NewClusterCacheTracker(manager ctrl.Manager, options ClusterCacheTrackerOpt } return &ClusterCacheTracker{ + controllerName: controllerName, controllerPodMetadata: controllerPodMetadata, log: *options.Log, clientUncachedObjects: options.ClientUncachedObjects, @@ -257,7 +272,7 @@ func (t *ClusterCacheTracker) newClusterAccessor(ctx context.Context, cluster cl log := ctrl.LoggerFrom(ctx) // Get a rest config for the remote cluster - config, err := RESTConfig(ctx, clusterCacheControllerName, t.client, cluster) + config, err := RESTConfig(ctx, t.controllerName, t.client, cluster) if err != nil { return nil, errors.Wrapf(err, "error fetching REST client config for remote cluster %q", cluster.String()) } diff --git a/controlplane/kubeadm/main.go b/controlplane/kubeadm/main.go index a0807f625724..af3b01944c1c 100644 --- a/controlplane/kubeadm/main.go +++ b/controlplane/kubeadm/main.go @@ -59,8 +59,9 @@ import ( ) var ( - scheme = runtime.NewScheme() - setupLog = ctrl.Log.WithName("setup") + scheme = runtime.NewScheme() + setupLog = ctrl.Log.WithName("setup") + controllerName = "cluster-api-kubeadm-control-plane-manager" ) func init() { @@ -174,7 +175,7 @@ func main() { restConfig := ctrl.GetConfigOrDie() restConfig.QPS = restConfigQPS restConfig.Burst = restConfigBurst - restConfig.UserAgent = remote.DefaultClusterAPIUserAgent("cluster-api-kubeadm-control-plane-manager") + restConfig.UserAgent = remote.DefaultClusterAPIUserAgent(controllerName) tlsOptionOverrides, err := flags.GetTLSOptionOverrideFuncs(tlsOptions) if err != nil { @@ -257,8 +258,9 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager) { // requiring a connection to a remote cluster log := ctrl.Log.WithName("remote").WithName("ClusterCacheTracker") tracker, err := remote.NewClusterCacheTracker(mgr, remote.ClusterCacheTrackerOptions{ - Log: &log, - Indexes: remote.DefaultIndexes, + ControllerName: controllerName, + Log: &log, + Indexes: remote.DefaultIndexes, ClientUncachedObjects: []client.Object{ &corev1.ConfigMap{}, &corev1.Secret{}, diff --git a/exp/internal/controllers/machinepool_controller.go b/exp/internal/controllers/machinepool_controller.go index d431206cc7a5..3b31542da6eb 100644 --- a/exp/internal/controllers/machinepool_controller.go +++ b/exp/internal/controllers/machinepool_controller.go @@ -189,11 +189,25 @@ func (r *MachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Handle deletion reconciliation loop. if !mp.ObjectMeta.DeletionTimestamp.IsZero() { - return r.reconcileDelete(ctx, cluster, mp) + res, err := r.reconcileDelete(ctx, cluster, mp) + // Requeue if the reconcile failed because the ClusterCacheTracker was locked for + // the current cluster because of concurrent access. + if errors.Is(err, remote.ErrClusterLocked) { + log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker") + return ctrl.Result{Requeue: true}, nil + } + return res, err } // Handle normal reconciliation loop. - return r.reconcile(ctx, cluster, mp) + res, err := r.reconcile(ctx, cluster, mp) + // Requeue if the reconcile failed because the ClusterCacheTracker was locked for + // the current cluster because of concurrent access. + if errors.Is(err, remote.ErrClusterLocked) { + log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker") + return ctrl.Result{Requeue: true}, nil + } + return res, err } func (r *MachinePoolReconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, mp *expv1.MachinePool) (ctrl.Result, error) { @@ -249,7 +263,7 @@ func (r *MachinePoolReconciler) reconcileDeleteNodes(ctx context.Context, cluste return nil } - clusterClient, err := remote.NewClusterClient(ctx, MachinePoolControllerName, r.Client, util.ObjectKey(cluster)) + clusterClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster)) if err != nil { return err } diff --git a/exp/internal/controllers/machinepool_controller_noderef.go b/exp/internal/controllers/machinepool_controller_noderef.go index 14c40ce64add..f46d1b77e718 100644 --- a/exp/internal/controllers/machinepool_controller_noderef.go +++ b/exp/internal/controllers/machinepool_controller_noderef.go @@ -27,7 +27,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" - "sigs.k8s.io/cluster-api/controllers/remote" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/internal/util/taints" "sigs.k8s.io/cluster-api/util" @@ -72,7 +71,7 @@ func (r *MachinePoolReconciler) reconcileNodeRefs(ctx context.Context, cluster * return ctrl.Result{}, nil } - clusterClient, err := remote.NewClusterClient(ctx, MachinePoolControllerName, r.Client, util.ObjectKey(cluster)) + clusterClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster)) if err != nil { return ctrl.Result{}, err } diff --git a/exp/internal/controllers/machinepool_controller_phases_test.go b/exp/internal/controllers/machinepool_controller_phases_test.go index a7236d99d5a8..9ab1290ce417 100644 --- a/exp/internal/controllers/machinepool_controller_phases_test.go +++ b/exp/internal/controllers/machinepool_controller_phases_test.go @@ -20,6 +20,7 @@ import ( "testing" "time" + "github.com/go-logr/logr" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -31,8 +32,10 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/log" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/controllers/remote" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/internal/test/builder" "sigs.k8s.io/cluster-api/util/kubeconfig" @@ -222,8 +225,10 @@ func TestReconcileMachinePoolPhases(t *testing.T) { // Set NodeRef. machinepool.Status.NodeRefs = []corev1.ObjectReference{{Kind: "Node", Name: "machinepool-test-node"}} + fakeClient := fake.NewClientBuilder().WithObjects(defaultCluster, defaultKubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build() r := &MachinePoolReconciler{ - Client: fake.NewClientBuilder().WithObjects(defaultCluster, defaultKubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build(), + Client: fakeClient, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), fakeClient, fakeClient.Scheme(), client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), } res, err := r.reconcile(ctx, defaultCluster, machinepool) @@ -277,8 +282,10 @@ func TestReconcileMachinePoolPhases(t *testing.T) { // Set NodeRef. machinepool.Status.NodeRefs = []corev1.ObjectReference{{Kind: "Node", Name: "machinepool-test-node"}} + fakeClient := fake.NewClientBuilder().WithObjects(defaultCluster, defaultKubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build() r := &MachinePoolReconciler{ - Client: fake.NewClientBuilder().WithObjects(defaultCluster, defaultKubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build(), + Client: fakeClient, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), fakeClient, fakeClient.Scheme(), client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), } res, err := r.reconcile(ctx, defaultCluster, machinepool) @@ -350,8 +357,10 @@ func TestReconcileMachinePoolPhases(t *testing.T) { // Set NodeRef. machinepool.Status.NodeRefs = []corev1.ObjectReference{{Kind: "Node", Name: "machinepool-test-node"}} + fakeClient := fake.NewClientBuilder().WithObjects(defaultCluster, defaultKubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build() r := &MachinePoolReconciler{ - Client: fake.NewClientBuilder().WithObjects(defaultCluster, defaultKubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build(), + Client: fakeClient, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), fakeClient, fakeClient.Scheme(), client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), } res, err := r.reconcile(ctx, defaultCluster, machinepool) @@ -403,8 +412,10 @@ func TestReconcileMachinePoolPhases(t *testing.T) { {Kind: "Node", Name: "machinepool-test-node-3"}, } + fakeClient := fake.NewClientBuilder().WithObjects(defaultCluster, defaultKubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build() r := &MachinePoolReconciler{ - Client: fake.NewClientBuilder().WithObjects(defaultCluster, defaultKubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build(), + Client: fakeClient, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), fakeClient, fakeClient.Scheme(), client.ObjectKey{Name: defaultCluster.Name, Namespace: defaultCluster.Namespace}), } res, err := r.reconcile(ctx, defaultCluster, machinepool) @@ -1173,8 +1184,10 @@ func TestReconcileMachinePoolScaleToFromZero(t *testing.T) { err = unstructured.SetNestedField(infraConfig.Object, int64(1), "status", "replicas") g.Expect(err).NotTo(HaveOccurred()) + fakeClient := fake.NewClientBuilder().WithObjects(testCluster, kubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build() r := &MachinePoolReconciler{ - Client: fake.NewClientBuilder().WithObjects(testCluster, kubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build(), + Client: fakeClient, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), env.GetClient(), env.GetClient().Scheme(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), recorder: record.NewFakeRecorder(32), } @@ -1228,8 +1241,10 @@ func TestReconcileMachinePoolScaleToFromZero(t *testing.T) { err = unstructured.SetNestedField(infraConfig.Object, int64(0), "status", "replicas") g.Expect(err).NotTo(HaveOccurred()) + fakeClient := fake.NewClientBuilder().WithObjects(testCluster, kubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build() r := &MachinePoolReconciler{ - Client: fake.NewClientBuilder().WithObjects(testCluster, kubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build(), + Client: fakeClient, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), env.GetClient(), env.GetClient().Scheme(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), recorder: record.NewFakeRecorder(32), } @@ -1357,8 +1372,10 @@ func TestReconcileMachinePoolScaleToFromZero(t *testing.T) { err = unstructured.SetNestedField(infraConfig.Object, int64(1), "status", "replicas") g.Expect(err).NotTo(HaveOccurred()) + fakeClient := fake.NewClientBuilder().WithObjects(testCluster, kubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build() r := &MachinePoolReconciler{ - Client: fake.NewClientBuilder().WithObjects(testCluster, kubeconfigSecret, machinepool, bootstrapConfig, infraConfig, builder.TestBootstrapConfigCRD, builder.TestInfrastructureMachineTemplateCRD).Build(), + Client: fakeClient, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), env.GetClient(), env.GetClient().Scheme(), client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), recorder: record.NewFakeRecorder(32), } diff --git a/internal/controllers/machineset/machineset_controller.go b/internal/controllers/machineset/machineset_controller.go index 4a3e76ae7693..efa6558ac020 100644 --- a/internal/controllers/machineset/machineset_controller.go +++ b/internal/controllers/machineset/machineset_controller.go @@ -185,7 +185,6 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker") return ctrl.Result{Requeue: true}, nil } - log.Error(err, "Failed to reconcile MachineSet") r.recorder.Eventf(machineSet, corev1.EventTypeWarning, "ReconcileError", "%v", err) } return result, err diff --git a/main.go b/main.go index 7f1e4f363329..bb1d6aa595c4 100644 --- a/main.go +++ b/main.go @@ -72,9 +72,10 @@ import ( ) var ( - catalog = runtimecatalog.New() - scheme = runtime.NewScheme() - setupLog = ctrl.Log.WithName("setup") + catalog = runtimecatalog.New() + scheme = runtime.NewScheme() + setupLog = ctrl.Log.WithName("setup") + controllerName = "cluster-api-controller-manager" // flags. metricsBindAddr string @@ -228,7 +229,7 @@ func main() { restConfig := ctrl.GetConfigOrDie() restConfig.QPS = restConfigQPS restConfig.Burst = restConfigBurst - restConfig.UserAgent = remote.DefaultClusterAPIUserAgent("cluster-api-controller-manager") + restConfig.UserAgent = remote.DefaultClusterAPIUserAgent(controllerName) minVer := version.MinimumKubernetesVersion if feature.Gates.Enabled(feature.ClusterTopology) { @@ -331,8 +332,9 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager) { tracker, err := remote.NewClusterCacheTracker( mgr, remote.ClusterCacheTrackerOptions{ - Log: &log, - Indexes: remote.DefaultIndexes, + ControllerName: controllerName, + Log: &log, + Indexes: remote.DefaultIndexes, }, ) if err != nil { diff --git a/test/infrastructure/docker/main.go b/test/infrastructure/docker/main.go index a3c6891072dc..8ee234cc8854 100644 --- a/test/infrastructure/docker/main.go +++ b/test/infrastructure/docker/main.go @@ -60,8 +60,9 @@ import ( ) var ( - scheme = runtime.NewScheme() - setupLog = ctrl.Log.WithName("setup") + scheme = runtime.NewScheme() + setupLog = ctrl.Log.WithName("setup") + controllerName = "cluster-api-docker-controller-manager" // flags. metricsBindAddr string @@ -172,7 +173,7 @@ func main() { restConfig := ctrl.GetConfigOrDie() restConfig.QPS = restConfigQPS restConfig.Burst = restConfigBurst - restConfig.UserAgent = remote.DefaultClusterAPIUserAgent("cluster-api-docker-controller-manager") + restConfig.UserAgent = remote.DefaultClusterAPIUserAgent(controllerName) tlsOptionOverrides, err := flags.GetTLSOptionOverrideFuncs(tlsOptions) if err != nil { @@ -262,8 +263,9 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager) { tracker, err := remote.NewClusterCacheTracker( mgr, remote.ClusterCacheTrackerOptions{ - Log: &log, - Indexes: remote.DefaultIndexes, + ControllerName: controllerName, + Log: &log, + Indexes: remote.DefaultIndexes, }, ) if err != nil { From 39b2431b11863456818837fc5416cba75d99ecd7 Mon Sep 17 00:00:00 2001 From: Stefan Bueringer Date: Wed, 7 Jun 2023 16:29:34 +0200 Subject: [PATCH 13/94] hack/observability: Add Grafana state dashboard, improve metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stefan Büringer buringerst@vmware.com --- Makefile | 2 +- .../grafana/dashboards/cluster-api.json | 3860 +++++++++++++++++ .../{scale.json => controller-runtime.json} | 475 +- hack/observability/grafana/kustomization.yaml | 3 +- .../kube-state-metrics/chart/values.yaml | 11 + .../kube-state-metrics/crd-config.yaml | 411 +- .../metrics/clusterclass.yaml | 26 + .../metrics/common_metrics.yaml | 15 + .../metrics/kubeadmconfig.yaml | 30 + .../kube-state-metrics/metrics/machine.yaml | 22 + .../metrics/machinedeployment.yaml | 37 + .../metrics/machinehealthcheck.yaml | 10 + .../metrics/machineset.yaml | 43 +- hack/observability/promtail/values.yaml | 16 +- 14 files changed, 4868 insertions(+), 93 deletions(-) create mode 100644 hack/observability/grafana/dashboards/cluster-api.json rename hack/observability/grafana/dashboards/{scale.json => controller-runtime.json} (92%) create mode 100644 hack/observability/kube-state-metrics/metrics/clusterclass.yaml create mode 100644 hack/observability/kube-state-metrics/metrics/kubeadmconfig.yaml diff --git a/Makefile b/Makefile index baa5d139f29a..35f70d5a3f78 100644 --- a/Makefile +++ b/Makefile @@ -552,7 +552,7 @@ generate-metrics-config: $(ENVSUBST_BIN) ## Generate ./hack/observability/kube-s METRICS_DIR="${OBSERVABILITY_DIR}/kube-state-metrics/metrics"; \ echo "# This file was auto-generated via: make generate-metrics-config" > "$${OUTPUT_FILE}"; \ cat "$${METRICS_DIR}/header.yaml" >> "$${OUTPUT_FILE}"; \ - for resource in cluster kubeadmcontrolplane machine machinedeployment machinehealthcheck machineset machinepool; do \ + for resource in clusterclass cluster kubeadmcontrolplane kubeadmconfig machine machinedeployment machinehealthcheck machineset machinepool; do \ cat "$${METRICS_DIR}/$${resource}.yaml"; \ RESOURCE="$${resource}" ${ENVSUBST_BIN} < "$${METRICS_DIR}/common_metrics.yaml"; \ if [[ "$${resource}" != "cluster" ]]; then \ diff --git a/hack/observability/grafana/dashboards/cluster-api.json b/hack/observability/grafana/dashboards/cluster-api.json new file mode 100644 index 000000000000..6918a3085047 --- /dev/null +++ b/hack/observability/grafana/dashboards/cluster-api.json @@ -0,0 +1,3860 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 2, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 26, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_clusterclass_info)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "# ClusterClass", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 3, + "y": 1 + }, + "id": 9, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_cluster_info)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "# Cluster", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 6, + "y": 1 + }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_kubeadmcontrolplane_info)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "# KubeadmControlPlane", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 9, + "y": 1 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machinedeployment_info)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "# MachineDeployment", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 12, + "y": 1 + }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machineset_info)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "# MachineSet", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 15, + "y": 1 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machinepool_info)", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "# MachinePool", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 18, + "y": 1 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machine_info)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "# Machine", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 21, + "y": 1 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machinehealthcheck_info)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "# MachineHealthCheck", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 42, + "panels": [], + "title": "Performance", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 43, + "options": { + "legend": { + "calcs": [ + "mean", + "min", + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "avg((capi_cluster_status_condition_last_transition_time{status=\"True\"}>= time() - 60*5) - on(name) group_left () capi_cluster_created) by (type)", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Cluster time until condition true since creationTimestamp", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 45, + "options": { + "legend": { + "calcs": [ + "mean", + "min", + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "avg((capi_kubeadmcontrolplane_status_condition_last_transition_time{status=\"True\"}>= time() - 60*10) - on(name) group_left () capi_kubeadmcontrolplane_created) by (type)", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "KubeadmControlPlane time until condition true since creationTimestamp", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 44, + "options": { + "legend": { + "calcs": [ + "mean", + "min", + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "avg((capi_machine_status_condition_last_transition_time{status=\"True\"}>= time() - 60*10) - on(name) group_left () capi_machine_created) by (type)", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Machine time until condition true since creationTimestamp", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 46, + "options": { + "legend": { + "calcs": [ + "mean", + "min", + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "(capi_kubeadmconfig_status_condition_last_transition_time{status=\"True\"}>= time() - 60*10) - on(name) group_left () (capi_kubeadmconfig_created * on (name) group_right label_replace(capi_machine_info{control_plane_name!=\"\"}, \"name\", \"$0\", \"bootstrap_reference_name\", \".*\"))", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Control Plane KubeadmConfig time until condition true since creationTimestamp", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 47, + "options": { + "legend": { + "calcs": [ + "mean", + "min", + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "asc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "(capi_kubeadmconfig_status_condition_last_transition_time{status=\"True\"}>= time() - 60*10) - on(name) group_left () (capi_kubeadmconfig_created * on (name) group_right label_replace(capi_machine_info{control_plane_name=\"\"}, \"name\", \"$0\", \"bootstrap_reference_name\", \".*\"))", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Worker KubeadmConfig time until condition true since creationTimestamp", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 22, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 7 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_cluster_info)", + "legendFormat": "All", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_cluster_status_condition{status=\"True\"} == 1) by (type)", + "hide": false, + "legendFormat": "{{type}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_cluster_status_condition{status=\"False\"} == 1) by (type)", + "hide": false, + "legendFormat": "{{type}} (False)", + "range": true, + "refId": "C" + } + ], + "title": "Cluster by conditions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 7 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_cluster_info) by (topology_version)", + "legendFormat": "{{topology_version}}", + "range": true, + "refId": "A" + } + ], + "title": "Cluster by version", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 7 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_cluster_status_phase == 1) by (phase)", + "legendFormat": "{{phase}}", + "range": true, + "refId": "A" + } + ], + "title": "Cluster by status phase", + "type": "timeseries" + } + ], + "title": "Cluster", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 47 + }, + "id": 21, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 8 + }, + "id": 13, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_kubeadmcontrolplane_info)", + "legendFormat": "All", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_kubeadmcontrolplane_status_condition{status=\"True\"} == 1) by (type)", + "hide": false, + "legendFormat": "{{type}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_kubeadmcontrolplane_status_condition{status=\"False\"} == 1) by (type)", + "hide": false, + "legendFormat": "{{type}} (False)", + "range": true, + "refId": "C" + } + ], + "title": "KubeadmControlPlane by conditions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 8 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_kubeadmcontrolplane_info) by (version)", + "legendFormat": "{{version}}", + "range": true, + "refId": "A" + } + ], + "title": "KubeadmControlPlane by version", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 8 + }, + "id": 14, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_kubeadmcontrolplane_spec_replicas)", + "legendFormat": "spec replicas", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_kubeadmcontrolplane_status_replicas)", + "hide": false, + "legendFormat": "status replicas", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_kubeadmcontrolplane_status_replicas_ready)", + "hide": false, + "legendFormat": "status ready replicas", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_kubeadmcontrolplane_status_replicas_unavailable)", + "hide": false, + "legendFormat": "status unavailable replicas", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_kubeadmcontrolplane_status_replicas_updated)", + "hide": false, + "legendFormat": "status updated replicas", + "range": true, + "refId": "E" + } + ], + "title": "KubeadmControlPlane replicas", + "type": "timeseries" + } + ], + "title": "KubeadmControlPlane", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 48 + }, + "id": 20, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 8 + }, + "id": 15, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinedeployment_info)", + "legendFormat": "All", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machinedeployment_status_condition{status=\"True\"} == 1) by (type)", + "hide": false, + "legendFormat": "{{type}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machinedeployment_status_condition{status=\"False\"} == 1) by (type)", + "hide": false, + "legendFormat": "{{type}} (False)", + "range": true, + "refId": "C" + } + ], + "title": "MachineDeployment by conditions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 8 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinedeployment_info) by (version)", + "legendFormat": "{{version}}", + "range": true, + "refId": "A" + } + ], + "title": "MachineDeployment by version", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 8 + }, + "id": 17, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinedeployment_spec_replicas)", + "legendFormat": "spec replicas", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinedeployment_status_replicas)", + "hide": false, + "legendFormat": "status replicas", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinedeployment_status_replicas_ready)", + "hide": false, + "legendFormat": "status ready replicas", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinedeployment_status_replicas_unavailable)", + "hide": false, + "legendFormat": "status unavailable replicas", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinedeployment_status_replicas_updated)", + "hide": false, + "legendFormat": "status updated replicas", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinedeployment_status_replicas_available)", + "hide": false, + "legendFormat": "status available replicas", + "range": true, + "refId": "F" + } + ], + "title": "MachineDeployment replicas", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 8 + }, + "id": 18, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machinedeployment_status_phase == 1) by (phase)", + "legendFormat": "{{phase}}", + "range": true, + "refId": "A" + } + ], + "title": "MachineDeployment by status phase", + "type": "timeseries" + } + ], + "title": "MachineDeployment", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 49 + }, + "id": 19, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 17 + }, + "id": 23, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machineset_info)", + "legendFormat": "All", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machineset_status_condition{status=\"True\"} == 1) by (type)", + "hide": false, + "legendFormat": "{{type}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machineset_status_condition{status=\"False\"} == 1) by (type)", + "hide": false, + "legendFormat": "{{type}} (False)", + "range": true, + "refId": "C" + } + ], + "title": "MachineSet by conditions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 17 + }, + "id": 24, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machineset_info) by (version)", + "legendFormat": "{{version}}", + "range": true, + "refId": "A" + } + ], + "title": "MachineSet by version", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 17 + }, + "id": 25, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machineset_spec_replicas)", + "legendFormat": "spec replicas", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machineset_status_replicas)", + "hide": false, + "legendFormat": "status replicas", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machineset_status_replicas_ready)", + "hide": false, + "legendFormat": "status ready replicas", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machineset_status_replicas_available)", + "hide": false, + "legendFormat": "status available replicas", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machineset_status_replicas_fully_labeled)", + "hide": false, + "legendFormat": "status fully labeled replicas", + "range": true, + "refId": "D" + } + ], + "title": "MachineSet replicas", + "type": "timeseries" + } + ], + "title": "MachineSet", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 50 + }, + "id": 35, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 18 + }, + "id": 36, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinepool_info)", + "legendFormat": "All", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machinepool_status_condition{status=\"True\"} == 1) by (type)", + "hide": false, + "legendFormat": "{{type}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machinepool_status_condition{status=\"False\"} == 1) by (type)", + "hide": false, + "legendFormat": "{{type}} (False)", + "range": true, + "refId": "C" + } + ], + "title": "MachinePool by conditions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 18 + }, + "id": 37, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinepool_info) by (version)", + "legendFormat": "{{version}}", + "range": true, + "refId": "A" + } + ], + "title": "MachinePool by version", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 18 + }, + "id": 38, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinepool_spec_replicas)", + "legendFormat": "spec replicas", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinepool_status_replicas)", + "hide": false, + "legendFormat": "status replicas", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinepool_status_replicas_ready)", + "hide": false, + "legendFormat": "status ready replicas", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinepool_status_replicas_unavailable)", + "hide": false, + "legendFormat": "status unavailable replicas", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinepool_status_replicas_available)", + "hide": false, + "legendFormat": "status available replicas", + "range": true, + "refId": "F" + } + ], + "title": "MachinePool replicas", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 18 + }, + "id": 39, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machinepool_status_phase == 1) by (phase)", + "legendFormat": "{{phase}}", + "range": true, + "refId": "A" + } + ], + "title": "MachinePool by status phase", + "type": "timeseries" + } + ], + "title": "MachinePool", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 51 + }, + "id": 27, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 12 + }, + "id": 28, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machine_info{control_plane_name!=\"\"})", + "legendFormat": "All", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machine_info{control_plane_name!=\"\"} * on(name) group_right () (capi_machine_status_condition{status=\"True\"} == 1)) by (type)", + "hide": false, + "legendFormat": "{{type}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machine_info{control_plane_name!=\"\"} * on(name) group_right () (capi_machine_status_condition{status=\"False\"} == 1)) by (type)", + "hide": false, + "legendFormat": "{{type}} (False)", + "range": true, + "refId": "C" + } + ], + "title": "Control Plane Machine by conditions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 12 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machine_info{control_plane_name!=\"\"}) by (version)", + "legendFormat": "{{version}}", + "range": true, + "refId": "A" + } + ], + "title": "Control Plane Machine by version", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 12 + }, + "id": 30, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machine_info{control_plane_name!=\"\"} * on (name) group_right () (capi_machine_status_phase == 1)) by (phase)", + "legendFormat": "{{phase}}", + "range": true, + "refId": "A" + } + ], + "title": "Control Plane Machine by status phase", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 20 + }, + "id": 31, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machine_info{control_plane_name=\"\"})", + "legendFormat": "All", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machine_info{control_plane_name=\"\"} * on(name) group_right () (capi_machine_status_condition{status=\"True\"} == 1)) by (type)", + "hide": false, + "legendFormat": "{{type}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machine_info{control_plane_name=\"\"} * on(name) group_right () (capi_machine_status_condition{status=\"False\"} == 1)) by (type)", + "hide": false, + "legendFormat": "{{type}} (False)", + "range": true, + "refId": "C" + } + ], + "title": "Worker Machine by conditions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 20 + }, + "id": 32, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machine_info{control_plane_name=\"\"}) by (version)", + "legendFormat": "{{version}}", + "range": true, + "refId": "A" + } + ], + "title": "Worker Machine by version", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 20 + }, + "id": 33, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machine_info{control_plane_name=\"\"} * on (name) group_right () (capi_machine_status_phase == 1)) by (phase)", + "legendFormat": "{{phase}}", + "range": true, + "refId": "A" + } + ], + "title": "Worker Machine by status phase", + "type": "timeseries" + } + ], + "title": "Machine", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 52 + }, + "id": 34, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 20 + }, + "id": 40, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinehealthcheck_info)", + "legendFormat": "All", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machinehealthcheck_status_condition{status=\"True\"} == 1) by (type)", + "hide": false, + "legendFormat": "{{type}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(capi_machinehealthcheck_status_condition{status=\"False\"} == 1) by (type)", + "hide": false, + "legendFormat": "{{type}} (False)", + "range": true, + "refId": "C" + } + ], + "title": "MachineHealthCheck by conditions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 20 + }, + "id": 41, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinehealthcheck_status_expected_machines)", + "legendFormat": "status expected", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum (capi_machinehealthcheck_status_current_healthy)", + "hide": false, + "legendFormat": "status current healthy", + "range": true, + "refId": "B" + } + ], + "title": "MachineHealthCheck replicas", + "type": "timeseries" + } + ], + "title": "MachineHealthCheck", + "type": "row" + } + ], + "refresh": "", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Cluster API", + "uid": "db3b854f-73eb-4000-b4a7-5ee887f5ae77", + "version": 1, + "weekStart": "" +} diff --git a/hack/observability/grafana/dashboards/scale.json b/hack/observability/grafana/dashboards/controller-runtime.json similarity index 92% rename from hack/observability/grafana/dashboards/scale.json rename to hack/observability/grafana/dashboards/controller-runtime.json index 06a3f5eea490..d33f689413ff 100644 --- a/hack/observability/grafana/dashboards/scale.json +++ b/hack/observability/grafana/dashboards/controller-runtime.json @@ -19,6 +19,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, + "id": 1, "links": [], "liveNow": false, "panels": [ @@ -337,8 +338,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -445,8 +445,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -539,8 +538,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -647,8 +645,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -741,8 +738,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -835,8 +831,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -929,8 +924,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1037,8 +1031,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1129,8 +1122,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1236,7 +1228,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1252,7 +1245,7 @@ "h": 8, "w": 12, "x": 0, - "y": 42 + "y": 10 }, "id": 19, "options": { @@ -1328,7 +1321,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1344,7 +1338,7 @@ "h": 8, "w": 12, "x": 12, - "y": 42 + "y": 10 }, "id": 20, "options": { @@ -1420,7 +1414,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1436,7 +1431,7 @@ "h": 8, "w": 12, "x": 0, - "y": 50 + "y": 18 }, "id": 21, "options": { @@ -1512,7 +1507,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1528,7 +1524,7 @@ "h": 8, "w": 12, "x": 12, - "y": 50 + "y": 18 }, "id": 22, "options": { @@ -1604,7 +1600,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1620,7 +1617,7 @@ "h": 8, "w": 12, "x": 0, - "y": 58 + "y": 26 }, "id": 23, "options": { @@ -1696,7 +1693,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1712,7 +1710,7 @@ "h": 8, "w": 12, "x": 12, - "y": 58 + "y": 26 }, "id": 24, "options": { @@ -1802,7 +1800,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1818,7 +1817,7 @@ "h": 8, "w": 12, "x": 0, - "y": 51 + "y": 11 }, "id": 27, "options": { @@ -1894,7 +1893,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1910,7 +1910,7 @@ "h": 8, "w": 12, "x": 12, - "y": 51 + "y": 11 }, "id": 28, "options": { @@ -1941,6 +1941,99 @@ "title": "Reconcile Duration by Controller", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 62, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(rate(controller_runtime_reconcile_time_seconds_sum{pod=~\"$Pod\",controller=~\"$Controller\"}[5m])) by (pod,controller) / sum(rate(controller_runtime_reconcile_time_seconds_count{pod=~\"$Pod\",controller=~\"$Controller\"}[5m])) by (pod,controller)", + "legendFormat": "{{pod}}: {{controller}}", + "range": true, + "refId": "A" + } + ], + "title": "Reconcile Duration by Controller", + "type": "timeseries" + }, { "datasource": { "type": "prometheus", @@ -1965,7 +2058,7 @@ "h": 13, "w": 24, "x": 0, - "y": 59 + "y": 27 }, "id": 26, "options": { @@ -2097,7 +2190,7 @@ "h": 8, "w": 12, "x": 0, - "y": 28 + "y": 12 }, "id": 31, "options": { @@ -2190,7 +2283,7 @@ "h": 8, "w": 12, "x": 12, - "y": 28 + "y": 12 }, "id": 32, "options": { @@ -2284,7 +2377,7 @@ "h": 8, "w": 12, "x": 0, - "y": 36 + "y": 20 }, "id": 33, "options": { @@ -2378,7 +2471,7 @@ "h": 8, "w": 12, "x": 12, - "y": 36 + "y": 20 }, "id": 34, "options": { @@ -2472,7 +2565,7 @@ "h": 8, "w": 12, "x": 0, - "y": 44 + "y": 28 }, "id": 35, "options": { @@ -2503,6 +2596,125 @@ "title": "Workqueue Unfinished Work", "type": "timeseries" }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "How long in seconds an item stays in workqueue before being requested", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 63, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum(rate(workqueue_queue_duration_seconds_bucket{pod=~\"$Pod\",name=~\"$Controller\"}[5m])) by (pod, name, le))", + "hide": false, + "legendFormat": "P50 {{pod}} {{name}} ", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum(rate(workqueue_queue_duration_seconds_bucket{pod=~\"$Pod\",name=~\"$Controller\"}[5m])) by (pod, name, le))", + "hide": false, + "legendFormat": "P90 {{pod}} {{name}} ", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{pod=~\"$Pod\",name=~\"$Controller\"}[5m])) by (pod, name, le))", + "hide": false, + "legendFormat": "P99 {{pod}} {{name}} ", + "range": true, + "refId": "C" + } + ], + "title": "Workqueue Queue Duration Quantils", + "type": "timeseries" + }, { "datasource": { "type": "prometheus", @@ -2528,7 +2740,7 @@ "h": 13, "w": 24, "x": 0, - "y": 52 + "y": 45 }, "id": 36, "options": { @@ -2584,6 +2796,125 @@ "title": "Workqueue Queue Duration", "type": "heatmap" }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "How long in seconds processing an item from workqueue takes", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 58 + }, + "id": 64, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum(rate(workqueue_work_duration_seconds_bucket{pod=~\"$Pod\",name=~\"$Controller\"}[5m])) by (pod, name, le))", + "hide": false, + "legendFormat": "P50 {{pod}} {{name}} ", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum(rate(workqueue_work_duration_seconds_bucket{pod=~\"$Pod\",name=~\"$Controller\"}[5m])) by (pod, name, le))", + "hide": false, + "legendFormat": "P90 {{pod}} {{name}} ", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(workqueue_work_duration_seconds_bucket{pod=~\"$Pod\",name=~\"$Controller\"}[5m])) by (pod, name, le))", + "hide": false, + "legendFormat": "P99 {{pod}} {{name}} ", + "range": true, + "refId": "C" + } + ], + "title": "Workqueue Work Duration Quantils", + "type": "timeseries" + }, { "datasource": { "type": "prometheus", @@ -2609,7 +2940,7 @@ "h": 13, "w": 24, "x": 0, - "y": 65 + "y": 67 }, "id": 37, "options": { @@ -2724,8 +3055,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2841,8 +3171,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3238,8 +3567,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3331,8 +3659,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3439,8 +3766,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3547,8 +3873,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3641,8 +3966,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3747,8 +4071,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -3889,8 +4212,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4067,8 +4389,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4161,8 +4482,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4255,8 +4575,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4349,8 +4668,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4443,8 +4761,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -4496,7 +4813,7 @@ "type": "row" } ], - "refresh": false, + "refresh": "", "schemaVersion": 38, "style": "dark", "tags": [], @@ -4507,10 +4824,10 @@ "current": { "selected": true, "text": [ - "capi-test-control-plane" + "All" ], "value": [ - "capi-test-control-plane" + "$__all" ] }, "datasource": { @@ -4537,13 +4854,9 @@ { "allValue": ".*", "current": { - "selected": true, - "text": [ - "capi-controller-manager-fbd4fc888-jvfl9" - ], - "value": [ - "capi-controller-manager-fbd4fc888-jvfl9" - ] + "selected": false, + "text": "kindnet-whq29", + "value": "kindnet-whq29" }, "datasource": { "type": "prometheus", @@ -4645,7 +4958,7 @@ }, "timepicker": {}, "timezone": "", - "title": "Cluster API - Scale", + "title": "Controller Runtime", "uid": "abe29aa7-e44a-4eef-9474-970f95f08ee6", "version": 1, "weekStart": "" diff --git a/hack/observability/grafana/kustomization.yaml b/hack/observability/grafana/kustomization.yaml index 4d2ee7d5c123..bca1904fb39b 100644 --- a/hack/observability/grafana/kustomization.yaml +++ b/hack/observability/grafana/kustomization.yaml @@ -12,4 +12,5 @@ namespace: observability configMapGenerator: - name: grafana-dashboards files: - - dashboards/scale.json + - dashboards/cluster-api.json + - dashboards/controller-runtime.json diff --git a/hack/observability/kube-state-metrics/chart/values.yaml b/hack/observability/kube-state-metrics/chart/values.yaml index 273d7e88055e..93d0a8551a13 100644 --- a/hack/observability/kube-state-metrics/chart/values.yaml +++ b/hack/observability/kube-state-metrics/chart/values.yaml @@ -16,6 +16,7 @@ rbac: - apiGroups: - cluster.x-k8s.io resources: + - clusterclasses - clusters - machinedeployments - machinepools @@ -34,15 +35,25 @@ rbac: - get - list - watch + - apiGroups: + - bootstrap.cluster.x-k8s.io + resources: + - kubeadmconfigs + verbs: + - get + - list + - watch collectors: # CAPI CRs + - clusterclasses - clusters - machinedeployments - machinesets - machines - machinehealthchecks - kubeadmcontrolplanes + - kubeadmconfigs # We need to define all default collectors too, otherwise the helm chart does not include this resources in rbac - certificatesigningrequests - configmaps diff --git a/hack/observability/kube-state-metrics/crd-config.yaml b/hack/observability/kube-state-metrics/crd-config.yaml index 230fba619e71..4fd23967964c 100644 --- a/hack/observability/kube-state-metrics/crd-config.yaml +++ b/hack/observability/kube-state-metrics/crd-config.yaml @@ -2,6 +2,101 @@ kind: CustomResourceStateMetrics spec: resources: + - groupVersionKind: + group: cluster.x-k8s.io + kind: ClusterClass + version: v1beta1 + labelsFromPath: + name: + - metadata + - name + namespace: + - metadata + - namespace + uid: + - metadata + - uid + metricNamePrefix: capi_clusterclass + metrics: + - name: info + help: Information about a clusterclass. + each: + info: + # TODO: added metadata.name even it's already defined above as the metric doesn't work with empty labelsFromPath. + labelsFromPath: + name: + - metadata + - name + type: Info + - name: created + help: Unix creation timestamp. + each: + gauge: + path: + - metadata + - creationTimestamp + type: Gauge + - name: annotation_paused + help: Whether the clusterclass is paused and any of its resources will not be processed by the controllers. + each: + info: + path: + - metadata + - annotations + - cluster.x-k8s.io/paused + labelsFromPath: + paused_value: [] + type: Info + - name: status_condition + help: The condition of a clusterclass. + each: + stateSet: + labelName: status + labelsFromPath: + type: + - type + list: + - 'True' + - 'False' + - Unknown + path: + - status + - conditions + valueFrom: + - status + type: StateSet + - name: status_condition_last_transition_time + help: The condition last transition time of a clusterclass. + each: + gauge: + labelsFromPath: + type: + - type + status: + - status + path: + - status + - conditions + valueFrom: + - lastTransitionTime + type: Gauge + - name: owner + help: Owner references. + each: + info: + labelsFromPath: + owner_is_controller: + - controller + owner_kind: + - kind + owner_name: + - name + owner_uid: + - uid + path: + - metadata + - ownerReferences + type: Info - groupVersionKind: group: cluster.x-k8s.io kind: Cluster @@ -118,6 +213,21 @@ spec: valueFrom: - status type: StateSet + - name: status_condition_last_transition_time + help: The condition last transition time of a cluster. + each: + gauge: + labelsFromPath: + type: + - type + status: + - status + path: + - status + - conditions + valueFrom: + - lastTransitionTime + type: Gauge - groupVersionKind: group: controlplane.cluster.x-k8s.io kind: KubeadmControlPlane @@ -239,6 +349,120 @@ spec: valueFrom: - status type: StateSet + - name: status_condition_last_transition_time + help: The condition last transition time of a kubeadmcontrolplane. + each: + gauge: + labelsFromPath: + type: + - type + status: + - status + path: + - status + - conditions + valueFrom: + - lastTransitionTime + type: Gauge + - name: owner + help: Owner references. + each: + info: + labelsFromPath: + owner_is_controller: + - controller + owner_kind: + - kind + owner_name: + - name + owner_uid: + - uid + path: + - metadata + - ownerReferences + type: Info + - groupVersionKind: + group: bootstrap.cluster.x-k8s.io + kind: KubeadmConfig + version: v1beta1 + labelsFromPath: + cluster_name: + - metadata + - labels + - cluster.x-k8s.io/cluster-name + name: + - metadata + - name + namespace: + - metadata + - namespace + uid: + - metadata + - uid + metricNamePrefix: capi_kubeadmconfig + metrics: + - name: info + help: Information about a kubeadmconfig. + each: + info: + # TODO: added metadata.name even it's already defined above as the metric doesn't work with empty labelsFromPath. + labelsFromPath: + name: + - metadata + - name + type: Info + - name: created + help: Unix creation timestamp. + each: + gauge: + path: + - metadata + - creationTimestamp + type: Gauge + - name: annotation_paused + help: Whether the kubeadmconfig is paused and any of its resources will not be processed by the controllers. + each: + info: + path: + - metadata + - annotations + - cluster.x-k8s.io/paused + labelsFromPath: + paused_value: [] + type: Info + - name: status_condition + help: The condition of a kubeadmconfig. + each: + stateSet: + labelName: status + labelsFromPath: + type: + - type + list: + - 'True' + - 'False' + - Unknown + path: + - status + - conditions + valueFrom: + - status + type: StateSet + - name: status_condition_last_transition_time + help: The condition last transition time of a kubeadmconfig. + each: + gauge: + labelsFromPath: + type: + - type + status: + - status + path: + - status + - conditions + valueFrom: + - lastTransitionTime + type: Gauge - name: owner help: Owner references. each: @@ -309,6 +533,28 @@ spec: version: - spec - version + bootstrap_reference_kind: + - spec + - bootstrap + - configRef + - kind + bootstrap_reference_name: + - spec + - bootstrap + - configRef + - name + infrastructure_reference_kind: + - spec + - infrastructureRef + - kind + infrastructure_reference_name: + - spec + - infrastructureRef + - name + control_plane_name: + - metadata + - labels + - cluster.x-k8s.io/control-plane-name type: Info - name: addresses help: Address information about a machine. @@ -392,6 +638,21 @@ spec: valueFrom: - status type: StateSet + - name: status_condition_last_transition_time + help: The condition last transition time of a machine. + each: + gauge: + labelsFromPath: + type: + - type + status: + - status + path: + - status + - conditions + valueFrom: + - lastTransitionTime + type: Gauge - name: owner help: Owner references. each: @@ -428,6 +689,43 @@ spec: - uid metricNamePrefix: capi_machinedeployment metrics: + - name: info + help: Information about a machinedeployment. + each: + info: + labelsFromPath: + version: + - spec + - template + - spec + - version + bootstrap_reference_kind: + - spec + - template + - spec + - bootstrap + - configRef + - kind + bootstrap_reference_name: + - spec + - template + - spec + - bootstrap + - configRef + - name + infrastructure_reference_kind: + - spec + - template + - spec + - infrastructureRef + - kind + infrastructure_reference_name: + - spec + - template + - spec + - infrastructureRef + - name + type: Info - name: spec_paused help: Whether the machinedeployment is paused and any of its resources will not be processed by the controllers. each: @@ -562,6 +860,21 @@ spec: valueFrom: - status type: StateSet + - name: status_condition_last_transition_time + help: The condition last transition time of a machinedeployment. + each: + gauge: + labelsFromPath: + type: + - type + status: + - status + path: + - status + - conditions + valueFrom: + - lastTransitionTime + type: Gauge - name: owner help: Owner references. each: @@ -598,6 +911,16 @@ spec: - uid metricNamePrefix: capi_machinehealthcheck metrics: + - name: info + help: Information about a machinehealthcheck. + each: + info: + # TODO: added metadata.name even it's already defined above as the metric doesn't work with empty labelsFromPath. + labelsFromPath: + name: + - metadata + - name + type: Info - name: status_current_healthy help: Current number of healthy machines. each: @@ -659,6 +982,21 @@ spec: valueFrom: - status type: StateSet + - name: status_condition_last_transition_time + help: The condition last transition time of a machinehealthcheck. + each: + gauge: + labelsFromPath: + type: + - type + status: + - status + path: + - status + - conditions + valueFrom: + - lastTransitionTime + type: Gauge - name: owner help: Owner references. each: @@ -695,6 +1033,43 @@ spec: - uid metricNamePrefix: capi_machineset metrics: + - name: info + help: Information about a machineset. + each: + info: + labelsFromPath: + version: + - spec + - template + - spec + - version + bootstrap_reference_kind: + - spec + - template + - spec + - bootstrap + - configRef + - kind + bootstrap_reference_name: + - spec + - template + - spec + - bootstrap + - configRef + - name + infrastructure_reference_kind: + - spec + - template + - spec + - infrastructureRef + - kind + infrastructure_reference_name: + - spec + - template + - spec + - infrastructureRef + - name + type: Info - name: spec_replicas help: The number of desired machines for a machineset. each: @@ -704,7 +1079,7 @@ spec: - replicas nilIsZero: true type: Gauge - - name: status_available_replicas + - name: status_replicas_available help: The number of available replicas per machineset. each: gauge: @@ -713,7 +1088,7 @@ spec: - availableReplicas nilIsZero: true type: Gauge - - name: status_fully_labeled_replicas + - name: status_replicas_fully_labeled help: The number of fully labeled replicas per machineset. each: gauge: @@ -721,7 +1096,7 @@ spec: - status - fullyLabeledReplicas type: Gauge - - name: status_ready_replicas + - name: status_replicas_ready help: The number of ready replicas per machineset. each: gauge: @@ -776,6 +1151,21 @@ spec: valueFrom: - status type: StateSet + - name: status_condition_last_transition_time + help: The condition last transition time of a machineset. + each: + gauge: + labelsFromPath: + type: + - type + status: + - status + path: + - status + - conditions + valueFrom: + - lastTransitionTime + type: Gauge - name: owner help: Owner references. each: @@ -949,6 +1339,21 @@ spec: valueFrom: - status type: StateSet + - name: status_condition_last_transition_time + help: The condition last transition time of a machinepool. + each: + gauge: + labelsFromPath: + type: + - type + status: + - status + path: + - status + - conditions + valueFrom: + - lastTransitionTime + type: Gauge - name: owner help: Owner references. each: diff --git a/hack/observability/kube-state-metrics/metrics/clusterclass.yaml b/hack/observability/kube-state-metrics/metrics/clusterclass.yaml new file mode 100644 index 000000000000..21b464d45e06 --- /dev/null +++ b/hack/observability/kube-state-metrics/metrics/clusterclass.yaml @@ -0,0 +1,26 @@ + - groupVersionKind: + group: cluster.x-k8s.io + kind: ClusterClass + version: v1beta1 + labelsFromPath: + name: + - metadata + - name + namespace: + - metadata + - namespace + uid: + - metadata + - uid + metricNamePrefix: capi_clusterclass + metrics: + - name: info + help: Information about a clusterclass. + each: + info: + # TODO: added metadata.name even it's already defined above as the metric doesn't work with empty labelsFromPath. + labelsFromPath: + name: + - metadata + - name + type: Info diff --git a/hack/observability/kube-state-metrics/metrics/common_metrics.yaml b/hack/observability/kube-state-metrics/metrics/common_metrics.yaml index 49e55e1d31a2..073631b9c88b 100644 --- a/hack/observability/kube-state-metrics/metrics/common_metrics.yaml +++ b/hack/observability/kube-state-metrics/metrics/common_metrics.yaml @@ -35,3 +35,18 @@ valueFrom: - status type: StateSet + - name: status_condition_last_transition_time + help: The condition last transition time of a ${RESOURCE}. + each: + gauge: + labelsFromPath: + type: + - type + status: + - status + path: + - status + - conditions + valueFrom: + - lastTransitionTime + type: Gauge diff --git a/hack/observability/kube-state-metrics/metrics/kubeadmconfig.yaml b/hack/observability/kube-state-metrics/metrics/kubeadmconfig.yaml new file mode 100644 index 000000000000..ffb240172a60 --- /dev/null +++ b/hack/observability/kube-state-metrics/metrics/kubeadmconfig.yaml @@ -0,0 +1,30 @@ + - groupVersionKind: + group: bootstrap.cluster.x-k8s.io + kind: KubeadmConfig + version: v1beta1 + labelsFromPath: + cluster_name: + - metadata + - labels + - cluster.x-k8s.io/cluster-name + name: + - metadata + - name + namespace: + - metadata + - namespace + uid: + - metadata + - uid + metricNamePrefix: capi_kubeadmconfig + metrics: + - name: info + help: Information about a kubeadmconfig. + each: + info: + # TODO: added metadata.name even it's already defined above as the metric doesn't work with empty labelsFromPath. + labelsFromPath: + name: + - metadata + - name + type: Info diff --git a/hack/observability/kube-state-metrics/metrics/machine.yaml b/hack/observability/kube-state-metrics/metrics/machine.yaml index addf4595c8b0..f9334450a5ea 100644 --- a/hack/observability/kube-state-metrics/metrics/machine.yaml +++ b/hack/observability/kube-state-metrics/metrics/machine.yaml @@ -51,6 +51,28 @@ version: - spec - version + bootstrap_reference_kind: + - spec + - bootstrap + - configRef + - kind + bootstrap_reference_name: + - spec + - bootstrap + - configRef + - name + infrastructure_reference_kind: + - spec + - infrastructureRef + - kind + infrastructure_reference_name: + - spec + - infrastructureRef + - name + control_plane_name: + - metadata + - labels + - cluster.x-k8s.io/control-plane-name type: Info - name: addresses help: Address information about a machine. diff --git a/hack/observability/kube-state-metrics/metrics/machinedeployment.yaml b/hack/observability/kube-state-metrics/metrics/machinedeployment.yaml index 9f94745c8c0a..1ece3552149a 100644 --- a/hack/observability/kube-state-metrics/metrics/machinedeployment.yaml +++ b/hack/observability/kube-state-metrics/metrics/machinedeployment.yaml @@ -17,6 +17,43 @@ - uid metricNamePrefix: capi_machinedeployment metrics: + - name: info + help: Information about a machinedeployment. + each: + info: + labelsFromPath: + version: + - spec + - template + - spec + - version + bootstrap_reference_kind: + - spec + - template + - spec + - bootstrap + - configRef + - kind + bootstrap_reference_name: + - spec + - template + - spec + - bootstrap + - configRef + - name + infrastructure_reference_kind: + - spec + - template + - spec + - infrastructureRef + - kind + infrastructure_reference_name: + - spec + - template + - spec + - infrastructureRef + - name + type: Info - name: spec_paused help: Whether the machinedeployment is paused and any of its resources will not be processed by the controllers. each: diff --git a/hack/observability/kube-state-metrics/metrics/machinehealthcheck.yaml b/hack/observability/kube-state-metrics/metrics/machinehealthcheck.yaml index 9df009812fb2..a14a164be72b 100644 --- a/hack/observability/kube-state-metrics/metrics/machinehealthcheck.yaml +++ b/hack/observability/kube-state-metrics/metrics/machinehealthcheck.yaml @@ -17,6 +17,16 @@ - uid metricNamePrefix: capi_machinehealthcheck metrics: + - name: info + help: Information about a machinehealthcheck. + each: + info: + # TODO: added metadata.name even it's already defined above as the metric doesn't work with empty labelsFromPath. + labelsFromPath: + name: + - metadata + - name + type: Info - name: status_current_healthy help: Current number of healthy machines. each: diff --git a/hack/observability/kube-state-metrics/metrics/machineset.yaml b/hack/observability/kube-state-metrics/metrics/machineset.yaml index 42ef967aac53..e2bde1ecde6e 100644 --- a/hack/observability/kube-state-metrics/metrics/machineset.yaml +++ b/hack/observability/kube-state-metrics/metrics/machineset.yaml @@ -17,6 +17,43 @@ - uid metricNamePrefix: capi_machineset metrics: + - name: info + help: Information about a machineset. + each: + info: + labelsFromPath: + version: + - spec + - template + - spec + - version + bootstrap_reference_kind: + - spec + - template + - spec + - bootstrap + - configRef + - kind + bootstrap_reference_name: + - spec + - template + - spec + - bootstrap + - configRef + - name + infrastructure_reference_kind: + - spec + - template + - spec + - infrastructureRef + - kind + infrastructure_reference_name: + - spec + - template + - spec + - infrastructureRef + - name + type: Info - name: spec_replicas help: The number of desired machines for a machineset. each: @@ -26,7 +63,7 @@ - replicas nilIsZero: true type: Gauge - - name: status_available_replicas + - name: status_replicas_available help: The number of available replicas per machineset. each: gauge: @@ -35,7 +72,7 @@ - availableReplicas nilIsZero: true type: Gauge - - name: status_fully_labeled_replicas + - name: status_replicas_fully_labeled help: The number of fully labeled replicas per machineset. each: gauge: @@ -43,7 +80,7 @@ - status - fullyLabeledReplicas type: Gauge - - name: status_ready_replicas + - name: status_replicas_ready help: The number of ready replicas per machineset. each: gauge: diff --git a/hack/observability/promtail/values.yaml b/hack/observability/promtail/values.yaml index a32675ed6237..ed419ef9c045 100644 --- a/hack/observability/promtail/values.yaml +++ b/hack/observability/promtail/values.yaml @@ -15,9 +15,17 @@ config: - json: expressions: controller: - cluster: join('/',[Cluster.namespace,Cluster.name]) - machine: join('/',[Machine.namespace,Machine.name]) + Cluster: join('/',[Cluster.namespace,Cluster.name]) + Machine: join('/',[Machine.namespace,Machine.name]) + KubeadmControlPlane: join('/',[KubeadmControlPlane.namespace,KubeadmControlPlane.name]) + MachineDeployment: join('/',[MachineDeployment.namespace,MachineDeployment.name]) + MachineSet: join('/',[MachineSet.namespace,MachineSet.name]) + MachinePool: join('/',[MachinePool.namespace,MachinePool.name]) - labels: controller: - cluster: - machine: + Cluster: + Machine: + KubeadmControlPlane: + MachineDeployment: + MachineSet: + MachinePool: From cbf3a9a448d0cba09647929888ce4e0754dacd1c Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Mon, 12 Jun 2023 16:11:03 +0200 Subject: [PATCH 14/94] add startup timeout to the in memory provider --- .../api/v1alpha1/inmemorymachine_types.go | 102 +++ .../api/v1alpha1/zz_generated.deepcopy.go | 120 +++ ...ure.cluster.x-k8s.io_inmemorymachines.yaml | 118 +++ ...ter.x-k8s.io_inmemorymachinetemplates.yaml | 125 ++++ .../controllers/inmemorymachine_controller.go | 616 ++++++++++++---- .../inmemorymachine_controller_test.go | 681 ++++++++++++++++++ 6 files changed, 1637 insertions(+), 125 deletions(-) create mode 100644 test/infrastructure/inmemory/internal/controllers/inmemorymachine_controller_test.go diff --git a/test/infrastructure/inmemory/api/v1alpha1/inmemorymachine_types.go b/test/infrastructure/inmemory/api/v1alpha1/inmemorymachine_types.go index 7bb3d32dec39..bf6b0d1b908d 100644 --- a/test/infrastructure/inmemory/api/v1alpha1/inmemorymachine_types.go +++ b/test/infrastructure/inmemory/api/v1alpha1/inmemorymachine_types.go @@ -28,11 +28,113 @@ const ( MachineFinalizer = "inmemorymachine.infrastructure.cluster.x-k8s.io" ) +const ( + // VMProvisionedCondition documents the status of the provisioning VM implementing the InMemoryMachine. + VMProvisionedCondition clusterv1.ConditionType = "VMProvisioned" + + // WaitingForClusterInfrastructureReason (Severity=Info) documents an InMemoryMachine VM waiting for the cluster + // infrastructure to be ready. + WaitingForClusterInfrastructureReason = "WaitingForClusterInfrastructure" + + // WaitingControlPlaneInitializedReason (Severity=Info) documents an InMemoryMachine VM waiting + // for the control plane to be initialized. + WaitingControlPlaneInitializedReason = "WaitingControlPlaneInitialized" + + // WaitingForBootstrapDataReason (Severity=Info) documents an InMemoryMachine VM waiting for the bootstrap + // data to be ready before starting to create the CloudMachine/VM. + WaitingForBootstrapDataReason = "WaitingForBootstrapData" + + // VMWaitingForStartupTimeoutReason (Severity=Info) documents a InMemoryMachine VM provisioning. + VMWaitingForStartupTimeoutReason = "WaitingForStartupTimeout" +) + +const ( + // NodeProvisionedCondition documents the status of the provisioning of the node hosted on the InMemoryMachine. + NodeProvisionedCondition clusterv1.ConditionType = "NodeProvisioned" + + // NodeWaitingForStartupTimeoutReason (Severity=Info) documents a InMemoryMachine Node provisioning. + NodeWaitingForStartupTimeoutReason = "WaitingForStartupTimeout" +) + +const ( + // EtcdProvisionedCondition documents the status of the provisioning of the etcd member hosted on the InMemoryMachine. + EtcdProvisionedCondition clusterv1.ConditionType = "EtcdProvisioned" + + // EtcdWaitingForStartupTimeoutReason (Severity=Info) documents a InMemoryMachine etcd pod provisioning. + EtcdWaitingForStartupTimeoutReason = "WaitingForStartupTimeout" +) + +const ( + // APIServerProvisionedCondition documents the status of the provisioning of the APIServer instance hosted on the InMemoryMachine. + APIServerProvisionedCondition clusterv1.ConditionType = "APIServerProvisioned" + + // APIServerWaitingForStartupTimeoutReason (Severity=Info) documents a InMemoryMachine API server pod provisioning. + APIServerWaitingForStartupTimeoutReason = "WaitingForStartupTimeout" +) + // InMemoryMachineSpec defines the desired state of InMemoryMachine. type InMemoryMachineSpec struct { // ProviderID will be the container name in ProviderID format (in-memory:////) // +optional ProviderID *string `json:"providerID,omitempty"` + + // Behaviour of the InMemoryMachine; this will allow to make a simulation more alike to real use cases + // e.g. by defining the duration of the provisioning phase mimicking the performances of the target infrastructure. + Behaviour *InMemoryMachineBehaviour `json:"behaviour,omitempty"` +} + +// InMemoryMachineBehaviour defines the behaviour of the InMemoryMachine. +type InMemoryMachineBehaviour struct { + // VM defines the behaviour of the VM implementing the InMemoryMachine. + VM *InMemoryVMBehaviour `json:"vm,omitempty"` + + // Node defines the behaviour of the Node (the kubelet) hosted on the InMemoryMachine. + Node *InMemoryNodeBehaviour `json:"node,omitempty"` + + // APIServer defines the behaviour of the APIServer hosted on the InMemoryMachine. + APIServer *InMemoryAPIServerBehaviour `json:"apiServer,omitempty"` + + // Etcd defines the behaviour of the etcd member hosted on the InMemoryMachine. + Etcd *InMemoryEtcdBehaviour `json:"etcd,omitempty"` +} + +// InMemoryVMBehaviour defines the behaviour of the VM implementing the InMemoryMachine. +type InMemoryVMBehaviour struct { + // Provisioning defines variables influencing how the VM implementing the InMemoryMachine is going to be provisioned. + // NOTE: VM provisioning includes all the steps from creation to power-on. + Provisioning CommonProvisioningSettings `json:"provisioning,omitempty"` +} + +// InMemoryNodeBehaviour defines the behaviour of the Node (the kubelet) hosted on the InMemoryMachine. +type InMemoryNodeBehaviour struct { + // Provisioning defines variables influencing how the Node (the kubelet) hosted on the InMemoryMachine is going to be provisioned. + // NOTE: Node provisioning includes all the steps from starting kubelet to the node become ready, get a provider ID, and being registered in K8s. + Provisioning CommonProvisioningSettings `json:"provisioning,omitempty"` +} + +// InMemoryAPIServerBehaviour defines the behaviour of the APIServer hosted on the InMemoryMachine. +type InMemoryAPIServerBehaviour struct { + // Provisioning defines variables influencing how the APIServer hosted on the InMemoryMachine is going to be provisioned. + // NOTE: APIServer provisioning includes all the steps from starting the static Pod to the Pod become ready and being registered in K8s. + Provisioning CommonProvisioningSettings `json:"provisioning,omitempty"` +} + +// InMemoryEtcdBehaviour defines the behaviour of the etcd member hosted on the InMemoryMachine. +type InMemoryEtcdBehaviour struct { + // Provisioning defines variables influencing how the etcd member hosted on the InMemoryMachine is going to be provisioned. + // NOTE: Etcd provisioning includes all the steps from starting the static Pod to the Pod become ready and being registered in K8s. + Provisioning CommonProvisioningSettings `json:"provisioning,omitempty"` +} + +// CommonProvisioningSettings holds parameters that applies to provisioning of most of the objects. +type CommonProvisioningSettings struct { + // StartupDuration defines the duration of the object provisioning phase. + StartupDuration metav1.Duration `json:"startupDuration"` + + // StartupJitter adds some randomness on StartupDuration; the actual duration will be StartupDuration plus an additional + // amount chosen uniformly at random from the interval between zero and `StartupJitter*StartupDuration`. + // NOTE: this is modeled as string because the usage of float is highly discouraged, as support for them varies across languages. + StartupJitter string `json:"startupJitter,omitempty"` } // InMemoryMachineStatus defines the observed state of InMemoryMachine. diff --git a/test/infrastructure/inmemory/api/v1alpha1/zz_generated.deepcopy.go b/test/infrastructure/inmemory/api/v1alpha1/zz_generated.deepcopy.go index bf6252d8f39c..d0dd93bde1ec 100644 --- a/test/infrastructure/inmemory/api/v1alpha1/zz_generated.deepcopy.go +++ b/test/infrastructure/inmemory/api/v1alpha1/zz_generated.deepcopy.go @@ -41,6 +41,38 @@ func (in *APIEndpoint) DeepCopy() *APIEndpoint { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CommonProvisioningSettings) DeepCopyInto(out *CommonProvisioningSettings) { + *out = *in + out.StartupDuration = in.StartupDuration +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CommonProvisioningSettings. +func (in *CommonProvisioningSettings) DeepCopy() *CommonProvisioningSettings { + if in == nil { + return nil + } + out := new(CommonProvisioningSettings) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InMemoryAPIServerBehaviour) DeepCopyInto(out *InMemoryAPIServerBehaviour) { + *out = *in + out.Provisioning = in.Provisioning +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InMemoryAPIServerBehaviour. +func (in *InMemoryAPIServerBehaviour) DeepCopy() *InMemoryAPIServerBehaviour { + if in == nil { + return nil + } + out := new(InMemoryAPIServerBehaviour) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InMemoryCluster) DeepCopyInto(out *InMemoryCluster) { *out = *in @@ -138,6 +170,22 @@ func (in *InMemoryClusterStatus) DeepCopy() *InMemoryClusterStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InMemoryEtcdBehaviour) DeepCopyInto(out *InMemoryEtcdBehaviour) { + *out = *in + out.Provisioning = in.Provisioning +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InMemoryEtcdBehaviour. +func (in *InMemoryEtcdBehaviour) DeepCopy() *InMemoryEtcdBehaviour { + if in == nil { + return nil + } + out := new(InMemoryEtcdBehaviour) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InMemoryMachine) DeepCopyInto(out *InMemoryMachine) { *out = *in @@ -165,6 +213,41 @@ func (in *InMemoryMachine) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InMemoryMachineBehaviour) DeepCopyInto(out *InMemoryMachineBehaviour) { + *out = *in + if in.VM != nil { + in, out := &in.VM, &out.VM + *out = new(InMemoryVMBehaviour) + **out = **in + } + if in.Node != nil { + in, out := &in.Node, &out.Node + *out = new(InMemoryNodeBehaviour) + **out = **in + } + if in.APIServer != nil { + in, out := &in.APIServer, &out.APIServer + *out = new(InMemoryAPIServerBehaviour) + **out = **in + } + if in.Etcd != nil { + in, out := &in.Etcd, &out.Etcd + *out = new(InMemoryEtcdBehaviour) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InMemoryMachineBehaviour. +func (in *InMemoryMachineBehaviour) DeepCopy() *InMemoryMachineBehaviour { + if in == nil { + return nil + } + out := new(InMemoryMachineBehaviour) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InMemoryMachineList) DeepCopyInto(out *InMemoryMachineList) { *out = *in @@ -205,6 +288,11 @@ func (in *InMemoryMachineSpec) DeepCopyInto(out *InMemoryMachineSpec) { *out = new(string) **out = **in } + if in.Behaviour != nil { + in, out := &in.Behaviour, &out.Behaviour + *out = new(InMemoryMachineBehaviour) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InMemoryMachineSpec. @@ -329,3 +417,35 @@ func (in *InMemoryMachineTemplateSpec) DeepCopy() *InMemoryMachineTemplateSpec { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InMemoryNodeBehaviour) DeepCopyInto(out *InMemoryNodeBehaviour) { + *out = *in + out.Provisioning = in.Provisioning +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InMemoryNodeBehaviour. +func (in *InMemoryNodeBehaviour) DeepCopy() *InMemoryNodeBehaviour { + if in == nil { + return nil + } + out := new(InMemoryNodeBehaviour) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InMemoryVMBehaviour) DeepCopyInto(out *InMemoryVMBehaviour) { + *out = *in + out.Provisioning = in.Provisioning +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InMemoryVMBehaviour. +func (in *InMemoryVMBehaviour) DeepCopy() *InMemoryVMBehaviour { + if in == nil { + return nil + } + out := new(InMemoryVMBehaviour) + in.DeepCopyInto(out) + return out +} diff --git a/test/infrastructure/inmemory/config/crd/bases/infrastructure.cluster.x-k8s.io_inmemorymachines.yaml b/test/infrastructure/inmemory/config/crd/bases/infrastructure.cluster.x-k8s.io_inmemorymachines.yaml index 4c58896319fb..2bad196d1996 100644 --- a/test/infrastructure/inmemory/config/crd/bases/infrastructure.cluster.x-k8s.io_inmemorymachines.yaml +++ b/test/infrastructure/inmemory/config/crd/bases/infrastructure.cluster.x-k8s.io_inmemorymachines.yaml @@ -57,6 +57,124 @@ spec: spec: description: InMemoryMachineSpec defines the desired state of InMemoryMachine. properties: + behaviour: + description: Behaviour of the InMemoryMachine; this will allow to + make a simulation more alike to real use cases e.g. by defining + the duration of the provisioning phase mimicking the performances + of the target infrastructure. + properties: + apiServer: + description: APIServer defines the behaviour of the APIServer + hosted on the InMemoryMachine. + properties: + provisioning: + description: 'Provisioning defines variables influencing how + the APIServer hosted on the InMemoryMachine is going to + be provisioned. NOTE: APIServer provisioning includes all + the steps from starting the static Pod to the Pod become + ready and being registered in K8s.' + properties: + startupDuration: + description: StartupDuration defines the duration of the + object provisioning phase. + type: string + startupJitter: + description: 'StartupJitter adds some randomness on StartupDuration; + the actual duration will be StartupDuration plus an + additional amount chosen uniformly at random from the + interval between zero and `StartupJitter*StartupDuration`. + NOTE: this is modeled as string because the usage of + float is highly discouraged, as support for them varies + across languages.' + type: string + required: + - startupDuration + type: object + type: object + etcd: + description: Etcd defines the behaviour of the etcd member hosted + on the InMemoryMachine. + properties: + provisioning: + description: 'Provisioning defines variables influencing how + the etcd member hosted on the InMemoryMachine is going to + be provisioned. NOTE: Etcd provisioning includes all the + steps from starting the static Pod to the Pod become ready + and being registered in K8s.' + properties: + startupDuration: + description: StartupDuration defines the duration of the + object provisioning phase. + type: string + startupJitter: + description: 'StartupJitter adds some randomness on StartupDuration; + the actual duration will be StartupDuration plus an + additional amount chosen uniformly at random from the + interval between zero and `StartupJitter*StartupDuration`. + NOTE: this is modeled as string because the usage of + float is highly discouraged, as support for them varies + across languages.' + type: string + required: + - startupDuration + type: object + type: object + node: + description: Node defines the behaviour of the Node (the kubelet) + hosted on the InMemoryMachine. + properties: + provisioning: + description: 'Provisioning defines variables influencing how + the Node (the kubelet) hosted on the InMemoryMachine is + going to be provisioned. NOTE: Node provisioning includes + all the steps from starting kubelet to the node become ready, + get a provider ID, and being registered in K8s.' + properties: + startupDuration: + description: StartupDuration defines the duration of the + object provisioning phase. + type: string + startupJitter: + description: 'StartupJitter adds some randomness on StartupDuration; + the actual duration will be StartupDuration plus an + additional amount chosen uniformly at random from the + interval between zero and `StartupJitter*StartupDuration`. + NOTE: this is modeled as string because the usage of + float is highly discouraged, as support for them varies + across languages.' + type: string + required: + - startupDuration + type: object + type: object + vm: + description: VM defines the behaviour of the VM implementing the + InMemoryMachine. + properties: + provisioning: + description: 'Provisioning defines variables influencing how + the VM implementing the InMemoryMachine is going to be provisioned. + NOTE: VM provisioning includes all the steps from creation + to power-on.' + properties: + startupDuration: + description: StartupDuration defines the duration of the + object provisioning phase. + type: string + startupJitter: + description: 'StartupJitter adds some randomness on StartupDuration; + the actual duration will be StartupDuration plus an + additional amount chosen uniformly at random from the + interval between zero and `StartupJitter*StartupDuration`. + NOTE: this is modeled as string because the usage of + float is highly discouraged, as support for them varies + across languages.' + type: string + required: + - startupDuration + type: object + type: object + type: object providerID: description: ProviderID will be the container name in ProviderID format (in-memory:////) diff --git a/test/infrastructure/inmemory/config/crd/bases/infrastructure.cluster.x-k8s.io_inmemorymachinetemplates.yaml b/test/infrastructure/inmemory/config/crd/bases/infrastructure.cluster.x-k8s.io_inmemorymachinetemplates.yaml index 8b3ff12c17cf..fb425e38c9be 100644 --- a/test/infrastructure/inmemory/config/crd/bases/infrastructure.cluster.x-k8s.io_inmemorymachinetemplates.yaml +++ b/test/infrastructure/inmemory/config/crd/bases/infrastructure.cluster.x-k8s.io_inmemorymachinetemplates.yaml @@ -72,6 +72,131 @@ spec: description: Spec is the specification of the desired behavior of the machine. properties: + behaviour: + description: Behaviour of the InMemoryMachine; this will allow + to make a simulation more alike to real use cases e.g. by + defining the duration of the provisioning phase mimicking + the performances of the target infrastructure. + properties: + apiServer: + description: APIServer defines the behaviour of the APIServer + hosted on the InMemoryMachine. + properties: + provisioning: + description: 'Provisioning defines variables influencing + how the APIServer hosted on the InMemoryMachine + is going to be provisioned. NOTE: APIServer provisioning + includes all the steps from starting the static + Pod to the Pod become ready and being registered + in K8s.' + properties: + startupDuration: + description: StartupDuration defines the duration + of the object provisioning phase. + type: string + startupJitter: + description: 'StartupJitter adds some randomness + on StartupDuration; the actual duration will + be StartupDuration plus an additional amount + chosen uniformly at random from the interval + between zero and `StartupJitter*StartupDuration`. + NOTE: this is modeled as string because the + usage of float is highly discouraged, as support + for them varies across languages.' + type: string + required: + - startupDuration + type: object + type: object + etcd: + description: Etcd defines the behaviour of the etcd member + hosted on the InMemoryMachine. + properties: + provisioning: + description: 'Provisioning defines variables influencing + how the etcd member hosted on the InMemoryMachine + is going to be provisioned. NOTE: Etcd provisioning + includes all the steps from starting the static + Pod to the Pod become ready and being registered + in K8s.' + properties: + startupDuration: + description: StartupDuration defines the duration + of the object provisioning phase. + type: string + startupJitter: + description: 'StartupJitter adds some randomness + on StartupDuration; the actual duration will + be StartupDuration plus an additional amount + chosen uniformly at random from the interval + between zero and `StartupJitter*StartupDuration`. + NOTE: this is modeled as string because the + usage of float is highly discouraged, as support + for them varies across languages.' + type: string + required: + - startupDuration + type: object + type: object + node: + description: Node defines the behaviour of the Node (the + kubelet) hosted on the InMemoryMachine. + properties: + provisioning: + description: 'Provisioning defines variables influencing + how the Node (the kubelet) hosted on the InMemoryMachine + is going to be provisioned. NOTE: Node provisioning + includes all the steps from starting kubelet to + the node become ready, get a provider ID, and being + registered in K8s.' + properties: + startupDuration: + description: StartupDuration defines the duration + of the object provisioning phase. + type: string + startupJitter: + description: 'StartupJitter adds some randomness + on StartupDuration; the actual duration will + be StartupDuration plus an additional amount + chosen uniformly at random from the interval + between zero and `StartupJitter*StartupDuration`. + NOTE: this is modeled as string because the + usage of float is highly discouraged, as support + for them varies across languages.' + type: string + required: + - startupDuration + type: object + type: object + vm: + description: VM defines the behaviour of the VM implementing + the InMemoryMachine. + properties: + provisioning: + description: 'Provisioning defines variables influencing + how the VM implementing the InMemoryMachine is going + to be provisioned. NOTE: VM provisioning includes + all the steps from creation to power-on.' + properties: + startupDuration: + description: StartupDuration defines the duration + of the object provisioning phase. + type: string + startupJitter: + description: 'StartupJitter adds some randomness + on StartupDuration; the actual duration will + be StartupDuration plus an additional amount + chosen uniformly at random from the interval + between zero and `StartupJitter*StartupDuration`. + NOTE: this is modeled as string because the + usage of float is highly discouraged, as support + for them varies across languages.' + type: string + required: + - startupDuration + type: object + type: object + type: object providerID: description: ProviderID will be the container name in ProviderID format (in-memory:////) diff --git a/test/infrastructure/inmemory/internal/controllers/inmemorymachine_controller.go b/test/infrastructure/inmemory/internal/controllers/inmemorymachine_controller.go index 80ee58010a38..4dd00edb9e5c 100644 --- a/test/infrastructure/inmemory/internal/controllers/inmemorymachine_controller.go +++ b/test/infrastructure/inmemory/internal/controllers/inmemorymachine_controller.go @@ -22,6 +22,7 @@ import ( "crypto/rsa" "fmt" "math/rand" + "strconv" "time" "github.com/pkg/errors" @@ -29,6 +30,7 @@ import ( rbacv1 "k8s.io/api/rbac/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" @@ -139,7 +141,23 @@ func (r *InMemoryMachineReconciler) Reconcile(ctx context.Context, req ctrl.Requ // Always attempt to Patch the InMemoryMachine object and status after each reconciliation. defer func() { - if err := patchHelper.Patch(ctx, inMemoryMachine); err != nil { + inMemoryMachineConditions := []clusterv1.ConditionType{ + infrav1.VMProvisionedCondition, + infrav1.NodeProvisionedCondition, + } + if util.IsControlPlaneMachine(machine) { + inMemoryMachineConditions = append(inMemoryMachineConditions, + infrav1.EtcdProvisionedCondition, + infrav1.APIServerProvisionedCondition, + ) + } + // Always update the readyCondition by summarizing the state of other conditions. + // A step counter is added to represent progress during the provisioning process (instead we are hiding the step counter during the deletion process). + conditions.SetSummary(inMemoryMachine, + conditions.WithConditions(inMemoryMachineConditions...), + conditions.WithStepCounterIf(inMemoryMachine.ObjectMeta.DeletionTimestamp.IsZero() && inMemoryMachine.Spec.ProviderID == nil), + ) + if err := patchHelper.Patch(ctx, inMemoryMachine, patch.WithOwnedConditions{Conditions: inMemoryMachineConditions}); err != nil { log.Error(err, "failed to patch InMemoryMachine") if rerr == nil { rerr = err @@ -165,13 +183,9 @@ func (r *InMemoryMachineReconciler) Reconcile(ctx context.Context, req ctrl.Requ func (r *InMemoryMachineReconciler) reconcileNormal(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { log := ctrl.LoggerFrom(ctx) - // Compute the resource group unique name. - // NOTE: We are using reconcilerGroup also as a name for the listener for sake of simplicity. - resourceGroup := klog.KObj(cluster).String() - cloudClient := r.CloudManager.GetResourceGroup(resourceGroup).GetClient() - // Check if the infrastructure is ready, otherwise return and wait for the cluster object to be updated if !cluster.Status.InfrastructureReady { + conditions.MarkFalse(inMemoryMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForClusterInfrastructureReason, clusterv1.ConditionSeverityInfo, "") log.Info("Waiting for InMemoryCluster Controller to create cluster infrastructure") return ctrl.Result{}, nil } @@ -181,28 +195,131 @@ func (r *InMemoryMachineReconciler) reconcileNormal(ctx context.Context, cluster // provisioning workflow. if machine.Spec.Bootstrap.DataSecretName == nil { if !util.IsControlPlaneMachine(machine) && !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { + conditions.MarkFalse(inMemoryMachine, infrav1.VMProvisionedCondition, infrav1.WaitingControlPlaneInitializedReason, clusterv1.ConditionSeverityInfo, "") log.Info("Waiting for the control plane to be initialized") return ctrl.Result{}, nil } + conditions.MarkFalse(inMemoryMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "") log.Info("Waiting for the Bootstrap provider controller to set bootstrap data") return ctrl.Result{}, nil } - // Create VM + // Call the inner reconciliation methods. + phases := []func(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error){ + r.reconcileNormalCloudMachine, + r.reconcileNormalNode, + r.reconcileNormalETCD, + r.reconcileNormalAPIServer, + r.reconcileNormalScheduler, + r.reconcileNormalControllerManager, + r.reconcileNormalKubeadmObjects, + } + + res := ctrl.Result{} + errs := []error{} + for _, phase := range phases { + phaseResult, err := phase(ctx, cluster, machine, inMemoryMachine) + if err != nil { + errs = append(errs, err) + } + if len(errs) > 0 { + continue + } + // TODO: consider if we have to use max(RequeueAfter) instead of min(RequeueAfter) to reduce the pressure on + // the reconcile queue for InMemoryMachines given that we are requeuing just to wait for some period to expire; + // the downside of it is that InMemoryMachines status will change by "big steps" vs incrementally. + res = util.LowestNonZeroResult(res, phaseResult) + } + return res, kerrors.NewAggregate(errs) +} + +func (r *InMemoryMachineReconciler) reconcileNormalCloudMachine(ctx context.Context, cluster *clusterv1.Cluster, _ *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { + // Compute the resource group unique name. + // NOTE: We are using reconcilerGroup also as a name for the listener for sake of simplicity. + resourceGroup := klog.KObj(cluster).String() + cloudClient := r.CloudManager.GetResourceGroup(resourceGroup).GetClient() + + // Create VM; a Cloud VM can be created as soon as the Infra Machine is created // NOTE: for sake of simplicity we keep cloud resources as global resources (namespace empty). - // TODO: we should convert this into a full reconcile (if it exist, update) cloudMachine := &cloudv1.CloudMachine{ ObjectMeta: metav1.ObjectMeta{ Name: inMemoryMachine.Name, }, } - if err := cloudClient.Create(ctx, cloudMachine); err != nil && !apierrors.IsAlreadyExists(err) { - return ctrl.Result{}, errors.Wrapf(err, "failed to create CloudMachine") + if err := cloudClient.Get(ctx, client.ObjectKeyFromObject(cloudMachine), cloudMachine); err != nil { + if !apierrors.IsNotFound(err) { + return ctrl.Result{}, err + } + + if err := cloudClient.Create(ctx, cloudMachine); err != nil && !apierrors.IsAlreadyExists(err) { + return ctrl.Result{}, errors.Wrapf(err, "failed to create CloudMachine") + } + } + + // Wait for the VM to be provisioned; provisioned happens a configurable time after the cloud machine creation. + provisioningDuration := time.Duration(0) + if inMemoryMachine.Spec.Behaviour != nil && inMemoryMachine.Spec.Behaviour.VM != nil { + x := inMemoryMachine.Spec.Behaviour.VM.Provisioning + + provisioningDuration = x.StartupDuration.Duration + jitter, err := strconv.ParseFloat(x.StartupJitter, 64) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to parse VM's StartupJitter") + } + if jitter > 0.0 { + provisioningDuration += time.Duration(rand.Float64() * jitter * float64(provisioningDuration)) //nolint:gosec // Intentionally using a weak random number generator here. + } + } + + start := cloudMachine.CreationTimestamp + now := time.Now() + if now.Before(start.Add(provisioningDuration)) { + conditions.MarkFalse(inMemoryMachine, infrav1.VMProvisionedCondition, infrav1.VMWaitingForStartupTimeoutReason, clusterv1.ConditionSeverityInfo, "") + return ctrl.Result{RequeueAfter: start.Add(provisioningDuration).Sub(now)}, nil + } + + // TODO: consider if to surface VM provisioned also on the cloud machine (currently it surfaces only on the inMemoryMachine) + + conditions.MarkTrue(inMemoryMachine, infrav1.VMProvisionedCondition) + return ctrl.Result{}, nil +} + +func (r *InMemoryMachineReconciler) reconcileNormalNode(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { + // No-op if the VM is not provisioned yet + if !conditions.IsTrue(inMemoryMachine, infrav1.VMProvisionedCondition) { + return ctrl.Result{}, nil + } + + // Wait for the node/kubelet to start up; node/kubelet start happens a configurable time after the VM is provisioned. + provisioningDuration := time.Duration(0) + if inMemoryMachine.Spec.Behaviour != nil && inMemoryMachine.Spec.Behaviour.Node != nil { + x := inMemoryMachine.Spec.Behaviour.Node.Provisioning + + provisioningDuration = x.StartupDuration.Duration + jitter, err := strconv.ParseFloat(x.StartupJitter, 64) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to parse node's StartupJitter") + } + if jitter > 0.0 { + provisioningDuration += time.Duration(rand.Float64() * jitter * float64(provisioningDuration)) //nolint:gosec // Intentionally using a weak random number generator here. + } + } + + start := conditions.Get(inMemoryMachine, infrav1.VMProvisionedCondition).LastTransitionTime + now := time.Now() + if now.Before(start.Add(provisioningDuration)) { + conditions.MarkFalse(inMemoryMachine, infrav1.NodeProvisionedCondition, infrav1.NodeWaitingForStartupTimeoutReason, clusterv1.ConditionSeverityInfo, "") + return ctrl.Result{RequeueAfter: start.Add(provisioningDuration).Sub(now)}, nil } + // Compute the resource group unique name. + // NOTE: We are using reconcilerGroup also as a name for the listener for sake of simplicity. + resourceGroup := klog.KObj(cluster).String() + cloudClient := r.CloudManager.GetResourceGroup(resourceGroup).GetClient() + // Create Node - // TODO: we should convert this into a full reconcile (if it exist, update) + // TODO: consider if to handle an additional setting adding a delay in between create node and node ready/provider ID being set node := &corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: inMemoryMachine.Name, @@ -225,20 +342,106 @@ func (r *InMemoryMachineReconciler) reconcileNormal(ctx context.Context, cluster } node.Labels["node-role.kubernetes.io/control-plane"] = "" } - if err := cloudClient.Create(ctx, node); err != nil && !apierrors.IsAlreadyExists(err) { - return ctrl.Result{}, errors.Wrapf(err, "failed to create Node") + + if err := cloudClient.Get(ctx, client.ObjectKeyFromObject(node), node); err != nil { + if !apierrors.IsNotFound(err) { + return ctrl.Result{}, errors.Wrapf(err, "failed to get node") + } + + // NOTE: for the first control plane machine we might create the node before etcd and API server pod are running + // but this is not an issue, because it won't be visible to CAPI until the API server start serving requests. + if err := cloudClient.Create(ctx, node); err != nil && !apierrors.IsAlreadyExists(err) { + return ctrl.Result{}, errors.Wrapf(err, "failed to create Node") + } } - // NOTE: this can probably go up, after the VM is created inMemoryMachine.Spec.ProviderID = &node.Spec.ProviderID inMemoryMachine.Status.Ready = true + conditions.MarkTrue(inMemoryMachine, infrav1.NodeProvisionedCondition) + return ctrl.Result{}, nil +} + +func (r *InMemoryMachineReconciler) reconcileNormalETCD(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { + // No-op if the machine is not a control plane machine. if !util.IsControlPlaneMachine(machine) { return ctrl.Result{}, nil } - // If there is not yet an etcd member listener for this machine. + // No-op if the Node is not provisioned yet + if !conditions.IsTrue(inMemoryMachine, infrav1.NodeProvisionedCondition) { + return ctrl.Result{}, nil + } + + // Wait for the etcd pod to start up; etcd pod start happens a configurable time after the Node is provisioned. + provisioningDuration := time.Duration(0) + if inMemoryMachine.Spec.Behaviour != nil && inMemoryMachine.Spec.Behaviour.Etcd != nil { + x := inMemoryMachine.Spec.Behaviour.Etcd.Provisioning + + provisioningDuration = x.StartupDuration.Duration + jitter, err := strconv.ParseFloat(x.StartupJitter, 64) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to parse etcd's StartupJitter") + } + if jitter > 0.0 { + provisioningDuration += time.Duration(rand.Float64() * jitter * float64(provisioningDuration)) //nolint:gosec // Intentionally using a weak random number generator here. + } + } + + start := conditions.Get(inMemoryMachine, infrav1.NodeProvisionedCondition).LastTransitionTime + now := time.Now() + if now.Before(start.Add(provisioningDuration)) { + conditions.MarkFalse(inMemoryMachine, infrav1.EtcdProvisionedCondition, infrav1.EtcdWaitingForStartupTimeoutReason, clusterv1.ConditionSeverityInfo, "") + return ctrl.Result{RequeueAfter: start.Add(provisioningDuration).Sub(now)}, nil + } + + // Compute the resource group unique name. + // NOTE: We are using reconcilerGroup also as a name for the listener for sake of simplicity. + resourceGroup := klog.KObj(cluster).String() + cloudClient := r.CloudManager.GetResourceGroup(resourceGroup).GetClient() + + // Create the etcd pod + // TODO: consider if to handle an additional setting adding a delay in between create pod and pod ready etcdMember := fmt.Sprintf("etcd-%s", inMemoryMachine.Name) + etcdPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: etcdMember, + Labels: map[string]string{ + "component": "etcd", + "tier": "control-plane", + }, + Annotations: map[string]string{ + // TODO: read this from existing etcd pods, if any, otherwise all the member will get a different ClusterID. + "etcd.inmemory.infrastructure.cluster.x-k8s.io/cluster-id": fmt.Sprintf("%d", rand.Uint32()), //nolint:gosec // weak random number generator is good enough here + "etcd.inmemory.infrastructure.cluster.x-k8s.io/member-id": fmt.Sprintf("%d", rand.Uint32()), //nolint:gosec // weak random number generator is good enough here + // TODO: set this only if there are no other leaders. + "etcd.inmemory.infrastructure.cluster.x-k8s.io/leader-from": time.Now().Format(time.RFC3339), + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + }, + }, + } + if err := cloudClient.Get(ctx, client.ObjectKeyFromObject(etcdPod), etcdPod); err != nil { + if !apierrors.IsNotFound(err) { + return ctrl.Result{}, errors.Wrapf(err, "failed to get etcd Pod") + } + + // NOTE: for the first control plane machine we might create the etcd pod before the API server pod is running + // but this is not an issue, because it won't be visible to CAPI until the API server start serving requests. + if err := cloudClient.Create(ctx, etcdPod); err != nil && !apierrors.IsAlreadyExists(err) { + return ctrl.Result{}, errors.Wrapf(err, "failed to create Pod") + } + } + + // If there is not yet an etcd member listener for this machine, add it to the server. if !r.APIServerMux.HasEtcdMember(resourceGroup, etcdMember) { // Getting the etcd CA s, err := secret.Get(ctx, r.Client, client.ObjectKeyFromObject(cluster), secret.EtcdCA) @@ -270,8 +473,82 @@ func (r *InMemoryMachineReconciler) reconcileNormal(ctx context.Context, cluster } } - // If there is not yet an API server listener for this machine. + conditions.MarkTrue(inMemoryMachine, infrav1.EtcdProvisionedCondition) + return ctrl.Result{}, nil +} + +func (r *InMemoryMachineReconciler) reconcileNormalAPIServer(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { + // No-op if the machine is not a control plane machine. + if !util.IsControlPlaneMachine(machine) { + return ctrl.Result{}, nil + } + + // No-op if the Node is not provisioned yet + if !conditions.IsTrue(inMemoryMachine, infrav1.NodeProvisionedCondition) { + return ctrl.Result{}, nil + } + + // Wait for the API server pod to start up; API server pod start happens a configurable time after the Node is provisioned. + provisioningDuration := time.Duration(0) + if inMemoryMachine.Spec.Behaviour != nil && inMemoryMachine.Spec.Behaviour.APIServer != nil { + x := inMemoryMachine.Spec.Behaviour.APIServer.Provisioning + + provisioningDuration = x.StartupDuration.Duration + jitter, err := strconv.ParseFloat(x.StartupJitter, 64) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to parse API server's StartupJitter") + } + if jitter > 0.0 { + provisioningDuration += time.Duration(rand.Float64() * jitter * float64(provisioningDuration)) //nolint:gosec // Intentionally using a weak random number generator here. + } + } + + start := conditions.Get(inMemoryMachine, infrav1.NodeProvisionedCondition).LastTransitionTime + now := time.Now() + if now.Before(start.Add(provisioningDuration)) { + conditions.MarkFalse(inMemoryMachine, infrav1.APIServerProvisionedCondition, infrav1.APIServerWaitingForStartupTimeoutReason, clusterv1.ConditionSeverityInfo, "") + return ctrl.Result{RequeueAfter: start.Add(provisioningDuration).Sub(now)}, nil + } + + // Compute the resource group unique name. + // NOTE: We are using reconcilerGroup also as a name for the listener for sake of simplicity. + resourceGroup := klog.KObj(cluster).String() + cloudClient := r.CloudManager.GetResourceGroup(resourceGroup).GetClient() + + // Create the apiserver pod + // TODO: consider if to handle an additional setting adding a delay in between create pod and pod ready apiServer := fmt.Sprintf("kube-apiserver-%s", inMemoryMachine.Name) + + apiServerPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: apiServer, + Labels: map[string]string{ + "component": "kube-apiserver", + "tier": "control-plane", + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + }, + }, + } + if err := cloudClient.Get(ctx, client.ObjectKeyFromObject(apiServerPod), apiServerPod); err != nil { + if !apierrors.IsNotFound(err) { + return ctrl.Result{}, errors.Wrapf(err, "failed to get apiServer Pod") + } + + if err := cloudClient.Create(ctx, apiServerPod); err != nil && !apierrors.IsAlreadyExists(err) { + return ctrl.Result{}, errors.Wrapf(err, "failed to create apiServer Pod") + } + } + + // If there is not yet an API server listener for this machine. if !r.APIServerMux.HasAPIServer(resourceGroup, apiServer) { // Getting the Kubernetes CA s, err := secret.Get(ctx, r.Client, client.ObjectKeyFromObject(cluster), secret.ClusterCA) @@ -305,67 +582,28 @@ func (r *InMemoryMachineReconciler) reconcileNormal(ctx context.Context, cluster } } - // TBD is this is the right point in the sequence, the pod shows up after API server is running. + conditions.MarkTrue(inMemoryMachine, infrav1.APIServerProvisionedCondition) + return ctrl.Result{}, nil +} - etcdPod := &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: metav1.NamespaceSystem, - Name: etcdMember, - }, - Status: corev1.PodStatus{ - Phase: corev1.PodRunning, - Conditions: []corev1.PodCondition{ - { - Type: corev1.PodReady, - Status: corev1.ConditionTrue, - }, - }, - }, +func (r *InMemoryMachineReconciler) reconcileNormalScheduler(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { + // No-op if the machine is not a control plane machine. + if !util.IsControlPlaneMachine(machine) { + return ctrl.Result{}, nil } - if err := cloudClient.Get(ctx, client.ObjectKeyFromObject(etcdPod), etcdPod); err != nil { - if !apierrors.IsNotFound(err) { - return ctrl.Result{}, errors.Wrapf(err, "failed to get etcd Pod") - } - etcdPod.Labels = map[string]string{ - "component": "etcd", - "tier": "control-plane", - } - etcdPod.Annotations = map[string]string{ - // TODO: read this from existing etcd pods, if any. - "etcd.inmemory.infrastructure.cluster.x-k8s.io/cluster-id": fmt.Sprintf("%d", rand.Uint32()), //nolint:gosec // weak random number generator is good enough here - "etcd.inmemory.infrastructure.cluster.x-k8s.io/member-id": fmt.Sprintf("%d", rand.Uint32()), //nolint:gosec // weak random number generator is good enough here - // TODO: set this only if there are no other leaders. - "etcd.inmemory.infrastructure.cluster.x-k8s.io/leader-from": time.Now().Format(time.RFC3339), - } - - if err := cloudClient.Create(ctx, etcdPod); err != nil && !apierrors.IsAlreadyExists(err) { - return ctrl.Result{}, errors.Wrapf(err, "failed to create etcdPod Pod") - } + // NOTE: we are creating the scheduler pod to make KCP happy, but we are not implementing any + // specific behaviour for this component because they are not relevant for stress tests. + // As a current approximation, we create the scheduler as soon as the API server is provisioned; + // also, the scheduler is immediately marked as ready. + if !conditions.IsTrue(inMemoryMachine, infrav1.APIServerProvisionedCondition) { + return ctrl.Result{}, nil } - apiServerPod := &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: metav1.NamespaceSystem, - Name: apiServer, - Labels: map[string]string{ - "component": "kube-apiserver", - "tier": "control-plane", - }, - }, - Status: corev1.PodStatus{ - Phase: corev1.PodRunning, - Conditions: []corev1.PodCondition{ - { - Type: corev1.PodReady, - Status: corev1.ConditionTrue, - }, - }, - }, - } - if err := cloudClient.Create(ctx, apiServerPod); err != nil && !apierrors.IsAlreadyExists(err) { - return ctrl.Result{}, errors.Wrapf(err, "failed to create apiServer Pod") - } + // Compute the resource group unique name. + // NOTE: We are using reconcilerGroup also as a name for the listener for sake of simplicity. + resourceGroup := klog.KObj(cluster).String() + cloudClient := r.CloudManager.GetResourceGroup(resourceGroup).GetClient() schedulerPod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -390,6 +628,28 @@ func (r *InMemoryMachineReconciler) reconcileNormal(ctx context.Context, cluster return ctrl.Result{}, errors.Wrapf(err, "failed to create scheduler Pod") } + return ctrl.Result{}, nil +} + +func (r *InMemoryMachineReconciler) reconcileNormalControllerManager(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { + // No-op if the machine is not a control plane machine. + if !util.IsControlPlaneMachine(machine) { + return ctrl.Result{}, nil + } + + // NOTE: we are creating the controller manager pod to make KCP happy, but we are not implementing any + // specific behaviour for this component because they are not relevant for stress tests. + // As a current approximation, we create the controller manager as soon as the API server is provisioned; + // also, the controller manager is immediately marked as ready. + if !conditions.IsTrue(inMemoryMachine, infrav1.APIServerProvisionedCondition) { + return ctrl.Result{}, nil + } + + // Compute the resource group unique name. + // NOTE: We are using reconcilerGroup also as a name for the listener for sake of simplicity. + resourceGroup := klog.KObj(cluster).String() + cloudClient := r.CloudManager.GetResourceGroup(resourceGroup).GetClient() + controllerManagerPod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Namespace: metav1.NamespaceSystem, @@ -413,13 +673,27 @@ func (r *InMemoryMachineReconciler) reconcileNormal(ctx context.Context, cluster return ctrl.Result{}, errors.Wrapf(err, "failed to create controller manager Pod") } + return ctrl.Result{}, nil +} + +func (r *InMemoryMachineReconciler) reconcileNormalKubeadmObjects(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, _ *infrav1.InMemoryMachine) (ctrl.Result, error) { + // No-op if the machine is not a control plane machine. + if !util.IsControlPlaneMachine(machine) { + return ctrl.Result{}, nil + } + + // Compute the resource group unique name. + // NOTE: We are using reconcilerGroup also as a name for the listener for sake of simplicity. + resourceGroup := klog.KObj(cluster).String() + cloudClient := r.CloudManager.GetResourceGroup(resourceGroup).GetClient() + // create kubeadm ClusterRole and ClusterRoleBinding enforced by KCP - // TODO: drop this as soon as we implement CREATE + // NOTE: we create those objects because this is what kubeadm does, but KCP creates + // ClusterRole and ClusterRoleBinding if not found. role := &rbacv1.ClusterRole{ ObjectMeta: metav1.ObjectMeta{ Name: "kubeadm:get-nodes", - // Namespace: metav1.NamespaceSystem, // TODO: drop in kubeadm? Yup! }, Rules: []rbacv1.PolicyRule{ { @@ -436,7 +710,6 @@ func (r *InMemoryMachineReconciler) reconcileNormal(ctx context.Context, cluster roleBinding := &rbacv1.ClusterRoleBinding{ ObjectMeta: metav1.ObjectMeta{ Name: "kubeadm:get-nodes", - // Namespace: metav1.NamespaceSystem, // TODO: drop in kubeadm? Yup! }, RoleRef: rbacv1.RoleRef{ APIGroup: rbacv1.GroupName, @@ -469,6 +742,37 @@ func (r *InMemoryMachineReconciler) reconcileNormal(ctx context.Context, cluster } func (r *InMemoryMachineReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { + // Call the inner reconciliation methods. + phases := []func(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error){ + // TODO: revisit order when we implement behaviour for the deletion workflow + r.reconcileDeleteNode, + r.reconcileDeleteETCD, + r.reconcileDeleteAPIServer, + r.reconcileDeleteScheduler, + r.reconcileDeleteControllerManager, + r.reconcileDeleteCloudMachine, + // Note: We are not deleting kubeadm objects because they exist in K8s, they are not related to a specific machine. + } + + res := ctrl.Result{} + errs := []error{} + for _, phase := range phases { + phaseResult, err := phase(ctx, cluster, machine, inMemoryMachine) + if err != nil { + errs = append(errs, err) + } + if len(errs) > 0 { + continue + } + res = util.LowestNonZeroResult(res, phaseResult) + } + if res.IsZero() && len(errs) == 0 { + controllerutil.RemoveFinalizer(inMemoryMachine, infrav1.MachineFinalizer) + } + return res, kerrors.NewAggregate(errs) +} + +func (r *InMemoryMachineReconciler) reconcileDeleteCloudMachine(ctx context.Context, cluster *clusterv1.Cluster, _ *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { // Compute the resource group unique name. // NOTE: We are using reconcilerGroup also as a name for the listener for sake of simplicity. resourceGroup := klog.KObj(cluster).String() @@ -484,6 +788,15 @@ func (r *InMemoryMachineReconciler) reconcileDelete(ctx context.Context, cluster return ctrl.Result{}, errors.Wrapf(err, "failed to delete CloudMachine") } + return ctrl.Result{}, nil +} + +func (r *InMemoryMachineReconciler) reconcileDeleteNode(ctx context.Context, cluster *clusterv1.Cluster, _ *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { + // Compute the resource group unique name. + // NOTE: We are using reconcilerGroup also as a name for the listener for sake of simplicity. + resourceGroup := klog.KObj(cluster).String() + cloudClient := r.CloudManager.GetResourceGroup(resourceGroup).GetClient() + // Delete Node node := &corev1.Node{ ObjectMeta: metav1.ObjectMeta{ @@ -494,62 +807,115 @@ func (r *InMemoryMachineReconciler) reconcileDelete(ctx context.Context, cluster return ctrl.Result{}, errors.Wrapf(err, "failed to delete Node") } - if util.IsControlPlaneMachine(machine) { - controllerManagerPod := &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: metav1.NamespaceSystem, - Name: fmt.Sprintf("kube-controller-manager-%s", inMemoryMachine.Name), - }, - } - if err := cloudClient.Delete(ctx, controllerManagerPod); err != nil && !apierrors.IsNotFound(err) { - return ctrl.Result{}, errors.Wrapf(err, "failed to controller manager Pod") - } + return ctrl.Result{}, nil +} - schedulerPod := &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: metav1.NamespaceSystem, - Name: fmt.Sprintf("kube-scheduler-%s", inMemoryMachine.Name), - }, - } - if err := cloudClient.Delete(ctx, schedulerPod); err != nil && !apierrors.IsNotFound(err) { - return ctrl.Result{}, errors.Wrapf(err, "failed to scheduler Pod") - } +func (r *InMemoryMachineReconciler) reconcileDeleteETCD(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { + // No-op if the machine is not a control plane machine. + if !util.IsControlPlaneMachine(machine) { + return ctrl.Result{}, nil + } - apiServer := fmt.Sprintf("kube-apiserver-%s", inMemoryMachine.Name) - apiServerPod := &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: metav1.NamespaceSystem, - Name: apiServer, - }, - } - if err := cloudClient.Delete(ctx, apiServerPod); err != nil && !apierrors.IsNotFound(err) { - return ctrl.Result{}, errors.Wrapf(err, "failed to apiServer Pod") - } - if err := r.APIServerMux.DeleteAPIServer(resourceGroup, apiServer); err != nil { - return ctrl.Result{}, err - } + // Compute the resource group unique name. + // NOTE: We are using reconcilerGroup also as a name for the listener for sake of simplicity. + resourceGroup := klog.KObj(cluster).String() + cloudClient := r.CloudManager.GetResourceGroup(resourceGroup).GetClient() - // TODO: if all the API server are gone, cleanup all the k8s objects from the resource group. - // note: it is not possible to delete the resource group, because cloud resources should be preserved. - // given that, in order to implement this it is required to find a way to identify all the k8s resources (might be via gvk); - // also, deletion must happen suddently, without respecting finalizers or owner references links. + etcdMember := fmt.Sprintf("etcd-%s", inMemoryMachine.Name) + etcdPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: etcdMember, + }, + } + if err := cloudClient.Delete(ctx, etcdPod); err != nil && !apierrors.IsNotFound(err) { + return ctrl.Result{}, errors.Wrapf(err, "failed to delete etcd Pod") + } + if err := r.APIServerMux.DeleteEtcdMember(resourceGroup, etcdMember); err != nil { + return ctrl.Result{}, err + } - etcdMember := fmt.Sprintf("etcd-%s", inMemoryMachine.Name) - etcdPod := &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: metav1.NamespaceSystem, - Name: etcdMember, - }, - } - if err := cloudClient.Delete(ctx, etcdPod); err != nil && !apierrors.IsNotFound(err) { - return ctrl.Result{}, errors.Wrapf(err, "failed to etcd Pod") - } - if err := r.APIServerMux.DeleteEtcdMember(resourceGroup, etcdMember); err != nil { - return ctrl.Result{}, err - } + // TODO: if all the etcd members are gone, cleanup all the k8s objects from the resource group. + // note: it is not possible to delete the resource group, because cloud resources should be preserved. + // given that, in order to implement this it is required to find a way to identify all the k8s resources (might be via gvk); + // also, deletion must happen suddenly, without respecting finalizers or owner references links. + + return ctrl.Result{}, nil +} + +func (r *InMemoryMachineReconciler) reconcileDeleteAPIServer(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { + // No-op if the machine is not a control plane machine. + if !util.IsControlPlaneMachine(machine) { + return ctrl.Result{}, nil + } + + // Compute the resource group unique name. + // NOTE: We are using reconcilerGroup also as a name for the listener for sake of simplicity. + resourceGroup := klog.KObj(cluster).String() + cloudClient := r.CloudManager.GetResourceGroup(resourceGroup).GetClient() + + apiServer := fmt.Sprintf("kube-apiserver-%s", inMemoryMachine.Name) + apiServerPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: apiServer, + }, + } + if err := cloudClient.Delete(ctx, apiServerPod); err != nil && !apierrors.IsNotFound(err) { + return ctrl.Result{}, errors.Wrapf(err, "failed to delete apiServer Pod") + } + if err := r.APIServerMux.DeleteAPIServer(resourceGroup, apiServer); err != nil { + return ctrl.Result{}, err + } + + return ctrl.Result{}, nil +} + +func (r *InMemoryMachineReconciler) reconcileDeleteScheduler(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { + // No-op if the machine is not a control plane machine. + if !util.IsControlPlaneMachine(machine) { + return ctrl.Result{}, nil + } + + // Compute the resource group unique name. + // NOTE: We are using reconcilerGroup also as a name for the listener for sake of simplicity. + resourceGroup := klog.KObj(cluster).String() + cloudClient := r.CloudManager.GetResourceGroup(resourceGroup).GetClient() + + schedulerPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: fmt.Sprintf("kube-scheduler-%s", inMemoryMachine.Name), + }, + } + if err := cloudClient.Delete(ctx, schedulerPod); err != nil && !apierrors.IsNotFound(err) { + return ctrl.Result{}, errors.Wrapf(err, "failed to scheduler Pod") + } + + return ctrl.Result{}, nil +} + +func (r *InMemoryMachineReconciler) reconcileDeleteControllerManager(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { + // No-op if the machine is not a control plane machine. + if !util.IsControlPlaneMachine(machine) { + return ctrl.Result{}, nil + } + + // Compute the resource group unique name. + // NOTE: We are using reconcilerGroup also as a name for the listener for sake of simplicity. + resourceGroup := klog.KObj(cluster).String() + cloudClient := r.CloudManager.GetResourceGroup(resourceGroup).GetClient() + + controllerManagerPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: fmt.Sprintf("kube-controller-manager-%s", inMemoryMachine.Name), + }, + } + if err := cloudClient.Delete(ctx, controllerManagerPod); err != nil && !apierrors.IsNotFound(err) { + return ctrl.Result{}, errors.Wrapf(err, "failed to controller manager Pod") } - controllerutil.RemoveFinalizer(inMemoryMachine, infrav1.MachineFinalizer) return ctrl.Result{}, nil } diff --git a/test/infrastructure/inmemory/internal/controllers/inmemorymachine_controller_test.go b/test/infrastructure/inmemory/internal/controllers/inmemorymachine_controller_test.go new file mode 100644 index 000000000000..0b534454546d --- /dev/null +++ b/test/infrastructure/inmemory/internal/controllers/inmemorymachine_controller_test.go @@ -0,0 +1,681 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + cryptorand "crypto/rand" + "crypto/rsa" + "crypto/x509" + "crypto/x509/pkix" + "fmt" + "math/big" + "testing" + "time" + + . "github.com/onsi/gomega" + "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/klog/v2" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + infrav1 "sigs.k8s.io/cluster-api/test/infrastructure/inmemory/api/v1alpha1" + cloudv1 "sigs.k8s.io/cluster-api/test/infrastructure/inmemory/internal/cloud/api/v1alpha1" + cmanager "sigs.k8s.io/cluster-api/test/infrastructure/inmemory/internal/cloud/runtime/manager" + "sigs.k8s.io/cluster-api/test/infrastructure/inmemory/internal/server" + "sigs.k8s.io/cluster-api/util/certs" + "sigs.k8s.io/cluster-api/util/conditions" + secretutil "sigs.k8s.io/cluster-api/util/secret" +) + +var ( + ctx = context.Background() + scheme = runtime.NewScheme() + + cluster = &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + }, + } + + cpMachine = &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Labels: map[string]string{ + clusterv1.MachineControlPlaneLabel: "", + }, + }, + } + + workerMachine = &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "baz", + }, + } +) + +func init() { + _ = metav1.AddMetaToScheme(scheme) + _ = corev1.AddToScheme(scheme) + _ = cloudv1.AddToScheme(scheme) + + ctrl.SetLogger(klog.Background()) +} + +func TestReconcileNormalCloudMachine(t *testing.T) { + inMemoryMachine := &infrav1.InMemoryMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + }, + Spec: infrav1.InMemoryMachineSpec{ + Behaviour: &infrav1.InMemoryMachineBehaviour{ + VM: &infrav1.InMemoryVMBehaviour{ + Provisioning: infrav1.CommonProvisioningSettings{ + StartupDuration: metav1.Duration{Duration: 2 * time.Second}, + }, + }, + }, + }, + } + + t.Run("create CloudMachine", func(t *testing.T) { + g := NewWithT(t) + + r := InMemoryMachineReconciler{ + CloudManager: cmanager.New(scheme), + } + r.CloudManager.AddResourceGroup(klog.KObj(cluster).String()) + c := r.CloudManager.GetResourceGroup(klog.KObj(cluster).String()).GetClient() + + res, err := r.reconcileNormalCloudMachine(ctx, cluster, cpMachine, inMemoryMachine) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(res.IsZero()).To(BeFalse()) + g.Expect(conditions.IsFalse(inMemoryMachine, infrav1.VMProvisionedCondition)).To(BeTrue()) + + got := &cloudv1.CloudMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: inMemoryMachine.Name, + }, + } + err = c.Get(ctx, client.ObjectKeyFromObject(got), got) + g.Expect(err).ToNot(HaveOccurred()) + + t.Run("gets provisioned after the provisioning time is expired", func(t *testing.T) { + g := NewWithT(t) + + g.Eventually(func() bool { + res, err := r.reconcileNormalCloudMachine(ctx, cluster, cpMachine, inMemoryMachine) + g.Expect(err).ToNot(HaveOccurred()) + if !res.IsZero() { + time.Sleep(res.RequeueAfter / 100 * 90) + } + return res.IsZero() + }, inMemoryMachine.Spec.Behaviour.VM.Provisioning.StartupDuration.Duration*2).Should(BeTrue()) + + g.Expect(conditions.IsTrue(inMemoryMachine, infrav1.VMProvisionedCondition)).To(BeTrue()) + g.Expect(conditions.Get(inMemoryMachine, infrav1.VMProvisionedCondition).LastTransitionTime.Time).To(BeTemporally(">", inMemoryMachine.CreationTimestamp.Time, inMemoryMachine.Spec.Behaviour.VM.Provisioning.StartupDuration.Duration)) + }) + + t.Run("no-op after it is provisioned", func(t *testing.T) { + g := NewWithT(t) + + res, err := r.reconcileNormalCloudMachine(ctx, cluster, cpMachine, inMemoryMachine) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(res.IsZero()).To(BeTrue()) + }) + }) +} + +func TestReconcileNormalNode(t *testing.T) { + inMemoryMachineWithVMNotYetProvisioned := &infrav1.InMemoryMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + }, + } + + inMemoryMachineWithVMProvisioned := &infrav1.InMemoryMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + }, + Spec: infrav1.InMemoryMachineSpec{ + Behaviour: &infrav1.InMemoryMachineBehaviour{ + Node: &infrav1.InMemoryNodeBehaviour{ + Provisioning: infrav1.CommonProvisioningSettings{ + StartupDuration: metav1.Duration{Duration: 2 * time.Second}, + }, + }, + }, + }, + Status: infrav1.InMemoryMachineStatus{ + Conditions: []clusterv1.Condition{ + { + Type: infrav1.VMProvisionedCondition, + Status: corev1.ConditionTrue, + LastTransitionTime: metav1.Now(), + }, + }, + }, + } + + t.Run("no-op if VM is not yet ready", func(t *testing.T) { + g := NewWithT(t) + + r := InMemoryMachineReconciler{ + CloudManager: cmanager.New(scheme), + } + r.CloudManager.AddResourceGroup(klog.KObj(cluster).String()) + c := r.CloudManager.GetResourceGroup(klog.KObj(cluster).String()).GetClient() + + res, err := r.reconcileNormalNode(ctx, cluster, cpMachine, inMemoryMachineWithVMNotYetProvisioned) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(res.IsZero()).To(BeTrue()) + + got := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: inMemoryMachineWithVMNotYetProvisioned.Name, + }, + } + err = c.Get(ctx, client.ObjectKeyFromObject(got), got) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) + + t.Run("create node if VM is ready", func(t *testing.T) { + g := NewWithT(t) + + r := InMemoryMachineReconciler{ + CloudManager: cmanager.New(scheme), + } + r.CloudManager.AddResourceGroup(klog.KObj(cluster).String()) + c := r.CloudManager.GetResourceGroup(klog.KObj(cluster).String()).GetClient() + + res, err := r.reconcileNormalNode(ctx, cluster, cpMachine, inMemoryMachineWithVMProvisioned) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(res.IsZero()).To(BeFalse()) + g.Expect(conditions.IsFalse(inMemoryMachineWithVMProvisioned, infrav1.NodeProvisionedCondition)).To(BeTrue()) + + got := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: inMemoryMachineWithVMProvisioned.Name, + }, + } + err = c.Get(ctx, client.ObjectKeyFromObject(got), got) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + + t.Run("gets provisioned after the provisioning time is expired", func(t *testing.T) { + g := NewWithT(t) + + g.Eventually(func() bool { + res, err := r.reconcileNormalNode(ctx, cluster, cpMachine, inMemoryMachineWithVMProvisioned) + g.Expect(err).ToNot(HaveOccurred()) + if !res.IsZero() { + time.Sleep(res.RequeueAfter / 100 * 90) + } + return res.IsZero() + }, inMemoryMachineWithVMProvisioned.Spec.Behaviour.Node.Provisioning.StartupDuration.Duration*2).Should(BeTrue()) + + err = c.Get(ctx, client.ObjectKeyFromObject(got), got) + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(conditions.IsTrue(inMemoryMachineWithVMProvisioned, infrav1.NodeProvisionedCondition)).To(BeTrue()) + g.Expect(conditions.Get(inMemoryMachineWithVMProvisioned, infrav1.NodeProvisionedCondition).LastTransitionTime.Time).To(BeTemporally(">", conditions.Get(inMemoryMachineWithVMProvisioned, infrav1.VMProvisionedCondition).LastTransitionTime.Time, inMemoryMachineWithVMProvisioned.Spec.Behaviour.Node.Provisioning.StartupDuration.Duration)) + }) + + t.Run("no-op after it is provisioned", func(t *testing.T) { + g := NewWithT(t) + + res, err := r.reconcileNormalNode(ctx, cluster, cpMachine, inMemoryMachineWithVMProvisioned) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(res.IsZero()).To(BeTrue()) + }) + }) +} + +func TestReconcileNormalEtcd(t *testing.T) { + inMemoryMachineWithNodeNotYetProvisioned := &infrav1.InMemoryMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + }, + } + + inMemoryMachineWithNodeProvisioned := &infrav1.InMemoryMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + }, + Spec: infrav1.InMemoryMachineSpec{ + Behaviour: &infrav1.InMemoryMachineBehaviour{ + Etcd: &infrav1.InMemoryEtcdBehaviour{ + Provisioning: infrav1.CommonProvisioningSettings{ + StartupDuration: metav1.Duration{Duration: 2 * time.Second}, + }, + }, + }, + }, + Status: infrav1.InMemoryMachineStatus{ + Conditions: []clusterv1.Condition{ + { + Type: infrav1.NodeProvisionedCondition, + Status: corev1.ConditionTrue, + LastTransitionTime: metav1.Now(), + }, + }, + }, + } + + t.Run("no-op for worker machines", func(t *testing.T) { + // TODO: implement test + }) + + t.Run("no-op if Node is not yet ready", func(t *testing.T) { + g := NewWithT(t) + + r := InMemoryMachineReconciler{ + CloudManager: cmanager.New(scheme), + } + r.CloudManager.AddResourceGroup(klog.KObj(cluster).String()) + c := r.CloudManager.GetResourceGroup(klog.KObj(cluster).String()).GetClient() + + res, err := r.reconcileNormalETCD(ctx, cluster, cpMachine, inMemoryMachineWithNodeNotYetProvisioned) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(res.IsZero()).To(BeTrue()) + + got := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: fmt.Sprintf("etcd-%s", inMemoryMachineWithNodeNotYetProvisioned.Name), + }, + } + err = c.Get(ctx, client.ObjectKeyFromObject(got), got) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) + + t.Run("create pod if Node is ready", func(t *testing.T) { + g := NewWithT(t) + + manager := cmanager.New(scheme) + + host := "127.0.0.1" + wcmux := server.NewWorkloadClustersMux(manager, host) + _, err := wcmux.InitWorkloadClusterListener(klog.KObj(cluster).String()) + g.Expect(err).ToNot(HaveOccurred()) + + r := InMemoryMachineReconciler{ + Client: fake.NewClientBuilder().WithScheme(scheme).WithObjects(createCASecret(t, cluster, secretutil.EtcdCA)).Build(), + CloudManager: manager, + APIServerMux: wcmux, + } + r.CloudManager.AddResourceGroup(klog.KObj(cluster).String()) + c := r.CloudManager.GetResourceGroup(klog.KObj(cluster).String()).GetClient() + + res, err := r.reconcileNormalETCD(ctx, cluster, cpMachine, inMemoryMachineWithNodeProvisioned) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(res.IsZero()).To(BeFalse()) + g.Expect(conditions.IsFalse(inMemoryMachineWithNodeProvisioned, infrav1.EtcdProvisionedCondition)).To(BeTrue()) + + got := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: fmt.Sprintf("etcd-%s", inMemoryMachineWithNodeNotYetProvisioned.Name), + }, + } + err = c.Get(ctx, client.ObjectKeyFromObject(got), got) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + + t.Run("gets provisioned after the provisioning time is expired", func(t *testing.T) { + g := NewWithT(t) + + g.Eventually(func() bool { + res, err := r.reconcileNormalETCD(ctx, cluster, cpMachine, inMemoryMachineWithNodeProvisioned) + g.Expect(err).ToNot(HaveOccurred()) + if !res.IsZero() { + time.Sleep(res.RequeueAfter / 100 * 90) + } + return res.IsZero() + }, inMemoryMachineWithNodeProvisioned.Spec.Behaviour.Etcd.Provisioning.StartupDuration.Duration*2).Should(BeTrue()) + + err = c.Get(ctx, client.ObjectKeyFromObject(got), got) + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(conditions.IsTrue(inMemoryMachineWithNodeProvisioned, infrav1.EtcdProvisionedCondition)).To(BeTrue()) + g.Expect(conditions.Get(inMemoryMachineWithNodeProvisioned, infrav1.EtcdProvisionedCondition).LastTransitionTime.Time).To(BeTemporally(">", conditions.Get(inMemoryMachineWithNodeProvisioned, infrav1.NodeProvisionedCondition).LastTransitionTime.Time, inMemoryMachineWithNodeProvisioned.Spec.Behaviour.Etcd.Provisioning.StartupDuration.Duration)) + }) + + t.Run("no-op after it is provisioned", func(t *testing.T) { + g := NewWithT(t) + + res, err := r.reconcileNormalETCD(ctx, cluster, cpMachine, inMemoryMachineWithNodeProvisioned) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(res.IsZero()).To(BeTrue()) + }) + + err = wcmux.Shutdown(ctx) + g.Expect(err).ToNot(HaveOccurred()) + }) +} + +func TestReconcileNormalApiServer(t *testing.T) { + inMemoryMachineWithNodeNotYetProvisioned := &infrav1.InMemoryMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + }, + } + + inMemoryMachineWithNodeProvisioned := &infrav1.InMemoryMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + }, + Spec: infrav1.InMemoryMachineSpec{ + Behaviour: &infrav1.InMemoryMachineBehaviour{ + APIServer: &infrav1.InMemoryAPIServerBehaviour{ + Provisioning: infrav1.CommonProvisioningSettings{ + StartupDuration: metav1.Duration{Duration: 2 * time.Second}, + }, + }, + }, + }, + Status: infrav1.InMemoryMachineStatus{ + Conditions: []clusterv1.Condition{ + { + Type: infrav1.NodeProvisionedCondition, + Status: corev1.ConditionTrue, + LastTransitionTime: metav1.Now(), + }, + }, + }, + } + + t.Run("no-op for worker machines", func(t *testing.T) { + // TODO: implement test + }) + + t.Run("no-op if Node is not yet ready", func(t *testing.T) { + g := NewWithT(t) + + r := InMemoryMachineReconciler{ + CloudManager: cmanager.New(scheme), + } + r.CloudManager.AddResourceGroup(klog.KObj(cluster).String()) + c := r.CloudManager.GetResourceGroup(klog.KObj(cluster).String()).GetClient() + + res, err := r.reconcileNormalAPIServer(ctx, cluster, cpMachine, inMemoryMachineWithNodeNotYetProvisioned) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(res.IsZero()).To(BeTrue()) + + got := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: fmt.Sprintf("kube-apiserver-%s", inMemoryMachineWithNodeNotYetProvisioned.Name), + }, + } + err = c.Get(ctx, client.ObjectKeyFromObject(got), got) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) + + t.Run("create pod if Node is ready", func(t *testing.T) { + g := NewWithT(t) + + manager := cmanager.New(scheme) + + host := "127.0.0.1" + wcmux := server.NewWorkloadClustersMux(manager, host) + _, err := wcmux.InitWorkloadClusterListener(klog.KObj(cluster).String()) + g.Expect(err).ToNot(HaveOccurred()) + + r := InMemoryMachineReconciler{ + Client: fake.NewClientBuilder().WithScheme(scheme).WithObjects(createCASecret(t, cluster, secretutil.ClusterCA)).Build(), + CloudManager: manager, + APIServerMux: wcmux, + } + r.CloudManager.AddResourceGroup(klog.KObj(cluster).String()) + c := r.CloudManager.GetResourceGroup(klog.KObj(cluster).String()).GetClient() + + res, err := r.reconcileNormalAPIServer(ctx, cluster, cpMachine, inMemoryMachineWithNodeProvisioned) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(res.IsZero()).To(BeFalse()) + g.Expect(conditions.IsFalse(inMemoryMachineWithNodeProvisioned, infrav1.APIServerProvisionedCondition)).To(BeTrue()) + + got := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: fmt.Sprintf("kube-apiserver-%s", inMemoryMachineWithNodeNotYetProvisioned.Name), + }, + } + err = c.Get(ctx, client.ObjectKeyFromObject(got), got) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + + t.Run("gets provisioned after the provisioning time is expired", func(t *testing.T) { + g := NewWithT(t) + + g.Eventually(func() bool { + res, err := r.reconcileNormalAPIServer(ctx, cluster, cpMachine, inMemoryMachineWithNodeProvisioned) + g.Expect(err).ToNot(HaveOccurred()) + if !res.IsZero() { + time.Sleep(res.RequeueAfter / 100 * 90) + } + return res.IsZero() + }, inMemoryMachineWithNodeProvisioned.Spec.Behaviour.APIServer.Provisioning.StartupDuration.Duration*2).Should(BeTrue()) + + err = c.Get(ctx, client.ObjectKeyFromObject(got), got) + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(conditions.IsTrue(inMemoryMachineWithNodeProvisioned, infrav1.APIServerProvisionedCondition)).To(BeTrue()) + g.Expect(conditions.Get(inMemoryMachineWithNodeProvisioned, infrav1.APIServerProvisionedCondition).LastTransitionTime.Time).To(BeTemporally(">", conditions.Get(inMemoryMachineWithNodeProvisioned, infrav1.NodeProvisionedCondition).LastTransitionTime.Time, inMemoryMachineWithNodeProvisioned.Spec.Behaviour.APIServer.Provisioning.StartupDuration.Duration)) + }) + + t.Run("no-op after it is provisioned", func(t *testing.T) { + g := NewWithT(t) + + res, err := r.reconcileNormalAPIServer(ctx, cluster, cpMachine, inMemoryMachineWithNodeProvisioned) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(res.IsZero()).To(BeTrue()) + }) + + err = wcmux.Shutdown(ctx) + g.Expect(err).ToNot(HaveOccurred()) + }) +} + +func TestReconcileNormalScheduler(t *testing.T) { + testReconcileNormalComponent(t, "kube-scheduler", func(r InMemoryMachineReconciler) func(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { + return r.reconcileNormalScheduler + }) +} + +func TestReconcileNormalControllerManager(t *testing.T) { + testReconcileNormalComponent(t, "kube-controller-manager", func(r InMemoryMachineReconciler) func(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) { + return r.reconcileNormalControllerManager + }) +} + +func testReconcileNormalComponent(t *testing.T, component string, reconcileFunc func(InMemoryMachineReconciler) func(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error)) { + t.Helper() + + inMemoryMachineWithAPIServerNotYetProvisioned := &infrav1.InMemoryMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + }, + } + + inMemoryMachineWithAPIServerProvisioned := &infrav1.InMemoryMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + }, + Status: infrav1.InMemoryMachineStatus{ + Conditions: []clusterv1.Condition{ + { + Type: infrav1.APIServerProvisionedCondition, + Status: corev1.ConditionTrue, + }, + }, + }, + } + + t.Run("no-op for worker machines", func(t *testing.T) { + g := NewWithT(t) + + r := InMemoryMachineReconciler{ + CloudManager: cmanager.New(scheme), + } + r.CloudManager.AddResourceGroup(klog.KObj(cluster).String()) + c := r.CloudManager.GetResourceGroup(klog.KObj(cluster).String()).GetClient() + + res, err := reconcileFunc(r)(ctx, cluster, workerMachine, inMemoryMachineWithAPIServerProvisioned) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(res.IsZero()).To(BeTrue()) + + got := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: fmt.Sprintf("%s-%s", component, inMemoryMachineWithAPIServerProvisioned.Name), + }, + } + err = c.Get(ctx, client.ObjectKeyFromObject(got), got) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) + + t.Run("no-op if API server is not yet ready", func(t *testing.T) { + g := NewWithT(t) + + r := InMemoryMachineReconciler{ + CloudManager: cmanager.New(scheme), + } + r.CloudManager.AddResourceGroup(klog.KObj(cluster).String()) + c := r.CloudManager.GetResourceGroup(klog.KObj(cluster).String()).GetClient() + + res, err := reconcileFunc(r)(ctx, cluster, cpMachine, inMemoryMachineWithAPIServerNotYetProvisioned) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(res.IsZero()).To(BeTrue()) + + got := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: fmt.Sprintf("%s-%s", component, inMemoryMachineWithAPIServerProvisioned.Name), + }, + } + err = c.Get(ctx, client.ObjectKeyFromObject(got), got) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) + + t.Run(fmt.Sprintf("create %s pod if API server is ready", component), func(t *testing.T) { + g := NewWithT(t) + + r := InMemoryMachineReconciler{ + CloudManager: cmanager.New(scheme), + } + r.CloudManager.AddResourceGroup(klog.KObj(cluster).String()) + c := r.CloudManager.GetResourceGroup(klog.KObj(cluster).String()).GetClient() + + res, err := reconcileFunc(r)(ctx, cluster, cpMachine, inMemoryMachineWithAPIServerProvisioned) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(res.IsZero()).To(BeTrue()) + + got := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: fmt.Sprintf("%s-%s", component, inMemoryMachineWithAPIServerProvisioned.Name), + }, + } + err = c.Get(ctx, client.ObjectKeyFromObject(got), got) + g.Expect(err).ToNot(HaveOccurred()) + + t.Run(fmt.Sprintf("no-op if %s pod already exists", component), func(t *testing.T) { + g := NewWithT(t) + + res, err := reconcileFunc(r)(ctx, cluster, cpMachine, inMemoryMachineWithAPIServerProvisioned) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(res.IsZero()).To(BeTrue()) + }) + }) +} + +func createCASecret(t *testing.T, cluster *clusterv1.Cluster, purpose secretutil.Purpose) *corev1.Secret { + t.Helper() + + g := NewWithT(t) + + cert, key, err := newCertificateAuthority() + g.Expect(err).NotTo(HaveOccurred()) + + return &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: cluster.Namespace, + Name: secretutil.Name(cluster.Name, purpose), + Labels: map[string]string{ + clusterv1.ClusterNameLabel: cluster.Name, + }, + }, + Data: map[string][]byte{ + secretutil.TLSKeyDataName: certs.EncodePrivateKeyPEM(key), + secretutil.TLSCrtDataName: certs.EncodeCertPEM(cert), + }, + Type: clusterv1.ClusterSecretType, + } +} + +// TODO: make this public functions in server/certs.go or in a new util package. + +// newCertificateAuthority creates new certificate and private key for the certificate authority. +func newCertificateAuthority() (*x509.Certificate, *rsa.PrivateKey, error) { + key, err := certs.NewPrivateKey() + if err != nil { + return nil, nil, err + } + + c, err := newSelfSignedCACert(key) + if err != nil { + return nil, nil, err + } + + return c, key, nil +} + +// newSelfSignedCACert creates a CA certificate. +func newSelfSignedCACert(key *rsa.PrivateKey) (*x509.Certificate, error) { + cfg := certs.Config{ + CommonName: "kubernetes", + } + + now := time.Now().UTC() + + tmpl := x509.Certificate{ + SerialNumber: new(big.Int).SetInt64(0), + Subject: pkix.Name{ + CommonName: cfg.CommonName, + Organization: cfg.Organization, + }, + NotBefore: now.Add(time.Minute * -5), + NotAfter: now.Add(time.Hour * 24 * 365 * 10), // 10 years + KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign, + MaxPathLenZero: true, + BasicConstraintsValid: true, + MaxPathLen: 0, + IsCA: true, + } + + b, err := x509.CreateCertificate(cryptorand.Reader, &tmpl, &tmpl, key.Public(), key) + if err != nil { + return nil, errors.Wrapf(err, "failed to create self signed CA certificate: %+v", tmpl) + } + + c, err := x509.ParseCertificate(b) + return c, errors.WithStack(err) +} From 74547501ff97343d4b7ec65ace4e7f0778d03026 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Jun 2023 21:58:06 +0000 Subject: [PATCH 15/94] :seedling: Bump actions/checkout from 3.5.2 to 3.5.3 Bumps [actions/checkout](https://github.com/actions/checkout) from 3.5.2 to 3.5.3. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/8e5e7e5ab8b370d6c329ec480221332ada57f0ab...c85c95e3d7251135ab7dc9ce3241c5835cc595a9) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/dependabot.yml | 2 +- .github/workflows/golangci-lint.yml | 2 +- .github/workflows/lint-docs-pr.yaml | 2 +- .github/workflows/lint-docs-weekly.yml | 2 +- .github/workflows/release.yml | 2 +- .github/workflows/scan.yml | 2 +- .github/workflows/test-release-weekly.yml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml index d59121e9bc73..8b55c95d95e6 100644 --- a/.github/workflows/dependabot.yml +++ b/.github/workflows/dependabot.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out code into the Go module directory - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3.5.2 + uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3.5.3 - name: Calculate go version id: vars run: echo "go_version=$(make go-version)" >> $GITHUB_OUTPUT diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index 957c84827608..b2280b9c828d 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -19,7 +19,7 @@ jobs: - test - hack/tools steps: - - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3.5.2 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3.5.3 - name: Calculate go version id: vars run: echo "go_version=$(make go-version)" >> $GITHUB_OUTPUT diff --git a/.github/workflows/lint-docs-pr.yaml b/.github/workflows/lint-docs-pr.yaml index 5b3cb273e22d..93a5b16ef0a1 100644 --- a/.github/workflows/lint-docs-pr.yaml +++ b/.github/workflows/lint-docs-pr.yaml @@ -14,7 +14,7 @@ jobs: name: Broken Links runs-on: ubuntu-latest steps: - - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3.5.2 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3.5.3 - uses: gaurav-nelson/github-action-markdown-link-check@5c5dfc0ac2e225883c0e5f03a85311ec2830d368 # tag=v1 with: use-quiet-mode: 'yes' diff --git a/.github/workflows/lint-docs-weekly.yml b/.github/workflows/lint-docs-weekly.yml index 01cf7054ad6f..7323b58a385b 100644 --- a/.github/workflows/lint-docs-weekly.yml +++ b/.github/workflows/lint-docs-weekly.yml @@ -17,7 +17,7 @@ jobs: branch: [ main, release-1.4, release-1.3 ] runs-on: ubuntu-latest steps: - - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3.5.2 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3.5.3 with: ref: ${{ matrix.branch }} - uses: gaurav-nelson/github-action-markdown-link-check@5c5dfc0ac2e225883c0e5f03a85311ec2830d368 # tag=v1 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 436a3d9b9a6c..d76225d8b809 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,7 +17,7 @@ jobs: - name: Set env run: echo "RELEASE_TAG=${GITHUB_REF:10}" >> $GITHUB_ENV - name: checkout code - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3.5.2 + uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3.5.3 with: fetch-depth: 0 - name: Calculate go version diff --git a/.github/workflows/scan.yml b/.github/workflows/scan.yml index 15a05ddb680d..5d727684c9f8 100644 --- a/.github/workflows/scan.yml +++ b/.github/workflows/scan.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out code - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3.5.2 + uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3.5.3 with: ref: ${{ matrix.branch }} - name: Calculate go version diff --git a/.github/workflows/test-release-weekly.yml b/.github/workflows/test-release-weekly.yml index 293c5f579a25..8b6ec50d819f 100644 --- a/.github/workflows/test-release-weekly.yml +++ b/.github/workflows/test-release-weekly.yml @@ -20,7 +20,7 @@ jobs: branch: [ main, release-1.4, release-1.3 ] runs-on: ubuntu-latest steps: - - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # tag=v3.5.2 + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # tag=v3.5.3 with: ref: ${{ matrix.branch }} fetch-depth: 0 From 68ff7a22e392860a15163a6f742d9bcf199f5467 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Jun 2023 21:58:14 +0000 Subject: [PATCH 16/94] :seedling: Bump golangci/golangci-lint-action from 3.5.0 to 3.6.0 Bumps [golangci/golangci-lint-action](https://github.com/golangci/golangci-lint-action) from 3.5.0 to 3.6.0. - [Release notes](https://github.com/golangci/golangci-lint-action/releases) - [Commits](https://github.com/golangci/golangci-lint-action/compare/5f1fec7010f6ae3b84ea4f7b2129beb8639b564f...639cd343e1d3b897ff35927a75193d57cfcba299) --- updated-dependencies: - dependency-name: golangci/golangci-lint-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/golangci-lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index 957c84827608..71b5646c9276 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -28,7 +28,7 @@ jobs: with: go-version: ${{ steps.vars.outputs.go_version }} - name: golangci-lint - uses: golangci/golangci-lint-action@5f1fec7010f6ae3b84ea4f7b2129beb8639b564f # tag=v3.5.0 + uses: golangci/golangci-lint-action@639cd343e1d3b897ff35927a75193d57cfcba299 # tag=v3.6.0 with: version: v1.52.1 working-directory: ${{matrix.working-directory}} From bb71d3ef6c0fa619bb2f268dda8aa580faa24b27 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Jun 2023 21:58:37 +0000 Subject: [PATCH 17/94] :seedling: Bump golang.org/x/text from 0.9.0 to 0.10.0 Bumps [golang.org/x/text](https://github.com/golang/text) from 0.9.0 to 0.10.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.9.0...v0.10.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index dbdc40f8d363..6df41ca0337a 100644 --- a/go.mod +++ b/go.mod @@ -124,7 +124,7 @@ require ( golang.org/x/crypto v0.9.0 // indirect golang.org/x/sys v0.8.0 // indirect golang.org/x/term v0.8.0 // indirect - golang.org/x/text v0.9.0 + golang.org/x/text v0.10.0 golang.org/x/time v0.3.0 // indirect gomodules.xyz/jsonpatch/v2 v2.3.0 google.golang.org/appengine v1.6.7 // indirect diff --git a/go.sum b/go.sum index 192185123ab3..28de7b4a3ec4 100644 --- a/go.sum +++ b/go.sum @@ -731,8 +731,8 @@ golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= -golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58= +golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= From d51b0273f9171a4585534f1d0929b4e136b8f919 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Jun 2023 21:58:51 +0000 Subject: [PATCH 18/94] :seedling: Bump github.com/onsi/gomega from 1.27.7 to 1.27.8 Bumps [github.com/onsi/gomega](https://github.com/onsi/gomega) from 1.27.7 to 1.27.8. - [Release notes](https://github.com/onsi/gomega/releases) - [Changelog](https://github.com/onsi/gomega/blob/master/CHANGELOG.md) - [Commits](https://github.com/onsi/gomega/compare/v1.27.7...v1.27.8) --- updated-dependencies: - dependency-name: github.com/onsi/gomega dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index dbdc40f8d363..e4fd780b0db1 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( github.com/google/gofuzz v1.2.0 github.com/mattn/go-runewidth v0.0.14 // indirect github.com/onsi/ginkgo/v2 v2.9.7 - github.com/onsi/gomega v1.27.7 + github.com/onsi/gomega v1.27.8 github.com/pkg/errors v0.9.1 github.com/spf13/cobra v1.7.0 github.com/spf13/pflag v1.0.5 diff --git a/go.sum b/go.sum index 192185123ab3..520d1e93946a 100644 --- a/go.sum +++ b/go.sum @@ -394,8 +394,8 @@ github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss= github.com/onsi/ginkgo/v2 v2.9.7/go.mod h1:cxrmXWykAwTwhQsJOPfdIDiJ+l2RYq7U8hFU+M/1uw0= -github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU= -github.com/onsi/gomega v1.27.7/go.mod h1:1p8OOlwo2iUUDsHnOrjE5UKYJ+e3W8eQ3qSlRahPmr4= +github.com/onsi/gomega v1.27.8 h1:gegWiwZjBsf2DgiSbf5hpokZ98JVDMcWkUiigk6/KXc= +github.com/onsi/gomega v1.27.8/go.mod h1:2J8vzI/s+2shY9XHRApDkdgPo1TKT7P2u6fXeJKFnNQ= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= From 78ba119213dc774385e62e1f7ec05f24091cb11d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Jun 2023 22:06:56 +0000 Subject: [PATCH 19/94] Update generated code --- hack/tools/go.mod | 2 +- hack/tools/go.sum | 4 ++-- test/go.mod | 2 +- test/go.sum | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hack/tools/go.mod b/hack/tools/go.mod index 5520fba0e4bf..eb29e38b5281 100644 --- a/hack/tools/go.mod +++ b/hack/tools/go.mod @@ -137,7 +137,7 @@ require ( golang.org/x/sync v0.2.0 // indirect golang.org/x/sys v0.8.0 // indirect golang.org/x/term v0.8.0 // indirect - golang.org/x/text v0.9.0 // indirect + golang.org/x/text v0.10.0 // indirect golang.org/x/time v0.3.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect gomodules.xyz/jsonpatch/v2 v2.3.0 // indirect diff --git a/hack/tools/go.sum b/hack/tools/go.sum index d31cfe103894..0f639ea26eb2 100644 --- a/hack/tools/go.sum +++ b/hack/tools/go.sum @@ -747,8 +747,8 @@ golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= -golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58= +golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= diff --git a/test/go.mod b/test/go.mod index 6fb7863ad121..1f6308fbc519 100644 --- a/test/go.mod +++ b/test/go.mod @@ -120,7 +120,7 @@ require ( golang.org/x/oauth2 v0.8.0 // indirect golang.org/x/sys v0.8.0 // indirect golang.org/x/term v0.8.0 // indirect - golang.org/x/text v0.9.0 // indirect + golang.org/x/text v0.10.0 // indirect golang.org/x/time v0.3.0 // indirect golang.org/x/tools v0.9.1 // indirect gomodules.xyz/jsonpatch/v2 v2.3.0 // indirect diff --git a/test/go.sum b/test/go.sum index b8d7a127b16e..8428fd54b669 100644 --- a/test/go.sum +++ b/test/go.sum @@ -708,8 +708,8 @@ golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= -golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58= +golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= From 433ac735ebb9906b95e678d7e701be4ea42319f2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Jun 2023 22:07:16 +0000 Subject: [PATCH 20/94] Update generated code --- hack/tools/go.mod | 2 +- hack/tools/go.sum | 4 ++-- test/go.mod | 2 +- test/go.sum | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hack/tools/go.mod b/hack/tools/go.mod index 5520fba0e4bf..5741721aa28b 100644 --- a/hack/tools/go.mod +++ b/hack/tools/go.mod @@ -109,7 +109,7 @@ require ( github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect github.com/morikuni/aec v1.0.0 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect - github.com/onsi/gomega v1.27.7 // indirect + github.com/onsi/gomega v1.27.8 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.0-rc2 // indirect github.com/pelletier/go-toml/v2 v2.0.8 // indirect diff --git a/hack/tools/go.sum b/hack/tools/go.sum index d31cfe103894..3fad7ff66040 100644 --- a/hack/tools/go.sum +++ b/hack/tools/go.sum @@ -418,8 +418,8 @@ github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss= -github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU= -github.com/onsi/gomega v1.27.7/go.mod h1:1p8OOlwo2iUUDsHnOrjE5UKYJ+e3W8eQ3qSlRahPmr4= +github.com/onsi/gomega v1.27.8 h1:gegWiwZjBsf2DgiSbf5hpokZ98JVDMcWkUiigk6/KXc= +github.com/onsi/gomega v1.27.8/go.mod h1:2J8vzI/s+2shY9XHRApDkdgPo1TKT7P2u6fXeJKFnNQ= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.0-rc2 h1:2zx/Stx4Wc5pIPDvIxHXvXtQFW/7XWJGmnM7r3wg034= diff --git a/test/go.mod b/test/go.mod index 6fb7863ad121..129903549d06 100644 --- a/test/go.mod +++ b/test/go.mod @@ -13,7 +13,7 @@ require ( github.com/flatcar/ignition v0.36.2 github.com/go-logr/logr v1.2.4 github.com/onsi/ginkgo/v2 v2.9.7 - github.com/onsi/gomega v1.27.7 + github.com/onsi/gomega v1.27.8 github.com/pkg/errors v0.9.1 github.com/spf13/pflag v1.0.5 github.com/vincent-petithory/dataurl v1.0.0 diff --git a/test/go.sum b/test/go.sum index b8d7a127b16e..4a7cbef18b67 100644 --- a/test/go.sum +++ b/test/go.sum @@ -375,8 +375,8 @@ github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLA github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss= github.com/onsi/ginkgo/v2 v2.9.7/go.mod h1:cxrmXWykAwTwhQsJOPfdIDiJ+l2RYq7U8hFU+M/1uw0= -github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU= -github.com/onsi/gomega v1.27.7/go.mod h1:1p8OOlwo2iUUDsHnOrjE5UKYJ+e3W8eQ3qSlRahPmr4= +github.com/onsi/gomega v1.27.8 h1:gegWiwZjBsf2DgiSbf5hpokZ98JVDMcWkUiigk6/KXc= +github.com/onsi/gomega v1.27.8/go.mod h1:2J8vzI/s+2shY9XHRApDkdgPo1TKT7P2u6fXeJKFnNQ= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM= From 9c751d59c05e2c31ff378c0bd54646dbb5df3ef7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 13 Jun 2023 07:06:43 +0000 Subject: [PATCH 21/94] :seedling: Bump github.com/onsi/ginkgo/v2 from 2.9.7 to 2.10.0 Bumps [github.com/onsi/ginkgo/v2](https://github.com/onsi/ginkgo) from 2.9.7 to 2.10.0. - [Release notes](https://github.com/onsi/ginkgo/releases) - [Changelog](https://github.com/onsi/ginkgo/blob/master/CHANGELOG.md) - [Commits](https://github.com/onsi/ginkgo/compare/v2.9.7...v2.10.0) --- updated-dependencies: - dependency-name: github.com/onsi/ginkgo/v2 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 71fd483613fd..dfb959b10830 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,7 @@ require ( github.com/google/go-github/v48 v48.2.0 github.com/google/gofuzz v1.2.0 github.com/mattn/go-runewidth v0.0.14 // indirect - github.com/onsi/ginkgo/v2 v2.9.7 + github.com/onsi/ginkgo/v2 v2.10.0 github.com/onsi/gomega v1.27.8 github.com/pkg/errors v0.9.1 github.com/spf13/cobra v1.7.0 @@ -151,5 +151,5 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pelletier/go-toml/v2 v2.0.8 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect - golang.org/x/tools v0.9.1 // indirect + golang.org/x/tools v0.9.3 // indirect ) diff --git a/go.sum b/go.sum index f4140d983c5c..a1850163abdb 100644 --- a/go.sum +++ b/go.sum @@ -392,8 +392,8 @@ github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRW github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= -github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss= -github.com/onsi/ginkgo/v2 v2.9.7/go.mod h1:cxrmXWykAwTwhQsJOPfdIDiJ+l2RYq7U8hFU+M/1uw0= +github.com/onsi/ginkgo/v2 v2.10.0 h1:sfUl4qgLdvkChZrWCYndY2EAu9BRIw1YphNAzy1VNWs= +github.com/onsi/ginkgo/v2 v2.10.0/go.mod h1:UDQOh5wbQUlMnkLfVaIUMtQ1Vus92oM+P2JX1aulgcE= github.com/onsi/gomega v1.27.8 h1:gegWiwZjBsf2DgiSbf5hpokZ98JVDMcWkUiigk6/KXc= github.com/onsi/gomega v1.27.8/go.mod h1:2J8vzI/s+2shY9XHRApDkdgPo1TKT7P2u6fXeJKFnNQ= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= @@ -792,8 +792,8 @@ golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo= -golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= +golang.org/x/tools v0.9.3 h1:Gn1I8+64MsuTb/HpH+LmQtNas23LhUVr3rYZ0eKuaMM= +golang.org/x/tools v0.9.3/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= From 46d75aaff9fb2f6230c56b94ff2850c571dc0199 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 13 Jun 2023 07:14:39 +0000 Subject: [PATCH 22/94] Update generated code --- hack/tools/go.mod | 2 +- hack/tools/go.sum | 6 +++--- test/go.mod | 4 ++-- test/go.sum | 8 ++++---- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/hack/tools/go.mod b/hack/tools/go.mod index e8184a13ec33..41efd1c5a7b5 100644 --- a/hack/tools/go.mod +++ b/hack/tools/go.mod @@ -13,7 +13,7 @@ require ( github.com/pkg/errors v0.9.1 github.com/spf13/pflag v1.0.5 github.com/valyala/fastjson v1.6.4 - golang.org/x/tools v0.9.1 + golang.org/x/tools v0.9.3 google.golang.org/api v0.122.0 helm.sh/helm/v3 v3.11.1 k8s.io/api v0.27.2 diff --git a/hack/tools/go.sum b/hack/tools/go.sum index 800dc1261774..eba8b996cba3 100644 --- a/hack/tools/go.sum +++ b/hack/tools/go.sum @@ -417,7 +417,7 @@ github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRW github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= -github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss= +github.com/onsi/ginkgo/v2 v2.10.0 h1:sfUl4qgLdvkChZrWCYndY2EAu9BRIw1YphNAzy1VNWs= github.com/onsi/gomega v1.27.8 h1:gegWiwZjBsf2DgiSbf5hpokZ98JVDMcWkUiigk6/KXc= github.com/onsi/gomega v1.27.8/go.mod h1:2J8vzI/s+2shY9XHRApDkdgPo1TKT7P2u6fXeJKFnNQ= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= @@ -807,8 +807,8 @@ golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo= -golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= +golang.org/x/tools v0.9.3 h1:Gn1I8+64MsuTb/HpH+LmQtNas23LhUVr3rYZ0eKuaMM= +golang.org/x/tools v0.9.3/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/test/go.mod b/test/go.mod index 40349e2cfd5e..19f23eb771af 100644 --- a/test/go.mod +++ b/test/go.mod @@ -12,7 +12,7 @@ require ( github.com/evanphx/json-patch/v5 v5.6.0 github.com/flatcar/ignition v0.36.2 github.com/go-logr/logr v1.2.4 - github.com/onsi/ginkgo/v2 v2.9.7 + github.com/onsi/ginkgo/v2 v2.10.0 github.com/onsi/gomega v1.27.8 github.com/pkg/errors v0.9.1 github.com/spf13/pflag v1.0.5 @@ -122,7 +122,7 @@ require ( golang.org/x/term v0.8.0 // indirect golang.org/x/text v0.10.0 // indirect golang.org/x/time v0.3.0 // indirect - golang.org/x/tools v0.9.1 // indirect + golang.org/x/tools v0.9.3 // indirect gomodules.xyz/jsonpatch/v2 v2.3.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect diff --git a/test/go.sum b/test/go.sum index 9cc8a5bd2b3e..71567235b277 100644 --- a/test/go.sum +++ b/test/go.sum @@ -373,8 +373,8 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8m github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= -github.com/onsi/ginkgo/v2 v2.9.7 h1:06xGQy5www2oN160RtEZoTvnP2sPhEfePYmCDc2szss= -github.com/onsi/ginkgo/v2 v2.9.7/go.mod h1:cxrmXWykAwTwhQsJOPfdIDiJ+l2RYq7U8hFU+M/1uw0= +github.com/onsi/ginkgo/v2 v2.10.0 h1:sfUl4qgLdvkChZrWCYndY2EAu9BRIw1YphNAzy1VNWs= +github.com/onsi/ginkgo/v2 v2.10.0/go.mod h1:UDQOh5wbQUlMnkLfVaIUMtQ1Vus92oM+P2JX1aulgcE= github.com/onsi/gomega v1.27.8 h1:gegWiwZjBsf2DgiSbf5hpokZ98JVDMcWkUiigk6/KXc= github.com/onsi/gomega v1.27.8/go.mod h1:2J8vzI/s+2shY9XHRApDkdgPo1TKT7P2u6fXeJKFnNQ= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= @@ -769,8 +769,8 @@ golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo= -golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= +golang.org/x/tools v0.9.3 h1:Gn1I8+64MsuTb/HpH+LmQtNas23LhUVr3rYZ0eKuaMM= +golang.org/x/tools v0.9.3/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= From a0d0c1d17124d5b446b4b11032d2f14df15abdd5 Mon Sep 17 00:00:00 2001 From: Chris Werner Rau Date: Wed, 13 Jul 2022 15:29:58 +0200 Subject: [PATCH 23/94] feature(clusterctl): Follow XDG Directory standard for config/data/... files feat(clusterctl): use XDG_CONFIG_HOME for script as well --- cmd/clusterctl/client/config.go | 2 +- cmd/clusterctl/client/config/client.go | 7 +- cmd/clusterctl/client/config/reader_viper.go | 36 +++-- .../client/config/reader_viper_test.go | 7 +- cmd/clusterctl/client/repository/overrides.go | 8 +- .../client/repository/overrides_test.go | 15 ++- cmd/clusterctl/cmd/config_repositories.go | 2 +- .../cmd/config_repositories_test.go | 6 +- cmd/clusterctl/cmd/generate_cluster.go | 4 +- cmd/clusterctl/cmd/init.go | 2 +- cmd/clusterctl/cmd/root.go | 27 ++-- cmd/clusterctl/cmd/version_checker.go | 15 ++- cmd/clusterctl/cmd/version_checker_test.go | 29 +++-- .../hack/create-local-repository.py | 123 +++++++++++------- .../commands/additional-commands.md | 2 +- docs/book/src/clusterctl/commands/init.md | 4 +- docs/book/src/clusterctl/configuration.md | 10 +- docs/book/src/clusterctl/developers.md | 4 +- .../experimental-features.md | 2 +- docs/book/src/user/troubleshooting.md | 32 ++--- go.mod | 1 + go.sum | 3 + hack/tools/go.mod | 1 + hack/tools/go.sum | 3 + test/go.mod | 1 + test/go.sum | 3 + 26 files changed, 211 insertions(+), 138 deletions(-) diff --git a/cmd/clusterctl/client/config.go b/cmd/clusterctl/client/config.go index 4140c2cf1495..8f2d631d2a1f 100644 --- a/cmd/clusterctl/client/config.go +++ b/cmd/clusterctl/client/config.go @@ -134,7 +134,7 @@ type GetClusterTemplateOptions struct { ClusterName string // KubernetesVersion to use for the workload cluster. If unspecified, the value from os env variables - // or the .cluster-api/clusterctl.yaml config file will be used. + // or the $XDG_CONFIG_HOME/cluster-api/clusterctl.yaml or .cluster-api/clusterctl.yaml config file will be used. KubernetesVersion string // ControlPlaneMachineCount defines the number of control plane machines to be added to the workload cluster. diff --git a/cmd/clusterctl/client/config/client.go b/cmd/clusterctl/client/config/client.go index a25faf9944b1..f689b402cdd1 100644 --- a/cmd/clusterctl/client/config/client.go +++ b/cmd/clusterctl/client/config/client.go @@ -86,9 +86,12 @@ func newConfigClient(path string, options ...Option) (*configClient, error) { } // if there is an injected reader, use it, otherwise use a default one + var err error if client.reader == nil { - client.reader = newViperReader() - if err := client.reader.Init(path); err != nil { + if client.reader, err = newViperReader(); err != nil { + return nil, errors.Wrap(err, "failed to create the configuration reader") + } + if err = client.reader.Init(path); err != nil { return nil, errors.Wrap(err, "failed to initialize the configuration reader") } } diff --git a/cmd/clusterctl/client/config/reader_viper.go b/cmd/clusterctl/client/config/reader_viper.go index 113423100efe..85485b038f67 100644 --- a/cmd/clusterctl/client/config/reader_viper.go +++ b/cmd/clusterctl/client/config/reader_viper.go @@ -27,17 +27,19 @@ import ( "strings" "time" + "github.com/adrg/xdg" "github.com/pkg/errors" "github.com/spf13/viper" - "k8s.io/client-go/util/homedir" logf "sigs.k8s.io/cluster-api/cmd/clusterctl/log" ) const ( - // ConfigFolder defines the name of the config folder under $home. + // ConfigFolder defines the old name of the config folder under $HOME. ConfigFolder = ".cluster-api" - // ConfigName defines the name of the config file under ConfigFolder. + // ConfigFolderXDG defines the name of the config folder under $XDG_CONFIG_HOME. + ConfigFolderXDG = "cluster-api" + // ConfigName defines the name of the config file under ConfigFolderXDG. ConfigName = "clusterctl" // DownloadConfigFile is the config file when fetching the config from a remote location. DownloadConfigFile = "clusterctl-download.yaml" @@ -58,14 +60,18 @@ func injectConfigPaths(configPaths []string) viperReaderOption { } // newViperReader returns a viperReader. -func newViperReader(opts ...viperReaderOption) Reader { +func newViperReader(opts ...viperReaderOption) (Reader, error) { + configDirectory, err := xdg.ConfigFile(ConfigFolderXDG) + if err != nil { + return nil, err + } vr := &viperReader{ - configPaths: []string{filepath.Join(homedir.HomeDir(), ConfigFolder)}, + configPaths: []string{configDirectory, filepath.Join(xdg.Home, ConfigFolder)}, } for _, o := range opts { o(vr) } - return vr + return vr, nil } // Init initialize the viperReader. @@ -89,15 +95,17 @@ func (v *viperReader) Init(path string) error { switch { case url.Scheme == "https" || url.Scheme == "http": - configPath := filepath.Join(homedir.HomeDir(), ConfigFolder) + var configDirectory string if len(v.configPaths) > 0 { - configPath = v.configPaths[0] - } - if err := os.MkdirAll(configPath, os.ModePerm); err != nil { - return err + configDirectory = v.configPaths[0] + } else { + configDirectory, err = xdg.ConfigFile(ConfigFolderXDG) + if err != nil { + return err + } } - downloadConfigFile := filepath.Join(configPath, DownloadConfigFile) + downloadConfigFile := filepath.Join(configDirectory, DownloadConfigFile) err = downloadFile(url.String(), downloadConfigFile) if err != nil { return err @@ -112,14 +120,14 @@ func (v *viperReader) Init(path string) error { viper.SetConfigFile(path) } } else { - // Checks if there is a default .cluster-api/clusterctl{.extension} file in home directory + // Checks if there is a default $XDG_CONFIG_HOME/cluster-api/clusterctl{.extension} or $HOME/.cluster-api/clusterctl{.extension} file if !v.checkDefaultConfig() { // since there is no default config to read from, just skip // reading in config log.V(5).Info("No default config file available") return nil } - // Configure viper for reading .cluster-api/clusterctl{.extension} in home directory + // Configure viper for reading $XDG_CONFIG_HOME/cluster-api/clusterctl{.extension} or $HOME/.cluster-api/clusterctl{.extension} file viper.SetConfigName(ConfigName) for _, p := range v.configPaths { viper.AddConfigPath(p) diff --git a/cmd/clusterctl/client/config/reader_viper_test.go b/cmd/clusterctl/client/config/reader_viper_test.go index 8e0b5b209821..53e37e89c5e5 100644 --- a/cmd/clusterctl/client/config/reader_viper_test.go +++ b/cmd/clusterctl/client/config/reader_viper_test.go @@ -108,7 +108,7 @@ func Test_viperReader_Init(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { gg := NewWithT(t) - v := newViperReader(injectConfigPaths(tt.configDirs)) + v, _ := newViperReader(injectConfigPaths(tt.configDirs)) if tt.expectErr { gg.Expect(v.Init(tt.configPath)).ToNot(Succeed()) return @@ -168,7 +168,7 @@ func Test_viperReader_Get(t *testing.T) { t.Run(tt.name, func(t *testing.T) { gs := NewWithT(t) - v := newViperReader(injectConfigPaths([]string{dir})) + v, _ := newViperReader(injectConfigPaths([]string{dir})) gs.Expect(v.Init(configFile)).To(Succeed()) @@ -192,7 +192,8 @@ func Test_viperReader_GetWithoutDefaultConfig(t *testing.T) { _ = os.Setenv("FOO_FOO", "bar") - v := newViperReader(injectConfigPaths([]string{dir})) + v, err := newViperReader(injectConfigPaths([]string{dir})) + g.Expect(err).NotTo(HaveOccurred()) g.Expect(v.Init("")).To(Succeed()) got, err := v.Get("FOO_FOO") diff --git a/cmd/clusterctl/client/repository/overrides.go b/cmd/clusterctl/client/repository/overrides.go index 32c4a742ff5e..1a6c858347bf 100644 --- a/cmd/clusterctl/client/repository/overrides.go +++ b/cmd/clusterctl/client/repository/overrides.go @@ -23,9 +23,9 @@ import ( "runtime" "strings" + "github.com/adrg/xdg" "github.com/drone/envsubst/v2" "github.com/pkg/errors" - "k8s.io/client-go/util/homedir" "sigs.k8s.io/cluster-api/cmd/clusterctl/client/config" ) @@ -68,7 +68,11 @@ func newOverride(o *newOverrideInput) Overrider { // Path returns the fully formed path to the file within the specified // overrides config. func (o *overrides) Path() (string, error) { - basepath := filepath.Join(homedir.HomeDir(), config.ConfigFolder, overrideFolder) + configDirectory, err := xdg.ConfigFile(config.ConfigFolderXDG) + if err != nil { + return "", err + } + basepath := filepath.Join(configDirectory, overrideFolder) f, err := o.configVariablesClient.Get(overrideFolderKey) if err == nil && strings.TrimSpace(f) != "" { basepath = f diff --git a/cmd/clusterctl/client/repository/overrides_test.go b/cmd/clusterctl/client/repository/overrides_test.go index b68e2c72bf6d..2565c5dfcc1a 100644 --- a/cmd/clusterctl/client/repository/overrides_test.go +++ b/cmd/clusterctl/client/repository/overrides_test.go @@ -21,8 +21,8 @@ import ( "path/filepath" "testing" + "github.com/adrg/xdg" . "github.com/onsi/gomega" - "k8s.io/client-go/util/homedir" clusterctlv1 "sigs.k8s.io/cluster-api/cmd/clusterctl/api/v1alpha3" "sigs.k8s.io/cluster-api/cmd/clusterctl/client/config" @@ -30,6 +30,9 @@ import ( ) func TestOverrides(t *testing.T) { + configDirectory, err := xdg.ConfigFile(config.ConfigFolderXDG) + NewWithT(t).Expect(err).To(BeNil()) + tests := []struct { name string configVarClient config.VariablesClient @@ -39,17 +42,17 @@ func TestOverrides(t *testing.T) { { name: "returns default overrides path if no config provided", configVarClient: test.NewFakeVariableClient(), - expectedPath: filepath.Join(homedir.HomeDir(), config.ConfigFolder, overrideFolder, "infrastructure-myinfra", "v1.0.1", "infra-comp.yaml"), + expectedPath: filepath.Join(configDirectory, overrideFolder, "infrastructure-myinfra", "v1.0.1", "infra-comp.yaml"), }, { name: "returns default overrides path if config variable is empty", configVarClient: test.NewFakeVariableClient().WithVar(overrideFolderKey, ""), - expectedPath: filepath.Join(homedir.HomeDir(), config.ConfigFolder, overrideFolder, "infrastructure-myinfra", "v1.0.1", "infra-comp.yaml"), + expectedPath: filepath.Join(configDirectory, overrideFolder, "infrastructure-myinfra", "v1.0.1", "infra-comp.yaml"), }, { name: "returns default overrides path if config variable is whitespace", configVarClient: test.NewFakeVariableClient().WithVar(overrideFolderKey, " "), - expectedPath: filepath.Join(homedir.HomeDir(), config.ConfigFolder, overrideFolder, "infrastructure-myinfra", "v1.0.1", "infra-comp.yaml"), + expectedPath: filepath.Join(configDirectory, overrideFolder, "infrastructure-myinfra", "v1.0.1", "infra-comp.yaml"), }, { name: "uses overrides folder from the config variables", @@ -86,7 +89,9 @@ func TestOverrides(t *testing.T) { filePath: "infra-comp.yaml", }) - g.Expect(override.Path()).To(Equal(tt.expectedPath)) + overridePath, err := override.Path() + g.Expect(err).To(BeNil()) + g.Expect(overridePath).To(Equal(tt.expectedPath)) }) } } diff --git a/cmd/clusterctl/cmd/config_repositories.go b/cmd/clusterctl/cmd/config_repositories.go index 48e189d18771..0531e50130c7 100644 --- a/cmd/clusterctl/cmd/config_repositories.go +++ b/cmd/clusterctl/cmd/config_repositories.go @@ -56,7 +56,7 @@ var configRepositoryCmd = &cobra.Command{ Display the list of providers and their repository configurations. clusterctl ships with a list of known providers; if necessary, edit - $HOME/.cluster-api/clusterctl.yaml file to add a new provider or to customize existing ones.`), + $XDG_CONFIG_HOME/cluster-api/clusterctl.yaml file to add a new provider or to customize existing ones.`), Example: Examples(` # Displays the list of available providers. diff --git a/cmd/clusterctl/cmd/config_repositories_test.go b/cmd/clusterctl/cmd/config_repositories_test.go index dd816cf99b04..cde75d160556 100644 --- a/cmd/clusterctl/cmd/config_repositories_test.go +++ b/cmd/clusterctl/cmd/config_repositories_test.go @@ -86,7 +86,7 @@ var template = `--- providers: # add a custom provider - name: "my-infra-provider" - url: "/home/.cluster-api/overrides/infrastructure-docker/latest/infrastructure-components.yaml" + url: "/home/.config/cluster-api/overrides/infrastructure-docker/latest/infrastructure-components.yaml" type: "InfrastructureProvider" # add a custom provider - name: "another-provider" @@ -129,7 +129,7 @@ kubekey InfrastructureProvider https://github.com/kubesphere/kubek kubevirt InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-kubevirt/releases/latest/ infrastructure-components.yaml maas InfrastructureProvider https://github.com/spectrocloud/cluster-api-provider-maas/releases/latest/ infrastructure-components.yaml metal3 InfrastructureProvider https://github.com/metal3-io/cluster-api-provider-metal3/releases/latest/ infrastructure-components.yaml -my-infra-provider InfrastructureProvider /home/.cluster-api/overrides/infrastructure-docker/latest/ infrastructure-components.yaml +my-infra-provider InfrastructureProvider /home/.config/cluster-api/overrides/infrastructure-docker/latest/ infrastructure-components.yaml nested InfrastructureProvider https://github.com/kubernetes-sigs/cluster-api-provider-nested/releases/latest/ infrastructure-components.yaml nutanix InfrastructureProvider https://github.com/nutanix-cloud-native/cluster-api-provider-nutanix/releases/latest/ infrastructure-components.yaml oci InfrastructureProvider https://github.com/oracle/cluster-api-provider-oci/releases/latest/ infrastructure-components.yaml @@ -251,7 +251,7 @@ var expectedOutputYaml = `- File: core_components.yaml - File: infrastructure-components.yaml Name: my-infra-provider ProviderType: InfrastructureProvider - URL: /home/.cluster-api/overrides/infrastructure-docker/latest/ + URL: /home/.config/cluster-api/overrides/infrastructure-docker/latest/ - File: infrastructure-components.yaml Name: nested ProviderType: InfrastructureProvider diff --git a/cmd/clusterctl/cmd/generate_cluster.go b/cmd/clusterctl/cmd/generate_cluster.go index 066a24a7e0cc..976780273dbf 100644 --- a/cmd/clusterctl/cmd/generate_cluster.go +++ b/cmd/clusterctl/cmd/generate_cluster.go @@ -55,7 +55,7 @@ var generateClusterClusterCmd = &cobra.Command{ Generate templates for creating workload clusters. clusterctl ships with a list of known providers; if necessary, edit - $HOME/.cluster-api/clusterctl.yaml to add new provider or to customize existing ones. + $XDG_CONFIG_HOME/cluster-api/clusterctl.yaml to add new provider or to customize existing ones. Each provider configuration links to a repository; clusterctl uses this information to fetch templates when creating a new cluster.`), @@ -112,7 +112,7 @@ func init() { generateClusterClusterCmd.Flags().StringVarP(&gc.targetNamespace, "target-namespace", "n", "", "The namespace to use for the workload cluster. If unspecified, the current namespace will be used.") generateClusterClusterCmd.Flags().StringVar(&gc.kubernetesVersion, "kubernetes-version", "", - "The Kubernetes version to use for the workload cluster. If unspecified, the value from OS environment variables or the .cluster-api/clusterctl.yaml config file will be used.") + "The Kubernetes version to use for the workload cluster. If unspecified, the value from OS environment variables or the $XDG_CONFIG_HOME/cluster-api/clusterctl.yaml config file will be used.") generateClusterClusterCmd.Flags().Int64Var(&gc.controlPlaneMachineCount, "control-plane-machine-count", 1, "The number of control plane machines for the workload cluster.") generateClusterClusterCmd.Flags().Int64Var(&gc.workerMachineCount, "worker-machine-count", 0, diff --git a/cmd/clusterctl/cmd/init.go b/cmd/clusterctl/cmd/init.go index f53342cd81ea..aac13344bead 100644 --- a/cmd/clusterctl/cmd/init.go +++ b/cmd/clusterctl/cmd/init.go @@ -56,7 +56,7 @@ var initCmd = &cobra.Command{ to have enough privileges to install the desired components. Use 'clusterctl config repositories' to get a list of available providers; if necessary, edit - $HOME/.cluster-api/clusterctl.yaml file to add new provider or to customize existing ones. + $XDG_CONFIG_HOME/cluster-api/clusterctl.yaml file to add new provider or to customize existing ones. Some providers require environment variables to be set before running clusterctl init. Refer to the provider documentation, or use 'clusterctl config provider [name]' to get a list of required variables. diff --git a/cmd/clusterctl/cmd/root.go b/cmd/clusterctl/cmd/root.go index 517a42024e71..f24f5aac6912 100644 --- a/cmd/clusterctl/cmd/root.go +++ b/cmd/clusterctl/cmd/root.go @@ -25,9 +25,9 @@ import ( "strings" "github.com/MakeNowJust/heredoc" + "github.com/adrg/xdg" "github.com/pkg/errors" "github.com/spf13/cobra" - "k8s.io/client-go/util/homedir" "sigs.k8s.io/cluster-api/cmd/clusterctl/client/config" logf "sigs.k8s.io/cluster-api/cmd/clusterctl/log" @@ -56,16 +56,6 @@ var RootCmd = &cobra.Command{ Long: LongDesc(` Get started with Cluster API using clusterctl to create a management cluster, install providers, and create templates for your workload cluster.`), - PersistentPreRunE: func(cmd *cobra.Command, args []string) error { - // Check if Config folder (~/.cluster-api) exist and if not create it - configFolderPath := filepath.Join(homedir.HomeDir(), config.ConfigFolder) - if _, err := os.Stat(configFolderPath); os.IsNotExist(err) { - if err := os.MkdirAll(filepath.Dir(configFolderPath), os.ModePerm); err != nil { - return errors.Wrapf(err, "failed to create the clusterctl config directory: %s", configFolderPath) - } - } - return nil - }, PersistentPostRunE: func(cmd *cobra.Command, args []string) error { // Check if clusterctl needs an upgrade "AFTER" running each command // and sub-command. @@ -78,7 +68,11 @@ var RootCmd = &cobra.Command{ // version check is disabled. Return early. return nil } - output, err := newVersionChecker(configClient.Variables()).Check() + checker, err := newVersionChecker(configClient.Variables()) + if err != nil { + return err + } + output, err := checker.Check() if err != nil { return errors.Wrap(err, "unable to verify clusterctl version") } @@ -87,8 +81,13 @@ var RootCmd = &cobra.Command{ fmt.Fprintf(os.Stderr, "\033[33m%s\033[0m", output) } + configDirectory, err := xdg.ConfigFile(config.ConfigFolderXDG) + if err != nil { + return err + } + // clean the downloaded config if was fetched from remote - downloadConfigFile := filepath.Join(homedir.HomeDir(), config.ConfigFolder, config.DownloadConfigFile) + downloadConfigFile := filepath.Join(configDirectory, config.DownloadConfigFile) if _, err := os.Stat(downloadConfigFile); err == nil { if verbosity != nil && *verbosity >= 5 { fmt.Fprintf(os.Stdout, "Removing downloaded clusterctl config file: %s\n", config.DownloadConfigFile) @@ -122,7 +121,7 @@ func init() { RootCmd.PersistentFlags().AddGoFlagSet(flag.CommandLine) RootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", - "Path to clusterctl configuration (default is `$HOME/.cluster-api/clusterctl.yaml`) or to a remote location (i.e. https://example.com/clusterctl.yaml)") + "Path to clusterctl configuration (default is `$XDG_CONFIG_HOME/cluster-api/clusterctl.yaml`) or to a remote location (i.e. https://example.com/clusterctl.yaml)") RootCmd.AddGroup( &cobra.Group{ diff --git a/cmd/clusterctl/cmd/version_checker.go b/cmd/clusterctl/cmd/version_checker.go index 8aee0ef0607c..65308b319d3d 100644 --- a/cmd/clusterctl/cmd/version_checker.go +++ b/cmd/clusterctl/cmd/version_checker.go @@ -25,11 +25,11 @@ import ( "strings" "time" + "github.com/adrg/xdg" "github.com/blang/semver" "github.com/google/go-github/v48/github" "github.com/pkg/errors" "golang.org/x/oauth2" - "k8s.io/client-go/util/homedir" "sigs.k8s.io/yaml" "sigs.k8s.io/cluster-api/cmd/clusterctl/client/config" @@ -51,7 +51,7 @@ type versionChecker struct { // newVersionChecker returns a versionChecker. Its behavior has been inspired // by https://github.com/cli/cli. -func newVersionChecker(vc config.VariablesClient) *versionChecker { +func newVersionChecker(vc config.VariablesClient) (*versionChecker, error) { var client *github.Client token, err := vc.Get("GITHUB_TOKEN") if err == nil { @@ -64,11 +64,16 @@ func newVersionChecker(vc config.VariablesClient) *versionChecker { client = github.NewClient(nil) } + configDirectory, err := xdg.ConfigFile(config.ConfigFolderXDG) + if err != nil { + return nil, err + } + return &versionChecker{ - versionFilePath: filepath.Join(homedir.HomeDir(), config.ConfigFolder, "version.yaml"), + versionFilePath: filepath.Join(configDirectory, "version.yaml"), cliVersion: version.Get, githubClient: client, - } + }, nil } // ReleaseInfo stores information about the release. @@ -87,7 +92,7 @@ type VersionState struct { // latest available release for CAPI // (https://github.com/kubernetes-sigs/cluster-api). It gets the latest // release from github at most once during a 24 hour period and caches the -// state by default in $HOME/.cluster-api/state.yaml. If the clusterctl +// state by default in $XDG_CONFIG_HOME/cluster-api/state.yaml. If the clusterctl // version is the same or greater it returns nothing. func (v *versionChecker) Check() (string, error) { log := logf.Log diff --git a/cmd/clusterctl/cmd/version_checker_test.go b/cmd/clusterctl/cmd/version_checker_test.go index 0ca77cfad89a..b48dcebf7616 100644 --- a/cmd/clusterctl/cmd/version_checker_test.go +++ b/cmd/clusterctl/cmd/version_checker_test.go @@ -24,8 +24,8 @@ import ( "testing" "time" + "github.com/adrg/xdg" . "github.com/onsi/gomega" - "k8s.io/client-go/util/homedir" "sigs.k8s.io/yaml" "sigs.k8s.io/cluster-api/cmd/clusterctl/internal/test" @@ -35,9 +35,15 @@ import ( func TestVersionChecker_newVersionChecker(t *testing.T) { g := NewWithT(t) - versionChecker := newVersionChecker(test.NewFakeVariableClient()) + versionChecker, err := newVersionChecker(test.NewFakeVariableClient()) - expectedStateFilePath := filepath.Join(homedir.HomeDir(), ".cluster-api", "version.yaml") + g.Expect(err).To(BeNil()) + + configHome, err := xdg.ConfigFile("cluster-api") + + g.Expect(err).To(BeNil()) + + expectedStateFilePath := filepath.Join(configHome, "version.yaml") g.Expect(versionChecker.versionFilePath).To(Equal(expectedStateFilePath)) g.Expect(versionChecker.cliVersion).ToNot(BeNil()) g.Expect(versionChecker.githubClient).ToNot(BeNil()) @@ -240,7 +246,9 @@ https://github.com/foo/bar/releases/v0.3.8-alpha.1 }, ) defer cleanup() - versionChecker := newVersionChecker(test.NewFakeVariableClient()) + versionChecker, err := newVersionChecker(test.NewFakeVariableClient()) + g.Expect(err).To(BeNil()) + versionChecker.cliVersion = tt.cliVersion versionChecker.githubClient = fakeGithubClient versionChecker.versionFilePath = tmpVersionFile @@ -272,7 +280,8 @@ func TestVersionChecker_WriteStateFile(t *testing.T) { tmpVersionFile, cleanDir := generateTempVersionFilePath(g) defer cleanDir() - versionChecker := newVersionChecker(test.NewFakeVariableClient()) + versionChecker, err := newVersionChecker(test.NewFakeVariableClient()) + g.Expect(err).To(BeNil()) versionChecker.versionFilePath = tmpVersionFile versionChecker.githubClient = fakeGithubClient @@ -303,13 +312,14 @@ func TestVersionChecker_ReadFromStateFile(t *testing.T) { }, ) defer cleanup1() - versionChecker := newVersionChecker(test.NewFakeVariableClient()) + versionChecker, err := newVersionChecker(test.NewFakeVariableClient()) + g.Expect(err).To(BeNil()) versionChecker.versionFilePath = tmpVersionFile versionChecker.githubClient = fakeGithubClient1 // this call to getLatestRelease will pull from our fakeGithubClient1 and // store the information including timestamp into the state file. - _, err := versionChecker.getLatestRelease() + _, err = versionChecker.getLatestRelease() g.Expect(err).ToNot(HaveOccurred()) // override the github client with response to a new version v0.3.99 @@ -359,11 +369,12 @@ func TestVersionChecker_ReadFromStateFileWithin24Hrs(t *testing.T) { }, ) defer cleanup1() - versionChecker := newVersionChecker(test.NewFakeVariableClient()) + versionChecker, err := newVersionChecker(test.NewFakeVariableClient()) + g.Expect(err).To(BeNil()) versionChecker.versionFilePath = tmpVersionFile versionChecker.githubClient = fakeGithubClient1 - _, err := versionChecker.getLatestRelease() + _, err = versionChecker.getLatestRelease() g.Expect(err).ToNot(HaveOccurred()) // Since the state file is more that 24 hours old we want to retrieve the diff --git a/cmd/clusterctl/hack/create-local-repository.py b/cmd/clusterctl/hack/create-local-repository.py index c8d5f750a0fa..bfb9880b1e8c 100755 --- a/cmd/clusterctl/hack/create-local-repository.py +++ b/cmd/clusterctl/hack/create-local-repository.py @@ -40,54 +40,54 @@ from __future__ import unicode_literals +import errno import json -import subprocess import os +import subprocess from distutils.dir_util import copy_tree from distutils.file_util import copy_file -import errno -import sys settings = {} providers = { - 'cluster-api': { - 'componentsFile': 'core-components.yaml', - 'nextVersion': 'v1.5.99', - 'type': 'CoreProvider', - }, - 'bootstrap-kubeadm': { - 'componentsFile': 'bootstrap-components.yaml', - 'nextVersion': 'v1.5.99', - 'type': 'BootstrapProvider', - 'configFolder': 'bootstrap/kubeadm/config/default', - }, - 'control-plane-kubeadm': { - 'componentsFile': 'control-plane-components.yaml', - 'nextVersion': 'v1.5.99', - 'type': 'ControlPlaneProvider', - 'configFolder': 'controlplane/kubeadm/config/default', - }, - 'infrastructure-docker': { - 'componentsFile': 'infrastructure-components.yaml', - 'nextVersion': 'v1.5.99', - 'type': 'InfrastructureProvider', - 'configFolder': 'test/infrastructure/docker/config/default', - }, - 'infrastructure-in-memory': { + 'cluster-api': { + 'componentsFile': 'core-components.yaml', + 'nextVersion': 'v1.5.99', + 'type': 'CoreProvider', + }, + 'bootstrap-kubeadm': { + 'componentsFile': 'bootstrap-components.yaml', + 'nextVersion': 'v1.5.99', + 'type': 'BootstrapProvider', + 'configFolder': 'bootstrap/kubeadm/config/default', + }, + 'control-plane-kubeadm': { + 'componentsFile': 'control-plane-components.yaml', + 'nextVersion': 'v1.5.99', + 'type': 'ControlPlaneProvider', + 'configFolder': 'controlplane/kubeadm/config/default', + }, + 'infrastructure-docker': { + 'componentsFile': 'infrastructure-components.yaml', + 'nextVersion': 'v1.5.99', + 'type': 'InfrastructureProvider', + 'configFolder': 'test/infrastructure/docker/config/default', + }, + 'infrastructure-in-memory': { 'componentsFile': 'infrastructure-components.yaml', 'nextVersion': 'v1.5.99', 'type': 'InfrastructureProvider', 'configFolder': 'test/infrastructure/inmemory/config/default', }, 'runtime-extension-test': { - 'componentsFile': 'runtime-extension-components.yaml', - 'nextVersion': 'v1.5.99', - 'type': 'RuntimeExtensionProvider', - 'configFolder': 'test/extension/config/default', - }, + 'componentsFile': 'runtime-extension-components.yaml', + 'nextVersion': 'v1.5.99', + 'type': 'RuntimeExtensionProvider', + 'configFolder': 'test/extension/config/default', + }, } + def load_settings(): global settings try: @@ -95,6 +95,7 @@ def load_settings(): except Exception as e: raise Exception('failed to load clusterctl-settings.json: {}'.format(e)) + def load_providers(): provider_repos = settings.get('provider_repos', []) for repo in provider_repos: @@ -108,26 +109,31 @@ def load_providers(): except Exception as e: raise Exception('failed to load clusterctl-settings.json from repo {}: {}'.format(repo, e)) + def execCmd(args): try: out = subprocess.Popen(args, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) stdout, stderr = out.communicate() if stderr is not None: raise Exception('stderr contains: \n{}'.format(stderr)) return stdout - except Exception as e: + except Exception as e: raise Exception('failed to run {}: {}'.format(args, e)) -def get_home(): - return os.path.expanduser('~') def get_repository_folder(): - home = get_home() - return os.path.join(home, '.cluster-api', 'dev-repository') + config_dir = os.getenv("XDG_CONFIG_HOME", "") + if config_dir == "": + home_dir = os.getenv("HOME", "") + if home_dir == "": + raise Exception('HOME variable is not set') + config_dir = os.path.join(home_dir, ".config") + return os.path.join(config_dir, 'cluster-api', 'dev-repository') + def write_local_repository(provider, version, components_file, components_yaml, metadata_file): try: @@ -155,6 +161,7 @@ def write_local_repository(provider, version, components_file, components_yaml, except Exception as e: raise Exception('failed to write {} to {}: {}'.format(components_file, provider_folder, e)) + def create_local_repositories(): providerList = settings.get('providers', []) assert providerList is not None, 'invalid configuration: please define the list of providers to override' @@ -162,31 +169,38 @@ def create_local_repositories(): for provider in providerList: p = providers.get(provider) - assert p is not None, 'invalid configuration: please specify the configuration for the {} provider'.format(provider) + assert p is not None, 'invalid configuration: please specify the configuration for the {} provider'.format( + provider) repo = p.get('repo', '.') config_folder = p.get('configFolder', 'config/default') - metadata_file = repo+'/metadata.yaml' + metadata_file = repo + '/metadata.yaml' next_version = p.get('nextVersion') - assert next_version is not None, 'invalid configuration for provider {}: please provide nextVersion value'.format(provider) + assert next_version is not None, 'invalid configuration for provider {}: please provide nextVersion value'.format( + provider) name, type = splitNameAndType(provider) - assert name is not None, 'invalid configuration for provider {}: please use a valid provider label'.format(provider) + assert name is not None, 'invalid configuration for provider {}: please use a valid provider label'.format( + provider) components_file = p.get('componentsFile') - assert components_file is not None, 'invalid configuration for provider {}: please provide componentsFile value'.format(provider) + assert components_file is not None, 'invalid configuration for provider {}: please provide componentsFile value'.format( + provider) execCmd(['make', 'kustomize']) components_yaml = execCmd(['./hack/tools/bin/kustomize', 'build', os.path.join(repo, config_folder)]) - components_path = write_local_repository(provider, next_version, components_file, components_yaml, metadata_file) + components_path = write_local_repository(provider, next_version, components_file, components_yaml, + metadata_file) yield name, type, next_version, components_path + def injectLatest(path): head, tail = os.path.split(path) return '{}/latest/{}'.format(head, tail) + def create_dev_config(repos): yaml = "providers:\n" for name, type, next_version, components_path in repos: @@ -205,6 +219,7 @@ def create_dev_config(repos): except Exception as e: raise Exception('failed to write {}: {}'.format(config_path, e)) + def splitNameAndType(provider): if provider == 'cluster-api': return 'cluster-api', 'CoreProvider' @@ -222,27 +237,35 @@ def splitNameAndType(provider): return provider[len('addon-'):], 'AddonProvider' return None, None + def CoreProviderFlag(): return '--core' + def BootstrapProviderFlag(): return '--bootstrap' + def ControlPlaneProviderFlag(): return '--control-plane' + def InfrastructureProviderFlag(): return '--infrastructure' + def IPAMProviderFlag(): return '--ipam' + def RuntimeExtensionProviderFlag(): return '--runtime-extension' + def AddonProviderFlag(): return '--addon' + def type_to_flag(type): switcher = { 'CoreProvider': CoreProviderFlag, @@ -256,19 +279,21 @@ def type_to_flag(type): func = switcher.get(type, lambda: 'Invalid type') return func() + def print_instructions(repos): providerList = settings.get('providers', []) - print ('clusterctl local overrides generated from local repositories for the {} providers.'.format(', '.join(providerList))) - print ('in order to use them, please run:') + print('clusterctl local overrides generated from local repositories for the {} providers.'.format( + ', '.join(providerList))) + print('in order to use them, please run:') print cmd = "clusterctl init \\\n" for name, type, next_version, components_path in repos: cmd += " {} {}:{} \\\n".format(type_to_flag(type), name, next_version) cmd += " --config ~/.cluster-api/dev-repository/config.yaml" - print (cmd) + print(cmd) print if 'infrastructure-docker' in providerList: - print ('please check the documentation for additional steps required for using the docker provider') + print('please check the documentation for additional steps required for using the docker provider') print if 'infrastructure-in-memory' in providerList: print ('please check the documentation for additional steps required for using the in-memory provider') diff --git a/docs/book/src/clusterctl/commands/additional-commands.md b/docs/book/src/clusterctl/commands/additional-commands.md index 278d8d9a1977..f7b251964b79 100644 --- a/docs/book/src/clusterctl/commands/additional-commands.md +++ b/docs/book/src/clusterctl/commands/additional-commands.md @@ -3,7 +3,7 @@ Display the list of providers and their repository configurations. clusterctl ships with a list of known providers; if necessary, edit -$HOME/.cluster-api/clusterctl.yaml file to add a new provider or to customize existing ones. +$XDG_CONFIG_HOME/cluster-api/clusterctl.yaml file to add a new provider or to customize existing ones. # clusterctl help diff --git a/docs/book/src/clusterctl/commands/init.md b/docs/book/src/clusterctl/commands/init.md index 366b3a11bac5..032a8771bae3 100644 --- a/docs/book/src/clusterctl/commands/init.md +++ b/docs/book/src/clusterctl/commands/init.md @@ -127,10 +127,10 @@ See [clusterctl configuration](../configuration.md) for more info about provider

Is it possible to override files read from a provider repository?

If, for any reasons, the user wants to replace the assets available on a provider repository with a locally available asset, -the user is required to save the file under `$HOME/.cluster-api/overrides///`. +the user is required to save the file under `$XDG_CONFIG_HOME/cluster-api/overrides///`. ```bash -$HOME/.cluster-api/overrides/infrastructure-aws/v0.5.2/infrastructure-components.yaml +$XDG_CONFIG_HOME/cluster-api/overrides/infrastructure-aws/v0.5.2/infrastructure-components.yaml ``` diff --git a/docs/book/src/clusterctl/configuration.md b/docs/book/src/clusterctl/configuration.md index f1975a12258d..3ac2017d9601 100644 --- a/docs/book/src/clusterctl/configuration.md +++ b/docs/book/src/clusterctl/configuration.md @@ -1,6 +1,6 @@ # clusterctl Configuration File -The `clusterctl` config file is located at `$HOME/.cluster-api/clusterctl.yaml`. +The `clusterctl` config file is located at `$XDG_CONFIG_HOME/cluster-api/clusterctl.yaml`. It can be used to: - Customize the list of providers and provider repositories. @@ -72,7 +72,7 @@ wants to use a different repository, it is possible to use the following configu ```yaml cert-manager: - url: "/Users/foo/.cluster-api/dev-repository/cert-manager/latest/cert-manager.yaml" + url: "/Users/foo/.config/cluster-api/dev-repository/cert-manager/latest/cert-manager.yaml" ``` **Note**: It is possible to use the `${HOME}` and `${CLUSTERCTL_REPOSITORY_PATH}` environment variables in `url`. @@ -134,7 +134,7 @@ Overrides only provide file replacements; instead, provider version resolution i `clusterctl` uses an overrides layer to read in injected provider components, cluster templates and metadata. By default, it reads the files from -`$HOME/.cluster-api/overrides`. +`$XDG_CONFIG_HOME/cluster-api/overrides`. The directory structure under the `overrides` directory should follow the template: @@ -262,9 +262,9 @@ images: To have more verbose logs you can use the `-v` flag when running the `clusterctl` and set the level of the logging verbose with a positive integer number, ie. `-v 3`. -If you do not want to use the flag every time you issue a command you can set the environment variable `CLUSTERCTL_LOG_LEVEL` or set the variable in the `clusterctl` config file located by default at `$HOME/.cluster-api/clusterctl.yaml`. +If you do not want to use the flag every time you issue a command you can set the environment variable `CLUSTERCTL_LOG_LEVEL` or set the variable in the `clusterctl` config file located by default at `$XDG_CONFIG_HOME/cluster-api/clusterctl.yaml`. ## Skip checking for updates -`clusterctl` automatically checks for new versions every time it is used. If you do not want `clusterctl` to check for new updates you can set the environment variable `CLUSTERCTL_DISABLE_VERSIONCHECK` to `"true"` or set the variable in the `clusterctl` config file located by default at `$HOME/.cluster-api/clusterctl.yaml`. +`clusterctl` automatically checks for new versions every time it is used. If you do not want `clusterctl` to check for new updates you can set the environment variable `CLUSTERCTL_DISABLE_VERSIONCHECK` to `"true"` or set the variable in the `clusterctl` config file located by default at `$XDG_CONFIG_HOME/cluster-api/clusterctl.yaml`. diff --git a/docs/book/src/clusterctl/developers.md b/docs/book/src/clusterctl/developers.md index 1170b016bdf4..6f79beb62162 100644 --- a/docs/book/src/clusterctl/developers.md +++ b/docs/book/src/clusterctl/developers.md @@ -66,7 +66,7 @@ cmd/clusterctl/hack/create-local-repository.py ``` The script reads from the source folders for the providers you want to install, builds the providers' assets, -and places them in a local repository folder located under `$HOME/.cluster-api/dev-repository/`. +and places them in a local repository folder located under `$XDG_CONFIG_HOME/cluster-api/dev-repository/`. Additionally, the command output provides you the `clusterctl init` command with all the necessary flags. The output should be similar to: @@ -83,7 +83,7 @@ clusterctl init \ --config ~/.cluster-api/dev-repository/config.yaml ``` -As you might notice, the command is using the `$HOME/.cluster-api/dev-repository/config.yaml` config file, +As you might notice, the command is using the `$XDG_CONFIG_HOME/cluster-api/dev-repository/config.yaml` config file, containing all the required setting to make clusterctl use the local repository.