From 89b92e8c59d08759b02521756583ba7609011a0b Mon Sep 17 00:00:00 2001 From: Ray Wainman Date: Mon, 4 Nov 2024 16:28:30 -0500 Subject: [PATCH 1/5] Update FAQ.md with leader-election flag warnings --- vertical-pod-autoscaler/FAQ.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vertical-pod-autoscaler/FAQ.md b/vertical-pod-autoscaler/FAQ.md index cc2e64387e22..86fe1c6c94a2 100644 --- a/vertical-pod-autoscaler/FAQ.md +++ b/vertical-pod-autoscaler/FAQ.md @@ -205,11 +205,11 @@ Name | Type | Description | Default `memory-histogram-decay-half-life` | Duration | The amount of time it takes a historical memory usage sample to lose half of its weight. In other words, a fresh usage sample is twice as 'important' as one with age equal to the half life period. | model.DefaultMemoryHistogramDecayHalfLife `cpu-histogram-decay-half-life` | Duration | The amount of time it takes a historical CPU usage sample to lose half of its weight. | model.DefaultCPUHistogramDecayHalfLife `cpu-integer-post-processor-enabled` | Bool | Enable the CPU integer recommendation post processor | false -`leader-elect` | Bool | Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability. | false +`leader-elect` | Bool | Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability. **If enabling this in GKE, you MUST also manually set the `--leader-elect-resource-name` flag.** | false `leader-elect-lease-duration` | Duration | The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled. | 15s `leader-elect-renew-deadline` | Duration | The interval between attempts by the acting master to renew a leadership slot before it stops leading. This must be less than the lease duration. This is only applicable if leader election is enabled. | 10s `leader-elect-resource-lock` | String | The type of resource object that is used for locking during leader election. Supported options are 'leases', 'endpointsleases' and 'configmapsleases'. | "leases" -`leader-elect-resource-name` | String | The name of resource object that is used for locking during leader election. | "vpa-recommender" +`leader-elect-resource-name` | String | The name of resource object that is used for locking during leader election. **If using GKE, you must set this value to something OTHER than "vpa-recommender", for example "vpa-recommender-lease".** | "vpa-recommender" `leader-elect-resource-namespace` | String | The namespace of resource object that is used for locking during leader election. | "kube-system" `leader-elect-retry-period` | Duration | The duration the clients should wait between attempting acquisition and renewal of a leadership. This is only applicable if leader election is enabled. | 2s From c705f0121a4de55ce82cf0c8b37be82226561aec Mon Sep 17 00:00:00 2001 From: Ray Wainman Date: Mon, 4 Nov 2024 21:33:18 +0000 Subject: [PATCH 2/5] Add warning in main.go for lease name. --- vertical-pod-autoscaler/pkg/recommender/main.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vertical-pod-autoscaler/pkg/recommender/main.go b/vertical-pod-autoscaler/pkg/recommender/main.go index 84223798aa76..785cb604c854 100644 --- a/vertical-pod-autoscaler/pkg/recommender/main.go +++ b/vertical-pod-autoscaler/pkg/recommender/main.go @@ -190,6 +190,8 @@ func defaultLeaderElectionConfiguration() componentbaseconfig.LeaderElectionConf RenewDeadline: metav1.Duration{Duration: defaultRenewDeadline}, RetryPeriod: metav1.Duration{Duration: defaultRetryPeriod}, ResourceLock: resourcelock.LeasesResourceLock, + // Note that the following default conflicts with the GKE default system component that also uses a lease with this name. + // When deploying in GKE, be sure to use a different lease name! ResourceName: "vpa-recommender", ResourceNamespace: metav1.NamespaceSystem, } From 3f3610092f14412ca47726de5569b3e76ccabaf3 Mon Sep 17 00:00:00 2001 From: Ray Wainman Date: Tue, 5 Nov 2024 14:45:29 +0000 Subject: [PATCH 3/5] fix formatting issue in main.go --- vertical-pod-autoscaler/pkg/recommender/main.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/recommender/main.go b/vertical-pod-autoscaler/pkg/recommender/main.go index 785cb604c854..4909b22422b1 100644 --- a/vertical-pod-autoscaler/pkg/recommender/main.go +++ b/vertical-pod-autoscaler/pkg/recommender/main.go @@ -185,11 +185,11 @@ const ( func defaultLeaderElectionConfiguration() componentbaseconfig.LeaderElectionConfiguration { return componentbaseconfig.LeaderElectionConfiguration{ - LeaderElect: false, - LeaseDuration: metav1.Duration{Duration: defaultLeaseDuration}, - RenewDeadline: metav1.Duration{Duration: defaultRenewDeadline}, - RetryPeriod: metav1.Duration{Duration: defaultRetryPeriod}, - ResourceLock: resourcelock.LeasesResourceLock, + LeaderElect: false, + LeaseDuration: metav1.Duration{Duration: defaultLeaseDuration}, + RenewDeadline: metav1.Duration{Duration: defaultRenewDeadline}, + RetryPeriod: metav1.Duration{Duration: defaultRetryPeriod}, + ResourceLock: resourcelock.LeasesResourceLock, // Note that the following default conflicts with the GKE default system component that also uses a lease with this name. // When deploying in GKE, be sure to use a different lease name! ResourceName: "vpa-recommender", From bec215e1263dbbbd419875d0e4965526ba1e6dab Mon Sep 17 00:00:00 2001 From: Ray Wainman Date: Tue, 5 Nov 2024 14:50:06 +0000 Subject: [PATCH 4/5] Add entry to known limitations --- vertical-pod-autoscaler/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vertical-pod-autoscaler/README.md b/vertical-pod-autoscaler/README.md index 656189b51668..3e9c1015628c 100644 --- a/vertical-pod-autoscaler/README.md +++ b/vertical-pod-autoscaler/README.md @@ -420,6 +420,10 @@ Using it in conjunction with `--ignored-vpa-object-namespaces=kube-system` or `- size, available quota) and cause **pods to go pending**. This can be partly addressed by using VPA together with [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#basics). * Multiple VPA resources matching the same pod have undefined behavior. +* Running the vpa-recommender with leader election enabled (`--leader-elect=true`) in a GKE cluster + causes contention with a lease called `vpa-recommender` held by the GKE system component of the + same name. To run your own VPA in GKE, make sure to specify a different lease name using + `--leader-elect-resource-name=vpa-recommender-lease` (or specify your own lease name). # Related links From bb0d3298dc5a162e29c63dbb895de1d0851808a3 Mon Sep 17 00:00:00 2001 From: Ray Wainman Date: Tue, 5 Nov 2024 19:52:51 +0000 Subject: [PATCH 5/5] remove main.go changes, we should refrain from putting any vendor specific content there --- vertical-pod-autoscaler/pkg/recommender/main.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/vertical-pod-autoscaler/pkg/recommender/main.go b/vertical-pod-autoscaler/pkg/recommender/main.go index 4909b22422b1..84223798aa76 100644 --- a/vertical-pod-autoscaler/pkg/recommender/main.go +++ b/vertical-pod-autoscaler/pkg/recommender/main.go @@ -185,13 +185,11 @@ const ( func defaultLeaderElectionConfiguration() componentbaseconfig.LeaderElectionConfiguration { return componentbaseconfig.LeaderElectionConfiguration{ - LeaderElect: false, - LeaseDuration: metav1.Duration{Duration: defaultLeaseDuration}, - RenewDeadline: metav1.Duration{Duration: defaultRenewDeadline}, - RetryPeriod: metav1.Duration{Duration: defaultRetryPeriod}, - ResourceLock: resourcelock.LeasesResourceLock, - // Note that the following default conflicts with the GKE default system component that also uses a lease with this name. - // When deploying in GKE, be sure to use a different lease name! + LeaderElect: false, + LeaseDuration: metav1.Duration{Duration: defaultLeaseDuration}, + RenewDeadline: metav1.Duration{Duration: defaultRenewDeadline}, + RetryPeriod: metav1.Duration{Duration: defaultRetryPeriod}, + ResourceLock: resourcelock.LeasesResourceLock, ResourceName: "vpa-recommender", ResourceNamespace: metav1.NamespaceSystem, }