From 75e0bfba95b8b39998178c90cf4a78eb27043868 Mon Sep 17 00:00:00 2001 From: Suraj Deshmukh Date: Wed, 23 Jun 2021 14:33:00 +0530 Subject: [PATCH] cert-rotator: Add retry to cluster upgrade Signed-off-by: Suraj Deshmukh --- cli/cmd/cluster/certificate-rotator.go | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/cli/cmd/cluster/certificate-rotator.go b/cli/cmd/cluster/certificate-rotator.go index af3f35c28..328150e6a 100644 --- a/cli/cmd/cluster/certificate-rotator.go +++ b/cli/cmd/cluster/certificate-rotator.go @@ -8,12 +8,18 @@ import ( log "github.com/sirupsen/logrus" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" "github.com/kinvolk/lokomotive/pkg/k8sutil" "github.com/kinvolk/lokomotive/pkg/platform" ) +const ( + retryInterval = 10 * time.Second + retryTimeout = 30 * time.Minute +) + type certificateRotator struct { clientSet *kubernetes.Clientset newCACert string @@ -99,8 +105,22 @@ func rotateControlPlaneCerts(contextLogger *log.Entry, cc clusterConfig) error { contextLogger.Log(log.InfoLevel, "Applying a controlplane update with the new CA") - if err := c.upgradeControlPlane(contextLogger, kubeconfig); err != nil { - return fmt.Errorf("running controlplane upgrade: %v", err) + var upgradeErr error + + err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { + if upgradeErr = c.upgradeControlPlane(contextLogger, kubeconfig); upgradeErr != nil { + return false, nil + } + + return true, nil + }) + + if upgradeErr != nil { + return fmt.Errorf("running controlplane upgrade: %w", upgradeErr) + } + + if err != nil { + return fmt.Errorf("control plane did not upgrade after multiple retries: %w", err) } cs, err := k8sutil.NewClientset(kubeconfig)