From 2eca1a6fefc0cd06ee990d5bd909c7ab517f75e2 Mon Sep 17 00:00:00 2001 From: Theron Voran Date: Tue, 10 Aug 2021 14:40:43 -0700 Subject: [PATCH 1/6] Continuously retry updating the cert secret Uses retry with exponential backoff to retry updating the cert secret if there's an error. Sync's with caller via a stop channel, caller waits on stop channel before starting a new update thread. --- go.mod | 1 + go.sum | 2 ++ helper/cert/source_gen.go | 48 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 1d234aae..84ee896b 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.16 require ( github.com/armon/go-radix v1.0.0 // indirect + github.com/cenkalti/backoff/v4 v4.1.1 github.com/hashicorp/go-hclog v0.9.2 github.com/hashicorp/golang-lru v0.5.3 // indirect github.com/hashicorp/vault/sdk v0.1.14-0.20191205220236-47cffd09f972 diff --git a/go.sum b/go.sum index 8f40a3c2..13072ba1 100644 --- a/go.sum +++ b/go.sum @@ -49,6 +49,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= +github.com/cenkalti/backoff/v4 v4.1.1 h1:G2HAfAmvm/GcKan2oOQpBXOd2tT2G57ZnZGWa1PxPBQ= +github.com/cenkalti/backoff/v4 v4.1.1/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= diff --git a/helper/cert/source_gen.go b/helper/cert/source_gen.go index 563c8245..6a30bb7a 100644 --- a/helper/cert/source_gen.go +++ b/helper/cert/source_gen.go @@ -18,6 +18,7 @@ import ( "sync" "time" + "github.com/cenkalti/backoff/v4" "github.com/hashicorp/go-hclog" "github.com/hashicorp/vault-k8s/leader" v1 "k8s.io/api/core/v1" @@ -61,6 +62,12 @@ type GenSource struct { SecretsCache informerv1.SecretInformer LeaderElector *leader.LeaderElector + // UpdateCancel and StopUpdate are used to control and sync the goroutine + // that continuously attempts to update the K8s Secret with the new + // certificate + UpdateCancel context.CancelFunc + StopUpdate chan struct{} + Log hclog.Logger } @@ -143,9 +150,19 @@ func (s *GenSource) Certificate(ctx context.Context, last *Bundle) (Bundle, erro result.Key = []byte(key) if s.LeaderElector != nil { - if err := s.updateSecret(ctx, result); err != nil { - return result, fmt.Errorf("failed to update Secret: %s", err) + // Cancel and wait for the previous retryUpdateSecret to quit + if s.UpdateCancel != nil && s.StopUpdate != nil { + s.Log.Trace("cancelling cert secret update context") + s.UpdateCancel() + <-s.StopUpdate + } + // Start a new update goroutine + var updateCtx context.Context + updateCtx, s.UpdateCancel = context.WithCancel(ctx) + if s.StopUpdate == nil { + s.StopUpdate = make(chan struct{}) } + go s.retryUpdateSecret(updateCtx, result) } } @@ -182,6 +199,33 @@ func (s *GenSource) checkLeader(ctx context.Context, changed chan<- bool) { } } +func (s *GenSource) retryUpdateSecret(ctx context.Context, bundle Bundle) { + defer func() { + s.StopUpdate <- struct{}{} + }() + + // New exponential backoff with unlimited retries + bo := backoff.NewExponentialBackOff() + bo.MaxInterval = time.Second * 30 + bo.MaxElapsedTime = 0 + ticker := backoff.NewTicker(bo) + + for { + select { + case <-ticker.C: + if err := s.updateSecret(ctx, bundle); err != nil { + s.Log.Error(fmt.Sprintf("attempt to update Secret %q failed: %s", certSecretName, err)) + } else { + s.Log.Trace(fmt.Sprintf("updated secret %q, quitting update thread", certSecretName)) + return + } + case <-ctx.Done(): + s.Log.Trace("quitting update thread") + return + } + } +} + func (s *GenSource) updateSecret(ctx context.Context, bundle Bundle) error { secret := &v1.Secret{ ObjectMeta: metav1.ObjectMeta{ From 034ff0c0dc0225bf8dc74e15ec287f5a05f4e34b Mon Sep 17 00:00:00 2001 From: Theron Voran Date: Mon, 16 Aug 2021 15:40:26 -0700 Subject: [PATCH 2/6] Update helper/cert/source_gen.go Co-authored-by: Tom Proctor --- helper/cert/source_gen.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helper/cert/source_gen.go b/helper/cert/source_gen.go index 6a30bb7a..4207cb0c 100644 --- a/helper/cert/source_gen.go +++ b/helper/cert/source_gen.go @@ -214,7 +214,7 @@ func (s *GenSource) retryUpdateSecret(ctx context.Context, bundle Bundle) { select { case <-ticker.C: if err := s.updateSecret(ctx, bundle); err != nil { - s.Log.Error(fmt.Sprintf("attempt to update Secret %q failed: %s", certSecretName, err)) + s.Log.Error("attempt to update cert secret failed", "certSecretName", certSecretName, "err", err) } else { s.Log.Trace(fmt.Sprintf("updated secret %q, quitting update thread", certSecretName)) return From 51f676243f146b93c2836e9905f09ff69e15a00b Mon Sep 17 00:00:00 2001 From: Theron Voran Date: Mon, 16 Aug 2021 17:32:01 -0700 Subject: [PATCH 3/6] update cert synchronously in Certificate() --- helper/cert/source_gen.go | 52 +++++++++++++-------------------------- 1 file changed, 17 insertions(+), 35 deletions(-) diff --git a/helper/cert/source_gen.go b/helper/cert/source_gen.go index 4207cb0c..230e53d8 100644 --- a/helper/cert/source_gen.go +++ b/helper/cert/source_gen.go @@ -62,12 +62,6 @@ type GenSource struct { SecretsCache informerv1.SecretInformer LeaderElector *leader.LeaderElector - // UpdateCancel and StopUpdate are used to control and sync the goroutine - // that continuously attempts to update the K8s Secret with the new - // certificate - UpdateCancel context.CancelFunc - StopUpdate chan struct{} - Log hclog.Logger } @@ -145,25 +139,14 @@ func (s *GenSource) Certificate(ctx context.Context, last *Bundle) (Bundle, erro // Generate cert, set it on the result, and return cert, key, err := s.generateCert() - if err == nil { - result.Cert = []byte(cert) - result.Key = []byte(key) - - if s.LeaderElector != nil { - // Cancel and wait for the previous retryUpdateSecret to quit - if s.UpdateCancel != nil && s.StopUpdate != nil { - s.Log.Trace("cancelling cert secret update context") - s.UpdateCancel() - <-s.StopUpdate - } - // Start a new update goroutine - var updateCtx context.Context - updateCtx, s.UpdateCancel = context.WithCancel(ctx) - if s.StopUpdate == nil { - s.StopUpdate = make(chan struct{}) - } - go s.retryUpdateSecret(updateCtx, result) - } + if err != nil { + return result, err + } + result.Cert = []byte(cert) + result.Key = []byte(key) + + if s.LeaderElector != nil { + s.retryUpdateSecret(ctx, leaderCh, result) } return result, err @@ -199,15 +182,11 @@ func (s *GenSource) checkLeader(ctx context.Context, changed chan<- bool) { } } -func (s *GenSource) retryUpdateSecret(ctx context.Context, bundle Bundle) { - defer func() { - s.StopUpdate <- struct{}{} - }() - - // New exponential backoff with unlimited retries +func (s *GenSource) retryUpdateSecret(ctx context.Context, leaderCh chan bool, bundle Bundle) { + // New exponential backoff bo := backoff.NewExponentialBackOff() - bo.MaxInterval = time.Second * 30 - bo.MaxElapsedTime = 0 + bo.MaxInterval = 30 * time.Second + bo.MaxElapsedTime = 30 * time.Minute ticker := backoff.NewTicker(bo) for { @@ -216,11 +195,14 @@ func (s *GenSource) retryUpdateSecret(ctx context.Context, bundle Bundle) { if err := s.updateSecret(ctx, bundle); err != nil { s.Log.Error("attempt to update cert secret failed", "certSecretName", certSecretName, "err", err) } else { - s.Log.Trace(fmt.Sprintf("updated secret %q, quitting update thread", certSecretName)) + s.Log.Trace("updated cert secret, quitting update thread", "certSecretName", certSecretName) return } case <-ctx.Done(): - s.Log.Trace("quitting update thread") + s.Log.Trace("quitting update cert retries due to done context") + return + case <-leaderCh: + s.Log.Trace("quitting update cert retries due to leadership change") return } } From 94b13be0f3d4e47f1e9c6553ca3d30b63bfd99ee Mon Sep 17 00:00:00 2001 From: Theron Voran Date: Tue, 17 Aug 2021 11:31:06 -0700 Subject: [PATCH 4/6] stop the backoff ticker Co-authored-by: Calvin Leung Huang <1883212+calvn@users.noreply.github.com> --- helper/cert/source_gen.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helper/cert/source_gen.go b/helper/cert/source_gen.go index 230e53d8..5e47e6c4 100644 --- a/helper/cert/source_gen.go +++ b/helper/cert/source_gen.go @@ -188,7 +188,7 @@ func (s *GenSource) retryUpdateSecret(ctx context.Context, leaderCh chan bool, b bo.MaxInterval = 30 * time.Second bo.MaxElapsedTime = 30 * time.Minute ticker := backoff.NewTicker(bo) - + defer ticker.Stop() for { select { case <-ticker.C: From c37fea7f6d9c50ef4fb3643125d203b3b086d9f9 Mon Sep 17 00:00:00 2001 From: Theron Voran Date: Tue, 17 Aug 2021 15:26:03 -0700 Subject: [PATCH 5/6] better error handling in retryUpdateSecret() Retries until the certificate would be renewed. Actually return an error if all attempts time out. --- helper/cert/source_gen.go | 40 ++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/helper/cert/source_gen.go b/helper/cert/source_gen.go index 5e47e6c4..bf8788e0 100644 --- a/helper/cert/source_gen.go +++ b/helper/cert/source_gen.go @@ -146,10 +146,12 @@ func (s *GenSource) Certificate(ctx context.Context, last *Bundle) (Bundle, erro result.Key = []byte(key) if s.LeaderElector != nil { - s.retryUpdateSecret(ctx, leaderCh, result) + if err := s.retryUpdateSecret(ctx, leaderCh, result); err != nil { + return result, err + } } - return result, err + return result, nil } func (s *GenSource) checkLeader(ctx context.Context, changed chan<- bool) { @@ -182,30 +184,38 @@ func (s *GenSource) checkLeader(ctx context.Context, changed chan<- bool) { } } -func (s *GenSource) retryUpdateSecret(ctx context.Context, leaderCh chan bool, bundle Bundle) { - // New exponential backoff +func (s *GenSource) retryUpdateSecret(ctx context.Context, leaderCh chan bool, bundle Bundle) error { + // New exponential backoff, with a max elapsed time of 90% of the newly + // created cert expiration, since that's the point where it would be renewed + // by the calling function bo := backoff.NewExponentialBackOff() bo.MaxInterval = 30 * time.Second - bo.MaxElapsedTime = 30 * time.Minute + bo.MaxElapsedTime = time.Duration(float64(s.expiry()) * 0.90) ticker := backoff.NewTicker(bo) defer ticker.Stop() - for { + + for range ticker.C { + if err := s.updateSecret(ctx, bundle); err != nil { + s.Log.Error("attempt to update cert secret failed", "certSecretName", certSecretName, "err", err) + } else { + s.Log.Trace("updated cert secret, quitting update thread", "certSecretName", certSecretName) + return nil + } + + // Also check for external exit conditions before continuing select { - case <-ticker.C: - if err := s.updateSecret(ctx, bundle); err != nil { - s.Log.Error("attempt to update cert secret failed", "certSecretName", certSecretName, "err", err) - } else { - s.Log.Trace("updated cert secret, quitting update thread", "certSecretName", certSecretName) - return - } case <-ctx.Done(): s.Log.Trace("quitting update cert retries due to done context") - return + return nil case <-leaderCh: s.Log.Trace("quitting update cert retries due to leadership change") - return + return nil + default: + // try again } } + + return fmt.Errorf("timed out attempting to update cert secret") } func (s *GenSource) updateSecret(ctx context.Context, bundle Bundle) error { From 8cded14358952b37d65e76a9e568bd6f39ee0434 Mon Sep 17 00:00:00 2001 From: Theron Voran Date: Thu, 19 Aug 2021 14:47:49 -0700 Subject: [PATCH 6/6] use select instead of range --- helper/cert/source_gen.go | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/helper/cert/source_gen.go b/helper/cert/source_gen.go index bf8788e0..509f141a 100644 --- a/helper/cert/source_gen.go +++ b/helper/cert/source_gen.go @@ -194,28 +194,27 @@ func (s *GenSource) retryUpdateSecret(ctx context.Context, leaderCh chan bool, b ticker := backoff.NewTicker(bo) defer ticker.Stop() - for range ticker.C { - if err := s.updateSecret(ctx, bundle); err != nil { - s.Log.Error("attempt to update cert secret failed", "certSecretName", certSecretName, "err", err) - } else { - s.Log.Trace("updated cert secret, quitting update thread", "certSecretName", certSecretName) - return nil - } - - // Also check for external exit conditions before continuing + var err error + for { select { + case _, ok := <-ticker.C: + if !ok { + return fmt.Errorf("timed out attempting to update cert secret: %w", err) + } + if err = s.updateSecret(ctx, bundle); err != nil { + s.Log.Error("attempt to update cert secret failed", "certSecretName", certSecretName, "err", err) + } else { + s.Log.Trace("updated cert secret, quitting update thread", "certSecretName", certSecretName) + return nil + } case <-ctx.Done(): s.Log.Trace("quitting update cert retries due to done context") return nil case <-leaderCh: s.Log.Trace("quitting update cert retries due to leadership change") return nil - default: - // try again } } - - return fmt.Errorf("timed out attempting to update cert secret") } func (s *GenSource) updateSecret(ctx context.Context, bundle Bundle) error {