From 7bb0bc2545efcc971ad4376285f74b6fc401f142 Mon Sep 17 00:00:00 2001 From: Luke Kysow <1034429+lkysow@users.noreply.github.com> Date: Sun, 17 Oct 2021 22:07:57 -0700 Subject: [PATCH] Change CRD retry-backoff Previously, if a custom resource failed to sync with Consul, we used the default retry backoff. This was an exponential backoff starting at 5ms and maxing out at 1000s (16m). This backoff was a poor UX for our common error case where one config entry cannot be applied due to a prerequisite, for example an ingress gateway entry cannot be applied until the protocol is set to http. The usual workflow to resolve this would be to look up the error, figure out the correct ServiceDefaults/ProxyDefaults, and then apply that resource. Once applied, the user needs to wait fo the ingress gateway (in this example) resource to be retried. With the default backoff config, because the user will have taken on the order of minutes to figure out the correct config, the exponential backoff will now be upwards of five minutes. The user will have to wait for a long time for the ingress gateway resource to be retried. This PR changes the backoff to start out at 200ms and max out at 5s. This fits our use-case better because the user will have to wait at max 5ms, usually if there's an error, retrying within 5ms does nothing, so waiting 200ms to start is fine, and finally, Consul servers can accept tens of thousands of writes per second so even if there are a ton of resources retrying every 5s, it won't be an issue. --- .../controller/configentry_controller.go | 35 +++++++++++++++++++ .../controller/ingressgateway_controller.go | 4 +-- control-plane/controller/mesh_controller.go | 4 +-- .../controller/proxydefaults_controller.go | 4 +-- .../controller/servicedefaults_controller.go | 4 +-- .../serviceintentions_controller.go | 4 +-- .../controller/serviceresolver_controller.go | 4 +-- .../controller/servicerouter_controller.go | 4 +-- .../controller/servicesplitter_controller.go | 4 +-- .../terminatinggateway_controller.go | 4 +-- control-plane/go.mod | 1 + 11 files changed, 45 insertions(+), 27 deletions(-) diff --git a/control-plane/controller/configentry_controller.go b/control-plane/controller/configentry_controller.go index 995c8138ed..d45340b151 100644 --- a/control-plane/controller/configentry_controller.go +++ b/control-plane/controller/configentry_controller.go @@ -11,12 +11,16 @@ import ( "github.com/hashicorp/consul-k8s/control-plane/api/common" "github.com/hashicorp/consul-k8s/control-plane/namespaces" capi "github.com/hashicorp/consul/api" + "golang.org/x/time/rate" corev1 "k8s.io/api/core/v1" k8serr "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/workqueue" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ) const ( @@ -246,6 +250,37 @@ func (r *ConfigEntryController) ReconcileEntry(ctx context.Context, crdCtrl Cont return ctrl.Result{}, nil } +// setupWithManager sets up the controller manager for the given resource +// with our default options. +func setupWithManager(mgr ctrl.Manager, resource client.Object, reconciler reconcile.Reconciler) error { + options := controller.Options{ + // Taken from https://github.com/kubernetes/client-go/blob/master/util/workqueue/default_rate_limiters.go#L39 + // and modified from a starting backoff of 5ms and max of 1000s to a + // starting backoff of 200ms and a max of 5s to better fit our most + // common error cases and performance characteristics. + // + // One common error case is that a config entry is applied that requires + // a protocol like http or grpc. Often the user will apply a new config + // entry to set the protocol in a minute or two. During this time, the + // default backoff could then be set up to 5m or more which means the + // original config entry takes a long time to re-sync. + // + // In terms of performance, Consul servers can handle tens of thousands + // of writes per second, so retrying at max every 5s isn't an issue and + // provides a better UX. + RateLimiter: workqueue.NewMaxOfRateLimiter( + workqueue.NewItemExponentialFailureRateLimiter(200*time.Millisecond, 5*time.Second), + // 10 qps, 100 bucket size. This is only for retry speed and its only the overall factor (not per item) + &workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(10), 100)}, + ), + } + + return ctrl.NewControllerManagedBy(mgr). + For(resource). + WithOptions(options). + Complete(reconciler) +} + func (r *ConfigEntryController) consulNamespace(configEntry capi.ConfigEntry, namespace string, globalResource bool) string { // ServiceIntentions have the appropriate Consul Namespace set on them as the value // is defaulted by the webhook. These are then set on the ServiceIntentions config entry diff --git a/control-plane/controller/ingressgateway_controller.go b/control-plane/controller/ingressgateway_controller.go index aedb2ff697..7e656b3d29 100644 --- a/control-plane/controller/ingressgateway_controller.go +++ b/control-plane/controller/ingressgateway_controller.go @@ -36,7 +36,5 @@ func (r *IngressGatewayController) UpdateStatus(ctx context.Context, obj client. } func (r *IngressGatewayController) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - For(&consulv1alpha1.IngressGateway{}). - Complete(r) + return setupWithManager(mgr, &consulv1alpha1.IngressGateway{}, r) } diff --git a/control-plane/controller/mesh_controller.go b/control-plane/controller/mesh_controller.go index 4b4e1257bc..61652d23fe 100644 --- a/control-plane/controller/mesh_controller.go +++ b/control-plane/controller/mesh_controller.go @@ -36,7 +36,5 @@ func (r *MeshController) UpdateStatus(ctx context.Context, obj client.Object, op } func (r *MeshController) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - For(&consulv1alpha1.Mesh{}). - Complete(r) + return setupWithManager(mgr, &consulv1alpha1.Mesh{}, r) } diff --git a/control-plane/controller/proxydefaults_controller.go b/control-plane/controller/proxydefaults_controller.go index 0b4a11c001..658806d340 100644 --- a/control-plane/controller/proxydefaults_controller.go +++ b/control-plane/controller/proxydefaults_controller.go @@ -36,7 +36,5 @@ func (r *ProxyDefaultsController) UpdateStatus(ctx context.Context, obj client.O } func (r *ProxyDefaultsController) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - For(&consulv1alpha1.ProxyDefaults{}). - Complete(r) + return setupWithManager(mgr, &consulv1alpha1.ProxyDefaults{}, r) } diff --git a/control-plane/controller/servicedefaults_controller.go b/control-plane/controller/servicedefaults_controller.go index f71afed09c..7dd73914e4 100644 --- a/control-plane/controller/servicedefaults_controller.go +++ b/control-plane/controller/servicedefaults_controller.go @@ -36,7 +36,5 @@ func (r *ServiceDefaultsController) UpdateStatus(ctx context.Context, obj client } func (r *ServiceDefaultsController) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - For(&consulv1alpha1.ServiceDefaults{}). - Complete(r) + return setupWithManager(mgr, &consulv1alpha1.ServiceDefaults{}, r) } diff --git a/control-plane/controller/serviceintentions_controller.go b/control-plane/controller/serviceintentions_controller.go index b6db47dfd0..1ee1db037c 100644 --- a/control-plane/controller/serviceintentions_controller.go +++ b/control-plane/controller/serviceintentions_controller.go @@ -36,7 +36,5 @@ func (r *ServiceIntentionsController) UpdateStatus(ctx context.Context, obj clie } func (r *ServiceIntentionsController) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - For(&consulv1alpha1.ServiceIntentions{}). - Complete(r) + return setupWithManager(mgr, &consulv1alpha1.ServiceIntentions{}, r) } diff --git a/control-plane/controller/serviceresolver_controller.go b/control-plane/controller/serviceresolver_controller.go index 96f068441c..3e01e680ea 100644 --- a/control-plane/controller/serviceresolver_controller.go +++ b/control-plane/controller/serviceresolver_controller.go @@ -36,7 +36,5 @@ func (r *ServiceResolverController) UpdateStatus(ctx context.Context, obj client } func (r *ServiceResolverController) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - For(&consulv1alpha1.ServiceResolver{}). - Complete(r) + return setupWithManager(mgr, &consulv1alpha1.ServiceResolver{}, r) } diff --git a/control-plane/controller/servicerouter_controller.go b/control-plane/controller/servicerouter_controller.go index e32c3d175a..7db983dec2 100644 --- a/control-plane/controller/servicerouter_controller.go +++ b/control-plane/controller/servicerouter_controller.go @@ -36,7 +36,5 @@ func (r *ServiceRouterController) UpdateStatus(ctx context.Context, obj client.O } func (r *ServiceRouterController) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - For(&consulv1alpha1.ServiceRouter{}). - Complete(r) + return setupWithManager(mgr, &consulv1alpha1.ServiceRouter{}, r) } diff --git a/control-plane/controller/servicesplitter_controller.go b/control-plane/controller/servicesplitter_controller.go index 537fbef969..a8b6267c70 100644 --- a/control-plane/controller/servicesplitter_controller.go +++ b/control-plane/controller/servicesplitter_controller.go @@ -36,7 +36,5 @@ func (r *ServiceSplitterController) UpdateStatus(ctx context.Context, obj client } func (r *ServiceSplitterController) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - For(&consulv1alpha1.ServiceSplitter{}). - Complete(r) + return setupWithManager(mgr, &consulv1alpha1.ServiceSplitter{}, r) } diff --git a/control-plane/controller/terminatinggateway_controller.go b/control-plane/controller/terminatinggateway_controller.go index 43b4b3a0df..a8db2d851e 100644 --- a/control-plane/controller/terminatinggateway_controller.go +++ b/control-plane/controller/terminatinggateway_controller.go @@ -36,7 +36,5 @@ func (r *TerminatingGatewayController) UpdateStatus(ctx context.Context, obj cli } func (r *TerminatingGatewayController) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - For(&consulv1alpha1.TerminatingGateway{}). - Complete(r) + return setupWithManager(mgr, &consulv1alpha1.TerminatingGateway{}, r) } diff --git a/control-plane/go.mod b/control-plane/go.mod index cf90cff87d..35fce971ae 100644 --- a/control-plane/go.mod +++ b/control-plane/go.mod @@ -31,6 +31,7 @@ require ( github.com/stretchr/testify v1.7.0 go.uber.org/zap v1.17.0 golang.org/x/sys v0.0.0-20210611083646-a4fc73990273 // indirect + golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba golang.org/x/tools v0.1.2 // indirect gomodules.xyz/jsonpatch/v2 v2.2.0 k8s.io/api v0.21.1