Skip to content

Commit

Permalink
Change CRD retry-backoff
Browse files Browse the repository at this point in the history
Previously, if a custom resource failed to sync with Consul, we used the
default retry backoff. This was an exponential backoff starting at 5ms
and maxing out at 1000s (16m).

This backoff was a poor UX for our common error case where one config
entry cannot be applied due to a prerequisite, for example an ingress
gateway entry cannot be applied until the protocol is set to http. The
usual workflow to resolve this would be to look up the error, figure out
the correct ServiceDefaults/ProxyDefaults, and then apply that resource.
Once applied, the user needs to wait fo the ingress gateway (in this
example) resource to be retried. With the default backoff config,
because the user will have taken on the order of minutes to figure out
the correct config, the exponential backoff will now be upwards of
five minutes. The user will have to wait for a long time for the ingress
gateway resource to be retried.

This PR changes the backoff to start out at 200ms and max out at 5s.
This fits our use-case better because the user will have to wait at max
5ms, usually if there's an error, retrying within 5ms does nothing, so
waiting 200ms to start is fine, and finally, Consul servers can accept
tens of thousands of writes per second so even if there are a ton of
resources retrying every 5s, it won't be an issue.
  • Loading branch information
lkysow committed Oct 18, 2021
1 parent 6fccb5c commit 7bb0bc2
Show file tree
Hide file tree
Showing 11 changed files with 45 additions and 27 deletions.
35 changes: 35 additions & 0 deletions control-plane/controller/configentry_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,16 @@ import (
"github.com/hashicorp/consul-k8s/control-plane/api/common"
"github.com/hashicorp/consul-k8s/control-plane/namespaces"
capi "github.com/hashicorp/consul/api"
"golang.org/x/time/rate"
corev1 "k8s.io/api/core/v1"
k8serr "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/util/workqueue"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

const (
Expand Down Expand Up @@ -246,6 +250,37 @@ func (r *ConfigEntryController) ReconcileEntry(ctx context.Context, crdCtrl Cont
return ctrl.Result{}, nil
}

// setupWithManager sets up the controller manager for the given resource
// with our default options.
func setupWithManager(mgr ctrl.Manager, resource client.Object, reconciler reconcile.Reconciler) error {
options := controller.Options{
// Taken from https://github.com/kubernetes/client-go/blob/master/util/workqueue/default_rate_limiters.go#L39
// and modified from a starting backoff of 5ms and max of 1000s to a
// starting backoff of 200ms and a max of 5s to better fit our most
// common error cases and performance characteristics.
//
// One common error case is that a config entry is applied that requires
// a protocol like http or grpc. Often the user will apply a new config
// entry to set the protocol in a minute or two. During this time, the
// default backoff could then be set up to 5m or more which means the
// original config entry takes a long time to re-sync.
//
// In terms of performance, Consul servers can handle tens of thousands
// of writes per second, so retrying at max every 5s isn't an issue and
// provides a better UX.
RateLimiter: workqueue.NewMaxOfRateLimiter(
workqueue.NewItemExponentialFailureRateLimiter(200*time.Millisecond, 5*time.Second),
// 10 qps, 100 bucket size. This is only for retry speed and its only the overall factor (not per item)
&workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(10), 100)},
),
}

return ctrl.NewControllerManagedBy(mgr).
For(resource).
WithOptions(options).
Complete(reconciler)
}

func (r *ConfigEntryController) consulNamespace(configEntry capi.ConfigEntry, namespace string, globalResource bool) string {
// ServiceIntentions have the appropriate Consul Namespace set on them as the value
// is defaulted by the webhook. These are then set on the ServiceIntentions config entry
Expand Down
4 changes: 1 addition & 3 deletions control-plane/controller/ingressgateway_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,5 @@ func (r *IngressGatewayController) UpdateStatus(ctx context.Context, obj client.
}

func (r *IngressGatewayController) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&consulv1alpha1.IngressGateway{}).
Complete(r)
return setupWithManager(mgr, &consulv1alpha1.IngressGateway{}, r)
}
4 changes: 1 addition & 3 deletions control-plane/controller/mesh_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,5 @@ func (r *MeshController) UpdateStatus(ctx context.Context, obj client.Object, op
}

func (r *MeshController) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&consulv1alpha1.Mesh{}).
Complete(r)
return setupWithManager(mgr, &consulv1alpha1.Mesh{}, r)
}
4 changes: 1 addition & 3 deletions control-plane/controller/proxydefaults_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,5 @@ func (r *ProxyDefaultsController) UpdateStatus(ctx context.Context, obj client.O
}

func (r *ProxyDefaultsController) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&consulv1alpha1.ProxyDefaults{}).
Complete(r)
return setupWithManager(mgr, &consulv1alpha1.ProxyDefaults{}, r)
}
4 changes: 1 addition & 3 deletions control-plane/controller/servicedefaults_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,5 @@ func (r *ServiceDefaultsController) UpdateStatus(ctx context.Context, obj client
}

func (r *ServiceDefaultsController) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&consulv1alpha1.ServiceDefaults{}).
Complete(r)
return setupWithManager(mgr, &consulv1alpha1.ServiceDefaults{}, r)
}
4 changes: 1 addition & 3 deletions control-plane/controller/serviceintentions_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,5 @@ func (r *ServiceIntentionsController) UpdateStatus(ctx context.Context, obj clie
}

func (r *ServiceIntentionsController) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&consulv1alpha1.ServiceIntentions{}).
Complete(r)
return setupWithManager(mgr, &consulv1alpha1.ServiceIntentions{}, r)
}
4 changes: 1 addition & 3 deletions control-plane/controller/serviceresolver_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,5 @@ func (r *ServiceResolverController) UpdateStatus(ctx context.Context, obj client
}

func (r *ServiceResolverController) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&consulv1alpha1.ServiceResolver{}).
Complete(r)
return setupWithManager(mgr, &consulv1alpha1.ServiceResolver{}, r)
}
4 changes: 1 addition & 3 deletions control-plane/controller/servicerouter_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,5 @@ func (r *ServiceRouterController) UpdateStatus(ctx context.Context, obj client.O
}

func (r *ServiceRouterController) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&consulv1alpha1.ServiceRouter{}).
Complete(r)
return setupWithManager(mgr, &consulv1alpha1.ServiceRouter{}, r)
}
4 changes: 1 addition & 3 deletions control-plane/controller/servicesplitter_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,5 @@ func (r *ServiceSplitterController) UpdateStatus(ctx context.Context, obj client
}

func (r *ServiceSplitterController) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&consulv1alpha1.ServiceSplitter{}).
Complete(r)
return setupWithManager(mgr, &consulv1alpha1.ServiceSplitter{}, r)
}
4 changes: 1 addition & 3 deletions control-plane/controller/terminatinggateway_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,5 @@ func (r *TerminatingGatewayController) UpdateStatus(ctx context.Context, obj cli
}

func (r *TerminatingGatewayController) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&consulv1alpha1.TerminatingGateway{}).
Complete(r)
return setupWithManager(mgr, &consulv1alpha1.TerminatingGateway{}, r)
}
1 change: 1 addition & 0 deletions control-plane/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ require (
github.com/stretchr/testify v1.7.0
go.uber.org/zap v1.17.0
golang.org/x/sys v0.0.0-20210611083646-a4fc73990273 // indirect
golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba
golang.org/x/tools v0.1.2 // indirect
gomodules.xyz/jsonpatch/v2 v2.2.0
k8s.io/api v0.21.1
Expand Down

0 comments on commit 7bb0bc2

Please sign in to comment.