From a394613dd44eecfd429280be000a89c649b5462e Mon Sep 17 00:00:00 2001 From: Kyle Schochenmaier Date: Tue, 13 Jul 2021 10:19:16 -0700 Subject: [PATCH] Fail scheduling all pods that are not part of consul when the webhook is offline (#1024) * Fail scheduling all pods that are not part of consul when the webhook is unhealthy. Co-authored-by: Iryna Shustava --- CHANGELOG.md | 1 + templates/connect-inject-mutatingwebhook.yaml | 8 +++++++- test/acceptance/framework/config/config.go | 5 +++++ values.yaml | 8 ++++++++ 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e39be70b44..542a3a94ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## Unreleased IMPROVEMENTS: +* Set failurePolicy to Fail for connectInject mutating webhook so that pods fail to schedule when the webhook is offline. This can be controlled via `connectInject.failurePolicy`. [[GH-1024](https://github.com/hashicorp/consul-helm/pull/1024)] * Allow setting global.logLevel and global.logJSON and propogate this to all consul-k8s commands. [[GH-980](https://github.com/hashicorp/consul-helm/pull/980)] ## 0.32.1 (June 29, 2021) diff --git a/templates/connect-inject-mutatingwebhook.yaml b/templates/connect-inject-mutatingwebhook.yaml index 8c7af50c2f..0ce6f80dd9 100644 --- a/templates/connect-inject-mutatingwebhook.yaml +++ b/templates/connect-inject-mutatingwebhook.yaml @@ -12,7 +12,13 @@ metadata: release: {{ .Release.Name }} webhooks: - name: {{ template "consul.fullname" . }}-connect-injector.consul.hashicorp.com - failurePolicy: Ignore + # The webhook will fail scheduling all pods that are not part of consul if all replicas of the webhook are unhealthy. + objectSelector: + matchExpressions: + - key: app + operator: NotIn + values: [ {{ template "consul.name" . }} ] + failurePolicy: {{ .Values.connectInject.failurePolicy }} sideEffects: None admissionReviewVersions: - "v1beta1" diff --git a/test/acceptance/framework/config/config.go b/test/acceptance/framework/config/config.go index e60d603c35..4aab38f7d7 100644 --- a/test/acceptance/framework/config/config.go +++ b/test/acceptance/framework/config/config.go @@ -55,6 +55,11 @@ type TestConfig struct { func (t *TestConfig) HelmValuesFromConfig() (map[string]string, error) { helmValues := map[string]string{} + // If Kind is being used they use a pod to provision the underlying PV which will hang if we + // use "Fail" for the webhook failurePolicy. + if t.UseKind { + setIfNotEmpty(helmValues, "connectInject.failurePolicy", "Ignore") + } // Set the enterprise image first if enterprise tests are enabled. // It can be overwritten by the -consul-image flag later. if t.EnableEnterprise { diff --git a/values.yaml b/values.yaml index d520f8f208..1cd5148c2a 100644 --- a/values.yaml +++ b/values.yaml @@ -1485,6 +1485,14 @@ connectInject: memory: "50Mi" cpu: "50m" + # Sets the failurePolicy for the mutating webhook. By default this will cause pods not part of the consul installation to fail scheduling while the webhook + # is offline. This prevents a pod from skipping mutation if the webhook were to be momentarily offline. + # Once the webhook is back online the pod will be scheduled. + # In some environments such as Kind this may have an undesirable effect as it may prevent volume provisioner pods from running + # which can lead to hangs. In these environments it is recommend to use "Ignore" instead. + # This setting can be safely disabled by setting to "Ignore". + failurePolicy: "Fail" + # Selector for restricting the webhook to only # specific namespaces. This should be set to a multiline string. # See https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-namespaceselector