Set default resources

This enables our chart to be deployed into clusters that require resources to be set for each container. This also prevents our pods from being evicted first when Kubernetes runs out of resources because Kubernetes will prefer to evict pods that have no resource settings. To that end we have set defaults for clients and servers because even in first-run tests we observed those pods being evicted, causing confusing issues. We are setting our requests and limits to the same value so we are considered to have a 'Guaranteed' quality of service class. These default settings were chosen based on observation in a running cluster using `kubectl top` and also looking at GKE metrics. We ran a Consul connect workload at 1 req/s. For the helper components, the load we saw is representative of a production cluster because the workload of these components doesn't change, e.g. the lifecycle sidecar. For the core components like clients, servers and mesh gateways, their resource requirements will increase depending on the workload. We have set their resources to more than double what was observed in a base-case installation so that first-run doesn't require an unnecessary amount of resources but also won't run up against any limits. It's expected that for production, users will tune their settings based on their specific use-case and observed load. In our observations, most of the "helper" containers used a maximum of 10 millicores and 12 Mi of memory. We have set these smaller containers and init containers to 50 millicores and 25 Mi of memory to be safe. For the lifecycle sidecar we observed 2m/12Mi. We set this to 10m/25Mi to keep this footprint small. This setting will also be used for connect-injected apps so we can't increase it too much or it will cause apps to have large resource footprints unnecessarily. For components like sync catalog, connect inject and snapshot agent we saw max usage of 44m/14Mi. Since these are long-running components, we set their resources to 50m/50Mi. For Consul clients we saw maximum usage of 10m/15Mi, we have set these to 100m/100Mi. For Consul servers we same maximum usage of 40m/13Mi. We have set these to 100m/100Mi. For mesh gateways with multi-dc federation we observed maximum usage of 15m/13Mi. We have set these to 100m/100Mi.
hashicorp · May 21, 2020 · bae76d1 · bae76d1
1 parent ebf6184
commit bae76d1
Show file tree

Hide file tree

Showing 16 changed files with 169 additions and 28 deletions.
diff --git a/templates/_helpers.tpl b/templates/_helpers.tpl
@@ -97,4 +97,11 @@ This template is for an init container.
     {{- end }}
     - name: consul-auto-encrypt-ca-cert
       mountPath: /consul/tls/client/ca
-{{- end -}}
+  resources:
+    requests:
+      memory: "25Mi"
+      cpu: "50m"
+    limits:
+      memory: "25Mi"
+      cpu: "50m"
+{{- end -}}
diff --git a/templates/client-daemonset.yaml b/templates/client-daemonset.yaml
@@ -317,6 +317,13 @@ spec:
         volumeMounts:
           - name: aclconfig
             mountPath: /consul/aclconfig
+        resources:
+          requests:
+            memory: "25Mi"
+            cpu: "50m"
+          limits:
+            memory: "25Mi"
+            cpu: "50m"
       {{- end }}
       {{- if and .Values.global.tls.enabled (not .Values.global.tls.enableAutoEncrypt) }}
       - name: client-tls-init
@@ -348,6 +355,13 @@ spec:
           - name: consul-ca-key
             mountPath: /consul/tls/ca/key
             readOnly: true
+        resources:
+          requests:
+            memory: "25Mi"
+            cpu: "50m"
+          limits:
+            memory: "25Mi"
+            cpu: "50m"
       {{- end }}
       {{- end }}
       {{- if .Values.client.nodeSelector }}

diff --git a/templates/client-snapshot-agent-deployment.yaml b/templates/client-snapshot-agent-deployment.yaml
@@ -125,8 +125,15 @@ spec:
             {{- end }}
               mountPath: /consul/tls/ca
               readOnly: true
+            {{- end }}
           {{- end }}
-          {{- end }}
+          resources:
+            requests:
+              memory: "50Mi"
+              cpu: "50m"
+            limits:
+              memory: "50Mi"
+              cpu: "50m"
       {{- if (or (or .Values.global.acls.manageSystemACLs .Values.global.bootstrapACLs) (and .Values.global.tls.enabled .Values.global.tls.enableAutoEncrypt)) }}
       initContainers:
       {{- if (or .Values.global.acls.manageSystemACLs .Values.global.bootstrapACLs) }}
@@ -142,6 +149,13 @@ spec:
         volumeMounts:
           - name: aclconfig
             mountPath: /consul/aclconfig
+        resources:
+          requests:
+            memory: "25Mi"
+            cpu: "50m"
+          limits:
+            memory: "25Mi"
+            cpu: "50m"
       {{- end }}
       {{- if (and .Values.global.tls.enabled .Values.global.tls.enableAutoEncrypt) }}
       {{- include "consul.getAutoEncryptClientCA" . | nindent 6 }}

diff --git a/templates/connect-inject-deployment.yaml b/templates/connect-inject-deployment.yaml
@@ -161,6 +161,13 @@ spec:
               readOnly: true
             {{- end }}
           {{- end }}
+          resources:
+            requests:
+              memory: "50Mi"
+              cpu: "50m"
+            limits:
+              memory: "50Mi"
+              cpu: "50m"
       {{- if (or .Values.connectInject.certs.secretName .Values.global.tls.enabled) }}
       volumes:
         {{- if .Values.connectInject.certs.secretName }}
@@ -200,6 +207,13 @@ spec:
             consul-k8s acl-init \
               -secret-name="{{ template "consul.fullname" . }}-connect-inject-acl-token" \
               -k8s-namespace={{ .Release.Namespace }}
+        resources:
+          requests:
+            memory: "25Mi"
+            cpu: "50m"
+          limits:
+            memory: "25Mi"
+            cpu: "50m"
       {{- end }}
       {{- if (and .Values.global.tls.enabled .Values.global.tls.enableAutoEncrypt) }}
       {{- include "consul.getAutoEncryptClientCA" . | nindent 6 }}

diff --git a/templates/create-federation-secret-job.yaml b/templates/create-federation-secret-job.yaml
@@ -127,4 +127,11 @@ spec:
                   -resource-prefix="{{ template "consul.fullname" . }}" \
                   -server-ca-cert-file=/consul/tls/ca/tls.crt \
                   -server-ca-key-file=/consul/tls/server/ca/tls.key
+          resources:
+            requests:
+              memory: "25Mi"
+              cpu: "50m"
+            limits:
+              memory: "25Mi"
+              cpu: "50m"
 {{- end }}
diff --git a/templates/enterprise-license-job.yaml b/templates/enterprise-license-job.yaml
@@ -99,6 +99,13 @@ spec:
               mountPath: /consul/tls/ca
               readOnly: true
           {{- end }}
+          resources:
+            requests:
+              memory: "50Mi"
+              cpu: "50m"
+            limits:
+              memory: "50Mi"
+              cpu: "50m"
       {{- if (or .Values.global.acls.manageSystemACLs .Values.global.bootstrapACLs) }}
       initContainers:
       - name: ent-license-acl-init
@@ -110,6 +117,13 @@ spec:
             consul-k8s acl-init \
               -secret-name="{{ template "consul.fullname" . }}-enterprise-license-acl-token" \
               -k8s-namespace={{ .Release.Namespace }}
+        resources:
+          requests:
+            memory: "25Mi"
+            cpu: "50m"
+          limits:
+            memory: "25Mi"
+            cpu: "50m"
       {{- end }}
 {{- end }}
 {{- end }}
diff --git a/templates/mesh-gateway-deployment.yaml b/templates/mesh-gateway-deployment.yaml
@@ -90,6 +90,13 @@ spec:
           volumeMounts:
           - name: consul-bin
             mountPath: /consul-bin
+          resources:
+            requests:
+              memory: "25Mi"
+              cpu: "50m"
+            limits:
+              memory: "25Mi"
+              cpu: "50m"
         {{- if (and .Values.global.tls.enabled .Values.global.tls.enableAutoEncrypt) }}
         {{- include "consul.getAutoEncryptClientCA" . | nindent 8 }}
         {{- end }}
@@ -207,6 +214,13 @@ spec:
               mountPath: /consul/tls/ca
               readOnly: true
             {{- end }}
+          resources:
+            requests:
+              memory: "25Mi"
+              cpu: "50m"
+            limits:
+              memory: "25Mi"
+              cpu: "50m"
       containers:
         - name: mesh-gateway
           image: {{ .Values.meshGateway.imageEnvoy | quote }}
@@ -344,6 +358,13 @@ spec:
             {{- if (or .Values.global.acls.manageSystemACLs .Values.global.bootstrapACLs) }}
             - -token-file=/consul/service/acl-token
             {{- end }}
+          resources:
+            requests:
+              memory: "25Mi"
+              cpu: "10m"
+            limits:
+              memory: "25Mi"
+              cpu: "10m"
       {{- if .Values.meshGateway.priorityClassName }}
       priorityClassName: {{ .Values.meshGateway.priorityClassName | quote }}
       {{- end }}

diff --git a/templates/server-acl-init-cleanup-job.yaml b/templates/server-acl-init-cleanup-job.yaml
@@ -52,6 +52,13 @@ spec:
             - delete-completed-job
             - -k8s-namespace={{ .Release.Namespace }}
             - {{ template "consul.fullname" . }}-server-acl-init
+          resources:
+            requests:
+              memory: "25Mi"
+              cpu: "50m"
+            limits:
+              memory: "25Mi"
+              cpu: "50m"
   {{- end }}
   {{- end }}
   {{- end }}
diff --git a/templates/server-acl-init-job.yaml b/templates/server-acl-init-job.yaml
@@ -182,6 +182,13 @@ spec:
                 {{- end }}
                 {{- end }}
                 {{- end }}
+          resources:
+            requests:
+              memory: "25Mi"
+              cpu: "50m"
+            limits:
+              memory: "25Mi"
+              cpu: "50m"
 {{- end }}
 {{- end }}
 {{- end }}
diff --git a/templates/sync-catalog-deployment.yaml b/templates/sync-catalog-deployment.yaml
@@ -176,6 +176,13 @@ spec:
             periodSeconds: 5
             successThreshold: 1
             timeoutSeconds: 5
+          resources:
+            requests:
+              memory: "50Mi"
+              cpu: "50m"
+            limits:
+              memory: "50Mi"
+              cpu: "50m"
       {{- if or (or .Values.global.acls.manageSystemACLs .Values.global.bootstrapACLs) (and .Values.global.tls.enabled .Values.global.tls.enableAutoEncrypt) }}
       initContainers:
       {{- if (or .Values.global.acls.manageSystemACLs .Values.global.bootstrapACLs) }}
@@ -188,6 +195,13 @@ spec:
             consul-k8s acl-init \
               -secret-name="{{ template "consul.fullname" . }}-catalog-sync-acl-token" \
               -k8s-namespace={{ .Release.Namespace }}
+        resources:
+          requests:
+            memory: "25Mi"
+            cpu: "50m"
+          limits:
+            memory: "25Mi"
+            cpu: "50m"
       {{- end }}
       {{- if (and .Values.global.tls.enabled .Values.global.tls.enableAutoEncrypt) }}
       {{- include "consul.getAutoEncryptClientCA" . | nindent 6 }}

diff --git a/templates/tls-init-cleanup-job.yaml b/templates/tls-init-cleanup-job.yaml
@@ -52,5 +52,12 @@ spec:
               curl -s -X DELETE --cacert /var/run/secrets/kubernetes.io/serviceaccount/ca.crt \
                 https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/api/v1/namespaces/${NAMESPACE}/secrets/{{ template "consul.fullname" . }}-server-cert \
                 -H "Authorization: Bearer $( cat /var/run/secrets/kubernetes.io/serviceaccount/token )"
+          resources:
+            requests:
+              memory: "25Mi"
+              cpu: "50m"
+            limits:
+              memory: "25Mi"
+              cpu: "50m"
 {{- end }}
 {{- end }}
diff --git a/templates/tls-init-job.yaml b/templates/tls-init-job.yaml
@@ -110,5 +110,12 @@ spec:
               mountPath: /consul/tls/ca/key
               readOnly: true
           {{- end }}
+          resources:
+            requests:
+              memory: "25Mi"
+              cpu: "50m"
+            limits:
+              memory: "25Mi"
+              cpu: "50m"
 {{- end }}
 {{- end }}
diff --git a/test/unit/client-daemonset.bats b/test/unit/client-daemonset.bats
@@ -150,16 +150,16 @@ load _helpers
 #--------------------------------------------------------------------
 # resources
 
-@test "client/DaemonSet: no resources defined by default" {
+@test "client/DaemonSet: resources defined by default" {
   cd `chart_dir`
   local actual=$(helm template \
       -x templates/client-daemonset.yaml  \
       . | tee /dev/stderr |
-      yq -r '.spec.template.spec.containers[0].resources' | tee /dev/stderr)
-  [ "${actual}" = "null" ]
+      yq -rc '.spec.template.spec.containers[0].resources' | tee /dev/stderr)
+  [ "${actual}" = '{"limits":{"cpu":"100m","memory":"100Mi"},"requests":{"cpu":"100m","memory":"100Mi"}}' ]
 }
 
-@test "client/DaemonSet: resources can be set" {
+@test "client/DaemonSet: resources can be overridden" {
   cd `chart_dir`
   local actual=$(helm template \
       -x templates/client-daemonset.yaml  \
@@ -170,7 +170,7 @@ load _helpers
 }
 
 # Test support for the deprecated method of setting a YAML string.
-@test "client/DaemonSet: resources can be set with string" {
+@test "client/DaemonSet: resources can be overridden with string" {
   cd `chart_dir`
   local actual=$(helm template \
       -x templates/client-daemonset.yaml  \

diff --git a/test/unit/mesh-gateway-deployment.bats b/test/unit/mesh-gateway-deployment.bats
@@ -284,10 +284,10 @@ key2: value2' \
       . | tee /dev/stderr |
       yq -r '.spec.template.spec.containers[0].resources' | tee /dev/stderr)
 
-  [ $(echo "${actual}" | yq -r '.requests.memory') = "128Mi" ]
-  [ $(echo "${actual}" | yq -r '.requests.cpu') = "250m" ]
-  [ $(echo "${actual}" | yq -r '.limits.memory') = "256Mi" ]
-  [ $(echo "${actual}" | yq -r '.limits.cpu') = "500m" ]
+  [ $(echo "${actual}" | yq -r '.requests.memory') = "100Mi" ]
+  [ $(echo "${actual}" | yq -r '.requests.cpu') = "100m" ]
+  [ $(echo "${actual}" | yq -r '.limits.memory') = "100Mi" ]
+  [ $(echo "${actual}" | yq -r '.limits.cpu') = "100m" ]
 }
 
 @test "meshGateway/Deployment: resources can be overridden" {

diff --git a/test/unit/server-statefulset.bats b/test/unit/server-statefulset.bats
@@ -83,16 +83,16 @@ load _helpers
 #--------------------------------------------------------------------
 # resources
 
-@test "server/StatefulSet: no resources defined by default" {
+@test "server/StatefulSet: resources defined by default" {
   cd `chart_dir`
   local actual=$(helm template \
       -x templates/server-statefulset.yaml  \
       . | tee /dev/stderr |
-      yq -r '.spec.template.spec.containers[0].resources' | tee /dev/stderr)
-  [ "${actual}" = "null" ]
+      yq -rc '.spec.template.spec.containers[0].resources' | tee /dev/stderr)
+  [ "${actual}" = '{"limits":{"cpu":"100m","memory":"100Mi"},"requests":{"cpu":"100m","memory":"100Mi"}}' ]
 }
 
-@test "server/StatefulSet: resources can be set" {
+@test "server/StatefulSet: resources can be overridden" {
   cd `chart_dir`
   local actual=$(helm template \
       -x templates/server-statefulset.yaml  \
@@ -103,7 +103,7 @@ load _helpers
 }
 
 # Test support for the deprecated method of setting a YAML string.
-@test "server/StatefulSet: resources can be set with string" {
+@test "server/StatefulSet: resources can be overridden with string" {
   cd `chart_dir`
   local actual=$(helm template \
       -x templates/server-statefulset.yaml  \

diff --git a/values.yaml b/values.yaml
@@ -251,10 +251,14 @@ server:
   # via the extraConfig setting.
   connect: true
 
-  # Resource requests, limits, etc. for the server cluster placement. This
-  # should map directly to the value of the resources field for a PodSpec.
-  # By default no direct resource request is made.
-  resources: null
+  # Resource settings for Server agents.
+  resources:
+    requests:
+      memory: "100Mi"
+      cpu: "100m"
+    limits:
+      memory: "100Mi"
+      cpu: "100m"
 
   # updatePartition is used to control a careful rolling update of Consul
   # servers. This should be done particularly when changing the version
@@ -409,10 +413,14 @@ client:
   # also changes the clients' advertised IP to the hostIP rather than podIP.
   exposeGossipPorts: false
 
-  # Resource requests, limits, etc. for the client cluster placement. This
-  # should map directly to the value of the resources field for a PodSpec.
-  # By default no direct resource request is made.
-  resources: null
+  # Resource settings for Client agents.
+  resources:
+    requests:
+      memory: "100Mi"
+      cpu: "100m"
+    limits:
+      memory: "100Mi"
+      cpu: "100m"
 
   # extraConfig is a raw string of extra configuration to set with the
   # client. This should be JSON.
@@ -968,11 +976,11 @@ meshGateway:
 
   resources:
     requests:
-      memory: "128Mi"
-      cpu: "250m"
+      memory: "100Mi"
+      cpu: "100m"
     limits:
-      memory: "256Mi"
-      cpu: "500m"
+      memory: "100Mi"
+      cpu: "100m"
 
   # By default, we set an anti affinity so that two gateway pods won't be
   # on the same node. NOTE: Gateways require that Consul client agents are