From 8b9945772b07201afd69e0a428038b2d1cc5c7db Mon Sep 17 00:00:00 2001 From: natemollica-dev <57850649+natemollica-nm@users.noreply.github.com> Date: Mon, 12 Feb 2024 11:36:50 -0800 Subject: [PATCH] fixed cherry-pick change for consul.server.pdb.maxUnavailable template function --- .changelog/3407.txt | 13 + charts/consul/templates/_helpers.tpl | 243 +++++++++- .../consul/templates/datadog-agent-role.yaml | 38 ++ .../templates/datadog-agent-rolebinding.yaml | 26 ++ .../consul/templates/server-acl-init-job.yaml | 4 + .../templates/server-config-configmap.yaml | 9 +- .../templates/server-disruptionbudget.yaml | 2 +- .../consul/templates/server-statefulset.yaml | 81 ++++ .../telemetry-collector-deployment.yaml | 13 + .../consul/test/unit/server-acl-init-job.bats | 89 +++- .../test/unit/server-config-configmap.bats | 280 ++++++++++- .../consul/test/unit/server-statefulset.bats | 436 +++++++++++++++++- .../unit/telemetry-collector-deployment.bats | 79 ++++ charts/consul/values.yaml | 158 +++++++ control-plane/subcommand/common/common.go | 2 + .../subcommand/server-acl-init/command.go | 23 +- .../server-acl-init/command_test.go | 22 + .../subcommand/server-acl-init/rules.go | 26 ++ 18 files changed, 1512 insertions(+), 32 deletions(-) create mode 100644 .changelog/3407.txt create mode 100644 charts/consul/templates/datadog-agent-role.yaml create mode 100644 charts/consul/templates/datadog-agent-rolebinding.yaml diff --git a/.changelog/3407.txt b/.changelog/3407.txt new file mode 100644 index 0000000000..14dc27d0ff --- /dev/null +++ b/.changelog/3407.txt @@ -0,0 +1,13 @@ +```release-note:feature +helm: introduces `global.metrics.datadog` overrides to streamline consul-k8s datadog integration. +helm: introduces `server.enableAgentDebug` to expose agent [`enable_debug`](https://developer.hashicorp.com/consul/docs/agent/config/config-files#enable_debug) configuration. +helm: introduces `global.metrics.disableAgentHostName` to expose agent [`telemetry.disable_hostname`](https://developer.hashicorp.com/consul/docs/agent/config/config-files#telemetry-disable_hostname) configuration. +helm: introduces `global.metrics.enableHostMetrics` to expose agent [`telemetry.enable_host_metrics`](https://developer.hashicorp.com/consul/docs/agent/config/config-files#telemetry-enable_host_metrics) configuration. +helm: introduces `global.metrics.prefixFilter` to expose agent [`telemetry.prefix_filter`](https://developer.hashicorp.com/consul/docs/agent/config/config-files#telemetry-prefix_filter) configuration. +helm: introduces `global.metrics.datadog.dogstatsd.dogstatsdAddr` to expose agent [`telemetry.dogstatsd_addr`](https://developer.hashicorp.com/consul/docs/agent/config/config-files#telemetry-dogstatsd_addr) configuration. +helm: introduces `global.metrics.datadog.dogstatsd.dogstatsdTags` to expose agent [`telemetry.dogstatsd_tags`](https://developer.hashicorp.com/consul/docs/agent/config/config-files#telemetry-dogstatsd_tags) configuration. +helm: introduces required `ad.datadoghq.com/` annotations and `tags.datadoghq.com/` labels for integration with [Datadog Autodiscovery](https://docs.datadoghq.com/integrations/consul/?tab=containerized) and [Datadog Unified Service Tagging](https://docs.datadoghq.com/getting_started/tagging/unified_service_tagging/?tab=kubernetes#serverless-environment) for Consul. +helm: introduces automated unix domain socket hostPath mounting for containerized integration with datadog within consul-server statefulset. +helm: introduces `global.metrics.datadog.otlp` override options to allow OTLP metrics forwarding to Datadog Agent. +control-plane: adds `server-acl-init` datadog agent token creation for datadog integration. +``` \ No newline at end of file diff --git a/charts/consul/templates/_helpers.tpl b/charts/consul/templates/_helpers.tpl index 8507b2103a..ca87485a78 100644 --- a/charts/consul/templates/_helpers.tpl +++ b/charts/consul/templates/_helpers.tpl @@ -151,6 +151,29 @@ is passed to consul as a -config-file param on command line. [ -n "${HOSTNAME}" ] && sed -Ei "s|HOSTNAME|${HOSTNAME?}|g" /consul/extra-config/extra-from-values.json {{- end -}} +{{/* +Cleanup server.extraConfig entries to avoid conflicting entries: + - server.enableAgentDebug: + - `enable_debug` should not exist in extraConfig + - metrics.disableAgentHostName: + - if global.metrics.enabled and global.metrics.enableAgentMetrics are enabled, `disable_hostname` should not exist in extraConfig + - metrics.enableHostMetrics: + - if global.metrics.enabled and global.metrics.enableAgentMetrics are enabled, `enable_host_metrics` should not exist in extraConfig + - metrics.prefixFilter + - if global.metrics.enabled and global.metrics.enableAgentMetrics are enabled, `prefix_filter` should not exist in extraConfig + - metrics.datadog.enabled: + - if global.metrics.datadog.enabled and global.metrics.datadog.dogstatsd.enabled, `dogstatsd_tags` and `dogstatsd_addr` should not exist in extraConfig + +Usage: {{ template "consul.validateExtraConfig" . }} +*/}} +{{- define "consul.validateExtraConfig" -}} +{{- if (contains "enable_debug" .Values.server.extraConfig) }}{{ fail "The enable_debug key is present in extra-from-values.json. Use server.enableAgentDebug to set this value." }}{{- end }} +{{- if (contains "disable_hostname" .Values.server.extraConfig) }}{{ fail "The disable_hostname key is present in extra-from-values.json. Use global.metrics.disableAgentHostName to set this value." }}{{- end }} +{{- if (contains "enable_host_metrics" .Values.server.extraConfig) }}{{ fail "The enable_host_metrics key is present in extra-from-values.json. Use global.metrics.enableHostMetrics to set this value." }}{{- end }} +{{- if (contains "prefix_filter" .Values.server.extraConfig) }}{{ fail "The prefix_filter key is present in extra-from-values.json. Use global.metrics.prefix_filter to set this value." }}{{- end }} +{{- if (and .Values.global.metrics.enabled .Values.global.metrics.enableAgentMetrics) }}{{- if (and .Values.global.metrics.datadog.dogstatsd.enabled) }}{{- if (contains "dogstatsd_tags" .Values.server.extraConfig) }}{{ fail "The dogstatsd_tags key is present in extra-from-values.json. Use global.metrics.datadog.dogstatsd.dogstatsdTags to set this value." }}{{- end }}{{- end }}{{- if (and .Values.global.metrics.datadog.dogstatsd.enabled) }}{{- if (contains "dogstatsd_addr" .Values.server.extraConfig) }}{{ fail "The dogstatsd_addr key is present in extra-from-values.json. Use global.metrics.datadog.dogstatsd.dogstatsd_addr to set this value." }}{{- end }}{{- end }}{{- end }} +{{- end -}} + {{/* Create chart name and version as used by the chart label. */}} @@ -166,24 +189,27 @@ Expand the name of the chart. {{- end -}} {{/* -Compute the maximum number of unavailable replicas for the PodDisruptionBudget. -This defaults to (n/2)-1 where n is the number of members of the server cluster. -Special case of replica equaling 3 and allowing a minor disruption of 1 otherwise -use the integer value -Add a special case for replicas=1, where it should default to 0 as well. +Calculate max number of server pods that are allowed to be voluntarily disrupted. +When there's 1 server, this is set to 0 because this pod should not be disrupted. This is an edge +case and I'm not sure it makes a difference when there's only one server but that's what the previous config was and +I don't want to change it for this edge case. +Otherwise we've changed this to always be 1 as part of the move to set leave_on_terminate +to true. With leave_on_terminate set to true, whenever a server pod is stopped, the number of peers in raft +is reduced. If the number of servers is odd and the count is reduced by 1, the quorum size doesn't change, +but if it's reduced by more than 1, the quorum size can change so that's why this is now always hardcoded to 1. */}} -{{- define "consul.pdb.maxUnavailable" -}} +{{- define "consul.server.pdb.maxUnavailable" -}} {{- if eq (int .Values.server.replicas) 1 -}} {{ 0 }} {{- else if .Values.server.disruptionBudget.maxUnavailable -}} {{ .Values.server.disruptionBudget.maxUnavailable -}} {{- else -}} -{{- if eq (int .Values.server.replicas) 3 -}} -{{- 1 -}} -{{- else -}} -{{- sub (div (int .Values.server.replicas) 2) 1 -}} +{{ 1 }} {{- end -}} {{- end -}} + +{{- define "consul.server.autopilotMinQuorum" -}} +{{- add (div (int .Values.server.replicas) 2) 1 -}} {{- end -}} {{- define "consul.pdb.connectInject.maxUnavailable" -}} @@ -425,10 +451,10 @@ Usage: {{ template "consul.validateTelemetryCollectorCloud" . }} */}} {{- define "consul.validateTelemetryCollectorCloud" -}} {{- if (and .Values.telemetryCollector.cloud.clientId.secretName (and (not .Values.global.cloud.clientSecret.secretName) (not .Values.telemetryCollector.cloud.clientSecret.secretName))) }} -{{fail "When telemetryCollector.cloud.clientId.secretName is set, telemetryCollector.cloud.clientSecret.secretName must also be set."}} +{{fail "When telemetryCollector.cloud.clientId.secretName is set, telemetryCollector.cloud.clientSecret.secretName must also be set." }} {{- end }} {{- if (and .Values.telemetryCollector.cloud.clientSecret.secretName (and (not .Values.global.cloud.clientId.secretName) (not .Values.telemetryCollector.cloud.clientId.secretName))) }} -{{fail "When telemetryCollector.cloud.clientSecret.secretName is set, telemetryCollector.cloud.clientId.secretName must also be set."}} +{{fail "When telemetryCollector.cloud.clientSecret.secretName is set, telemetryCollector.cloud.clientId.secretName must also be set." }} {{- end }} {{- end }} @@ -467,3 +493,196 @@ Usage: {{ template "consul.validateTelemetryCollectorResourceId" . }} {{- end }} {{/**/}} + +{{/* +Fails if global.experiments.resourceAPIs is set along with any of these unsupported features. +- global.peering.enabled +- global.federation.enabled +- global.cloud.enabled +- client.enabled +- ui.enabled +- syncCatalog.enabled +- meshGateway.enabled +- ingressGateways.enabled +- terminatingGateways.enabled +- apiGateway.enabled + +Usage: {{ template "consul.validateResourceAPIs" . }} + +*/}} +{{- define "consul.validateResourceAPIs" -}} +{{- if (and (mustHas "resource-apis" .Values.global.experiments) .Values.global.peering.enabled ) }} +{{fail "When the value global.experiments.resourceAPIs is set, global.peering.enabled is currently unsupported."}} +{{- end }} +{{- if (and (mustHas "resource-apis" .Values.global.experiments) (not (mustHas "v2tenancy" .Values.global.experiments)) .Values.global.adminPartitions.enabled ) }} +{{fail "When the value global.experiments.resourceAPIs is set, global.experiments.v2tenancy must also be set to support global.adminPartitions.enabled."}} +{{- end }} +{{- if (and (mustHas "resource-apis" .Values.global.experiments) .Values.global.federation.enabled ) }} +{{fail "When the value global.experiments.resourceAPIs is set, global.federation.enabled is currently unsupported."}} +{{- end }} +{{- if (and (mustHas "resource-apis" .Values.global.experiments) .Values.global.cloud.enabled ) }} +{{fail "When the value global.experiments.resourceAPIs is set, global.cloud.enabled is currently unsupported."}} +{{- end }} +{{- if (and (mustHas "resource-apis" .Values.global.experiments) .Values.client.enabled ) }} +{{fail "When the value global.experiments.resourceAPIs is set, client.enabled is currently unsupported."}} +{{- end }} +{{- if (and (mustHas "resource-apis" .Values.global.experiments) .Values.ui.enabled ) }} +{{fail "When the value global.experiments.resourceAPIs is set, ui.enabled is currently unsupported."}} +{{- end }} +{{- if (and (mustHas "resource-apis" .Values.global.experiments) .Values.syncCatalog.enabled ) }} +{{fail "When the value global.experiments.resourceAPIs is set, syncCatalog.enabled is currently unsupported."}} +{{- end }} +{{- if (and (mustHas "resource-apis" .Values.global.experiments) .Values.ingressGateways.enabled ) }} +{{fail "When the value global.experiments.resourceAPIs is set, ingressGateways.enabled is currently unsupported."}} +{{- end }} +{{- if (and (mustHas "resource-apis" .Values.global.experiments) .Values.terminatingGateways.enabled ) }} +{{fail "When the value global.experiments.resourceAPIs is set, terminatingGateways.enabled is currently unsupported."}} +{{- end }} +{{- if (and (mustHas "resource-apis" .Values.global.experiments) .Values.apiGateway.enabled ) }} +{{fail "When the value global.experiments.resourceAPIs is set, apiGateway.enabled is currently unsupported."}} +{{- end }} +{{- end }} + +{{/* +Validation for Consul Metrics configuration: + +Fail if metrics.enabled=true and metrics.disableAgentHostName=true, but metrics.enableAgentMetrics=false + - metrics.enabled = true + - metrics.enableAgentMetrics = false + - metrics.disableAgentHostName = true + +Fail if metrics.enableAgentMetrics=true and metrics.disableAgentHostName=true, but metrics.enabled=false + - metrics.enabled = false + - metrics.enableAgentMetrics = true + - metrics.disableAgentHostName = true + +Fail if metrics.enabled=true and metrics.enableHostMetrics=true, but metrics.enableAgentMetrics=false + - metrics.enabled = true + - metrics.enableAgentMetrics = false + - metrics.enableHostMetrics = true + +Fail if metrics.enableAgentMetrics=true and metrics.enableHostMetrics=true, but metrics.enabled=false + - metrics.enabled = false + - metrics.enableAgentMetrics = true + - metrics.enableHostMetrics = true + +Usage: {{ template "consul.validateMetricsConfig" . }} + +*/}} + +{{- define "consul.validateMetricsConfig" -}} +{{- if and (not .Values.global.metrics.enableAgentMetrics) (and .Values.global.metrics.disableAgentHostName .Values.global.metrics.enabled )}} +{{fail "When enabling metrics (global.metrics.enabled) and disabling hostname emission from metrics (global.metrics.disableAgentHostName), global.metrics.enableAgentMetrics must be set to true"}} +{{- end }} +{{- if and (not .Values.global.metrics) (and .Values.global.metrics.disableAgentHostName .Values.global.metrics.enableAgentMetrics )}} +{{fail "When enabling Consul agent metrics (global.metrics.enableAgentMetrics) and disabling hostname emission from metrics (global.metrics.disableAgentHostName), global metrics enablement (global.metrics.enabled) must be set to true"}} +{{- end }} +{{- if and (not .Values.global.metrics.enableAgentMetrics) (and .Values.global.metrics.disableAgentHostName .Values.global.metrics.enabled )}} +{{fail "When disabling hostname emission from metrics (global.metrics.disableAgentHostName) and enabling global metrics (global.metrics.enabled), Consul agent metrics must be enabled(global.metrics.enableAgentMetrics=true)"}} +{{- end }} +{{- if and (not .Values.global.metrics.enabled) (and .Values.global.metrics.disableAgentHostName .Values.global.metrics.enableAgentMetrics)}} +{{fail "When enabling Consul agent metrics (global.metrics.enableAgentMetrics) and disabling hostname metrics emission (global.metrics.disableAgentHostName), global metrics must be enabled (global.metrics.enabled)."}} +{{- end }} +{{- end -}} + +{{/* +Validation for Consul Datadog Integration deployment: + +Fail if Datadog integration enabled and Consul server agent telemetry is not enabled. + - global.metrics.datadog.enabled=true + - global.metrics.enableAgentMetrics=false || global.metrics.enabled=false + +Fail if Consul OpenMetrics (Prometheus) and DogStatsD metrics are both enabled and configured. + - global.metrics.datadog.dogstatsd.enabled (scrapes `/v1/agent/metrics?format=prometheus` via the `use_prometheus_endpoint` option) + - global.metrics.datadog.openMetricsPrometheus.enabled (scrapes `/v1/agent/metrics?format=prometheus`) + - see https://docs.datadoghq.com/integrations/consul/?tab=host#host for recommendation to not have both + +Fail if Datadog OTLP forwarding is enabled and Consul Telemetry Collection is not enabled. + - global.metrics.datadog.otlp.enabled=true + - telemetryCollector.enabled=false + +Fail if Consul Open Telemetry collector forwarding protocol is not one of either "http" or "grpc" + - global.metrics.datadog.otlp.protocol!="http" || global.metrics.datadog.otlp.protocol!="grpc" + +Usage: {{ template "consul.validateDatadogConfiguration" . }} + +*/}} + +{{- define "consul.validateDatadogConfiguration" -}} +{{- if and .Values.global.metrics.datadog.enabled (or (not .Values.global.metrics.enableAgentMetrics) (not .Values.global.metrics.enabled) )}} +{{fail "When enabling datadog metrics collection, the /v1/agent/metrics is required to be accessible, therefore global.metrics.enableAgentMetrics and global.metrics.enabled must be also be enabled."}} +{{- end }} +{{- if and .Values.global.metrics.datadog.dogstatsd.enabled .Values.global.metrics.datadog.openMetricsPrometheus.enabled }} +{{fail "You must have one of DogStatsD (global.metrics.datadog.dogstatsd.enabled) or OpenMetrics (global.metrics.datadog.openMetricsPrometheus.enabled) enabled, not both as this is an unsupported configuration." }} +{{- end }} +{{- if and .Values.global.metrics.datadog.otlp.enabled (not .Values.telemetryCollector.enabled) }} +{{fail "Cannot enable Datadog OTLP metrics collection (global.metrics.datadog.otlp.enabled) without consul-telemetry-collector. Ensure Consul OTLP collection is enabled (telemetryCollector.enabled) and configured." }} +{{- end }} +{{- if and (ne ( lower .Values.global.metrics.datadog.otlp.protocol) "http") (ne ( lower .Values.global.metrics.datadog.otlp.protocol) "grpc") }} +{{fail "Valid values for global.metrics.datadog.otlp.protocol must be one of either \"http\" or \"grpc\"." }} +{{- end }} +{{- end -}} + +{{/* +Sets the dogstatsd_addr field of the agent configuration dependent on the +socket transport type being used: + - "UDS" (Unix Domain Socket): prefixes "unix://" to URL and appends path to socket (i.e., unix:///var/run/datadog/dsd.socket) + - "UDP" (User Datagram Protocol): adds no prefix and appends dogstatsd port number to hostname/IP (i.e., 172.20.180.10:8125) +- global.metrics.enableDatadogIntegration.dogstatsd configuration + +Usage: {{ template "consul.dogstatsdAaddressInfo" . }} +*/}} + +{{- define "consul.dogstatsdAaddressInfo" -}} +{{- if (and .Values.global.metrics.datadog.enabled .Values.global.metrics.datadog.dogstatsd.enabled) }} + "dogstatsd_addr": "{{- if eq .Values.global.metrics.datadog.dogstatsd.socketTransportType "UDS" }}unix://{{ .Values.global.metrics.datadog.dogstatsd.dogstatsdAddr }}{{- else }}{{ .Values.global.metrics.datadog.dogstatsd.dogstatsdAddr | trimAll "\"" }}{{- if ne ( .Values.global.metrics.datadog.dogstatsd.dogstatsdPort | int ) 0 }}:{{ .Values.global.metrics.datadog.dogstatsd.dogstatsdPort | toString }}{{- end }}{{- end }}",{{- end }} +{{- end -}} + +{{/* +Configures the metrics prefixing that's required to either allow or dissallow certaing RPC or gRPC server calls: + +Usage: {{ template "consul.prefixFilter" . }} +*/}} +{{- define "consul.prefixFilter" -}} +{{- $allowList := .Values.global.metrics.prefixFilter.allowList }} +{{- $blockList := .Values.global.metrics.prefixFilter.blockList }} +{{- if and (not (empty $allowList)) (not (empty $blockList)) }} + "prefix_filter": [{{- range $index, $value := concat $allowList $blockList -}} + "{{- if (has $value $allowList) }}{{ printf "+%s" ($value | trimAll "\"") }}{{- else }}{{ printf "-%s" ($value | trimAll "\"") }}{{- end }}"{{- if lt $index (sub (len (concat $allowList $blockList)) 1) -}},{{- end -}} + {{- end -}}], +{{- else if not (empty $allowList) }} + "prefix_filter": [{{- range $index, $value := $allowList -}} + "{{ printf "+%s" ($value | trimAll "\"") }}"{{- if lt $index (sub (len $allowList) 1) -}},{{- end -}} + {{- end -}}], +{{- else if not (empty $blockList) }} + "prefix_filter": [{{- range $index, $value := $blockList -}} + "{{ printf "-%s" ($value | trimAll "\"") }}"{{- if lt $index (sub (len $blockList) 1) -}},{{- end -}} + {{- end -}}], +{{- end }} +{{- end -}} + +{{/* +Retrieves the global consul/consul-enterprise version string for use with labels or tags. +Requirements for valid labels: + - a valid label must be an empty string or consist of + => alphanumeric characters + => '-', '_' or '.' + => must start and end with an alphanumeric character + (e.g. 'MyValue', or 'my_value', or '12345', regex used for validation is + '(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?') + +Usage: {{ template "consul.versionInfo" }} +*/}} +{{- define "consul.versionInfo" -}} +{{- $imageVersion := regexSplit ":" .Values.global.image -1 }} +{{- $versionInfo := printf "%s" (index $imageVersion 1 ) | trimSuffix "\"" }} +{{- $sanitizedVersion := "" }} +{{- $pattern := "^([A-Za-z0-9][-A-Za-z0-9_.]*[A-Za-z0-9])?$" }} +{{- if not (regexMatch $pattern $versionInfo) -}} + {{- $sanitizedVersion = regexReplaceAll "[^A-Za-z0-9-_.]|sha256" $versionInfo "" }} + {{- $sanitizedVersion = printf "%s" (trimSuffix "-" (trimPrefix "-" $sanitizedVersion)) -}} +{{- else }} + {{- $sanitizedVersion = $versionInfo }} +{{- end -}} +{{- printf "%s" $sanitizedVersion | quote }} +{{- end -}} \ No newline at end of file diff --git a/charts/consul/templates/datadog-agent-role.yaml b/charts/consul/templates/datadog-agent-role.yaml new file mode 100644 index 0000000000..191e6433c6 --- /dev/null +++ b/charts/consul/templates/datadog-agent-role.yaml @@ -0,0 +1,38 @@ +{{- if .Values.global.metrics.datadog.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ template "consul.fullname" . }}-datadog-metrics + namespace: {{ .Release.Namespace }} + labels: + app: datadog + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + component: agent +{{- if (or (and .Values.global.openshift.enabled .Values.server.exposeGossipAndRPCPorts) .Values.global.enablePodSecurityPolicies) }} +{{- if .Values.global.enablePodSecurityPolicies }} +rules: + - apiGroups: ["policy"] + resources: ["podsecuritypolicies"] + resourceNames: + - {{ template "consul.fullname" . }}-datadog-metrics + verbs: + - use +{{- end }} +{{- if (and .Values.global.openshift.enabled .Values.server.exposeGossipAndRPCPorts ) }} + - apiGroups: ["security.openshift.io"] + resources: ["securitycontextconstraints"] + resourceNames: + - {{ template "consul.fullname" . }}-datadog-metrics + verbs: + - use +{{- end }} +{{- else}} +rules: + - apiGroups: [ "" ] + resources: [ "secrets" ] + resourceNames: + - {{ .Release.Namespace }}-datadog-agent-metrics-acl-token + verbs: [ "get", "watch", "list" ] +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/consul/templates/datadog-agent-rolebinding.yaml b/charts/consul/templates/datadog-agent-rolebinding.yaml new file mode 100644 index 0000000000..5fc3fdf545 --- /dev/null +++ b/charts/consul/templates/datadog-agent-rolebinding.yaml @@ -0,0 +1,26 @@ +{{- if .Values.global.metrics.datadog.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ template "consul.fullname" . }}-datadog-metrics + namespace: {{ .Release.Namespace }} + labels: + app: {{ template "consul.name" . }} + chart: {{ template "consul.chart" . }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + component: agent +subjects: + - kind: ServiceAccount + apiGroup: "" + name: datadog-agent + namespace: datadog + - kind: ServiceAccount + apiGroup: "" + name: datadog-cluster-agent + namespace: datadog +roleRef: + kind: Role + name: {{ template "consul.fullname" . }}-datadog-metrics + apiGroup: "" +{{- end }} \ No newline at end of file diff --git a/charts/consul/templates/server-acl-init-job.yaml b/charts/consul/templates/server-acl-init-job.yaml index 7d56116d8d..f6ac8f8a6c 100644 --- a/charts/consul/templates/server-acl-init-job.yaml +++ b/charts/consul/templates/server-acl-init-job.yaml @@ -268,6 +268,10 @@ spec: -create-enterprise-license-token=true \ {{- end }} + {{- if (and (not .Values.global.metrics.datadog.dogstatsd.enabled) .Values.global.metrics.datadog.enabled .Values.global.acls.manageSystemACLs) }} + -create-dd-agent-token=true \ + {{- end }} + {{- if .Values.server.snapshotAgent.enabled }} -snapshot-agent=true \ {{- end }} diff --git a/charts/consul/templates/server-config-configmap.yaml b/charts/consul/templates/server-config-configmap.yaml index 8cd726f445..423eeac60a 100644 --- a/charts/consul/templates/server-config-configmap.yaml +++ b/charts/consul/templates/server-config-configmap.yaml @@ -30,6 +30,7 @@ data: {{- if .Values.server.logLevel }} "log_level": "{{ .Values.server.logLevel | upper }}", {{- end }} + "enable_debug": {{ .Values.server.enableAgentDebug }}, "domain": "{{ .Values.global.domain }}", "limits": { "request_limits": { @@ -187,7 +188,13 @@ data: telemetry-config.json: |- { "telemetry": { - "prometheus_retention_time": "{{ .Values.global.metrics.agentMetricsRetentionTime }}" + "prometheus_retention_time": "{{ .Values.global.metrics.agentMetricsRetentionTime }}", + "disable_hostname": {{ .Values.global.metrics.disableAgentHostName }},{{ template "consul.prefixFilter" . }} + "enable_host_metrics": {{ .Values.global.metrics.enableHostMetrics }}{{- if .Values.global.metrics.datadog.dogstatsd.enabled }},{{ template "consul.dogstatsdAaddressInfo" . }} + {{- if .Values.global.metrics.datadog.dogstatsd.enabled }} + "dogstatsd_tags": {{ .Values.global.metrics.datadog.dogstatsd.dogstatsdTags | toJson }} + {{- end }} + {{- end }} } } {{- end }} diff --git a/charts/consul/templates/server-disruptionbudget.yaml b/charts/consul/templates/server-disruptionbudget.yaml index edf9c1c57f..56805edc2a 100644 --- a/charts/consul/templates/server-disruptionbudget.yaml +++ b/charts/consul/templates/server-disruptionbudget.yaml @@ -17,7 +17,7 @@ metadata: release: {{ .Release.Name }} component: server spec: - maxUnavailable: {{ template "consul.pdb.maxUnavailable" . }} + maxUnavailable: {{ template "consul.server.pdb.maxUnavailable" . }} selector: matchLabels: app: {{ template "consul.name" . }} diff --git a/charts/consul/templates/server-statefulset.yaml b/charts/consul/templates/server-statefulset.yaml index 048d259197..a0fef7a811 100644 --- a/charts/consul/templates/server-statefulset.yaml +++ b/charts/consul/templates/server-statefulset.yaml @@ -19,6 +19,9 @@ {{- end -}} {{ template "consul.validateRequiredCloudSecretsExist" . }} {{ template "consul.validateCloudSecretKeys" . }} +{{ template "consul.validateMetricsConfig" . }} +{{ template "consul.validateDatadogConfiguration" . }} +{{ template "consul.validateExtraConfig" . }} # StatefulSet to run the actual Consul server cluster. apiVersion: apps/v1 kind: StatefulSet @@ -62,6 +65,11 @@ spec: release: {{ .Release.Name }} component: server hasDNS: "true" + {{- if .Values.global.metrics.datadog.enabled }} + "tags.datadoghq.com/version": {{ template "consul.versionInfo" . }} + "tags.datadoghq.com/env": {{ template "consul.name" . }} + "tags.datadoghq.com/service": "consul-server" + {{- end }} {{- if .Values.server.extraLabels }} {{- toYaml .Values.server.extraLabels | nindent 8 }} {{- end }} @@ -123,6 +131,7 @@ spec: {{- tpl .Values.server.annotations . | nindent 8 }} {{- end }} {{- if (and .Values.global.metrics.enabled .Values.global.metrics.enableAgentMetrics) }} + {{- if not .Values.global.metrics.datadog.openMetricsPrometheus.enabled }} "prometheus.io/scrape": "true" "prometheus.io/path": "/v1/agent/metrics" {{- if .Values.global.tls.enabled }} @@ -133,6 +142,67 @@ spec: "prometheus.io/scheme": "http" {{- end }} {{- end }} + {{- if .Values.global.metrics.datadog.enabled }} + "ad.datadoghq.com/tolerate-unready": "true" + "ad.datadoghq.com/consul.logs": {{ .Values.global.metrics.datadog.dogstatsd.dogstatsdTags | toJson | replace "[" "[{" | replace "]" "}]" | replace ":" "\": \"" | join "\",\"" | squote }} + {{- if .Values.global.metrics.datadog.openMetricsPrometheus.enabled }} + "ad.datadoghq.com/consul.checks": | + { + "openmetrics": { + "init_config": {}, + "instances": [ + { + {{- if .Values.global.tls.enabled }} + "openmetrics_endpoint": "https://consul-server.{{ .Release.Namespace }}.svc:8501/v1/agent/metrics?format=prometheus", + "tls_cert": "/etc/datadog-agent/conf.d/consul.d/certs/tls.crt", + "tls_private_key": "/etc/datadog-agent/conf.d/consul.d/certs/tls.key", + "tls_ca_cert": "/etc/datadog-agent/conf.d/consul.d/ca/tls.crt", + {{- else }} + "openmetrics_endpoint": "http://consul-server.{{ .Release.Namespace }}.svc:8500/v1/agent/metrics?format=prometheus", + {{- end }} + {{- if ( .Values.global.acls.manageSystemACLs) }} + "headers": { + "X-Consul-Token": "ENC[k8s_secret@{{ .Release.Namespace }}/{{ .Release.Namespace }}-datadog-agent-metrics-acl-token/token]" + }, + {{- end }} + "namespace": "{{ .Release.Namespace }}", + "metrics": [ ".*" ] + } + ] + } + } + {{- else if (not .Values.global.metrics.datadog.dogstatsd.enabled) }} + "ad.datadoghq.com/consul.checks": | + { + "consul": { + "init_config": {}, + "instances": [ + { + {{- if .Values.global.tls.enabled }} + "url": "https://consul-server.{{ .Release.Namespace }}.svc:8501", + "tls_cert": "/etc/datadog-agent/conf.d/consul.d/certs/tls.crt", + "tls_private_key": "/etc/datadog-agent/conf.d/consul.d/certs/tls.key", + "tls_ca_cert": "/etc/datadog-agent/conf.d/consul.d/ca/tls.crt", + {{- else }} + "url": "http://consul-server.consul.svc:8500", + {{- end }} + "use_prometheus_endpoint": true, + {{- if ( .Values.global.acls.manageSystemACLs) }} + "acl_token": "ENC[k8s_secret@{{ .Release.Namespace }}/{{ .Release.Namespace }}-datadog-agent-metrics-acl-token/token]", + {{- end }} + "new_leader_checks": true, + "network_latency_checks": true, + "catalog_checks": true, + "auth_type": "basic" + } + ] + } + } + {{- else }} + "ad.datadoghq.com/consul.metrics_exclude": "true" + {{- end }} + {{- end }} + {{- end }} spec: {{- if .Values.server.affinity }} affinity: @@ -218,6 +288,12 @@ spec: emptyDir: medium: "Memory" {{- end }} + {{- if and .Values.global.metrics.datadog.enabled .Values.global.metrics.datadog.dogstatsd.enabled (eq .Values.global.metrics.datadog.dogstatsd.socketTransportType "UDS" ) }} + - name: dsdsocket + hostPath: + path: /var/run/datadog + type: DirectoryOrCreate + {{- end }} {{- range .Values.server.extraVolumes }} - name: userconfig-{{ .name }} {{ .type }}: @@ -450,6 +526,11 @@ spec: mountPath: /consul/license readOnly: true {{- end }} + {{- if and .Values.global.metrics.datadog.enabled .Values.global.metrics.datadog.dogstatsd.enabled (eq .Values.global.metrics.datadog.dogstatsd.socketTransportType "UDS" ) }} + - name: dsdsocket + mountPath: /var/run/datadog + readOnly: true + {{- end }} {{- range .Values.server.extraVolumes }} - name: userconfig-{{ .name }} readOnly: true diff --git a/charts/consul/templates/telemetry-collector-deployment.yaml b/charts/consul/templates/telemetry-collector-deployment.yaml index 45216600a6..f18c54f615 100644 --- a/charts/consul/templates/telemetry-collector-deployment.yaml +++ b/charts/consul/templates/telemetry-collector-deployment.yaml @@ -248,6 +248,19 @@ spec: - name: SSL_CERT_DIR value: "/etc/ssl/certs:/trusted-cas" {{- end }} + {{- if .Values.global.metrics.datadog.otlp.enabled }} + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + {{- if eq (.Values.global.metrics.datadog.otlp.protocol | lower ) "http" }} + - name: CO_OTEL_HTTP_ENDPOINT + value: "http://$(HOST_IP):4318" + {{- else if eq (.Values.global.metrics.datadog.otlp.protocol | lower) "grpc" }} + - name: CO_OTEL_HTTP_ENDPOINT + value: "grpc://$(HOST_IP):4317" + {{- end }} + {{- end }} {{- include "consul.extraEnvironmentVars" .Values.telemetryCollector | nindent 12 }} command: - "/bin/sh" diff --git a/charts/consul/test/unit/server-acl-init-job.bats b/charts/consul/test/unit/server-acl-init-job.bats index a9873a8e61..99fc6b9a9e 100644 --- a/charts/consul/test/unit/server-acl-init-job.bats +++ b/charts/consul/test/unit/server-acl-init-job.bats @@ -1081,6 +1081,7 @@ load _helpers local expected=$(echo '{ "consul.hashicorp.com/connect-inject": "false", + "consul.hashicorp.com/mesh-inject": "false", "vault.hashicorp.com/agent-inject": "true", "vault.hashicorp.com/agent-pre-populate": "true", "vault.hashicorp.com/agent-pre-populate-only": "false", @@ -2356,7 +2357,11 @@ load _helpers -s templates/server-acl-init-job.yaml \ --set 'global.acls.manageSystemACLs=true' \ . | tee /dev/stderr | - yq -r '.spec.template.metadata.annotations | del(."consul.hashicorp.com/connect-inject") | del(."consul.hashicorp.com/config-checksum")' | tee /dev/stderr) + yq -r '.spec.template.metadata.annotations | + del(."consul.hashicorp.com/connect-inject") | + del(."consul.hashicorp.com/mesh-inject") | + del(."consul.hashicorp.com/config-checksum")' | + tee /dev/stderr) [ "${actual}" = "{}" ] } @@ -2406,3 +2411,85 @@ load _helpers yq -r '.spec.template.metadata.annotations["argocd.argoproj.io/hook-delete-policy"]' | tee /dev/stderr) [ "${actual}" = null ] } + +#-------------------------------------------------------------------- +# resource-apis + +@test "serverACLInit/Job: resource-apis is not set by default" { + cd `chart_dir` + local object=$(helm template \ + -s templates/server-acl-init-job.yaml \ + --set 'global.acls.manageSystemACLs=true' \ + . | tee /dev/stderr | + yq '.spec.template.spec.containers[0].command' | tee /dev/stderr) + + local actual=$(echo $object | + yq 'any(contains("-enable-resource-apis"))' | tee /dev/stderr) + [ "${actual}" = "false" ] +} + +@test "serverACLInit/Job: -enable-resource-apis=true is set when global.experiments contains [\"resource-apis\"] " { + cd `chart_dir` + local object=$(helm template \ + -s templates/server-acl-init-job.yaml \ + --set 'global.acls.manageSystemACLs=true' \ + --set 'global.tls.enabled=true' \ + --set 'connectInject.enabled=true' \ + --set 'global.experiments[0]=resource-apis' \ + --set 'ui.enabled=false' \ + . | tee /dev/stderr | + yq '.spec.template.spec.containers[0].command' | tee /dev/stderr) + + local actual=$(echo $object | + yq 'any(contains("-enable-resource-apis=true"))' | tee /dev/stderr) + [ "${actual}" = "true" ] +} + +#-------------------------------------------------------------------- +# global.metrics.datadog + +@test "serverACLInit/Job: -create-dd-agent-token not set when datadog=false and manageSystemACLs=true" { + cd `chart_dir` + local command=$(helm template \ + -s templates/server-acl-init-job.yaml \ + --set 'global.acls.manageSystemACLs=true' \ + . | tee /dev/stderr | + yq '.spec.template.spec.containers[0].command' | tee /dev/stderr) + + local actual=$( echo "$command" | + yq 'any(contains("-create-dd-agent-token"))' | tee /dev/stderr) + [ "${actual}" = "false" ] +} + +@test "serverACLInit/Job: -create-dd-agent-token set when global.metrics.datadog=true and global.acls.manageSystemACLs=true" { + cd `chart_dir` + local command=$(helm template \ + -s templates/server-acl-init-job.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + --set 'global.acls.manageSystemACLs=true' \ + . | tee /dev/stderr | + yq '.spec.template.spec.containers[0].command' | tee /dev/stderr) + + local actual=$( echo "$command" | + yq 'any(contains("-create-dd-agent-token"))' | tee /dev/stderr) + [ "${actual}" = "true" ] +} + +@test "serverACLInit/Job: -create-dd-agent-token NOT set when global.metrics.datadog=true, global.metrics.datadog.dogstatsd.enabled=true, and global.acls.manageSystemACLs=true" { + cd `chart_dir` + local command=$(helm template \ + -s templates/server-acl-init-job.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + --set 'global.metrics.datadog.dogstatsd.enabled=true' \ + --set 'global.acls.manageSystemACLs=true' \ + . | tee /dev/stderr | + yq '.spec.template.spec.containers[0].command' | tee /dev/stderr) + + local actual=$( echo "$command" | + yq 'any(contains("-create-dd-agent-token"))' | tee /dev/stderr) + [ "${actual}" = "false" ] +} \ No newline at end of file diff --git a/charts/consul/test/unit/server-config-configmap.bats b/charts/consul/test/unit/server-config-configmap.bats index 5a9d07c7de..d75e2fd799 100755 --- a/charts/consul/test/unit/server-config-configmap.bats +++ b/charts/consul/test/unit/server-config-configmap.bats @@ -1256,19 +1256,19 @@ load _helpers local actual=$(echo $object | jq -r .audit.sink.MySink1.path | tee /dev/stderr) [ "${actual}" = "/tmp/audit.json" ] - + local actual=$(echo $object | jq -r .audit.sink.MySink3.path | tee /dev/stderr) [ "${actual}" = "/tmp/audit-3.json" ] - local actual=$(echo $object | jq -r .audit.sink.MySink1.rotate_max_files | tee /dev/stderr) - [ ${actual} = 15 ] - local actual=$(echo $object | jq -r .audit.sink.MySink2.path | tee /dev/stderr) [ "${actual}" = "/tmp/audit-2.json" ] local actual=$(echo $object | jq -r .audit.sink.MySink1.name | tee /dev/stderr) [ "${actual}" = "null" ] + local actual=$(echo $object | jq -r .audit.sink.MySink1.rotate_max_files | tee /dev/stderr) + [ "${actual}" = 15 ] + local actual=$(echo $object | jq -r .audit.sink.MySink3.delivery_guarantee | tee /dev/stderr) [ "${actual}" = "best-effort" ] @@ -1308,3 +1308,275 @@ load _helpers [ "${configmap}" = "DEBUG" ] } + +#-------------------------------------------------------------------- +# Datadog + +@test "server/ConfigMap: when global.metrics.datadog.enabled=true, sets default telemetry.dogstatsd_addr config" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + --set 'global.metrics.datadog.dogstatsd.enabled=true' \ + . | tee /dev/stderr | + yq -r '.data["telemetry-config.json"]' | jq -r .telemetry.dogstatsd_addr | tee /dev/stderr) + + [ "${actual}" = "unix:///var/run/datadog/dsd.socket" ] +} + +@test "server/ConfigMap: when global.metrics.datadog.enabled=true, sets non-default telemetry.dogstatsd_addr config" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + --set 'global.metrics.datadog.dogstatsd.enabled=true' \ + --set 'global.metrics.datadog.dogstatsd.socketTransportType="UDP"' \ + --set 'global.metrics.datadog.dogstatsd.dogstatsdAddr="datadog-agent.default.svc.cluster.local"' \ + . | tee /dev/stderr | + yq -r '.data["telemetry-config.json"]' | jq -r .telemetry.dogstatsd_addr | tee /dev/stderr) + + [ "${actual}" = "datadog-agent.default.svc.cluster.local" ] +} + +@test "server/ConfigMap: when global.metrics.datadog.enabled=true, sets non-default namespace telemetry.dogstatsd_addr with non-default port config" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + --set 'global.metrics.datadog.dogstatsd.enabled=true' \ + --set 'global.metrics.datadog.dogstatsd.socketTransportType="UDP"' \ + --set 'global.metrics.datadog.dogstatsd.dogstatsdAddr="127.0.0.1"' \ + --set 'global.metrics.datadog.dogstatsd.dogstatsdPort=8000' \ + . | tee /dev/stderr | + yq -r '.data["telemetry-config.json"]' | jq -r .telemetry.dogstatsd_addr | tee /dev/stderr) + + [ "${actual}" = "127.0.0.1:8000" ] +} + +@test "server/ConfigMap: when global.metrics.datadog.enabled=true, sets default telemetry.dogstatsd_tags config" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + --set 'global.metrics.datadog.dogstatsd.enabled=true' \ + . | tee /dev/stderr | + yq -r '.data["telemetry-config.json"]' | jq -r .telemetry.dogstatsd_tags | jq -r '[ .[] ]| join (" ")' | tee /dev/stderr) + + [ "${actual}" = "source:consul consul_service:consul-server" ] +} + +@test "server/ConfigMap: when global.metrics.datadog.enabled=true, sets non-default telemetry.dogstatsd_tags config" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + --set 'global.metrics.datadog.dogstatsd.enabled=true' \ + --set 'global.metrics.datadog.dogstatsd.dogstatsdTags'='[\"source:consul-dataplane\"\,\"service:consul-server-connection-manager\"]' \ + . | tee /dev/stderr | + yq -r '.data["telemetry-config.json"]' | jq -r .telemetry.dogstatsd_tags | jq -r '[ .[] ]| join (" ")' | tee /dev/stderr) + + [ "${actual}" = "source:consul-dataplane service:consul-server-connection-manager" ] +} + +#-------------------------------------------------------------------- +# Consul Agent Metrics Prefix Filtering + +@test "server/ConfigMap: when global.metrics.prefixFilter default, empty telemetry.prefix_filter string list" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + . | tee /dev/stderr | + yq -r '.data["telemetry-config.json"]' | jq -r .telemetry.prefix_filter | jq -r '[ .[] ]| join (" ")' | tee /dev/stderr) + + [ "${actual}" = "" ] +} + +@test "server/ConfigMap: when global.metrics.prefixFilter.allowList, sets correctly prepended telemetry.prefix_filter string list" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.prefixFilter.allowList'={'"consul.rpc.server.call"'\,'"consul.grpc.server.call"'} \ + . | tee /dev/stderr | + yq -r '.data["telemetry-config.json"]' | jq -r .telemetry.prefix_filter | jq -r '[ .[] ]| join (" ")' | tee /dev/stderr) + + [ "${actual}" = "+consul.rpc.server.call +consul.grpc.server.call" ] +} + +@test "server/ConfigMap: when global.metrics.prefixFilter.blockList, sets correctly prepended telemetry.prefix_filter string list" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.prefixFilter.blockList'={'"consul.rpc.server.call"'\,'"consul.grpc.server.call"'} \ + . | tee /dev/stderr | + yq -r '.data["telemetry-config.json"]' | jq -r .telemetry.prefix_filter | jq -r '[ .[] ]| join (" ")' | tee /dev/stderr) + + [ "${actual}" = "-consul.rpc.server.call -consul.grpc.server.call" ] +} + +@test "server/ConfigMap: when global.metrics.prefixFilter.blockList and allowList, sets correctly prepended telemetry.prefix_filter string list" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.prefixFilter.allowList'={'"consul.rpc.server.call"'\,'"consul.http.GET"'} \ + --set 'global.metrics.prefixFilter.blockList'={'"consul.http"'\,'"consul.raft.apply"'} \ + . | tee /dev/stderr | + yq -r '.data["telemetry-config.json"]' | jq -r .telemetry.prefix_filter | jq -r '[ .[] ]| join (" ")' | tee /dev/stderr) + + [ "${actual}" = "+consul.rpc.server.call +consul.http.GET -consul.http -consul.raft.apply" ] +} + +#-------------------------------------------------------------------- +# Consul Agent Debug (PPROF) + +@test "server/ConfigMap: global.server.enableAgentDebug default, sets default enable_debug = false in server agent config" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + . | tee /dev/stderr | + yq -r '.data["server.json"]' | jq -r .enable_debug | tee /dev/stderr) + + [ "${actual}" = "false" ] +} + +@test "server/ConfigMap: when global.server.enableAgentDebug=true, sets enable_debug = true in server agent config" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'server.enableAgentDebug=true' \ + . | tee /dev/stderr | + yq -r '.data["server.json"]' | jq -r .enable_debug | tee /dev/stderr) + + [ "${actual}" = "true" ] +} + +#-------------------------------------------------------------------- +# Consul Agent Telemetry Host Metrics + +@test "server/ConfigMap: when global.metrics.enableHostMetrics is default, telemetry.enable_host_metrics = false in agent config" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + . | tee /dev/stderr | + yq -r '.data["telemetry-config.json"]' | jq -r .telemetry.enable_host_metrics | tee /dev/stderr) + + [ "${actual}" = "false" ] +} + +@test "server/ConfigMap: when global.metrics.enableHostMetrics=true, sets telemetry.enable_host_metrics = true in agent config" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.enableHostMetrics=true' \ + . | tee /dev/stderr | + yq -r '.data["telemetry-config.json"]' | jq -r .telemetry.enable_host_metrics | tee /dev/stderr) + + [ "${actual}" = "true" ] +} + +#-------------------------------------------------------------------- +# Consul Agent Telemetry Hostname Disable + +@test "server/ConfigMap: when global.metrics.disableAgentHostName is default, telemetry.disableAgentHostName = false in agent config" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + . | tee /dev/stderr | + yq -r '.data["telemetry-config.json"]' | jq -r .telemetry.enable_host_metrics | tee /dev/stderr) + + [ "${actual}" = "false" ] +} + +@test "server/ConfigMap: when global.metrics.disableAgentHostName=true, sets telemetry.disableAgentHostName = true in agent config" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.enableHostMetrics=true' \ + . | tee /dev/stderr | + yq -r '.data["telemetry-config.json"]' | jq -r .telemetry.enable_host_metrics | tee /dev/stderr) + + [ "${actual}" = "true" ] +} + +#-------------------------------------------------------------------- +# server.autopilot.min_quorum + +@test "server/ConfigMap: autopilot.min_quorum=1 when replicas=1" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'server.replicas=1' \ + . | tee /dev/stderr | + yq -r '.data["server.json"]' | jq -r .autopilot.min_quorum | tee /dev/stderr) + + [ "${actual}" = "1" ] +} + +@test "server/ConfigMap: autopilot.min_quorum=2 when replicas=2" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'server.replicas=2' \ + . | tee /dev/stderr | + yq -r '.data["server.json"]' | jq -r .autopilot.min_quorum | tee /dev/stderr) + + [ "${actual}" = "2" ] +} + +@test "server/ConfigMap: autopilot.min_quorum=2 when replicas=3" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'server.replicas=3' \ + . | tee /dev/stderr | + yq -r '.data["server.json"]' | jq -r .autopilot.min_quorum | tee /dev/stderr) + + [ "${actual}" = "2" ] +} + +@test "server/ConfigMap: autopilot.min_quorum=3 when replicas=4" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'server.replicas=4' \ + . | tee /dev/stderr | + yq -r '.data["server.json"]' | jq -r .autopilot.min_quorum | tee /dev/stderr) + + [ "${actual}" = "3" ] +} + +@test "server/ConfigMap: autopilot.min_quorum=3 when replicas=5" { + cd `chart_dir` + local actual=$(helm template \ + -s templates/server-config-configmap.yaml \ + --set 'server.replicas=5' \ + . | tee /dev/stderr | + yq -r '.data["server.json"]' | jq -r .autopilot.min_quorum | tee /dev/stderr) + + [ "${actual}" = "3" ] +} diff --git a/charts/consul/test/unit/server-statefulset.bats b/charts/consul/test/unit/server-statefulset.bats index 124c83306c..8fceb5c474 100755 --- a/charts/consul/test/unit/server-statefulset.bats +++ b/charts/consul/test/unit/server-statefulset.bats @@ -694,7 +694,11 @@ load _helpers local actual=$(helm template \ -s templates/server-statefulset.yaml \ . | tee /dev/stderr | - yq -r '.spec.template.metadata.annotations | del(."consul.hashicorp.com/connect-inject") | del(."consul.hashicorp.com/config-checksum")' | tee /dev/stderr) + yq -r '.spec.template.metadata.annotations | + del(."consul.hashicorp.com/connect-inject") | + del(."consul.hashicorp.com/mesh-inject") | + del(."consul.hashicorp.com/config-checksum")' | + tee /dev/stderr) [ "${actual}" = "{}" ] } @@ -779,6 +783,293 @@ load _helpers [ "${actual}" = "https" ] } +@test "server/StatefulSet: when global.metrics.datadog.enabled=true, adds ad.datadoghq.com annotations" { + cd `chart_dir` + local annotations=$(helm template \ + -s templates/server-statefulset.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + . | tee /dev/stderr | + yq -r '.spec.template.metadata.annotations' | tee /dev/stderr) + + local actual=$(echo "$annotations" | \ + yq -r '."ad.datadoghq.com/tolerate-unready"' | tee /dev/stderr) + [ "${actual}" = "true" ] + + local actual=$(echo "$annotations" | \ + yq -r '."ad.datadoghq.com/consul.logs"' | tee /dev/stderr) + [ "${actual}" = '[{"source": "consul","consul_service": "consul-server"}]' ] + + local consul_checks=$(echo "$annotations" | \ + yq -r '."ad.datadoghq.com/consul.checks"' | tee /dev/stderr) + + local actual="$( echo "$consul_checks" | \ + jq -r .consul.init_config | tee /dev/stderr)" + [ "${actual}" = "{}" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .consul.instances | jq -r .[0].url | tee /dev/stderr)" + [ "${actual}" = "http://consul-server.consul.svc:8500" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .consul.instances | jq -r .[0].new_leader_checks | tee /dev/stderr)" + [ "${actual}" = "true" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .consul.instances | jq -r .[0].catalog_checks | tee /dev/stderr)" + [ "${actual}" = "true" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .consul.instances | jq -r .[0].auth_type | tee /dev/stderr)" + [ "${actual}" = "basic" ] +} + +@test "server/StatefulSet: when global.metrics.datadog.enabled=true and global.tls.enabled, adds tls altered ad.datadoghq.com annotations" { + cd `chart_dir` + local annotations=$(helm template \ + -s templates/server-statefulset.yaml \ + --set 'global.tls.enabled=true' \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + . | tee /dev/stderr | + yq -r '.spec.template.metadata.annotations' | tee /dev/stderr) + + local actual=$(echo "$annotations" | \ + yq -r '."ad.datadoghq.com/tolerate-unready"' | tee /dev/stderr) + [ "${actual}" = "true" ] + + local actual=$(echo "$annotations" | \ + yq -r '."ad.datadoghq.com/consul.logs"' | tee /dev/stderr) + [ "${actual}" = '[{"source": "consul","consul_service": "consul-server"}]' ] + + local consul_checks=$(echo "$annotations" | \ + yq -r '."ad.datadoghq.com/consul.checks"' | tee /dev/stderr) + + local actual="$( echo "$consul_checks" | \ + jq -r .consul.init_config | tee /dev/stderr)" + [ "${actual}" = "{}" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .consul.instances | jq -r .[0].url | tee /dev/stderr)" + [ "${actual}" = "https://consul-server.default.svc:8501" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .consul.instances | jq -r .[0].tls_cert | tee /dev/stderr)" + [ "${actual}" = "/etc/datadog-agent/conf.d/consul.d/certs/tls.crt" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .consul.instances | jq -r .[0].tls_private_key | tee /dev/stderr)" + [ "${actual}" = "/etc/datadog-agent/conf.d/consul.d/certs/tls.key" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .consul.instances | jq -r .[0].tls_ca_cert | tee /dev/stderr)" + [ "${actual}" = "/etc/datadog-agent/conf.d/consul.d/ca/tls.crt" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .consul.instances | jq -r .[0].new_leader_checks | tee /dev/stderr)" + [ "${actual}" = "true" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .consul.instances | jq -r .[0].catalog_checks | tee /dev/stderr)" + [ "${actual}" = "true" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .consul.instances | jq -r .[0].auth_type | tee /dev/stderr)" + [ "${actual}" = "basic" ] +} + +@test "server/StatefulSet: when global.metrics.datadog.enabled=true and global.acls.manageSystemACLs=true, adds ad.datadoghq.com annotations for datadog-agent-metrics-acl-token secret rendering" { + cd `chart_dir` + local annotations=$(helm template \ + -s templates/server-statefulset.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + --set 'global.acls.manageSystemACLs=true' \ + . | tee /dev/stderr | + yq -r '.spec.template.metadata.annotations' | tee /dev/stderr) + + local consul_checks=$(echo "$annotations" | \ + yq -r '."ad.datadoghq.com/consul.checks"' | tee /dev/stderr) + + local actual="$( echo "$consul_checks" | \ + jq -r .consul.instances | jq -r .[0].acl_token | tee /dev/stderr)" + [ "${actual}" = "ENC[k8s_secret@default/default-datadog-agent-metrics-acl-token/token]" ] +} + +@test "server/StatefulSet: when global.metrics.datadog.openMetricsPrometheus.enabled, applicable openmetrics annotation is set" { + cd `chart_dir` + local annotations=$(helm template \ + -s templates/server-statefulset.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'telemetryCollector.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + --set 'global.metrics.datadog.openMetricsPrometheus.enabled=true' \ + . | tee /dev/stderr | + yq -r '.spec.template.metadata.annotations' | tee /dev/stderr) + + local consul_checks=$(echo "$annotations" | \ + yq -r '."ad.datadoghq.com/consul.checks"' | tee /dev/stderr) + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.init_config | tee /dev/stderr)" + [ "${actual}" = "{}" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.instances | jq -r .[0].openmetrics_endpoint | tee /dev/stderr)" + [ "${actual}" = "http://consul-server.default.svc:8500/v1/agent/metrics?format=prometheus" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.instances | jq -r .[0].headers | tee /dev/stderr)" + [ -n "${actual}" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.instances | jq -r .[0].namespace | tee /dev/stderr)" + [ "${actual}" = "default" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.instances | jq -r .[0].metrics[0] | tee /dev/stderr)" + [ "${actual}" = ".*" ] + +} + +@test "server/StatefulSet: when datadog.openMetricsPrometheus.enabled, applicable openmetrics annotation is set with tls url" { + cd `chart_dir` + local annotations=$(helm template \ + -s templates/server-statefulset.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'global.tls.enabled=true' \ + --set 'telemetryCollector.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + --set 'global.metrics.datadog.openMetricsPrometheus.enabled=true' \ + . | tee /dev/stderr | + yq -r '.spec.template.metadata.annotations' | tee /dev/stderr) + + local consul_checks=$(echo "$annotations" | \ + yq -r '."ad.datadoghq.com/consul.checks"' | tee /dev/stderr) + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.init_config | tee /dev/stderr)" + [ "${actual}" = "{}" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.instances | jq -r .[0].openmetrics_endpoint | tee /dev/stderr)" + [ "${actual}" = "https://consul-server.default.svc:8501/v1/agent/metrics?format=prometheus" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.instances | jq -r .[0].headers | tee /dev/stderr)" + [ -n "${actual}" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.instances | jq -r .[0].tls_cert | tee /dev/stderr)" + [ "${actual}" = "/etc/datadog-agent/conf.d/consul.d/certs/tls.crt" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.instances | jq -r .[0].tls_private_key | tee /dev/stderr)" + [ "${actual}" = "/etc/datadog-agent/conf.d/consul.d/certs/tls.key" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.instances | jq -r .[0].tls_ca_cert | tee /dev/stderr)" + [ "${actual}" = "/etc/datadog-agent/conf.d/consul.d/ca/tls.crt" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.instances | jq -r .[0].namespace | tee /dev/stderr)" + [ "${actual}" = "default" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.instances | jq -r .[0].metrics[0] | tee /dev/stderr)" + [ "${actual}" = ".*" ] +} + +@test "server/StatefulSet: when global.metrics.datadog.openMetricsPrometheus.enabled, applicable openmetrics annotation is set with acls.manageSystemACLs enabled" { + cd `chart_dir` + local annotations=$(helm template \ + -s templates/server-statefulset.yaml \ + --set 'global.metrics.enabled=true' \ + --set 'telemetryCollector.enabled=true' \ + --set 'global.acls.manageSystemACLs=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + --set 'global.metrics.datadog.openMetricsPrometheus.enabled=true' \ + . | tee /dev/stderr | + yq -r '.spec.template.metadata.annotations' | tee /dev/stderr) + + local consul_checks=$(echo "$annotations" | \ + yq -r '."ad.datadoghq.com/consul.checks"' | tee /dev/stderr) + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.init_config | tee /dev/stderr)" + [ "${actual}" = "{}" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.instances | jq -r .[0].openmetrics_endpoint | tee /dev/stderr)" + [ "${actual}" = "http://consul-server.default.svc:8500/v1/agent/metrics?format=prometheus" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.instances | jq -r '.[0].headers["X-Consul-Token"]' | tee /dev/stderr)" + [ "${actual}" = "ENC[k8s_secret@default/default-datadog-agent-metrics-acl-token/token]" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.instances | jq -r .[0].namespace | tee /dev/stderr)" + [ "${actual}" = "default" ] + + local actual="$( echo "$consul_checks" | \ + jq -r .openmetrics.instances | jq -r .[0].metrics[0] | tee /dev/stderr)" + [ "${actual}" = ".*" ] + +} + +@test "server/StatefulSet: consul metrics exclusion annotation when using metrics.datadog.dogstatsd.enabled=true" { + cd `chart_dir` + local annotations=$(helm template \ + -s templates/server-statefulset.yaml \ + --set 'global.image=hashicorp/consul-enterprise:1.17.0-ent' \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + --set 'global.metrics.datadog.dogstatsd.enabled=true' \ + . | tee /dev/stderr | + yq -r '.spec.template.metadata.annotations' | tee /dev/stderr) + + local actual=$( echo "$annotations" | \ + yq -r '."ad.datadoghq.com/consul.checks"' | tee /dev/stderr ) + [ -n "${actual}" ] + + local actual=$( echo "$annotations" | \ + yq -r '."ad.datadoghq.com/consul.metrics_exclude"' | tee /dev/stderr ) + [ "${actual}" = "true" ] +} + + +@test "server/StatefulSet: datadog unified tagging labels get added when global.metrics.datadog.enabled=true" { + cd `chart_dir` + local labels=$(helm template \ + -s templates/server-statefulset.yaml \ + --set 'global.image=hashicorp/consul-enterprise:1.17.0-ent' \ + --set 'global.metrics.enabled=true' \ + --set 'telemetryCollector.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + . | tee /dev/stderr | + yq -r '.spec.template.metadata.labels' | tee /dev/stderr) + + local actual=$( echo "$labels" | \ + yq -r '."tags.datadoghq.com/version"' | tee /dev/stderr ) + [ "${actual}" = "1.17.0-ent" ] + + local actual=$( echo "$labels" | \ + yq -r '."tags.datadoghq.com/env"' | tee /dev/stderr ) + [ "${actual}" = "consul" ] + + local actual=$( echo "$labels" | \ + yq -r '."tags.datadoghq.com/service"' | tee /dev/stderr ) + [ "${actual}" = "consul-server" ] +} + #-------------------------------------------------------------------- # config-configmap @@ -788,7 +1079,7 @@ load _helpers -s templates/server-statefulset.yaml \ . | tee /dev/stderr | yq -r '.spec.template.metadata.annotations."consul.hashicorp.com/config-checksum"' | tee /dev/stderr) - [ "${actual}" = 0e599137f8357c786d46e1b694d7d867c541cb34d6056241a037afd0de14866b ] + [ "${actual}" = a4771bea366d4a6ee9037572665dc4040519dc22e9b0ff3463a263aab13675b8 ] } @test "server/StatefulSet: adds config-checksum annotation when extraConfig is provided" { @@ -798,7 +1089,7 @@ load _helpers --set 'server.extraConfig="{\"hello\": \"world\"}"' \ . | tee /dev/stderr | yq -r '.spec.template.metadata.annotations."consul.hashicorp.com/config-checksum"' | tee /dev/stderr) - [ "${actual}" = 3f54c51be3473d7ae4cb91c24ba03263b7700d9a3dc3196f624ce3c6c8e93b8f ] + [ "${actual}" = c6b872933263bf5fe847d61e638035637d2db89edf31ad25d0aaeaa5261649c9 ] } @test "server/StatefulSet: adds config-checksum annotation when config is updated" { @@ -808,7 +1099,84 @@ load _helpers --set 'global.acls.manageSystemACLs=true' \ . | tee /dev/stderr | yq -r '.spec.template.metadata.annotations."consul.hashicorp.com/config-checksum"' | tee /dev/stderr) - [ "${actual}" = b44c82c9e4732433f54eeed8a299f11de0bad82a920047c8a3ad039e512ba281 ] + [ "${actual}" = 576044232d6181bca69628af87c12f15311ebd3f0ab700e112b3e1dea9225125 ] +} + +#-------------------------------------------------------------------- +# server extraConfig validation + +@test "server/Statefulset: Validate enable_debug extraConfig for Consul Helm chart" { + cd `chart_dir` + run helm template \ + -s templates/server-statefulset.yaml \ + --set global.metrics.enabled=true \ + --set global.metrics.enableAgentMetrics=true \ + --set server.extraConfig=enable_debug=true \ + . + [ "$status" -eq 1 ] + [[ "$output" =~ "The enable_debug key is present in extra-from-values.json. Use server.enableAgentDebug to set this value." ]] +} + +@test "server/Statefulset: Validate disable_hostname extraConfig for Consul Helm chart" { + cd `chart_dir` + run helm template \ + -s templates/server-statefulset.yaml \ + --set global.metrics.enabled=true \ + --set global.metrics.enableAgentMetrics=true \ + --set server.extraConfig=telemetry.disable_hostname=true \ + . + [ "$status" -eq 1 ] + [[ "$output" =~ "The disable_hostname key is present in extra-from-values.json. Use global.metrics.disableAgentHostName to set this value." ]] +} + +@test "server/Statefulset: Validate enable_host_metrics extraConfig for Consul Helm chart" { + cd `chart_dir` + run helm template \ + -s templates/server-statefulset.yaml \ + --set global.metrics.enabled=true \ + --set global.metrics.enableAgentMetrics=true \ + --set server.extraConfig=telemetry.enable_host_metrics=true \ + . + [ "$status" -eq 1 ] + [[ "$output" =~ "The enable_host_metrics key is present in extra-from-values.json. Use global.metrics.enableHostMetrics to set this value." ]] +} + +@test "server/Statefulset: Validate prefix_filter extraConfig for Consul Helm chart" { + cd `chart_dir` + run helm template \ + -s templates/server-statefulset.yaml \ + --set global.metrics.enabled=true \ + --set global.metrics.enableAgentMetrics=true \ + --set server.extraConfig=telemetry.prefix_filter=["+consul.rpc.server.call"] \ + . + [ "$status" -eq 1 ] + [[ "$output" =~ "The prefix_filter key is present in extra-from-values.json. Use global.metrics.prefix_filter to set this value." ]] +} + +@test "server/Statefulset: Validate dogstatsd_tags extraConfig for Consul Helm chart" { + cd `chart_dir` + run helm template \ + -s templates/server-statefulset.yaml \ + --set global.metrics.enabled=true \ + --set global.metrics.enableAgentMetrics=true \ + --set global.metrics.datadog.dogstatsd.enabled=true \ + --set server.extraConfig=telemetry.dogstatsd_tags='[\"source:consul-server\"\,\"consul_service:consul\"]' \ + . + [ "$status" -eq 1 ] + [[ "$output" =~ "The dogstatsd_tags key is present in extra-from-values.json. Use global.metrics.datadog.dogstatsd.dogstatsdTags to set this value." ]] +} + +@test "server/Statefulset: Validate dogstatsd_addr extraConfig for Consul Helm chart" { + cd `chart_dir` + run helm template \ + -s templates/server-statefulset.yaml \ + --set global.metrics.enabled=true \ + --set global.metrics.enableAgentMetrics=true \ + --set global.metrics.datadog.dogstatsd.enabled=true \ + --set server.extraConfig=telemetry.dogstatsd_addr="localhost:8125" \ + . + [ "$status" -eq 1 ] + [[ "$output" =~ "The dogstatsd_addr key is present in extra-from-values.json. Use global.metrics.datadog.dogstatsd.dogstatsd_addr to set this value." ]] } #-------------------------------------------------------------------- @@ -887,16 +1255,16 @@ load _helpers . | tee /dev/stderr | yq -r '.spec.template.spec.securityContext' | tee /dev/stderr) - local actual=$(echo $security_context | jq -r .runAsNonRoot) + local actual=$(echo "$security_context" | yq -r .runAsNonRoot) [ "${actual}" = "true" ] - local actual=$(echo $security_context | jq -r .fsGroup) + local actual=$(echo "$security_context" | yq -r .fsGroup) [ "${actual}" = "1000" ] - local actual=$(echo $security_context | jq -r .runAsUser) + local actual=$(echo "$security_context" | yq -r .runAsUser) [ "${actual}" = "100" ] - local actual=$(echo $security_context | jq -r .runAsGroup) + local actual=$(echo "$security_context" | yq -r .runAsGroup) [ "${actual}" = "1000" ] } @@ -906,14 +1274,26 @@ load _helpers -s templates/server-statefulset.yaml \ --set 'server.securityContext.runAsNonRoot=false' \ --set 'server.securityContext.privileged=true' \ + --set 'server.securityContext.runAsGroup=0' \ + --set 'server.securityContext.runAsUser=0' \ + --set 'server.securityContext.fsGroup=0' \ . | tee /dev/stderr | yq -r '.spec.template.spec.securityContext' | tee /dev/stderr) - local actual=$(echo $security_context | jq -r .runAsNonRoot) + local actual=$(echo "$security_context" | yq -r .runAsNonRoot) [ "${actual}" = "false" ] - local actual=$(echo $security_context | jq -r .privileged) + local actual=$(echo "$security_context" | yq -r .privileged) [ "${actual}" = "true" ] + + local actual=$(echo "$security_context" | yq -r .fsGroup) + [ "${actual}" = "0" ] + + local actual=$(echo "$security_context" | yq -r .runAsUser) + [ "${actual}" = "0" ] + + local actual=$(echo "$security_context" | yq -r .runAsGroup) + [ "${actual}" = "0" ] } #-------------------------------------------------------------------- @@ -2046,7 +2426,13 @@ load _helpers --set 'global.secretsBackend.vault.consulClientRole=test' \ --set 'global.secretsBackend.vault.consulServerRole=foo' \ . | tee /dev/stderr | - yq -r '.spec.template.metadata.annotations | del(."consul.hashicorp.com/connect-inject") | del(."consul.hashicorp.com/config-checksum") | del(."vault.hashicorp.com/agent-inject") | del(."vault.hashicorp.com/role")' | tee /dev/stderr) + yq -r '.spec.template.metadata.annotations | + del(."consul.hashicorp.com/connect-inject") | + del(."consul.hashicorp.com/mesh-inject") | + del(."consul.hashicorp.com/config-checksum") | + del(."vault.hashicorp.com/agent-inject") | + del(."vault.hashicorp.com/role")' | + tee /dev/stderr) [ "${actual}" = "{}" ] } @@ -3065,3 +3451,31 @@ MIICFjCCAZsCCQCdwLtdjbzlYzAKBggqhkjOPQQDAjB0MQswCQYDVQQGEwJDQTEL' \ yq -r '.spec.template.spec.containers[1].command[2] | contains("-interval=10h34m5s")' | tee /dev/stderr) [ "${actual}" = "true" ] } + +#-------------------------------------------------------------------- +# global.experiments=["resource-apis"] + +@test "server/StatefulSet: experiments=[\"resource-apis\"] is not set in command when global.experiments is empty" { + cd `chart_dir` + local object=$(helm template \ + -s templates/server-statefulset.yaml \ + . | tee /dev/stderr) + + # Test the flag is set. + local actual=$(echo "$object" | + yq '.spec.template.spec.containers[] | select(.name == "consul") | .command | any(contains("-hcl=\"experiments=[\\\"resource-apis\\\"]\""))' | tee /dev/stderr) + [ "${actual}" = "false" ] +} + +@test "server/StatefulSet: experiments=[\"resource-apis\"] is set in command when global.experiments contains \"resource-apis\"" { + cd `chart_dir` + local object=$(helm template \ + -s templates/server-statefulset.yaml \ + --set 'global.experiments[0]=resource-apis' \ + --set 'ui.enabled=false' \ + . | tee /dev/stderr) + + local actual=$(echo "$object" | + yq '.spec.template.spec.containers[] | select(.name == "consul") | .command | any(contains("-hcl=\"experiments=[\\\"resource-apis\\\"]\""))' | tee /dev/stderr) + [ "${actual}" = "true" ] +} \ No newline at end of file diff --git a/charts/consul/test/unit/telemetry-collector-deployment.bats b/charts/consul/test/unit/telemetry-collector-deployment.bats index 57d6b84b27..36591d2ec9 100755 --- a/charts/consul/test/unit/telemetry-collector-deployment.bats +++ b/charts/consul/test/unit/telemetry-collector-deployment.bats @@ -1357,3 +1357,82 @@ MIICFjCCAZsCCQCdwLtdjbzlYzAKBggqhkjOPQQDAjB0MQswCQYDVQQGEwJDQTEL' \ local actual=$(echo $object | jq -r '.[1].args | any(contains("-service-namespace=fakenamespace"))' | tee /dev/stderr) [ "${actual}" = 'true' ] } + +#-------------------------------------------------------------------- +# global.metrics.datadog.otlp + +@test "telemetryCollector/Deployment: DataDog OTLP Collector HTTP protocol verification" { + cd `chart_dir` + local object=$(helm template \ + -s templates/telemetry-collector-deployment.yaml \ + --set 'telemetryCollector.enabled=true' \ + --set 'telemetryCollector.cloud.enabled=false' \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + --set 'global.metrics.datadog.otlp.enabled=true' \ + --set 'global.metrics.datadog.otlp.protocol'="http" \ + . | tee /dev/stderr | + yq -r '.spec.template.spec.containers[0].env' | tee /dev/stderr) + + local actual=$(echo "$object" | + yq -r '.[] | select(.name=="CO_OTEL_HTTP_ENDPOINT").value' | tee /dev/stderr) + [ "${actual}" = 'http://$(HOST_IP):4318' ] +} + +@test "telemetryCollector/Deployment: DataDog OTLP Collector HTTP protocol verification, case-insensitive" { + cd `chart_dir` + local object=$(helm template \ + -s templates/telemetry-collector-deployment.yaml \ + --set 'telemetryCollector.enabled=true' \ + --set 'telemetryCollector.cloud.enabled=false' \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + --set 'global.metrics.datadog.otlp.enabled=true' \ + --set 'global.metrics.datadog.otlp.protocol'="HTTP" \ + . | tee /dev/stderr | + yq -r '.spec.template.spec.containers[0].env' | tee /dev/stderr) + + local actual=$(echo "$object" | + yq -r '.[] | select(.name=="CO_OTEL_HTTP_ENDPOINT").value' | tee /dev/stderr) + [ "${actual}" = 'http://$(HOST_IP):4318' ] +} + +@test "telemetryCollector/Deployment: DataDog OTLP Collector gRPC protocol verification" { + cd `chart_dir` + local object=$(helm template \ + -s templates/telemetry-collector-deployment.yaml \ + --set 'telemetryCollector.enabled=true' \ + --set 'telemetryCollector.cloud.enabled=false' \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + --set 'global.metrics.datadog.otlp.enabled=true' \ + --set 'global.metrics.datadog.otlp.protocol'="grpc" \ + . | tee /dev/stderr | + yq -r '.spec.template.spec.containers[0].env' | tee /dev/stderr) + + local actual=$(echo "$object" | + yq -r '.[] | select(.name=="CO_OTEL_HTTP_ENDPOINT").value' | tee /dev/stderr) + [ "${actual}" = 'grpc://$(HOST_IP):4317' ] +} + +@test "telemetryCollector/Deployment: DataDog OTLP Collector gRPC protocol verification, case-insensitive" { + cd `chart_dir` + local object=$(helm template \ + -s templates/telemetry-collector-deployment.yaml \ + --set 'telemetryCollector.enabled=true' \ + --set 'telemetryCollector.cloud.enabled=false' \ + --set 'global.metrics.enabled=true' \ + --set 'global.metrics.enableAgentMetrics=true' \ + --set 'global.metrics.datadog.enabled=true' \ + --set 'global.metrics.datadog.otlp.enabled=true' \ + --set 'global.metrics.datadog.otlp.protocol'="gRPC" \ + . | tee /dev/stderr | + yq -r '.spec.template.spec.containers[0].env' | tee /dev/stderr) + + local actual=$(echo "$object" | + yq -r '.[] | select(.name=="CO_OTEL_HTTP_ENDPOINT").value' | tee /dev/stderr) + [ "${actual}" = 'grpc://$(HOST_IP):4317' ] +} \ No newline at end of file diff --git a/charts/consul/values.yaml b/charts/consul/values.yaml index 0aa1571fce..d25b185e5f 100644 --- a/charts/consul/values.yaml +++ b/charts/consul/values.yaml @@ -618,6 +618,16 @@ global: # @type: boolean enableAgentMetrics: false + # Set to true to stop prepending the machine's hostname to gauge-type metrics. Default is false. + # Only applicable if `global.metrics.enabled` and `global.metrics.enableAgentMetrics` is true. + # @type: boolean + disableAgentHostName: false + + # Configures consul agent underlying host metrics. Only applicable if + # Only applicable if `global.metrics.enabled` and `global.metrics.enableAgentMetrics` is true. + # @type: boolean + enableHostMetrics: false + # Configures the retention time for metrics in Consul clients and # servers. This must be greater than 0 for Consul clients and servers # to expose any metrics at all. @@ -636,6 +646,148 @@ global: # @type: boolean enableTelemetryCollector: false + # This configures the list of filter rules to apply for allowing/blocking + # metrics by prefix in the following format: + # + # A leading "+" will enable any metrics with the given prefix, and a leading "-" will block them. + # If there is overlap between two rules, the more specific rule will take precedence. + # Blocking will take priority if the same prefix is listed multiple times. + # + # - allowList: + prefixFilter: + # @type: array + allowList: [] + # @type: array + blockList: [] + + # Configures consul integration configurations for datadog on kubernetes. + # Only applicable if `global.metrics.enabled` and `global.metrics.enableAgentMetrics` is true. + datadog: + # Enables datadog [Consul Autodiscovery Integration](https://docs.datadoghq.com/integrations/consul/?tab=containerized#metric-collection) + # by configuring the required `ad.datadoghq.com/consul.checks` annotation. The following _Consul_ agent metrics/health statuses + # are monitored by Datadog unless monitoring via OpenMetrics (Prometheus) or DogStatsD: + # - Serf events and member flaps + # - The Raft protocol + # - DNS performance + # - API Endpoints scraped: + # - `/v1/agent/metrics?format=prometheus` + # - `/v1/agent/self` + # - `/v1/status/leader` + # - `/v1/status/peers` + # - `/v1/catalog/services` + # - `/v1/health/service` + # - `/v1/health/state/any` + # - `/v1/coordinate/datacenters` + # - `/v1/coordinate/nodes` + # + # Setting either `global.metrics.datadog.otlp.enabled=true` or `global.metrics.datadog.dogstatsd.enabled=true` disables the above checks + # in lieu of metrics data collection via DogStatsD or by a customer OpenMetrics (Prometheus) collection endpoint. + # + # ~> **Note:** If you have a [dogstatsd_mapper_profile](https://docs.datadoghq.com/integrations/consul/?tab=host#dogstatsd) configured for Consul + # residing on either your Datadog NodeAgent or ClusterAgent the default Consul agent metrics/health status checks will fail. If you do not desire + # to utilize DogStatsD metrics emission from Consul, remove this configuration file, and restart your Datadog agent to permit the checks to run. + # + # @default: false + # @type: boolean + enabled: false + + # Configures Kubernetes Prometheus/OpenMetrics auto-discovery annotations for use with Datadog. + # This configuration is less common and more for advanced usage with custom metrics monitoring + # configurations. See https://docs.datadoghq.com/containers/kubernetes/prometheus/?tab=kubernetesadv2 for more details + # surround further configuration. + openMetricsPrometheus: + # @default: false + # @type: boolean + enabled: false + + otlp: + # Enables forwarding of Consul's Telemetry Collector OTLP metrics for + # ingestion by Datadog Agent. + # @default: false + # @type: boolean + enabled: false + # Protocol used for DataDog Endpoint OTLP ingestion. + # + # Valid protocol options are one of either: + # + # - "http": will forward to DataDog HTTP OTLP Node Agent Endpoint default - "0.0.0.0:4318" + # - "grpc": will forward to DataDog gRPC OTLP Node Agent Endpoint default - "0.0.0.0:4317" + # + # @default: "http" + # @type: string + protocol: "http" + + # Configuration settings for DogStatsD metrics aggregation service + # that is bundled with the Datadog Agent. + # DogStatsD implements the StatsD protocol and adds a few Datadog-specific extensions: + # - Histogram metric type + # - Service checks + # - Events + # - Tagging + dogstatsd: + enabled: false + # Sets the socket transport type for dogstatsd: + # - "UDS" (Unix Domain Socket): prefixes `unix://` to URL and appends path to socket (i.e., "unix:///var/run/datadog/dsd.socket") + # If set, this will create the required [hostPath](https://kubernetes.io/docs/concepts/storage/volumes/#hostpath) mount for + # managing [DogStatsD with Unix Domain Socket on Kubernetes](https://docs.datadoghq.com/developers/dogstatsd/unix_socket/?tab=kubernetes). + # The volume is mounted using the `DirectoryOrCreate` type, thereby setting `0755` permissions with the same kubelet group ownership. + # + # Applies the following `volumes` and `volumeMounts` to the consul-server stateful set consul containers: + # + # ```yaml + # volumes: + # - name: dsdsocket + # hostPath: + # path: /var/run/datadog + # type: DirectoryOrCreate + # volumeMounts: + # - name: dsdsocket + # mountPath: /var/run/datadog + # readOnly: true + # ``` + # - "UDP" (User Datagram Protocol): assigns address to use `hostname/IP:Port` formatted URL for UDP transport to hostIP based + # dogstatsd sink (i.e., 127.0.0.1:8125). HostIP of Datadog agent must be reachable and known to Consul server emitting metrics. + # + # @default: "UDS" + # @type: string + socketTransportType: "UDS" + # Sets URL path for dogstatsd: + # + # Can be either a path to unix domain socket or an IP Address or Hostname that's reachable from the + # consul-server service, server containers. When using "UDS" the path will be appended. When using "UDP" + # the path will be prepended to the specified `dogstatsdPort`. + # + # @default: "/var/run/datadog/dsd.socket" + # @type: string + dogstatsdAddr: "/var/run/datadog/dsd.socket" + # Configures IP based dogstatsd designated port that will be appended to "UDP" based transport socket IP/Hostname URL. + # + # If using a kubernetes service based address (i.e., datadog.default.svc.cluster.local), set this to 0 to + # mitigate appending a port value to the dogstatsd address field. Resultant address would be "datadog.default.svc.cluster.local" with + # default port setting, while appending a non-zero port would result in "172.10.23.6:8125" with a dogstatsdAddr value + # of "172.10.23.6". + # + # @default: 0 + # @type: integer + dogstatsdPort: 0 + # Configures datadog [autodiscovery](https://docs.datadoghq.com/containers/kubernetes/log/?tab=operator#autodiscovery) + # style [log integration](https://docs.datadoghq.com/integrations/consul/?tab=containerized#log-collection) + # configuration for Consul. + # + # The default settings should handle most Consul Kubernetes deployment schemes. The resultant annotation + # will reside on the consul-server statefulset as autodiscovery annotations. + # (i.e., ad.datadoghq.com/consul.logs: ["source:consul","consul_service:consul-server", ""]) + # + # @default: ["source:consul","consul_service:consul-server"] + # @type: array + dogstatsdTags: ["source:consul","consul_service:consul-server"] + # Namespace + # + # @default: "default" + # @type: string + namespace: "default" + + # The name (and tag) of the consul-dataplane Docker image used for the # connect-injected sidecar proxies and mesh, terminating, and ingress gateways. # @default: hashicorp/consul-dataplane: @@ -890,6 +1042,12 @@ server: # by setting the `server.extraConfig` value or by applying [configuration entries](https://developer.hashicorp.com/consul/docs/connect/config-entries). connect: true + # When set to true, enables Consul to report additional debugging information, including runtime profiling (pprof) data. + # This setting is only required for clusters without ACL enabled. Sets `enable_debug` in server agent config to `true`. + # If you change this setting, you must restart the agent for the change to take effect. Default is false. + # @type: boolean + enableAgentDebug: false + serviceAccount: # This value defines additional annotations for the server service account. This should be formatted as a multi-line # string. diff --git a/control-plane/subcommand/common/common.go b/control-plane/subcommand/common/common.go index 1636c0b10e..37ef5bcafe 100644 --- a/control-plane/subcommand/common/common.go +++ b/control-plane/subcommand/common/common.go @@ -27,6 +27,8 @@ const ( // create-federation-secret commands and so lives in this common package. ACLReplicationTokenName = "acl-replication" + DatadogAgentTokenName = "datadog-agent-metrics" + // ACLTokenSecretKey is the key that we store the ACL tokens in when we // create Kubernetes secrets. ACLTokenSecretKey = "token" diff --git a/control-plane/subcommand/server-acl-init/command.go b/control-plane/subcommand/server-acl-init/command.go index 0d162b18b5..9c39d21ead 100644 --- a/control-plane/subcommand/server-acl-init/command.go +++ b/control-plane/subcommand/server-acl-init/command.go @@ -57,6 +57,7 @@ type Command struct { flagBindingRuleSelector string flagCreateEntLicenseToken bool + flagCreateDDAgentToken bool flagSnapshotAgent bool @@ -206,11 +207,15 @@ func (c *Command) init() { c.flags.StringVar((*string)(&c.flagSecretsBackend), "secrets-backend", "kubernetes", `The secrets backend to use. Either "vault" or "kubernetes". Defaults to "kubernetes"`) c.flags.StringVar(&c.flagBootstrapTokenSecretName, "bootstrap-token-secret-name", "", - "The name of the Vault or Kuberenetes secret for the bootstrap token. This token must have `ac::write` permission "+ + "The name of the Vault or Kubernetes secret for the bootstrap token. This token must have `ac::write` permission "+ "in order to create policies and tokens. If not provided or if the secret is empty, then this command will "+ "bootstrap ACLs and write the bootstrap token to this secret.") c.flags.StringVar(&c.flagBootstrapTokenSecretKey, "bootstrap-token-secret-key", "", - "The key within the Vault or Kuberenetes secret containing the bootstrap token.") + "The key within the Vault or Kubernetes secret containing the bootstrap token.") + c.flags.BoolVar(&c.flagCreateDDAgentToken, "create-dd-agent-token", false, + "Enable ACL token creation for datadog agent integration"+ + "Configures the following permissions to grant datadog agent metrics scraping permissions with Consul ACLs enabled"+ + "agent_prefix \"\" {\n policy = \"read\"\n}\nservice_prefix \"\" {\n policy = \"read\"\n}\nnode_prefix \"\" {\n policy = \"read\"\n}") c.flags.DurationVar(&c.flagTimeout, "timeout", 10*time.Minute, "How long we'll try to bootstrap ACLs for before timing out, e.g. 1ms, 2s, 3m") @@ -674,6 +679,20 @@ func (c *Command) Run(args []string) int { } } + if c.flagCreateDDAgentToken { + var err error + rules, err := c.datadogAgentRules() + if err != nil { + c.log.Error("Error templating datadog agent metrics token rules", "err", err) + return 1 + } + err = c.createLocalACL(common.DatadogAgentTokenName, rules, consulDC, primary, dynamicClient) + if err != nil { + c.log.Error(err.Error()) + return 1 + } + } + c.log.Info("server-acl-init completed successfully") return 0 } diff --git a/control-plane/subcommand/server-acl-init/command_test.go b/control-plane/subcommand/server-acl-init/command_test.go index d974e370c5..7fac051265 100644 --- a/control-plane/subcommand/server-acl-init/command_test.go +++ b/control-plane/subcommand/server-acl-init/command_test.go @@ -175,6 +175,14 @@ func TestRun_TokensPrimaryDC(t *testing.T) { SecretNames: []string{resourcePrefix + "-acl-replication-acl-token"}, LocalToken: false, }, + { + TestName: "Datadog Agent Token", + TokenFlags: []string{"-create-dd-agent-token"}, + PolicyNames: []string{"datadog-agent-metrics-token"}, + PolicyDCs: []string{"dc1"}, + SecretNames: []string{resourcePrefix + "-datadog-agent-metrics-acl-token"}, + LocalToken: true, + }, } for _, c := range cases { t.Run(c.TestName, func(t *testing.T) { @@ -324,6 +332,14 @@ func TestRun_TokensReplicatedDC(t *testing.T) { SecretNames: []string{resourcePrefix + "-enterprise-license-acl-token"}, LocalToken: true, }, + { + TestName: "Datadog Agent Token", + TokenFlags: []string{"-create-dd-agent-token"}, + PolicyNames: []string{"datadog-agent-metrics-token-dc2"}, + PolicyDCs: []string{"dc2"}, + SecretNames: []string{resourcePrefix + "-datadog-agent-metrics-acl-token"}, + LocalToken: true, + }, } for _, c := range cases { t.Run(c.TestName, func(t *testing.T) { @@ -400,6 +416,12 @@ func TestRun_TokensWithProvidedBootstrapToken(t *testing.T) { PolicyNames: []string{"acl-replication-token"}, SecretNames: []string{resourcePrefix + "-acl-replication-acl-token"}, }, + { + TestName: "Datadog Agent Token", + TokenFlags: []string{"-create-dd-agent-token"}, + PolicyNames: []string{"datadog-agent-metrics-token"}, + SecretNames: []string{resourcePrefix + "-datadog-agent-metrics-acl-token"}, + }, } for _, c := range cases { t.Run(c.TestName, func(t *testing.T) { diff --git a/control-plane/subcommand/server-acl-init/rules.go b/control-plane/subcommand/server-acl-init/rules.go index 5f65b6c75c..015090d997 100644 --- a/control-plane/subcommand/server-acl-init/rules.go +++ b/control-plane/subcommand/server-acl-init/rules.go @@ -397,6 +397,32 @@ partition "default" { return c.renderRules(aclReplicationRulesTpl) } +func (c *Command) datadogAgentRules() (string, error) { + ddAgentRulesTpl := `{{- if .EnablePartitions }} +partition "{{ .PartitionName }}" { +{{- end }} + agent_prefix "" { + policy = "read" + } + node_prefix "" { + policy = "read" + } +{{- if .EnableNamespaces }} + namespace_prefix "" { +{{- end }} + service_prefix "" { + policy = "read" + } +{{- if .EnableNamespaces }} + } +{{- end }} +{{- if .EnablePartitions }} +} +{{- end }} +` + return c.renderRules(ddAgentRulesTpl) +} + func (c *Command) rulesData() rulesData { return rulesData{ EnablePartitions: c.consulFlags.Partition != "",