From a8280cf9ad6dedc7dbe1181459a9b4c522e8e269 Mon Sep 17 00:00:00 2001 From: natemollica-dev <57850649+natemollica-nm@users.noreply.github.com> Date: Fri, 19 Apr 2024 09:07:07 -0700 Subject: [PATCH 1/2] Datadog Integration Acceptance Tests / Bug fixes (#3685) * datadog: acceptance tests - initial commit (not fully working yet) * server-statefulset: update logic for prometheus annotations (only enabled if using dogstatsd, otherwise disabled) * datadog: acceptance test working with dd-client api and operator deployment frameword * datadog-acceptance: main branch rebase merge conflict cherry-pick * datadog: acceptance testing update to metric name matching using regex * datadog: acceptance testing helper update for backoff retry * datadog: acceptance testing working timeseries query verification udp + uds * datadog: update helpers for /v1/query * server-statefulset.yaml: update to correct release name prepend to consul-server URL * datadog: acceptance testing consul integration checks working * server-statefulset: yaml and bats updates for datadog openmetrics and consul integration check URLs to use consul.fullname-server * PR3685: changelog update * datadog: openmetrics acceptance test update * datadog: added OTEL_EXPORTER_OTLP_ENDPOINT to consul telemetry collector deployment for dd-agent ingestion (passes tag info to DD) * otlp: datadog otlp acceptance test updates for telemetry-collector (grpc => http prefix) | staged otlp acceptance test * datadog-acceptance: fake-intake fixture addition * datadog-acceptance: update _helpers.tpl for consul version sanitization (truncate to <64) * datadog-acceptance: update base fixture for fake-intake * datadog-acceptance: add DogstatsD stats enablement (required for curling agent local endpoint) * datadog-acceptance: add DogstatsD stats enablement (required for curling agent local endpoint) * datadog-acceptance: first-round fake-intake testing - works but is innaccurate * datadog-acceptance: datadog framework - remove dd client agent requirement (fake-intake) * datadog-acceptance: update flags to not require API and APP key (fake-intake) * datadog-acceptance: go mod updates for uuid downgrade * acceptance-test: remove otlp acceptance test -- no fake-intake or agent endpoint to verify * datadog-acceptance: acceptance test lint fixes * acceptance-test: update control-plane/cni/main.go l:272 comment with period for lint testing. * acceptance-test: retry lint fixes * acceptance-test: correct telemetry collector URL from grpc:// to http:// --- .changelog/3685.txt | 6 + acceptance/framework/config/config.go | 3 + acceptance/framework/consul/helm_cluster.go | 1 - acceptance/framework/datadog/datadog.go | 190 ++++++++ acceptance/framework/flags/flags.go | 9 +- acceptance/tests/datadog/datadog_test.go | 420 ++++++++++++++++++ acceptance/tests/datadog/main_test.go | 16 + .../tests/fixtures/bases/datadog/datadog.yaml | 69 +++ .../fixtures/bases/datadog/fake-intake.yaml | 57 +++ .../fixtures/bases/datadog/kustomization.yaml | 4 + .../datadog-dogstatsd-udp/kustomization.yaml | 8 + .../cases/datadog-dogstatsd-udp/patch.yaml | 55 +++ .../datadog-dogstatsd-uds/kustomization.yaml | 8 + .../cases/datadog-dogstatsd-uds/patch.yaml | 62 +++ .../datadog-openmetrics/kustomization.yaml | 8 + .../cases/datadog-openmetrics/patch.yaml | 9 + .../datadog-otlp-grpc/kustomization.yaml | 8 + .../cases/datadog-otlp-grpc/patch.yaml | 16 + .../cases/datadog-otlp/kustomization.yaml | 8 + .../fixtures/cases/datadog-otlp/patch.yaml | 16 + charts/consul/templates/_helpers.tpl | 2 +- .../consul/templates/server-statefulset.yaml | 10 +- .../telemetry-collector-deployment.yaml | 6 +- .../consul/test/unit/server-statefulset.bats | 10 +- .../unit/telemetry-collector-deployment.bats | 4 +- control-plane/cni/main.go | 2 +- 26 files changed, 989 insertions(+), 18 deletions(-) create mode 100644 .changelog/3685.txt create mode 100644 acceptance/framework/datadog/datadog.go create mode 100644 acceptance/tests/datadog/datadog_test.go create mode 100644 acceptance/tests/datadog/main_test.go create mode 100644 acceptance/tests/fixtures/bases/datadog/datadog.yaml create mode 100644 acceptance/tests/fixtures/bases/datadog/fake-intake.yaml create mode 100644 acceptance/tests/fixtures/bases/datadog/kustomization.yaml create mode 100644 acceptance/tests/fixtures/cases/datadog-dogstatsd-udp/kustomization.yaml create mode 100644 acceptance/tests/fixtures/cases/datadog-dogstatsd-udp/patch.yaml create mode 100644 acceptance/tests/fixtures/cases/datadog-dogstatsd-uds/kustomization.yaml create mode 100644 acceptance/tests/fixtures/cases/datadog-dogstatsd-uds/patch.yaml create mode 100644 acceptance/tests/fixtures/cases/datadog-openmetrics/kustomization.yaml create mode 100644 acceptance/tests/fixtures/cases/datadog-openmetrics/patch.yaml create mode 100644 acceptance/tests/fixtures/cases/datadog-otlp-grpc/kustomization.yaml create mode 100644 acceptance/tests/fixtures/cases/datadog-otlp-grpc/patch.yaml create mode 100644 acceptance/tests/fixtures/cases/datadog-otlp/kustomization.yaml create mode 100644 acceptance/tests/fixtures/cases/datadog-otlp/patch.yaml diff --git a/.changelog/3685.txt b/.changelog/3685.txt new file mode 100644 index 0000000000..05241d820d --- /dev/null +++ b/.changelog/3685.txt @@ -0,0 +1,6 @@ +```release-note:bug +helm: corrected datadog openmetrics and consul-checks consul server URLs set during automation to use full consul deployment release name +``` +```release-note:bug +helm: bug fix for `prometheus.io` annotation omission while using datadog integration with openmetrics/prometheus and consul integration checks +``` \ No newline at end of file diff --git a/acceptance/framework/config/config.go b/acceptance/framework/config/config.go index 4f9a8648c2..370e276bc7 100644 --- a/acceptance/framework/config/config.go +++ b/acceptance/framework/config/config.go @@ -70,6 +70,9 @@ type TestConfig struct { EnableEnterprise bool EnterpriseLicense string + SkipDataDogTests bool + DatadogHelmChartVersion string + EnableOpenshift bool EnablePodSecurityPolicies bool diff --git a/acceptance/framework/consul/helm_cluster.go b/acceptance/framework/consul/helm_cluster.go index fafaceaca1..46ed501162 100644 --- a/acceptance/framework/consul/helm_cluster.go +++ b/acceptance/framework/consul/helm_cluster.go @@ -158,7 +158,6 @@ func (h *HelmCluster) Create(t *testing.T) { if h.ChartPath != "" { chartName = h.ChartPath } - // Retry the install in case previous tests have not finished cleaning up. retry.RunWith(&retry.Counter{Wait: 2 * time.Second, Count: 30}, t, func(r *retry.R) { err := helm.InstallE(r, h.helmOptions, chartName, h.releaseName) diff --git a/acceptance/framework/datadog/datadog.go b/acceptance/framework/datadog/datadog.go new file mode 100644 index 0000000000..49f6e53af0 --- /dev/null +++ b/acceptance/framework/datadog/datadog.go @@ -0,0 +1,190 @@ +package datadog + +import ( + "context" + "fmt" + "github.com/hashicorp/consul-k8s/acceptance/framework/k8s" + "github.com/hashicorp/consul/sdk/testutil/retry" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "testing" + "time" + + "github.com/hashicorp/consul-k8s/acceptance/framework/config" + "github.com/hashicorp/consul-k8s/acceptance/framework/helpers" + "github.com/hashicorp/consul-k8s/acceptance/framework/logger" + + "github.com/gruntwork-io/terratest/modules/helm" + terratestk8s "github.com/gruntwork-io/terratest/modules/k8s" + terratestLogger "github.com/gruntwork-io/terratest/modules/logger" + "github.com/hashicorp/consul-k8s/acceptance/framework/environment" + "k8s.io/client-go/kubernetes" +) + +const ( + releaseLabel = "app.kubernetes.io/name" + OperatorReleaseName = "datadog-operator" + DefaultHelmChartVersion = "1.4.0" + datadogSecretName = "datadog-secret" + datadogAPIKey = "api-key" + datadogAppKey = "app-key" + datadogFakeAPIKey = "DD_FAKEAPIKEY" + datadogFakeAPPKey = "DD_FAKEAPPKEY" +) + +type DatadogCluster struct { + ctx environment.TestContext + + helmOptions *helm.Options + releaseName string + + kubectlOptions *terratestk8s.KubectlOptions + + kubernetesClient kubernetes.Interface + + noCleanupOnFailure bool + noCleanup bool + debugDirectory string + logger terratestLogger.TestLogger +} + +// releaseLabelSelector returns label selector that selects all pods +// from a Datadog installation. +func (d *DatadogCluster) releaseLabelSelector() string { + return fmt.Sprintf("%s=%s", releaseLabel, d.releaseName) +} + +func NewDatadogCluster(t *testing.T, ctx environment.TestContext, cfg *config.TestConfig, releaseName string, releaseNamespace string, helmValues map[string]string) *DatadogCluster { + logger := terratestLogger.New(logger.TestLogger{}) + + configureNamespace(t, ctx.KubernetesClient(t), cfg, releaseNamespace) + + createOrUpdateDatadogSecret(t, ctx.KubernetesClient(t), cfg, releaseNamespace) + + kopts := ctx.KubectlOptionsForNamespace(releaseNamespace) + + values := defaultHelmValues() + + ddHelmChartVersion := DefaultHelmChartVersion + if cfg.DatadogHelmChartVersion != "" { + ddHelmChartVersion = cfg.DatadogHelmChartVersion + } + + helpers.MergeMaps(values, helmValues) + datadogHelmOpts := &helm.Options{ + SetValues: values, + KubectlOptions: kopts, + Logger: logger, + Version: ddHelmChartVersion, + } + + helm.AddRepo(t, datadogHelmOpts, "datadog", "https://helm.datadoghq.com") + // Ignoring the error from `helm repo update` as it could fail due to stale cache or unreachable servers and we're + // asserting a chart version on Install which would fail in an obvious way should this not succeed. + _, err := helm.RunHelmCommandAndGetOutputE(t, &helm.Options{}, "repo", "update") + if err != nil { + logger.Logf(t, "Unable to update helm repository, proceeding anyway: %s.", err) + } + + return &DatadogCluster{ + ctx: ctx, + helmOptions: datadogHelmOpts, + kubectlOptions: kopts, + kubernetesClient: ctx.KubernetesClient(t), + noCleanupOnFailure: cfg.NoCleanupOnFailure, + noCleanup: cfg.NoCleanup, + debugDirectory: cfg.DebugDirectory, + logger: logger, + releaseName: releaseName, + } +} + +func (d *DatadogCluster) Create(t *testing.T) { + t.Helper() + + helpers.Cleanup(t, d.noCleanupOnFailure, d.noCleanup, func() { + d.Destroy(t) + }) + + helm.Install(t, d.helmOptions, "datadog/datadog-operator", d.releaseName) + // Wait for the datadog-operator to become ready + k8s.WaitForAllPodsToBeReady(t, d.kubernetesClient, d.helmOptions.KubectlOptions.Namespace, d.releaseLabelSelector()) +} + +func (d *DatadogCluster) Destroy(t *testing.T) { + t.Helper() + + k8s.WritePodsDebugInfoIfFailed(t, d.kubectlOptions, d.debugDirectory, d.releaseLabelSelector()) + // Ignore the error returned by the helm delete here so that we can + // always idempotent clean up resources in the cluster. + _ = helm.DeleteE(t, d.helmOptions, d.releaseName, true) +} + +func defaultHelmValues() map[string]string { + return map[string]string{ + "replicaCount": "1", + "image.tag": DefaultHelmChartVersion, + "image.repository": "gcr.io/datadoghq/operator", + } +} + +func configureNamespace(t *testing.T, client kubernetes.Interface, cfg *config.TestConfig, namespace string) { + ctx := context.Background() + + ns := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: namespace, + Labels: map[string]string{}, + }, + } + if cfg.EnableRestrictedPSAEnforcement { + ns.ObjectMeta.Labels["pod-security.kubernetes.io/enforce"] = "restricted" + ns.ObjectMeta.Labels["pod-security.kubernetes.io/enforce-version"] = "latest" + } + + _, createErr := client.CoreV1().Namespaces().Create(ctx, ns, metav1.CreateOptions{}) + if createErr == nil { + logger.Logf(t, "Created namespace %s", namespace) + return + } + + _, updateErr := client.CoreV1().Namespaces().Update(ctx, ns, metav1.UpdateOptions{}) + if updateErr == nil { + logger.Logf(t, "Updated namespace %s", namespace) + return + } + + require.Failf(t, "Failed to create or update namespace", "Namespace=%s, CreateError=%s, UpdateError=%s", namespace, createErr, updateErr) +} + +func createOrUpdateDatadogSecret(t *testing.T, client kubernetes.Interface, cfg *config.TestConfig, namespace string) { + secretMap := map[string]string{ + datadogAPIKey: datadogFakeAPIKey, + datadogAppKey: datadogFakeAPPKey, + } + createMultiKeyK8sSecret(t, client, cfg, namespace, datadogSecretName, secretMap) +} + +func createMultiKeyK8sSecret(t *testing.T, client kubernetes.Interface, cfg *config.TestConfig, namespace, secretName string, secretMap map[string]string) { + retry.RunWith(&retry.Counter{Wait: 2 * time.Second, Count: 15}, t, func(r *retry.R) { + _, err := client.CoreV1().Secrets(namespace).Get(context.Background(), secretName, metav1.GetOptions{}) + if errors.IsNotFound(err) { + _, err := client.CoreV1().Secrets(namespace).Create(context.Background(), &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + }, + StringData: secretMap, + Type: corev1.SecretTypeOpaque, + }, metav1.CreateOptions{}) + require.NoError(r, err) + } else { + require.NoError(r, err) + } + }) + + helpers.Cleanup(t, cfg.NoCleanupOnFailure, cfg.NoCleanup, func() { + _ = client.CoreV1().Secrets(namespace).Delete(context.Background(), secretName, metav1.DeleteOptions{}) + }) +} diff --git a/acceptance/framework/flags/flags.go b/acceptance/framework/flags/flags.go index c68983fe8c..c956c3f7e3 100644 --- a/acceptance/framework/flags/flags.go +++ b/acceptance/framework/flags/flags.go @@ -25,6 +25,8 @@ type TestFlags struct { flagEnableOpenshift bool + flagSkipDatadogTests bool + flagEnablePodSecurityPolicies bool flagEnableCNI bool @@ -155,6 +157,9 @@ func (t *TestFlags) init() { flag.BoolVar(&t.flagDisablePeering, "disable-peering", false, "If true, the peering tests will not run.") + flag.BoolVar(&t.flagSkipDatadogTests, "skip-datadog", false, + "If true, datadog acceptance tests will not run.") + if t.flagEnterpriseLicense == "" { t.flagEnterpriseLicense = os.Getenv("CONSUL_ENT_LICENSE") } @@ -198,11 +203,9 @@ func (t *TestFlags) TestConfigFromFlags() *config.TestConfig { // if the Version is empty consulVersion will be nil consulVersion, _ := version.NewVersion(t.flagConsulVersion) consulDataplaneVersion, _ := version.NewVersion(t.flagConsulDataplaneVersion) - //vaultserverVersion, _ := version.NewVersion(t.flagVaultServerVersion) kubeEnvs := config.NewKubeTestConfigList(t.flagKubeconfigs, t.flagKubecontexts, t.flagKubeNamespaces) c := &config.TestConfig{ - EnableEnterprise: t.flagEnableEnterprise, EnterpriseLicense: t.flagEnterpriseLicense, @@ -211,6 +214,8 @@ func (t *TestFlags) TestConfigFromFlags() *config.TestConfig { EnableOpenshift: t.flagEnableOpenshift, + SkipDataDogTests: t.flagSkipDatadogTests, + EnablePodSecurityPolicies: t.flagEnablePodSecurityPolicies, EnableCNI: t.flagEnableCNI, diff --git a/acceptance/tests/datadog/datadog_test.go b/acceptance/tests/datadog/datadog_test.go new file mode 100644 index 0000000000..aa6be4f360 --- /dev/null +++ b/acceptance/tests/datadog/datadog_test.go @@ -0,0 +1,420 @@ +package datadog + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "testing" + "time" + + "github.com/hashicorp/consul-k8s/acceptance/framework/consul" + "github.com/hashicorp/consul-k8s/acceptance/framework/datadog" + "github.com/hashicorp/consul-k8s/acceptance/framework/helpers" + "github.com/hashicorp/consul-k8s/acceptance/framework/k8s" + "github.com/hashicorp/consul-k8s/acceptance/framework/logger" + "github.com/hashicorp/consul/sdk/testutil/retry" + "github.com/stretchr/testify/require" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + consulDogstatsDMetricQuery = "consul.memberlist.gossip" +) + +// TODO: Refactor test cases into single function that accepts Helm Values, Fixture Name, and Validation Callback +// TestDatadogDogstatsDUnixDomainSocket +// Acceptance test to verify e2e metrics configuration works as expected +// with live datadog API using histogram formatted metric +// +// Method: DogstatsD + Unix Domain Socket. +func TestDatadogDogstatsDUnixDomainSocket(t *testing.T) { + env := suite.Environment() + cfg := suite.Config() + ctx := env.DefaultContext(t) + + if cfg.SkipDataDogTests { + t.Skipf("skipping this test because -skip-datadog is set") + } + + acceptanceTestingTags := "acceptance_test:unix_domain_sockets" + helmValues := map[string]string{ + "global.datacenter": "dc1", + "global.metrics.enabled": "true", + "global.metrics.enableAgentMetrics": "true", + "global.metrics.disableAgentHostName": "true", + "global.metrics.enableHostMetrics": "true", + "global.metrics.datadog.enabled": "true", + "global.metrics.datadog.namespace": "datadog", + "global.metrics.datadog.dogstatsd.enabled": "true", + "global.metrics.datadog.dogstatsd.socketTransportType": "UDS", + "global.metrics.datadog.dogstatsd.dogstatsdTags[0]": "source:consul", + "global.metrics.datadog.dogstatsd.dogstatsdTags[1]": "consul_service:consul-server", + "global.metrics.datadog.dogstatsd.dogstatsdTags[2]": acceptanceTestingTags, + } + + releaseName := helpers.RandomName() + datadogOperatorRelease := datadog.OperatorReleaseName + + // Install the consul cluster in the default kubernetes ctx. + consulCluster := consul.NewHelmCluster(t, helmValues, ctx, cfg, releaseName) + consulCluster.Create(t) + + // Deploy Datadog Agent via Datadog Operator and apply dogstatsd overlay + datadogNamespace := helmValues["global.metrics.datadog.namespace"] + logger.Log(t, fmt.Sprintf("deploying datadog-operator via helm | namespace: %s | release-name: %s", datadogNamespace, datadogOperatorRelease)) + datadogCluster := datadog.NewDatadogCluster(t, ctx, cfg, datadogOperatorRelease, datadogNamespace, map[string]string{}) + datadogCluster.Create(t) + + logger.Log(t, fmt.Sprintf("applying dogstatd over unix domain sockets kustomization patch to datadog-agent | namespace: %s", datadogNamespace)) + k8s.DeployKustomize(t, ctx.KubectlOptionsForNamespace(datadogNamespace), cfg.NoCleanupOnFailure, cfg.NoCleanup, cfg.DebugDirectory, "../fixtures/cases/datadog-dogstatsd-uds") + k8s.WaitForAllPodsToBeReady(t, ctx.KubernetesClient(t), datadogNamespace, "agent.datadoghq.com/component=agent") + + // Retrieve datadog-agent pod name for exec + podList, err := ctx.KubernetesClient(t).CoreV1().Pods(datadogNamespace).List(context.Background(), metav1.ListOptions{LabelSelector: "agent.datadoghq.com/component=agent"}) + require.NoError(t, err) + require.Len(t, podList.Items, 1) + ddAgentName := podList.Items[0].Name + + // Check the dogstats-stats of the local cluster agent to see if consul metrics + // are being seen by the agent + logger.Log(t, fmt.Sprintf("retrieving datadog-agent control api auth token from pod %s", ddAgentName)) + bearerToken, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptionsForNamespace(datadogNamespace), "exec", "pod/"+ddAgentName, "-c", "agent", "--", "cat", "/etc/datadog-agent/auth_token") + require.NoError(t, err) + // Retry because sometimes the merged metrics server takes a couple hundred milliseconds + // to start. + logger.Log(t, fmt.Sprintf("scraping datadog-agent /agent/dogstatsd-stats endpoint for %s | auth-token: %s", consulDogstatsDMetricQuery, bearerToken)) + retry.RunWith(&retry.Counter{Count: 20, Wait: 2 * time.Second}, t, func(r *retry.R) { + metricsOutput, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptionsForNamespace(datadogNamespace), "exec", "pod/"+ddAgentName, "-c", "agent", "--", "curl", "--silent", "--insecure", "--show-error", "--header", fmt.Sprintf("authorization: Bearer %s", bearerToken), "https://localhost:5001/agent/dogstatsd-stats") + require.NoError(r, err) + require.Contains(r, metricsOutput, consulDogstatsDMetricQuery) + }) +} + +// TestDatadogDogstatsDUDP +// Acceptance test to verify e2e metrics configuration works as expected +// with live datadog API using histogram formatted metric +// +// Method: DogstatsD + UDP to Kube Service DNS name on port 8125. +func TestDatadogDogstatsDUDP(t *testing.T) { + env := suite.Environment() + cfg := suite.Config() + ctx := env.DefaultContext(t) + + if cfg.SkipDataDogTests { + t.Skipf("skipping this test because -skip-datadog is set") + } + + acceptanceTestingTags := "acceptance_test:dogstatsd_udp" + helmValues := map[string]string{ + "global.datacenter": "dc1", + "global.metrics.enabled": "true", + "global.metrics.enableAgentMetrics": "true", + "global.metrics.disableAgentHostName": "true", + "global.metrics.enableHostMetrics": "true", + "global.metrics.datadog.enabled": "true", + "global.metrics.datadog.namespace": "datadog", + "global.metrics.datadog.dogstatsd.enabled": "true", + "global.metrics.datadog.dogstatsd.socketTransportType": "UDP", + "global.metrics.datadog.dogstatsd.dogstatsdAddr": "datadog-agent.datadog.svc.cluster.local", + "global.metrics.datadog.dogstatsd.dogstatsdTags[0]": "source:consul", + "global.metrics.datadog.dogstatsd.dogstatsdTags[1]": "consul_service:consul-server", + "global.metrics.datadog.dogstatsd.dogstatsdTags[2]": acceptanceTestingTags, + } + + releaseName := helpers.RandomName() + datadogOperatorRelease := datadog.OperatorReleaseName + + // Install the consul cluster in the default kubernetes ctx. + consulCluster := consul.NewHelmCluster(t, helmValues, ctx, cfg, releaseName) + consulCluster.Create(t) + + // Deploy Datadog Agent via Datadog Operator and apply dogstatsd overlay. + datadogNamespace := helmValues["global.metrics.datadog.namespace"] + logger.Log(t, fmt.Sprintf("deploying datadog-operator via helm | namespace: %s | release-name: %s", datadogNamespace, datadogOperatorRelease)) + datadogCluster := datadog.NewDatadogCluster(t, ctx, cfg, datadogOperatorRelease, datadogNamespace, map[string]string{}) + datadogCluster.Create(t) + + logger.Log(t, fmt.Sprintf("applying dogstatd over UDP kustomization patch to datadog-agent | namespace: %s", datadogNamespace)) + k8s.DeployKustomize(t, ctx.KubectlOptionsForNamespace(datadogNamespace), cfg.NoCleanupOnFailure, cfg.NoCleanup, cfg.DebugDirectory, "../fixtures/cases/datadog-dogstatsd-udp") + k8s.WaitForAllPodsToBeReady(t, ctx.KubernetesClient(t), datadogNamespace, "agent.datadoghq.com/component=agent") + + // Retrieve datadog-agent pod name for exec + podList, err := ctx.KubernetesClient(t).CoreV1().Pods(datadogNamespace).List(context.Background(), metav1.ListOptions{LabelSelector: "agent.datadoghq.com/component=agent"}) + require.NoError(t, err) + require.Len(t, podList.Items, 1) + ddAgentName := podList.Items[0].Name + + // Check the dogstats-stats of the local cluster agent to see if consul metrics + // are being seen by the agent + logger.Log(t, fmt.Sprintf("retrieving datadog-agent control api auth token from pod %s", ddAgentName)) + bearerToken, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptionsForNamespace(datadogNamespace), "exec", "pod/"+ddAgentName, "-c", "agent", "--", "cat", "/etc/datadog-agent/auth_token") + require.NoError(t, err) + // Retry because sometimes the merged metrics server takes a couple hundred milliseconds + // to start. + logger.Log(t, fmt.Sprintf("scraping datadog-agent /agent/dogstatsd-stats endpoint for %s | auth-token: %s", consulDogstatsDMetricQuery, bearerToken)) + retry.RunWith(&retry.Counter{Count: 20, Wait: 2 * time.Second}, t, func(r *retry.R) { + metricsOutput, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptionsForNamespace(datadogNamespace), "exec", "pod/"+ddAgentName, "-c", "agent", "--", "curl", "--silent", "--insecure", "--show-error", "--header", fmt.Sprintf("authorization: Bearer %s", bearerToken), "https://localhost:5001/agent/dogstatsd-stats") + require.NoError(r, err) + require.Contains(r, metricsOutput, consulDogstatsDMetricQuery) + }) +} + +// TestDatadogConsulChecks +// Acceptance test to verify e2e metrics configuration works as expected +// with live datadog API using histogram formatted metric +// +// Method: Consul Integrated Datadog Checks. +func TestDatadogConsulChecks(t *testing.T) { + env := suite.Environment() + cfg := suite.Config() + ctx := env.DefaultContext(t) + + if cfg.SkipDataDogTests { + t.Skipf("skipping this test because -skip-datadog is set") + } + + helmValues := map[string]string{ + "global.datacenter": "dc1", + "global.metrics.enabled": "true", + "global.metrics.enableAgentMetrics": "true", + "global.metrics.disableAgentHostName": "true", + "global.metrics.enableHostMetrics": "true", + "global.metrics.datadog.enabled": "true", + "global.metrics.datadog.namespace": "datadog", + } + + releaseName := helpers.RandomName() + datadogOperatorRelease := datadog.OperatorReleaseName + + // Install the consul cluster in the default kubernetes ctx. + consulCluster := consul.NewHelmCluster(t, helmValues, ctx, cfg, releaseName) + consulCluster.Create(t) + + // Deploy Datadog Agent via Datadog Operator and apply dogstatsd overlay. + datadogNamespace := helmValues["global.metrics.datadog.namespace"] + logger.Log(t, fmt.Sprintf("deploying datadog-operator via helm | namespace: %s | release-name: %s", datadogNamespace, datadogOperatorRelease)) + datadogCluster := datadog.NewDatadogCluster(t, ctx, cfg, datadogOperatorRelease, datadogNamespace, map[string]string{}) + datadogCluster.Create(t) + + logger.Log(t, fmt.Sprintf("applying datadog consul integration patch to datadog-agent | namespace: %s", datadogNamespace)) + k8s.DeployKustomize(t, ctx.KubectlOptionsForNamespace(datadogNamespace), cfg.NoCleanupOnFailure, cfg.NoCleanup, cfg.DebugDirectory, "../fixtures/bases/datadog") + k8s.WaitForAllPodsToBeReady(t, ctx.KubernetesClient(t), datadogNamespace, "agent.datadoghq.com/component=agent") + + // Retrieve datadog-agent pod name for exec + podList, err := ctx.KubernetesClient(t).CoreV1().Pods(datadogNamespace).List(context.Background(), metav1.ListOptions{LabelSelector: "agent.datadoghq.com/component=agent"}) + require.NoError(t, err) + require.Len(t, podList.Items, 1) + ddAgentName := podList.Items[0].Name + + // Check the dogstats-stats of the local cluster agent to see if consul metrics + // are being seen by the agent + logger.Log(t, fmt.Sprintf("retrieving datadog-agent control api auth token from pod %s", ddAgentName)) + bearerToken, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptionsForNamespace(datadogNamespace), "exec", "pod/"+ddAgentName, "-c", "agent", "--", "cat", "/etc/datadog-agent/auth_token") + // Retry because sometimes the merged metrics server takes a couple hundred milliseconds + // to start. + logger.Log(t, fmt.Sprintf("scraping datadog-agent /agent/status endpoint | auth-token: %s", bearerToken)) + var metricsOutput string + retry.RunWith(&retry.Counter{Count: 20, Wait: 2 * time.Second}, t, func(r *retry.R) { + metricsOutput, err = k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptionsForNamespace(datadogNamespace), "exec", "pod/"+ddAgentName, "-c", "agent", "--", "curl", "--silent", "--insecure", "--show-error", "--header", fmt.Sprintf("authorization: Bearer %s", bearerToken), "https://localhost:5001/agent/status") + require.NoError(r, err) + }) + var root Root + err = json.Unmarshal([]byte(metricsOutput), &root) + require.NoError(t, err) + for _, check := range root.RunnerStats.Checks.Consul { + require.Equal(t, ``, check.LastError) + } +} + +// TestDatadogOpenmetrics +// Acceptance test to verify e2e metrics configuration works as expected +// with live datadog API using histogram formatted metric +// +// Method: Datadog Openmetrics Prometheus Metrics Collection. +func TestDatadogOpenmetrics(t *testing.T) { + env := suite.Environment() + cfg := suite.Config() + ctx := env.DefaultContext(t) + + if cfg.SkipDataDogTests { + t.Skipf("skipping this test because -skip-datadog is set") + } + + helmValues := map[string]string{ + "global.datacenter": "dc1", + "global.metrics.enabled": "true", + "global.metrics.enableAgentMetrics": "true", + "global.metrics.disableAgentHostName": "true", + "global.metrics.enableHostMetrics": "true", + "global.metrics.datadog.enabled": "true", + "global.metrics.datadog.namespace": "datadog", + "global.metrics.datadog.openMetricsPrometheus.enabled": "true", + } + + releaseName := helpers.RandomName() + datadogOperatorRelease := datadog.OperatorReleaseName + + // Install the consul cluster in the default kubernetes ctx. + consulCluster := consul.NewHelmCluster(t, helmValues, ctx, cfg, releaseName) + consulCluster.Create(t) + + // Deploy Datadog Agent via Datadog Operator and apply dogstatsd overlay + datadogNamespace := helmValues["global.metrics.datadog.namespace"] + logger.Log(t, fmt.Sprintf("deploying datadog-operator via helm | namespace: %s | release-name: %s", datadogNamespace, datadogOperatorRelease)) + datadogCluster := datadog.NewDatadogCluster(t, ctx, cfg, datadogOperatorRelease, datadogNamespace, map[string]string{}) + datadogCluster.Create(t) + + logger.Log(t, fmt.Sprintf("applying datadog openmetrics patch to datadog-agent | namespace: %s", datadogNamespace)) + k8s.DeployKustomize(t, ctx.KubectlOptionsForNamespace(datadogNamespace), cfg.NoCleanupOnFailure, cfg.NoCleanup, cfg.DebugDirectory, "../fixtures/cases/datadog-openmetrics") + k8s.WaitForAllPodsToBeReady(t, ctx.KubernetesClient(t), datadogNamespace, "agent.datadoghq.com/component=agent") + + // Retrieve datadog-agent pod name for exec + podList, err := ctx.KubernetesClient(t).CoreV1().Pods(datadogNamespace).List(context.Background(), metav1.ListOptions{LabelSelector: "agent.datadoghq.com/component=agent"}) + require.NoError(t, err) + require.Len(t, podList.Items, 1) + ddAgentName := podList.Items[0].Name + + // Check the dogstats-stats of the local cluster agent to see if consul metrics + // are being seen by the agent + logger.Log(t, fmt.Sprintf("retrieving datadog-agent control api auth token from pod %s", ddAgentName)) + bearerToken, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptionsForNamespace(datadogNamespace), "exec", "pod/"+ddAgentName, "-c", "agent", "--", "cat", "/etc/datadog-agent/auth_token") + // Retry because sometimes the merged metrics server takes a couple hundred milliseconds + // to start. + logger.Log(t, fmt.Sprintf("scraping datadog-agent /agent/status endpoint | auth-token: %s", bearerToken)) + var metricsOutput string + retry.RunWith(&retry.Counter{Count: 20, Wait: 2 * time.Second}, t, func(r *retry.R) { + metricsOutput, err = k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptionsForNamespace(datadogNamespace), "exec", "pod/"+ddAgentName, "-c", "agent", "--", "curl", "--silent", "--insecure", "--show-error", "--header", fmt.Sprintf("authorization: Bearer %s", bearerToken), "https://localhost:5001/agent/status") + require.NoError(r, err) + }) + var root Root + err = json.Unmarshal([]byte(metricsOutput), &root) + require.NoError(t, err) + for _, check := range root.RunnerStats.Checks.Openmetrics { + if strings.Contains(check.CheckID, "consul") { + require.Equal(t, ``, check.LastError) + break + } + continue + } +} + +// TestDatadogOTLPCollection +// Acceptance test to verify e2e metrics configuration works as expected +// with live datadog API using histogram formatted metric +// +// Method: Datadog otlp metrics collection via consul-telemetry collector using dd-agent gRPC receiver. +//func TestDatadogOTLPCollection(t *testing.T) { +// env := suite.Environment() +// cfg := suite.Config() +// ctx := env.DefaultContext(t) +// // ns := ctx.KubectlOptions(t).Namespace +// +// helmValues := map[string]string{ +// "global.datacenter": "dc1", +// "global.metrics.enabled": "true", +// "global.metrics.enableAgentMetrics": "true", +// "global.metrics.disableAgentHostName": "true", +// "global.metrics.enableHostMetrics": "true", +// "global.metrics.datadog.enabled": "true", +// "global.metrics.datadog.namespace": "datadog", +// "global.metrics.datadog.otlp.enabled": "true", +// "global.metrics.datadog.otlp.protocol": "http", +// "telemetryCollector.enabled": "true", +// } +// +// datadogOperatorHelmValues := map[string]string{ +// "replicaCount": "1", +// "image.tag": datadog.DefaultHelmChartVersion, +// "image.repository": "gcr.io/datadoghq/operator", +// } +// +// releaseName := helpers.RandomName() +// datadogOperatorRelease := datadog.OperatorReleaseName +// +// // Install the consul cluster in the default kubernetes ctx. +// consulCluster := consul.NewHelmCluster(t, helmValues, ctx, cfg, releaseName) +// consulCluster.Create(t) +// +// // Deploy Datadog Agent via Datadog Operator and apply dogstatsd overlay +// datadogNamespace := helmValues["global.metrics.datadog.namespace"] +// logger.Log(t, fmt.Sprintf("deploying datadog-operator via helm | namespace: %s | release-name: %s", datadogNamespace, datadogOperatorRelease)) +// datadogCluster := datadog.NewDatadogCluster(t, ctx, cfg, datadogOperatorRelease, datadogNamespace, datadogOperatorHelmValues) +// datadogCluster.Create(t) +// +// logger.Log(t, fmt.Sprintf("applying datadog otlp HTTP endpoint collector patch to datadog-agent | namespace: %s", datadogNamespace)) +// k8s.DeployKustomize(t, ctx.KubectlOptionsForNamespace(datadogNamespace), cfg.NoCleanupOnFailure, cfg.NoCleanup, cfg.DebugDirectory, "../fixtures/cases/datadog-otlp") +// k8s.WaitForAllPodsToBeReady(t, ctx.KubernetesClient(t), datadogNamespace, "agent.datadoghq.com/component=agent") +// +// // Retrieve datadog-agent pod name for exec +// podList, err := ctx.KubernetesClient(t).CoreV1().Pods(datadogNamespace).List(context.Background(), metav1.ListOptions{LabelSelector: "agent.datadoghq.com/component=agent"}) +// require.NoError(t, err) +// require.Len(t, podList.Items, 1) +// ddAgentName := podList.Items[0].Name +// +// // Check the dogstats-stats of the local cluster agent to see if consul metrics +// // are being seen by the agent +// bearerToken, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptionsForNamespace(datadogNamespace), "exec", "pod/"+ddAgentName, "-c", "agent", "--", "cat /etc/datadog-agent/auth_token") +// metricsOutput, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptionsForNamespace(datadogNamespace), "exec", "pod/"+ddAgentName, "-c", "agent", "--", "curl", "--silent", "--insecure", "--show-error", "--header", fmt.Sprintf("authorization: Bearer %s", bearerToken), "https://localhost:5001/agent/dogstatsd-stats") +// require.NoError(t, err) +// require.Contains(t, metricsOutput, consulOTLPMetricQuery) +//} + +type ConsulCheck struct { + AverageExecutionTime int `json:"AverageExecutionTime"` + CheckConfigSource string `json:"CheckConfigSource"` + CheckID string `json:"CheckID"` + CheckName string `json:"CheckName"` + CheckVersion string `json:"CheckVersion"` + Events int `json:"Events"` + ExecutionTimes []int `json:"ExecutionTimes"` + LastError string `json:"LastError"` + LastExecutionTime int `json:"LastExecutionTime"` + LastSuccessDate int `json:"LastSuccessDate"` + MetricSamples int `json:"MetricSamples"` + ServiceChecks int `json:"ServiceChecks"` + TotalErrors int `json:"TotalErrors"` + TotalEvents int `json:"TotalEvents"` + TotalMetricSamples int `json:"TotalMetricSamples"` + TotalRuns int `json:"TotalRuns"` + TotalServiceChecks int `json:"TotalServiceChecks"` + TotalWarnings int `json:"TotalWarnings"` + UpdateTimestamp int `json:"UpdateTimestamp"` +} + +type OpenmetricsCheck struct { + AverageExecutionTime int `json:"AverageExecutionTime"` + CheckConfigSource string `json:"CheckConfigSource"` + CheckID string `json:"CheckID"` + CheckName string `json:"CheckName"` + CheckVersion string `json:"CheckVersion"` + Events int `json:"Events"` + ExecutionTimes []int `json:"ExecutionTimes"` + LastError string `json:"LastError"` + LastExecutionTime int `json:"LastExecutionTime"` + LastSuccessDate int64 `json:"LastSuccessDate"` + MetricSamples int `json:"MetricSamples"` + ServiceChecks int `json:"ServiceChecks"` + TotalErrors int `json:"TotalErrors"` + TotalEventPlatformEvents map[string]interface{} `json:"TotalEventPlatformEvents"` + TotalEvents int `json:"TotalEvents"` + TotalHistogramBuckets int `json:"TotalHistogramBuckets"` + TotalMetricSamples int `json:"TotalMetricSamples"` + TotalRuns int `json:"TotalRuns"` + TotalServiceChecks int `json:"TotalServiceChecks"` + TotalWarnings int `json:"TotalWarnings"` + UpdateTimestamp int64 `json:"UpdateTimestamp"` +} + +type Checks struct { + Consul map[string]ConsulCheck `json:"consul"` + Openmetrics map[string]OpenmetricsCheck `json:"openmetrics"` +} + +type RunnerStats struct { + Checks Checks `json:"Checks"` +} + +type Root struct { + RunnerStats RunnerStats `json:"runnerStats"` +} diff --git a/acceptance/tests/datadog/main_test.go b/acceptance/tests/datadog/main_test.go new file mode 100644 index 0000000000..03033336a0 --- /dev/null +++ b/acceptance/tests/datadog/main_test.go @@ -0,0 +1,16 @@ +package datadog + +import ( + "os" + "testing" + + testsuite "github.com/hashicorp/consul-k8s/acceptance/framework/suite" +) + +var suite testsuite.Suite + +func TestMain(m *testing.M) { + suite = testsuite.NewSuite(m) + os.Exit(suite.Run()) + +} diff --git a/acceptance/tests/fixtures/bases/datadog/datadog.yaml b/acceptance/tests/fixtures/bases/datadog/datadog.yaml new file mode 100644 index 0000000000..f0169640d1 --- /dev/null +++ b/acceptance/tests/fixtures/bases/datadog/datadog.yaml @@ -0,0 +1,69 @@ +# https://github.com/DataDog/datadog-operator/blob/main/docs/configuration.v2alpha1.md +apiVersion: datadoghq.com/v2alpha1 +kind: DatadogAgent +metadata: + name: datadog +spec: + global: + clusterName: dc1 + registry: gcr.io/datadoghq + logLevel: debug + # Site is the Datadog intake site Agent data are sent to. Set to 'datadoghq.com' to + # send data to the US1 site (default). Set to 'datadoghq.eu' to send data to the EU site. + # fake-intake image is datadog spoof site URL used for testing. + # Default: 'datadoghq.com' + site: http://fake-intake.datadog.svc.cluster.local + credentials: + apiSecret: + secretName: datadog-secret + keyName: api-key + appSecret: + secretName: datadog-secret + keyName: app-key + # Requirement for kind cluster as tls verification prevents the agent from + # being able to obtain hostname from hostnameFile + # ref: https://docs.datadoghq.com/agent/troubleshooting/hostname_containers/?tab=operator + kubelet: + tlsVerify: false + features: + dogstatsd: + unixDomainSocketConfig: + enabled: false + hostPortConfig: + enabled: false + clusterChecks: + enabled: false + useClusterChecksRunners: false + admissionController: + enabled: false + mutateUnlabelled: false + apm: + enabled: false + # features.npm.enabled: false + # required as the /etc/passwd rootfs is mounted for this + # see: https://github.com/DataDog/helm-charts/issues/273 + npm: + enabled: false + logCollection: + enabled: false + containerCollectAll: false + # features.processDiscovery.enabled: false + # required as the /etc/passwd rootfs is mounted for this + # see: https://github.com/DataDog/helm-charts/issues/273 + processDiscovery: + enabled: false + # features.liveProcessCollection.enabled: false + # required as the /etc/passwd rootfs is mounted for this + # see: https://github.com/DataDog/helm-charts/issues/273 + liveProcessCollection: + enabled: false + liveContainerCollection: + enabled: false + orchestratorExplorer: + enabled: false + prometheusScrape: + enabled: false + enableServiceEndpoints: false + override: + clusterAgent: + replicas: 0 \ No newline at end of file diff --git a/acceptance/tests/fixtures/bases/datadog/fake-intake.yaml b/acceptance/tests/fixtures/bases/datadog/fake-intake.yaml new file mode 100644 index 0000000000..122125e495 --- /dev/null +++ b/acceptance/tests/fixtures/bases/datadog/fake-intake.yaml @@ -0,0 +1,57 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: fake-intake + namespace: datadog +--- +apiVersion: v1 +kind: Service +metadata: + name: fake-intake + namespace: datadog +spec: + selector: + app: fake-intake + ports: + - port: 80 + targetPort: 80 + protocol: TCP + name: http +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: fake-intake + namespace: datadog +spec: + replicas: 1 + selector: + matchLabels: + app: fake-intake + template: + metadata: + name: fake-intake + namespace: datadog + labels: + app: fake-intake + tags.datadoghq.com/env: "dev" + tags.datadoghq.com/service: "fake-intake" + tags.datadoghq.com/version: "latest" + annotations: + 'consul.hashicorp.com/connect-inject': 'false' + 'consul.hashicorp.com/transparent-proxy': 'false' + 'consul.hashicorp.com/enable-metrics-merging': 'false' + 'consul.hashicorp.com/transparent-proxy-overwrite-probes': 'false' + spec: + serviceAccountName: fake-intake + containers: + - name: fake-intake + image: datadog/fakeintake:latest + ports: + - name: http + containerPort: 80 + protocol: TCP + securityContext: + privileged: true + runAsUser: 0 \ No newline at end of file diff --git a/acceptance/tests/fixtures/bases/datadog/kustomization.yaml b/acceptance/tests/fixtures/bases/datadog/kustomization.yaml new file mode 100644 index 0000000000..d67f01fb9f --- /dev/null +++ b/acceptance/tests/fixtures/bases/datadog/kustomization.yaml @@ -0,0 +1,4 @@ + +resources: + - fake-intake.yaml + - datadog.yaml diff --git a/acceptance/tests/fixtures/cases/datadog-dogstatsd-udp/kustomization.yaml b/acceptance/tests/fixtures/cases/datadog-dogstatsd-udp/kustomization.yaml new file mode 100644 index 0000000000..dcfce4e9f8 --- /dev/null +++ b/acceptance/tests/fixtures/cases/datadog-dogstatsd-udp/kustomization.yaml @@ -0,0 +1,8 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: MPL-2.0 +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ../../bases/datadog +patches: + - path: patch.yaml \ No newline at end of file diff --git a/acceptance/tests/fixtures/cases/datadog-dogstatsd-udp/patch.yaml b/acceptance/tests/fixtures/cases/datadog-dogstatsd-udp/patch.yaml new file mode 100644 index 0000000000..28e6acdfa3 --- /dev/null +++ b/acceptance/tests/fixtures/cases/datadog-dogstatsd-udp/patch.yaml @@ -0,0 +1,55 @@ +apiVersion: datadoghq.com/v2alpha1 +kind: DatadogAgent +metadata: + name: datadog +spec: + features: + dogstatsd: + unixDomainSocketConfig: + enabled: false + hostPortConfig: + enabled: true + hostPort: 8125 + mapperProfiles: + configData: |- + - name: consul + prefix: "consul." + mappings: + - match: 'consul\.raft\.replication\.appendEntries\.logs\.([0-9a-f-]+)' + match_type: "regex" + name: "consul.raft.replication.appendEntries.logs" + tags: + peer_id: "$1" + - match: 'consul\.raft\.replication\.appendEntries\.rpc\.([0-9a-f-]+)' + match_type: "regex" + name: "consul.raft.replication.appendEntries.rpc" + tags: + peer_id: "$1" + - match: 'consul\.raft\.replication\.heartbeat\.([0-9a-f-]+)' + match_type: "regex" + name: "consul.raft.replication.heartbeat" + tags: + peer_id: "$1" + override: + nodeAgent: + annotations: + 'consul.hashicorp.com/connect-inject': 'false' + 'consul.hashicorp.com/transparent-proxy': 'false' + tolerations: + - operator: Exists + env: + - name: DD_HISTOGRAM_PERCENTILES + value: '0.10 0.20 0.30 0.40 0.50 0.60 0.70 0.80 0.90 0.95 0.99' + - name: DD_SECRET_BACKEND_COMMAND + value: /readsecret_multiple_providers.sh + containers: + agent: + env: + - name: DD_DOGSTATSD_METRICS_STATS_ENABLE + value: "true" + - name: DD_OTLP_CONFIG_LOGS_ENABLED + value: "true" + - name: DD_DOGSTATSD_NON_LOCAL_TRAFFIC + value: "true" + - name: DD_USE_V2_API_SERIES + value: "true" \ No newline at end of file diff --git a/acceptance/tests/fixtures/cases/datadog-dogstatsd-uds/kustomization.yaml b/acceptance/tests/fixtures/cases/datadog-dogstatsd-uds/kustomization.yaml new file mode 100644 index 0000000000..dcfce4e9f8 --- /dev/null +++ b/acceptance/tests/fixtures/cases/datadog-dogstatsd-uds/kustomization.yaml @@ -0,0 +1,8 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: MPL-2.0 +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ../../bases/datadog +patches: + - path: patch.yaml \ No newline at end of file diff --git a/acceptance/tests/fixtures/cases/datadog-dogstatsd-uds/patch.yaml b/acceptance/tests/fixtures/cases/datadog-dogstatsd-uds/patch.yaml new file mode 100644 index 0000000000..ad555dc68b --- /dev/null +++ b/acceptance/tests/fixtures/cases/datadog-dogstatsd-uds/patch.yaml @@ -0,0 +1,62 @@ +apiVersion: datadoghq.com/v2alpha1 +kind: DatadogAgent +metadata: + name: datadog +spec: + features: + dogstatsd: + unixDomainSocketConfig: + enabled: true + path: "/var/run/datadog/dsd.socket" + hostPortConfig: + enabled: false + mapperProfiles: + configData: |- + - name: consul + prefix: "consul." + mappings: + - match: 'consul\.raft\.replication\.appendEntries\.logs\.([0-9a-f-]+)' + match_type: "regex" + name: "consul.raft.replication.appendEntries.logs" + tags: + peer_id: "$1" + - match: 'consul\.raft\.replication\.appendEntries\.rpc\.([0-9a-f-]+)' + match_type: "regex" + name: "consul.raft.replication.appendEntries.rpc" + tags: + peer_id: "$1" + - match: 'consul\.raft\.replication\.heartbeat\.([0-9a-f-]+)' + match_type: "regex" + name: "consul.raft.replication.heartbeat" + tags: + peer_id: "$1" + override: + nodeAgent: + annotations: + 'consul.hashicorp.com/connect-inject': 'false' + 'consul.hashicorp.com/transparent-proxy': 'false' + volumes: + - hostPath: + path: /var/run/datadog/ + name: dsdsocket + tolerations: + - operator: Exists + env: + - name: DD_HISTOGRAM_PERCENTILES + value: '0.10 0.20 0.30 0.40 0.50 0.60 0.70 0.80 0.90 0.95 0.99' + - name: DD_SECRET_BACKEND_COMMAND + value: /readsecret_multiple_providers.sh + containers: + agent: + env: + - name: DD_DOGSTATSD_METRICS_STATS_ENABLE + value: "true" + - name: DD_OTLP_CONFIG_LOGS_ENABLED + value: "true" + - name: DD_DOGSTATSD_NON_LOCAL_TRAFFIC + value: "true" + - name: DD_USE_V2_API_SERIES + value: "true" + volumeMounts: + - name: dsdsocket + mountPath: /var/run/datadog diff --git a/acceptance/tests/fixtures/cases/datadog-openmetrics/kustomization.yaml b/acceptance/tests/fixtures/cases/datadog-openmetrics/kustomization.yaml new file mode 100644 index 0000000000..dcfce4e9f8 --- /dev/null +++ b/acceptance/tests/fixtures/cases/datadog-openmetrics/kustomization.yaml @@ -0,0 +1,8 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: MPL-2.0 +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ../../bases/datadog +patches: + - path: patch.yaml \ No newline at end of file diff --git a/acceptance/tests/fixtures/cases/datadog-openmetrics/patch.yaml b/acceptance/tests/fixtures/cases/datadog-openmetrics/patch.yaml new file mode 100644 index 0000000000..fd9d358fb1 --- /dev/null +++ b/acceptance/tests/fixtures/cases/datadog-openmetrics/patch.yaml @@ -0,0 +1,9 @@ +apiVersion: datadoghq.com/v2alpha1 +kind: DatadogAgent +metadata: + name: datadog +spec: + features: + prometheusScrape: + enabled: true + enableServiceEndpoints: true diff --git a/acceptance/tests/fixtures/cases/datadog-otlp-grpc/kustomization.yaml b/acceptance/tests/fixtures/cases/datadog-otlp-grpc/kustomization.yaml new file mode 100644 index 0000000000..dcfce4e9f8 --- /dev/null +++ b/acceptance/tests/fixtures/cases/datadog-otlp-grpc/kustomization.yaml @@ -0,0 +1,8 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: MPL-2.0 +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ../../bases/datadog +patches: + - path: patch.yaml \ No newline at end of file diff --git a/acceptance/tests/fixtures/cases/datadog-otlp-grpc/patch.yaml b/acceptance/tests/fixtures/cases/datadog-otlp-grpc/patch.yaml new file mode 100644 index 0000000000..c550923ea2 --- /dev/null +++ b/acceptance/tests/fixtures/cases/datadog-otlp-grpc/patch.yaml @@ -0,0 +1,16 @@ +apiVersion: datadoghq.com/v2alpha1 +kind: DatadogAgent +metadata: + name: datadog +spec: + features: + # Sets: DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_GRPC_ENDPOINT: 0.0.0.0:4317 │ + # DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_HTTP_ENDPOINT: 0.0.0.0:4318 + otlp: + receiver: + protocols: + # Set to "0.0.0.0" as per the below reference docs + # ref: https://docs.datadoghq.com/opentelemetry/otlp_ingest_in_the_agent/?tab=host#enabling-otlp-ingestion-on-the-datadog-agent + grpc: + enabled: true + endpoint: "0.0.0.0:4317" \ No newline at end of file diff --git a/acceptance/tests/fixtures/cases/datadog-otlp/kustomization.yaml b/acceptance/tests/fixtures/cases/datadog-otlp/kustomization.yaml new file mode 100644 index 0000000000..dcfce4e9f8 --- /dev/null +++ b/acceptance/tests/fixtures/cases/datadog-otlp/kustomization.yaml @@ -0,0 +1,8 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: MPL-2.0 +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ../../bases/datadog +patches: + - path: patch.yaml \ No newline at end of file diff --git a/acceptance/tests/fixtures/cases/datadog-otlp/patch.yaml b/acceptance/tests/fixtures/cases/datadog-otlp/patch.yaml new file mode 100644 index 0000000000..4cf8f81dda --- /dev/null +++ b/acceptance/tests/fixtures/cases/datadog-otlp/patch.yaml @@ -0,0 +1,16 @@ +apiVersion: datadoghq.com/v2alpha1 +kind: DatadogAgent +metadata: + name: datadog +spec: + features: + # Sets: DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_GRPC_ENDPOINT: 0.0.0.0:4317 │ + # DD_OTLP_CONFIG_RECEIVER_PROTOCOLS_HTTP_ENDPOINT: 0.0.0.0:4318 + otlp: + receiver: + protocols: + # Set to "0.0.0.0" as per the below reference docs + # ref: https://docs.datadoghq.com/opentelemetry/otlp_ingest_in_the_agent/?tab=host#enabling-otlp-ingestion-on-the-datadog-agent + http: + enabled: true + endpoint: "0.0.0.0:4318" \ No newline at end of file diff --git a/charts/consul/templates/_helpers.tpl b/charts/consul/templates/_helpers.tpl index f830e18c26..368d4d2f9a 100644 --- a/charts/consul/templates/_helpers.tpl +++ b/charts/consul/templates/_helpers.tpl @@ -680,5 +680,5 @@ Usage: {{ template "consul.versionInfo" }} {{- else }} {{- $sanitizedVersion = $versionInfo }} {{- end -}} -{{- printf "%s" $sanitizedVersion | quote }} +{{- printf "%s" $sanitizedVersion | trunc 63 | quote }} {{- end -}} \ No newline at end of file diff --git a/charts/consul/templates/server-statefulset.yaml b/charts/consul/templates/server-statefulset.yaml index b1028e754a..2b25182b96 100644 --- a/charts/consul/templates/server-statefulset.yaml +++ b/charts/consul/templates/server-statefulset.yaml @@ -132,7 +132,7 @@ spec: {{- tpl .Values.server.annotations . | nindent 8 }} {{- end }} {{- if (and .Values.global.metrics.enabled .Values.global.metrics.enableAgentMetrics) }} - {{- if not .Values.global.metrics.datadog.openMetricsPrometheus.enabled }} + {{- if (or (not .Values.global.metrics.datadog.enabled) (and .Values.global.metrics.datadog.enabled (.Values.global.metrics.datadog.dogstatsd.enabled))) }} "prometheus.io/scrape": "true" {{- if not (hasKey (default "" .Values.server.annotations | fromYaml) "prometheus.io/path")}} "prometheus.io/path": "/v1/agent/metrics" @@ -156,12 +156,12 @@ spec: "instances": [ { {{- if .Values.global.tls.enabled }} - "openmetrics_endpoint": "https://consul-server.{{ .Release.Namespace }}.svc:8501/v1/agent/metrics?format=prometheus", + "openmetrics_endpoint": "https://{{ template "consul.fullname" . }}-server.{{ .Release.Namespace }}.svc:8501/v1/agent/metrics?format=prometheus", "tls_cert": "/etc/datadog-agent/conf.d/consul.d/certs/tls.crt", "tls_private_key": "/etc/datadog-agent/conf.d/consul.d/certs/tls.key", "tls_ca_cert": "/etc/datadog-agent/conf.d/consul.d/ca/tls.crt", {{- else }} - "openmetrics_endpoint": "http://consul-server.{{ .Release.Namespace }}.svc:8500/v1/agent/metrics?format=prometheus", + "openmetrics_endpoint": "http://{{ template "consul.fullname" . }}-server.{{ .Release.Namespace }}.svc:8500/v1/agent/metrics?format=prometheus", {{- end }} {{- if ( .Values.global.acls.manageSystemACLs) }} "headers": { @@ -182,12 +182,12 @@ spec: "instances": [ { {{- if .Values.global.tls.enabled }} - "url": "https://consul-server.{{ .Release.Namespace }}.svc:8501", + "url": "https://{{ template "consul.fullname" . }}-server.{{ .Release.Namespace }}.svc:8501", "tls_cert": "/etc/datadog-agent/conf.d/consul.d/certs/tls.crt", "tls_private_key": "/etc/datadog-agent/conf.d/consul.d/certs/tls.key", "tls_ca_cert": "/etc/datadog-agent/conf.d/consul.d/ca/tls.crt", {{- else }} - "url": "http://consul-server.consul.svc:8500", + "url": "http://{{ template "consul.fullname" . }}-server.{{ .Release.Namespace }}.svc:8500", {{- end }} "use_prometheus_endpoint": true, {{- if ( .Values.global.acls.manageSystemACLs) }} diff --git a/charts/consul/templates/telemetry-collector-deployment.yaml b/charts/consul/templates/telemetry-collector-deployment.yaml index f7b6d7bd2e..e03f6b9a4f 100644 --- a/charts/consul/templates/telemetry-collector-deployment.yaml +++ b/charts/consul/templates/telemetry-collector-deployment.yaml @@ -256,9 +256,13 @@ spec: {{- if eq (.Values.global.metrics.datadog.otlp.protocol | lower ) "http" }} - name: CO_OTEL_HTTP_ENDPOINT value: "http://$(HOST_IP):4318" + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: "http://$(HOST_IP):4318" {{- else if eq (.Values.global.metrics.datadog.otlp.protocol | lower) "grpc" }} - name: CO_OTEL_HTTP_ENDPOINT - value: "grpc://$(HOST_IP):4317" + value: "http://$(HOST_IP):4317" + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: "http://$(HOST_IP):4317" {{- end }} {{- end }} {{- include "consul.extraEnvironmentVars" .Values.telemetryCollector | nindent 12 }} diff --git a/charts/consul/test/unit/server-statefulset.bats b/charts/consul/test/unit/server-statefulset.bats index 3ef40a3141..afe946e7b3 100755 --- a/charts/consul/test/unit/server-statefulset.bats +++ b/charts/consul/test/unit/server-statefulset.bats @@ -823,7 +823,7 @@ load _helpers local actual="$( echo "$consul_checks" | \ jq -r .consul.instances | jq -r .[0].url | tee /dev/stderr)" - [ "${actual}" = "http://consul-server.consul.svc:8500" ] + [ "${actual}" = http://release-name-consul-server.default.svc:8500 ] local actual="$( echo "$consul_checks" | \ jq -r .consul.instances | jq -r .[0].new_leader_checks | tee /dev/stderr)" @@ -866,7 +866,7 @@ load _helpers local actual="$( echo "$consul_checks" | \ jq -r .consul.instances | jq -r .[0].url | tee /dev/stderr)" - [ "${actual}" = "https://consul-server.default.svc:8501" ] + [ "${actual}" = "https://release-name-consul-server.default.svc:8501" ] local actual="$( echo "$consul_checks" | \ jq -r .consul.instances | jq -r .[0].tls_cert | tee /dev/stderr)" @@ -933,7 +933,7 @@ load _helpers local actual="$( echo "$consul_checks" | \ jq -r .openmetrics.instances | jq -r .[0].openmetrics_endpoint | tee /dev/stderr)" - [ "${actual}" = "http://consul-server.default.svc:8500/v1/agent/metrics?format=prometheus" ] + [ "${actual}" = "http://release-name-consul-server.default.svc:8500/v1/agent/metrics?format=prometheus" ] local actual="$( echo "$consul_checks" | \ jq -r .openmetrics.instances | jq -r .[0].headers | tee /dev/stderr)" @@ -971,7 +971,7 @@ load _helpers local actual="$( echo "$consul_checks" | \ jq -r .openmetrics.instances | jq -r .[0].openmetrics_endpoint | tee /dev/stderr)" - [ "${actual}" = "https://consul-server.default.svc:8501/v1/agent/metrics?format=prometheus" ] + [ "${actual}" = "https://release-name-consul-server.default.svc:8501/v1/agent/metrics?format=prometheus" ] local actual="$( echo "$consul_checks" | \ jq -r .openmetrics.instances | jq -r .[0].headers | tee /dev/stderr)" @@ -1020,7 +1020,7 @@ load _helpers local actual="$( echo "$consul_checks" | \ jq -r .openmetrics.instances | jq -r .[0].openmetrics_endpoint | tee /dev/stderr)" - [ "${actual}" = "http://consul-server.default.svc:8500/v1/agent/metrics?format=prometheus" ] + [ "${actual}" = "http://release-name-consul-server.default.svc:8500/v1/agent/metrics?format=prometheus" ] local actual="$( echo "$consul_checks" | \ jq -r .openmetrics.instances | jq -r '.[0].headers["X-Consul-Token"]' | tee /dev/stderr)" diff --git a/charts/consul/test/unit/telemetry-collector-deployment.bats b/charts/consul/test/unit/telemetry-collector-deployment.bats index 71f10d3934..949a5e8cd4 100755 --- a/charts/consul/test/unit/telemetry-collector-deployment.bats +++ b/charts/consul/test/unit/telemetry-collector-deployment.bats @@ -1429,7 +1429,7 @@ MIICFjCCAZsCCQCdwLtdjbzlYzAKBggqhkjOPQQDAjB0MQswCQYDVQQGEwJDQTEL' \ local actual=$(echo "$object" | yq -r '.[] | select(.name=="CO_OTEL_HTTP_ENDPOINT").value' | tee /dev/stderr) - [ "${actual}" = 'grpc://$(HOST_IP):4317' ] + [ "${actual}" = 'http://$(HOST_IP):4317' ] } @test "telemetryCollector/Deployment: DataDog OTLP Collector gRPC protocol verification, case-insensitive" { @@ -1448,5 +1448,5 @@ MIICFjCCAZsCCQCdwLtdjbzlYzAKBggqhkjOPQQDAjB0MQswCQYDVQQGEwJDQTEL' \ local actual=$(echo "$object" | yq -r '.[] | select(.name=="CO_OTEL_HTTP_ENDPOINT").value' | tee /dev/stderr) - [ "${actual}" = 'grpc://$(HOST_IP):4317' ] + [ "${actual}" = 'http://$(HOST_IP):4317' ] } \ No newline at end of file diff --git a/control-plane/cni/main.go b/control-plane/cni/main.go index eb710ff9cb..0f9a32b265 100644 --- a/control-plane/cni/main.go +++ b/control-plane/cni/main.go @@ -269,7 +269,7 @@ func main() { } // createK8sClient configures the command's Kubernetes API client if it doesn't -// already exist +// already exist. func (c *Command) createK8sClient(cfg *PluginConf) error { restConfig, err := clientcmd.BuildConfigFromFlags("", filepath.Join(cfg.CNINetDir, cfg.Kubeconfig)) if err != nil { From 6dfbadf7bcaed8281d7f8ba2bade1621016a3802 Mon Sep 17 00:00:00 2001 From: John Maguire Date: Fri, 19 Apr 2024 13:08:22 -0400 Subject: [PATCH 2/2] [NET-8412] Fix APIGW policy creation ordering for upgrade path (#3918) * fix policy creation for upgrading * Added changelog --- .changelog/3918.txt | 3 +++ control-plane/api-gateway/cache/consul.go | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 .changelog/3918.txt diff --git a/.changelog/3918.txt b/.changelog/3918.txt new file mode 100644 index 0000000000..dad22ff2db --- /dev/null +++ b/.changelog/3918.txt @@ -0,0 +1,3 @@ +```release-note:bug +api-gateway: Fix order of initialization for creating ACL role/policy to avoid error logs in consul when upgrading between versions. +``` diff --git a/control-plane/api-gateway/cache/consul.go b/control-plane/api-gateway/cache/consul.go index 0b0d067df7..984f6db7b4 100644 --- a/control-plane/api-gateway/cache/consul.go +++ b/control-plane/api-gateway/cache/consul.go @@ -362,6 +362,9 @@ func (c *Cache) ensurePolicy(client *api.Client, gatewayName string) (string, er if err != nil { return "", err } + + // on an upgrade the cache will be empty so we need to write the policy to the cache + c.gatewayNameToPolicy[gatewayName] = existing return existing.ID, nil } @@ -389,6 +392,8 @@ func (c *Cache) ensurePolicy(client *api.Client, gatewayName string) (string, er return "", err } + // update cache with existing policy + c.gatewayNameToPolicy[gatewayName] = existing return existing.ID, nil } @@ -429,7 +434,8 @@ func (c *Cache) ensureRole(client *api.Client, gatewayName string) (string, erro } if aclRole != nil { - return cachedRole.Name, nil + c.gatewayNameToRole[gatewayName] = aclRole + return aclRole.Name, nil } return createRole()