Skip to content

Commit

Permalink
[exporter/datadog]: add ignore_resources configuration option (#3245)
Browse files Browse the repository at this point in the history
* [exporter/datadog]: wip add blocklist for resources. wip test and documentation

* [exporter/datadog]: clean up ignore resources defaults

* [exporter/datadog]: remove unused helpers

* [exporter/datadog]: add yaml config details

* [exporter/datadog]: add links to datadog agent funcs

* [exporter/datadog]: seperate blocklist into its own file and tests

* [exporter/datadog]: fix copyrights

* [exporter/datadog]: pr feedback update code comments and update naming conventions, clean up code comments

* [exporter/datadog]: add validatioon block to config

* [exporter/datadog]: update config

* [exporter/datadog]: linting

* [exporter/datadog]: wip add helper tests

* [exporter/datadog]: add helper tests

* [exporter/datadog]: linting

* [exporter/datadog]: use mustcompile for denylist

* [exporter/datadog]: fix tests for new config/denylist behavior

* [exporter/datadog]: add ignore resources validation test for config
  • Loading branch information
ericmustin authored Apr 30, 2021
1 parent 64a5d55 commit f853a99
Show file tree
Hide file tree
Showing 13 changed files with 363 additions and 23 deletions.
19 changes: 19 additions & 0 deletions exporter/datadogexporter/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package config
import (
"errors"
"fmt"
"regexp"
"strings"
"sync"

Expand Down Expand Up @@ -97,6 +98,12 @@ type TracesConfig struct {
// meaning no sampling. If you want to send one event out of every 250
// times Send() is called, you would specify 250 here.
SampleRate uint `mapstructure:"sample_rate"`

// ignored resources
// A blacklist of regular expressions can be provided to disable certain traces based on their resource name
// all entries must be surrounded by double quotes and separated by commas.
// ignore_resources: ["(GET|POST) /healthcheck"]
IgnoreResources []string `mapstructure:"ignore_resources"`
}

// TagsConfig defines the tag-related configuration
Expand Down Expand Up @@ -226,3 +233,15 @@ func (c *Config) Sanitize() error {

return nil
}

func (c *Config) Validate() error {
if c.Traces.IgnoreResources != nil {
for _, entry := range c.Traces.IgnoreResources {
_, err := regexp.Compile(entry)
if err != nil {
return fmt.Errorf("'%s' is not valid resource filter regular expression", entry)
}
}
}
return nil
}
10 changes: 10 additions & 0 deletions exporter/datadogexporter/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,13 @@ func TestCensorAPIKey(t *testing.T) {
cfg.GetCensoredKey(),
)
}

func TestIgnoreResourcesValidation(t *testing.T) {
validCfg := Config{Traces: TracesConfig{IgnoreResources: []string{"[123]"}}}
invalidCfg := Config{Traces: TracesConfig{IgnoreResources: []string{"[123"}}}

noErr := validCfg.Validate()
err := invalidCfg.Validate()
require.NoError(t, noErr)
require.Error(t, err)
}
57 changes: 57 additions & 0 deletions exporter/datadogexporter/denylister.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package datadogexporter

import (
"regexp"

"github.com/DataDog/datadog-agent/pkg/trace/exportable/pb"
)

// Denylister holds a list of regular expressions which will match resources
// on spans that should be dropped.
// From: https://github.com/DataDog/datadog-agent/blob/a6872e436681ea2136cf8a67465e99fdb4450519/pkg/trace/filters/blacklister.go#L15-L19
type Denylister struct {
list []*regexp.Regexp
}

// Allows returns true if the Denylister permits this span.
// From: https://github.com/DataDog/datadog-agent/blob/a6872e436681ea2136cf8a67465e99fdb4450519/pkg/trace/filters/blacklister.go#L21-L29
func (f *Denylister) Allows(span *pb.Span) bool {
for _, entry := range f.list {
if entry.MatchString(span.Resource) {
return false
}
}
return true
}

// NewDenylister creates a new Denylister based on the given list of
// regular expressions.
// From: https://github.com/DataDog/datadog-agent/blob/a6872e436681ea2136cf8a67465e99fdb4450519/pkg/trace/filters/blacklister.go#L41-L45
func NewDenylister(exprs []string) *Denylister {
return &Denylister{list: compileRules(exprs)}
}

// compileRules compiles as many rules as possible from the list of expressions.
// From: https://github.com/DataDog/datadog-agent/blob/a6872e436681ea2136cf8a67465e99fdb4450519/pkg/trace/filters/blacklister.go#L47-L59
func compileRules(exprs []string) []*regexp.Regexp {
list := make([]*regexp.Regexp, 0, len(exprs))
for _, entry := range exprs {
rule := regexp.MustCompile(entry)
list = append(list, rule)
}
return list
}
83 changes: 83 additions & 0 deletions exporter/datadogexporter/denylister_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package datadogexporter

import (
"testing"

"github.com/DataDog/datadog-agent/pkg/trace/exportable/pb"
"github.com/stretchr/testify/assert"
)

// TestSpan returns a fix span with hardcoded info, useful for reproducible tests
// https://github.com/DataDog/datadog-agent/blob/d3a4cd66314d70162e2c76d2481f4b93455cf717/pkg/trace/test/testutil/span.go#L372
func testSpan() *pb.Span {
return &pb.Span{
Duration: 10000000,
Error: 0,
Resource: "GET /some/raclette",
Service: "django",
Name: "django.controller",
SpanID: 42,
Start: 1472732573337575936,
TraceID: 424242,
Meta: map[string]string{
"user": "eric",
"pool": "fondue",
},
Metrics: map[string]float64{
"cheese_weight": 100000.0,
},
ParentID: 1111,
Type: "http",
}
}

func TestDenylister(t *testing.T) {
tests := []struct {
filter []string
resource string
expectation bool
}{
{[]string{"/foo/bar"}, "/foo/bar", false},
{[]string{"/foo/b.r"}, "/foo/bar", false},
{[]string{"[0-9]+"}, "/abcde", true},
{[]string{"[0-9]+"}, "/abcde123", false},
{[]string{"\\(foobar\\)"}, "(foobar)", false},
{[]string{"\\(foobar\\)"}, "(bar)", true},
{[]string{"(GET|POST) /healthcheck"}, "GET /foobar", true},
{[]string{"(GET|POST) /healthcheck"}, "GET /healthcheck", false},
{[]string{"(GET|POST) /healthcheck"}, "POST /healthcheck", false},
{[]string{"SELECT COUNT\\(\\*\\) FROM BAR"}, "SELECT COUNT(*) FROM BAR", false},
{[]string{"ABC+", "W+"}, "ABCCCC", false},
{[]string{"ABC+", "W+"}, "WWW", false},
}

for _, test := range tests {
span := testSpan()
span.Resource = test.resource
filter := NewDenylister(test.filter)

assert.Equal(t, test.expectation, filter.Allows(span))
}
}

func TestCompileRules(t *testing.T) {
filter := NewDenylister([]string{"\n{6}"})
for i := 0; i < 100; i++ {
span := testSpan()
assert.True(t, filter.Allows(span))
}
}
6 changes: 6 additions & 0 deletions exporter/datadogexporter/example/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@ exporters:
#
# endpoint: https://api.datadoghq.com

## @param ignore_resources - list of strings - optional
## A blacklist of regular expressions can be provided to disable certain traces based on their resource name
## all entries must be surrounded by double quotes and separated by commas.
#
# ignore_resources: ["(GET|POST) /healthcheck"]

service:
pipelines:
traces:
Expand Down
1 change: 1 addition & 0 deletions exporter/datadogexporter/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ func createDefaultConfig() config.Exporter {
TCPAddr: confignet.TCPAddr{
Endpoint: "$DD_APM_URL", // If not provided, set during config sanitization
},
IgnoreResources: []string{},
},

SendMetadata: true,
Expand Down
5 changes: 5 additions & 0 deletions exporter/datadogexporter/factory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ func TestCreateDefaultConfig(t *testing.T) {
TCPAddr: confignet.TCPAddr{
Endpoint: "$DD_APM_URL",
},
IgnoreResources: []string{},
},

TagsConfig: ddconfig.TagsConfig{
Expand Down Expand Up @@ -131,6 +132,7 @@ func TestLoadConfig(t *testing.T) {
TCPAddr: confignet.TCPAddr{
Endpoint: "https://trace.agent.datadoghq.eu",
},
IgnoreResources: []string{},
},
SendMetadata: true,
OnlyMetadata: false,
Expand Down Expand Up @@ -173,6 +175,7 @@ func TestLoadConfig(t *testing.T) {
TCPAddr: confignet.TCPAddr{
Endpoint: "https://trace.agent.datadoghq.com",
},
IgnoreResources: []string{},
},
SendMetadata: true,
OnlyMetadata: false,
Expand Down Expand Up @@ -257,6 +260,7 @@ func TestLoadConfigEnvVariables(t *testing.T) {
TCPAddr: confignet.TCPAddr{
Endpoint: "https://trace.agent.datadoghq.test",
},
IgnoreResources: []string{},
},
SendMetadata: true,
OnlyMetadata: false,
Expand Down Expand Up @@ -302,6 +306,7 @@ func TestLoadConfigEnvVariables(t *testing.T) {
TCPAddr: confignet.TCPAddr{
Endpoint: "https://trace.agent.datadoghq.com",
},
IgnoreResources: []string{},
},
SendMetadata: true,
OnlyMetadata: false,
Expand Down
7 changes: 6 additions & 1 deletion exporter/datadogexporter/traces_exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ type traceExporter struct {
obfuscator *obfuscate.Obfuscator
calculator *sublayerCalculator
client *datadog.Client
denylister *Denylister
}

var (
Expand Down Expand Up @@ -85,6 +86,9 @@ func newTracesExporter(ctx context.Context, params component.ExporterCreateParam
// https://github.com/DataDog/datadog-serverless-functions/blob/11f170eac105d66be30f18eda09eca791bc0d31b/aws/logs_monitoring/trace_forwarder/cmd/trace/main.go#L43
obfuscator := obfuscate.NewObfuscator(obfuscatorConfig)

// a denylist for dropping ignored resources
denylister := NewDenylister(cfg.Traces.IgnoreResources)

calculator := &sublayerCalculator{sc: stats.NewSublayerCalculator()}
exporter := &traceExporter{
params: params,
Expand All @@ -94,6 +98,7 @@ func newTracesExporter(ctx context.Context, params component.ExporterCreateParam
obfuscator: obfuscator,
calculator: calculator,
client: client,
denylister: denylister,
}

return exporter
Expand Down Expand Up @@ -131,7 +136,7 @@ func (exp *traceExporter) pushTraceData(
// convert traces to datadog traces and group trace payloads by env
// we largely apply the same logic as the serverless implementation, simplified a bit
// https://github.com/DataDog/datadog-serverless-functions/blob/f5c3aedfec5ba223b11b76a4239fcbf35ec7d045/aws/logs_monitoring/trace_forwarder/cmd/trace/main.go#L61-L83
ddTraces, ms := convertToDatadogTd(td, exp.calculator, exp.cfg)
ddTraces, ms := convertToDatadogTd(td, exp.calculator, exp.cfg, exp.denylister)

// group the traces by env to reduce the number of flushes
aggregatedTraces := aggregateTracePayloadsByEnv(ddTraces)
Expand Down
1 change: 1 addition & 0 deletions exporter/datadogexporter/traces_exporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ func testTracesExporterHelper(td pdata.Traces, t *testing.T) []string {
TCPAddr: confignet.TCPAddr{
Endpoint: server.URL,
},
IgnoreResources: []string{},
},
}

Expand Down
31 changes: 27 additions & 4 deletions exporter/datadogexporter/translate_traces.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ const (
)

// converts Traces into an array of datadog trace payloads grouped by env
func convertToDatadogTd(td pdata.Traces, calculator *sublayerCalculator, cfg *config.Config) ([]*pb.TracePayload, []datadog.Metric) {
func convertToDatadogTd(td pdata.Traces, calculator *sublayerCalculator, cfg *config.Config, blk *Denylister) ([]*pb.TracePayload, []datadog.Metric) {
// TODO:
// do we apply other global tags, like version+service, to every span or only root spans of a service
// should globalTags['service'] take precedence over a trace's resource.service.name? I don't believe so, need to confirm
Expand All @@ -80,7 +80,7 @@ func convertToDatadogTd(td pdata.Traces, calculator *sublayerCalculator, cfg *co
hostname = resHostname
}

payload := resourceSpansToDatadogSpans(rs, calculator, hostname, cfg)
payload := resourceSpansToDatadogSpans(rs, calculator, hostname, cfg, blk)
traces = append(traces, &payload)

ms := metrics.DefaultMetrics("traces", uint64(pushTime))
Expand Down Expand Up @@ -118,7 +118,7 @@ func aggregateTracePayloadsByEnv(tracePayloads []*pb.TracePayload) []*pb.TracePa
}

// converts a Trace's resource spans into a trace payload
func resourceSpansToDatadogSpans(rs pdata.ResourceSpans, calculator *sublayerCalculator, hostname string, cfg *config.Config) pb.TracePayload {
func resourceSpansToDatadogSpans(rs pdata.ResourceSpans, calculator *sublayerCalculator, hostname string, cfg *config.Config, blk *Denylister) pb.TracePayload {
// get env tag
env := utils.NormalizeTag(cfg.Env)

Expand Down Expand Up @@ -172,6 +172,27 @@ func resourceSpansToDatadogSpans(rs pdata.ResourceSpans, calculator *sublayerCal
}

for _, apiTrace := range apiTraces {
// first drop trace if root span exists in trace chunk that is on denylist (drop trace no stats).
// In the dd-agent, the denylist/blacklist behavior can be performed before most of the expensive
// operations on the span.
// See: https://github.com/DataDog/datadog-agent/blob/a6872e436681ea2136cf8a67465e99fdb4450519/pkg/trace/agent/agent.go#L200
// However, in our case, the span must be converted from otlp span into a dd span first. This is for 2 reasons.
// First, DD trace chunks rec'd by datadog-agent v0.4+ endpoint are expected as arrays of spans grouped by trace id.
// But, since OTLP groups by arrays of spans from the same instrumentation library, not trace-id,
// (contrib-instrumention-redis, contrib-instrumention-rails, etc), we have to iterate
// over batch and group all spans by trace id.
// Second, otlp->dd conversion is what creates the resource name that we are checking in the denylist.
// Note: OTLP also groups by ResourceSpans but practically speaking a payload will usually only
// contain 1 ResourceSpan array.

// Root span is used to carry some trace-level metadata, such as sampling rate and priority.
rootSpan := utils.GetRoot(apiTrace)

if !blk.Allows(rootSpan) {
// drop trace by not adding to payload if it's root span matches denylist
continue
}

// calculates analyzed spans for use in trace search and app analytics
// appends a specific piece of metadata to these spans marking them as analyzed
// TODO: allow users to configure specific spans to be marked as an analyzed spans for app analytics
Expand Down Expand Up @@ -249,11 +270,13 @@ func spanToDatadogSpan(s pdata.Span,
// we can then set Error field when creating and predefine a max meta capacity
isSpanError := getSpanErrorAndSetTags(s, tags)

resourceName := getDatadogResourceName(s, tags)

span := &pb.Span{
TraceID: decodeAPMTraceID(s.TraceID().Bytes()),
SpanID: decodeAPMSpanID(s.SpanID().Bytes()),
Name: getDatadogSpanName(s, tags),
Resource: getDatadogResourceName(s, tags),
Resource: resourceName,
Service: normalizedServiceName,
Start: int64(startTime),
Duration: duration,
Expand Down
Loading

0 comments on commit f853a99

Please sign in to comment.