From 7044c894de9f453ef6a5506ef1e921f08392ea5a Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Mon, 27 Jun 2022 13:39:23 +0200 Subject: [PATCH 01/27] pkg/tracer: added glob matcher for span sampling --- ddtrace/tracer/util.go | 13 ++++++++++ ddtrace/tracer/util_test.go | 51 +++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/ddtrace/tracer/util.go b/ddtrace/tracer/util.go index 116d0aef4d..51684db537 100644 --- a/ddtrace/tracer/util.go +++ b/ddtrace/tracer/util.go @@ -8,6 +8,7 @@ package tracer import ( "encoding/base64" "fmt" + "regexp" "strconv" "strings" @@ -129,3 +130,15 @@ var b64 = base64.StdEncoding.WithPadding(base64.NoPadding) func b64Encode(s string) string { return b64.EncodeToString([]byte(s)) } + +// globMatch compiles pattern string into glob format, i.e. regular expressions with only '?' +// and '*' treated as regex metacharacters. +func globMatch(pattern string) (*regexp.Regexp, error) { + // escaping regex characters + pattern = regexp.QuoteMeta(pattern) + // replacing '?' and '*' with regex characters + pattern = strings.Replace(pattern, "\\?", ".", -1) + pattern = strings.Replace(pattern, "\\*", ".*", -1) + //pattern must match an entire string + return regexp.Compile(fmt.Sprintf("^%s$", pattern)) +} diff --git a/ddtrace/tracer/util_test.go b/ddtrace/tracer/util_test.go index 42cc0a5f39..7325b7a29c 100644 --- a/ddtrace/tracer/util_test.go +++ b/ddtrace/tracer/util_test.go @@ -115,3 +115,54 @@ func TestParsePropagatableTraceTags(t *testing.T) { }) } } + +func TestGlobMatch(t *testing.T) { + for i, tt := range []struct { + pattern string + input string + shouldMatch bool + }{ + // pattern with * + {"test*", "test", true}, + {"test*", "test-case", true}, + {"test*", "a-test", false}, + {"*test", "a-test", true}, + {"a*case", "acase", true}, + {"a*case", "a-test-case", true}, + {"a*test*case", "a-test-case", true}, + {"a*test*case", "atestcase", true}, + {"a*test*case", "abadcase", false}, + // pattern with ? + {"test?", "test", false}, + {"test?", "test-case", false}, + {"test?", "a-test", false}, + {"?test", "a-test", false}, + {"a?case", "acase", false}, + {"a?case", "a-case", true}, + {"a?test?case", "a-test-case", true}, + {"a?test?case", "a-test--case", false}, + // pattern with ? and * + {"?test*", "atest", true}, + {"?test*", "atestcase", true}, + {"?test*", "testcase", false}, + {"?test*", "testcase", false}, + {"test*case", "testcase", true}, + {"a?test*", "a-test-case", true}, + {"a?test*", "atestcase", false}, + {"a*test?", "a-test-", true}, + {"a*test?", "atestcase", false}, + {"a*test?case", "a--test-case", true}, + {"a*test?case", "a--test--case", false}, + {"a?test*case", "a-testing--case", true}, + {"the?test*case", "the-test-cases", false}, + } { + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + rg, _ := globMatch(tt.pattern) + if tt.shouldMatch { + assert.Regexp(t, rg, tt.input) + } else { + assert.NotRegexp(t, rg, tt.input) + } + }) + } +} From d8fa045eab6042c0e4d084f170f4ba9bf02ddc71 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Mon, 27 Jun 2022 15:15:59 +0200 Subject: [PATCH 02/27] pkg/tracer: added single span sampling rules & updated the matcher --- ddtrace/ext/tags.go | 13 +++ ddtrace/tracer/log_test.go | 4 +- ddtrace/tracer/sampler.go | 113 +++++++++++++++--- ddtrace/tracer/sampler_test.go | 201 ++++++++++++++++++++++++++++++++- ddtrace/tracer/tracer.go | 2 +- 5 files changed, 314 insertions(+), 19 deletions(-) diff --git a/ddtrace/ext/tags.go b/ddtrace/ext/tags.go index e7337d7ae7..5ceee8e4ed 100644 --- a/ddtrace/ext/tags.go +++ b/ddtrace/ext/tags.go @@ -79,6 +79,19 @@ const ( // Search & Analytics event. AnalyticsEvent = "analytics.event" + // SpanSamplingMechanism is the metric key holding a span sampling rule that a span was kept on + SpanSamplingMechanism = "_dd.span_sampling.mechanism" + + // SingleSpanSamplingMechanism is a constant reserved to indicate that a span was kept on account of a span sampling rule. + SingleSpanSamplingMechanism = 8 + + // SingleSpanSamplingRuleRate is the metric key containing the configured sampling probability for the span sampling rule. + SingleSpanSamplingRuleRate = "_dd.span_sampling.rule_rate" + + // SingleSpanSamplingMPS is the metric key contains the configured limit for the span sampling rule that the span matched. + // // If there is no configured limit, then this tag is omitted. + SingleSpanSamplingMPS = "_dd.span_sampling.max_per_second" + // ManualKeep is a tag which specifies that the trace to which this span // belongs to should be kept when set to true. ManualKeep = "manual.keep" diff --git a/ddtrace/tracer/log_test.go b/ddtrace/tracer/log_test.go index 39f869256b..3962eeaa72 100644 --- a/ddtrace/tracer/log_test.go +++ b/ddtrace/tracer/log_test.go @@ -126,8 +126,8 @@ func TestLogSamplingRules(t *testing.T) { lines := removeAppSec(tp.Lines()) assert.Len(lines, 2) - assert.Contains(lines[0], "WARN: at index 4: ignoring rule {Service: Name: Rate:9.10}: rate is out of [0.0, 1.0] range") - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ WARN: DIAGNOSTICS Error\(s\) parsing DD_TRACE_SAMPLING_RULES: found errors:\n\tat index 1: rate not provided\n\tat index 3: rate not provided$`, lines[1]) + assert.Contains(lines[0], "WARN: at index 4: ignoring rule {Service: Name: Rate:9.10 MaxPerSecond:0}: rate is out of [0.0, 1.0] range") + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ WARN: DIAGNOSTICS Error\(s\) parsing sampling rules: found errors:\n\tat index 1: rate not provided\n\tat index 3: rate not provided$`, lines[1]) } func TestLogAgentReachable(t *testing.T) { diff --git a/ddtrace/tracer/sampler.go b/ddtrace/tracer/sampler.go index 2069436a1b..b0ee284ea5 100644 --- a/ddtrace/tracer/sampler.go +++ b/ddtrace/tracer/sampler.go @@ -188,19 +188,62 @@ func newRulesSampler(rules []SamplingRule) *rulesSampler { } } -// samplingRulesFromEnv parses sampling rules from the DD_TRACE_SAMPLING_RULES -// environment variable. +// samplingRulesFromEnv parses sampling rules from the DD_TRACE_SAMPLING_RULES environment variable +// and combines them with spanSamplingRules func samplingRulesFromEnv() ([]SamplingRule, error) { - rulesFromEnv := os.Getenv("DD_TRACE_SAMPLING_RULES") - if rulesFromEnv == "" { + errs := []string{} + spanRules, err := spanSamplingRulesFromEnv() + if err != nil { + errs = append(errs, err.Error()) + } + traceRules, err := processSamplingRulesFromEnv([]byte(os.Getenv("DD_TRACE_SAMPLING_RULES")), TraceSamplingRuleType) + if err != nil { + errs = append(errs, err.Error()) + } + + rules := append(spanRules, traceRules...) + if len(errs) != 0 { + return rules, fmt.Errorf("%s", strings.Join(errs, "\n\t")) + } + return rules, nil +} + +// spanSamplingRulesFromEnv parses sampling rules from the DD_SPAN_SAMPLING_RULES and +// DD_SPAN_SAMPLING_RULES_FILE environment variables. +func spanSamplingRulesFromEnv() ([]SamplingRule, error) { + errs := []string{} + rules, err := processSamplingRulesFromEnv([]byte(os.Getenv("DD_SPAN_SAMPLING_RULES")), SingleSpanSamplingType) + if err != nil { + errs = append(errs, err.Error()) + } + if len(rules) != 0 { + if os.Getenv("DD_SPAN_SAMPLING_RULES_FILE") != "" { + log.Warn("DIAGNOSTICS Error(s): DD_SPAN_SAMPLING_RULES is available and will take precedence over DD_SPAN_SAMPLING_RULES_FILE") + } + return rules, err + } + rulesFile := os.Getenv("DD_SPAN_SAMPLING_RULES_FILE") + if rulesFile == "" { + return rules, err + } + rulesFromEnvFile, err := os.ReadFile(rulesFile) + if err != nil { + log.Warn("Couldn't read file from DD_SPAN_SAMPLING_RULES_FILE") + } + return processSamplingRulesFromEnv(rulesFromEnvFile, SingleSpanSamplingType) +} + +func processSamplingRulesFromEnv(b []byte, ruleType SamplingRuleType) ([]SamplingRule, error) { + if len(b) == 0 { return nil, nil } - jsonRules := []struct { - Service string `json:"service"` - Name string `json:"name"` - Rate json.Number `json:"sample_rate"` - }{} - err := json.Unmarshal([]byte(rulesFromEnv), &jsonRules) + var jsonRules []struct { + Service string `json:"service"` + Name string `json:"name"` + Rate json.Number `json:"sample_rate"` + MaxPerSecond float64 `json:"max_per_second"` + } + err := json.Unmarshal(b, &jsonRules) if err != nil { return nil, fmt.Errorf("error unmarshalling JSON: %v", err) } @@ -220,6 +263,32 @@ func samplingRulesFromEnv() ([]SamplingRule, error) { log.Warn("at index %d: ignoring rule %+v: rate is out of [0.0, 1.0] range", i, v) continue } + if ruleType == SingleSpanSamplingType { + if v.Service == "" { + v.Service = "*" + } + srvGlob, err := globMatch(v.Service) + if err != nil { + log.Warn("at index %d: ignoring rule %+v: service name regex pattern can't be compiled", i, v) + continue + } + if v.Name == "" { + v.Name = "*" + } + opGlob, err := globMatch(v.Name) + if err != nil { + log.Warn("at index %d: ignoring rule %+v: operation name regex pattern can't be compiled", i, v) + continue + } + rules = append(rules, SamplingRule{ + Service: srvGlob, + Name: opGlob, + Rate: rate, + MaxPerSecond: v.MaxPerSecond, + Type: SingleSpanSamplingType, + }) + continue + } switch { case v.Service != "" && v.Name != "": rules = append(rules, NameServiceRule(v.Name, v.Service, rate)) @@ -230,7 +299,7 @@ func samplingRulesFromEnv() ([]SamplingRule, error) { } } if len(errs) != 0 { - return rules, fmt.Errorf("found errors:\n\t%s", strings.Join(errs, "\n\t")) + return rules, fmt.Errorf("\n\t%s", strings.Join(errs, "\n\t")) } return rules, nil } @@ -300,6 +369,13 @@ func (rs *rulesSampler) apply(span *span) bool { if rule.match(span) { matched = true rate = rule.Rate + if rule.Type == SingleSpanSamplingType { + span.setMetric(ext.SpanSamplingMechanism, ext.SingleSpanSamplingMechanism) + span.setMetric(ext.SingleSpanSamplingRuleRate, rule.Rate) + if rule.MaxPerSecond != 0 { + span.setMetric(ext.SingleSpanSamplingMPS, rule.MaxPerSecond) + } + } break } } @@ -338,13 +414,22 @@ func (rs *rulesSampler) limit() (float64, bool) { return math.NaN(), false } +type SamplingRuleType int + +const ( + TraceSamplingRuleType = iota + SingleSpanSamplingType +) + // SamplingRule is used for applying sampling rates to spans that match // the service name, operation name or both. // For basic usage, consider using the helper functions ServiceRule, NameRule, etc. type SamplingRule struct { - Service *regexp.Regexp - Name *regexp.Regexp - Rate float64 + Service *regexp.Regexp + Name *regexp.Regexp + Rate float64 + MaxPerSecond float64 + Type SamplingRuleType exactService string exactName string diff --git a/ddtrace/tracer/sampler_test.go b/ddtrace/tracer/sampler_test.go index 98b9edf915..3b22080495 100644 --- a/ddtrace/tracer/sampler_test.go +++ b/ddtrace/tracer/sampler_test.go @@ -231,7 +231,7 @@ func TestRuleEnvVars(t *testing.T) { } }) - t.Run("sampling-rules", func(t *testing.T) { + t.Run("trace-sampling-rules", func(t *testing.T) { assert := assert.New(t) defer os.Unsetenv("DD_TRACE_SAMPLING_RULES") @@ -264,12 +264,89 @@ func TestRuleEnvVars(t *testing.T) { if tt.errStr == "" { assert.NoError(err) } else { - assert.Equal(err.Error(), tt.errStr) + assert.Equal(tt.errStr, err.Error()) } assert.Len(rules, tt.ruleN) }) } }) + t.Run("span-sampling-rules", func(t *testing.T) { + assert := assert.New(t) + defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") + + for i, tt := range []struct { + value string + ruleN int + errStr string + }{ + { + value: "[]", + ruleN: 0, + }, { + value: `[{"service": "abcd", "sample_rate": 1.0}]`, + ruleN: 1, + }, { + value: `[{"service": "abcd", "sample_rate": 1.0},{"name": "wxyz", "sample_rate": 0.9},{"service": "efgh", "name": "lmnop", "sample_rate": 0.42}]`, + ruleN: 3, + }, { + // invalid rule ignored + value: `[{"service": "abcd", "sample_rate": 42.0}, {"service": "abcd", "sample_rate": 0.2}]`, + ruleN: 1, + }, { + value: `not JSON at all`, + errStr: `error unmarshalling JSON: invalid character 'o' in literal null (expecting 'u')`, + }, + } { + t.Run(fmt.Sprintf("%v", i), func(t *testing.T) { + os.Setenv("DD_SPAN_SAMPLING_RULES", tt.value) + rules, err := samplingRulesFromEnv() + if tt.errStr == "" { + assert.NoError(err) + } else { + assert.Equal(tt.errStr, err.Error()) + } + assert.Len(rules, tt.ruleN) + }) + } + }) + t.Run("span-sampling-rules-regex", func(t *testing.T) { + assert := assert.New(t) + defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") + + for i, tt := range []struct { + rules string + srvRegex string + nameRegex string + }{ + { + rules: `[{"name": "abcd?", "sample_rate": 1.0}]`, + srvRegex: "^.*$", + nameRegex: "^abcd.$", + }, + { + rules: `[{"service": "*abcd", "sample_rate": 1.0}]`, + nameRegex: "^.*$", + srvRegex: "^.*abcd$", + }, + { + rules: `[{"service": "*abcd", "sample_rate": 1.0}]`, + nameRegex: "^.*$", + srvRegex: "^.*abcd$", + }, { + rules: `[{"sample_rate": 1.0},{"name": "wxyz", "sample_rate": 0.9}]`, + nameRegex: "^.*$", + srvRegex: "^.*$", + }, + } { + t.Run(fmt.Sprintf("%v", i), func(t *testing.T) { + os.Setenv("DD_SPAN_SAMPLING_RULES", tt.rules) + rules, err := samplingRulesFromEnv() + assert.NoError(err) + assert.Equal(tt.srvRegex, rules[0].Service.String()) + assert.Equal(tt.nameRegex, rules[0].Name.String()) + }) + } + }) } func TestRulesSampler(t *testing.T) { @@ -329,6 +406,126 @@ func TestRulesSampler(t *testing.T) { } }) + t.Run("matching-span-rules", func(t *testing.T) { + defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") + for _, tt := range []struct { + rules string + spanSrv string + spanName string + }{ + { + rules: `[{"name": "abcd?", "sample_rate": 1.0, "max_per_second":100}]`, + spanSrv: "test-service", + spanName: "abcde", + }, + { + rules: `[{"service": "*abcd","max_per_second":100, "sample_rate": 1.0}]`, + spanSrv: "xyzabcd", + spanName: "abcde", + }, + { + rules: `[{"service": "?*", "sample_rate": 1.0, "max_per_second":100}]`, + spanSrv: "test-service", + spanName: "abcde", + }, + } { + t.Run("", func(t *testing.T) { + os.Setenv("DD_SPAN_SAMPLING_RULES", tt.rules) + rules, _ := samplingRulesFromEnv() + + assert := assert.New(t) + rs := newRulesSampler(rules) + + span := makeSpan(tt.spanName, tt.spanSrv) + result := rs.apply(span) + assert.True(result) + assert.Contains(span.Metrics, ext.SpanSamplingMechanism) + assert.Contains(span.Metrics, ext.SingleSpanSamplingRuleRate) + assert.Contains(span.Metrics, ext.SingleSpanSamplingMPS) + }) + } + }) + t.Run("not-matching-span-rules", func(t *testing.T) { + defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") + for _, tt := range []struct { + rules string + spanSrv string + spanName string + }{ + { + //first matching rule takes precedence + rules: `[{"name": "abcd?", "sample_rate": 0.0},{"name": "abcd?", "sample_rate": 1.0}]`, + spanSrv: "test-service", + spanName: "abcdef", + }, + { + rules: `[{"service": "abcd", "sample_rate": 1.0}]`, + spanSrv: "xyzabcd", + spanName: "abcde", + }, + { + rules: `[{"service": "?", "sample_rate": 1.0}]`, + spanSrv: "test-service", + spanName: "abcde", + }, + } { + t.Run("", func(t *testing.T) { + os.Setenv("DD_SPAN_SAMPLING_RULES", tt.rules) + rules, _ := samplingRulesFromEnv() + + assert := assert.New(t) + rs := newRulesSampler(rules) + + span := makeSpan(tt.spanName, tt.spanSrv) + result := rs.apply(span) + assert.False(result) + assert.NotContains(span.Metrics, ext.SpanSamplingMechanism) + assert.NotContains(span.Metrics, ext.SingleSpanSamplingRuleRate) + assert.NotContains(span.Metrics, ext.SingleSpanSamplingMPS) + }) + } + }) + t.Run("not-matching-span-rules/matching-trace-rules", func(t *testing.T) { + defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") + defer os.Unsetenv("DD_TRACE_SAMPLING_RULES") + for _, tt := range []struct { + spanRules string + traceRules string + spanSrv string + spanName string + }{ + { + //first matching rule takes precedence + spanRules: `[{"name": "abcdef?", "sample_rate": 1.0}]`, + traceRules: `[{"name": "abcdef", "sample_rate": 1.0},{"name": "abcd?", "sample_rate": 1.0}]`, + spanName: "abcdef", + spanSrv: "test-service", + }, + { + spanRules: `[{"service": "abcde", "sample_rate": 1.0}]`, + traceRules: `[{"service": "abcd", "sample_rate": 1.0}]`, + spanSrv: "abcd", + spanName: "abcde", + }, + } { + t.Run("", func(t *testing.T) { + os.Setenv("DD_SPAN_SAMPLING_RULES", tt.spanRules) + os.Setenv("DD_TRACE_SAMPLING_RULES", tt.traceRules) + rules, _ := samplingRulesFromEnv() + + assert := assert.New(t) + rs := newRulesSampler(rules) + + span := makeSpan(tt.spanName, tt.spanSrv) + result := rs.apply(span) + assert.True(result) + assert.NotContains(span.Metrics, ext.SpanSamplingMechanism) + assert.NotContains(span.Metrics, ext.SingleSpanSamplingRuleRate) + assert.NotContains(span.Metrics, ext.SingleSpanSamplingMPS) + }) + } + }) + t.Run("default-rate", func(t *testing.T) { ruleSets := [][]SamplingRule{ {}, diff --git a/ddtrace/tracer/tracer.go b/ddtrace/tracer/tracer.go index ee32b00efb..7edeafd3e8 100644 --- a/ddtrace/tracer/tracer.go +++ b/ddtrace/tracer/tracer.go @@ -178,7 +178,7 @@ func newUnstartedTracer(opts ...StartOption) *tracer { c := newConfig(opts...) envRules, err := samplingRulesFromEnv() if err != nil { - log.Warn("DIAGNOSTICS Error(s) parsing DD_TRACE_SAMPLING_RULES: %s", err) + log.Warn("DIAGNOSTICS Error(s) parsing sampling rules: found errors: %s", err) } if envRules != nil { c.samplingRules = envRules From 1f9b7592767abdbb62a28114f723ddce6452e61d Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Wed, 29 Jun 2022 13:59:31 +0200 Subject: [PATCH 03/27] pkg/tracer: added single span sampler --- ddtrace/tracer/option.go | 16 ++++-- ddtrace/tracer/sampler.go | 39 +++++++++----- ddtrace/tracer/sampler_test.go | 2 +- ddtrace/tracer/single_sampler.go | 88 ++++++++++++++++++++++++++++++++ ddtrace/tracer/spancontext.go | 2 +- ddtrace/tracer/tracer.go | 40 ++++++++++----- 6 files changed, 154 insertions(+), 33 deletions(-) create mode 100644 ddtrace/tracer/single_sampler.go diff --git a/ddtrace/tracer/option.go b/ddtrace/tracer/option.go index f74232ad0d..c789fadbbb 100644 --- a/ddtrace/tracer/option.go +++ b/ddtrace/tracer/option.go @@ -118,9 +118,13 @@ type config struct { // statsd is used for tracking metrics associated with the runtime and the tracer. statsd statsdClient - // samplingRules contains user-defined rules determine the sampling rate to apply - // to spans. - samplingRules []SamplingRule + // traceSamplingRules contains user-defined rules determine the sampling rate to apply + // to trace spans. + traceSamplingRules []SamplingRule + + // spanSamplingRules contains user-defined rules determine the sampling rate to apply + // to single spans, regardless of the trace sampling decision. + spanSamplingRules []SamplingRule // tickChan specifies a channel which will receive the time every time the tracer must flush. // It defaults to time.Ticker; replaced in tests. @@ -654,11 +658,13 @@ func WithDogstatsdAddress(addr string) StartOption { } } -// WithSamplingRules specifies the sampling rates to apply to spans based on the +// WithSamplingRules specifies the sampling rates to apply to trace spans based on the // provided rules. +// todo(shevchenko): there might be a need to add an analogous option for +// single span sampling rules func WithSamplingRules(rules []SamplingRule) StartOption { return func(cfg *config) { - cfg.samplingRules = rules + cfg.traceSamplingRules = rules } } diff --git a/ddtrace/tracer/sampler.go b/ddtrace/tracer/sampler.go index b0ee284ea5..0e624a66a1 100644 --- a/ddtrace/tracer/sampler.go +++ b/ddtrace/tracer/sampler.go @@ -196,18 +196,32 @@ func samplingRulesFromEnv() ([]SamplingRule, error) { if err != nil { errs = append(errs, err.Error()) } - traceRules, err := processSamplingRulesFromEnv([]byte(os.Getenv("DD_TRACE_SAMPLING_RULES")), TraceSamplingRuleType) + traceRules, err := traceSamplingRulesFromEnv() if err != nil { errs = append(errs, err.Error()) } - - rules := append(spanRules, traceRules...) + rules := []SamplingRule{} + if traceRules != nil { + rules = traceRules + } + if spanRules != nil { + rules = append(rules, spanRules...) + } if len(errs) != 0 { return rules, fmt.Errorf("%s", strings.Join(errs, "\n\t")) } return rules, nil } +// traceSamplingRulesFromEnv parses sampling rules from the DD_TRACE_SAMPLING_RULES environment variable. +func traceSamplingRulesFromEnv() ([]SamplingRule, error) { + rulesFromEnv := os.Getenv("DD_TRACE_SAMPLING_RULES") + if rulesFromEnv == "" { + return nil, nil + } + return processSamplingRulesFromEnv([]byte(rulesFromEnv), TraceSamplingRuleType) +} + // spanSamplingRulesFromEnv parses sampling rules from the DD_SPAN_SAMPLING_RULES and // DD_SPAN_SAMPLING_RULES_FILE environment variables. func spanSamplingRulesFromEnv() ([]SamplingRule, error) { @@ -286,6 +300,7 @@ func processSamplingRulesFromEnv(b []byte, ruleType SamplingRuleType) ([]Samplin Rate: rate, MaxPerSecond: v.MaxPerSecond, Type: SingleSpanSamplingType, + limiter: newSingleSpanRateLimiter(v.MaxPerSecond), }) continue } @@ -369,13 +384,6 @@ func (rs *rulesSampler) apply(span *span) bool { if rule.match(span) { matched = true rate = rule.Rate - if rule.Type == SingleSpanSamplingType { - span.setMetric(ext.SpanSamplingMechanism, ext.SingleSpanSamplingMechanism) - span.setMetric(ext.SingleSpanSamplingRuleRate, rule.Rate) - if rule.MaxPerSecond != 0 { - span.setMetric(ext.SingleSpanSamplingMPS, rule.MaxPerSecond) - } - } break } } @@ -433,6 +441,7 @@ type SamplingRule struct { exactService string exactName string + limiter *rateLimiter } // ServiceRule returns a SamplingRule that applies the provided sampling rate @@ -488,9 +497,10 @@ func (sr *SamplingRule) match(s *span) bool { // MarshalJSON implements the json.Marshaler interface. func (sr *SamplingRule) MarshalJSON() ([]byte, error) { s := struct { - Service string `json:"service"` - Name string `json:"name"` - Rate float64 `json:"sample_rate"` + Service string `json:"service"` + Name string `json:"name"` + Rate float64 `json:"sample_rate"` + MaxPerSecond *float64 `json:"max_per_second,omitempty"` }{} if sr.exactService != "" { s.Service = sr.exactService @@ -503,6 +513,9 @@ func (sr *SamplingRule) MarshalJSON() ([]byte, error) { s.Name = fmt.Sprintf("%s", sr.Name) } s.Rate = sr.Rate + if sr.MaxPerSecond != 0 { + s.MaxPerSecond = &sr.MaxPerSecond + } return json.Marshal(&s) } diff --git a/ddtrace/tracer/sampler_test.go b/ddtrace/tracer/sampler_test.go index 3b22080495..6a74d7d3b5 100644 --- a/ddtrace/tracer/sampler_test.go +++ b/ddtrace/tracer/sampler_test.go @@ -434,7 +434,7 @@ func TestRulesSampler(t *testing.T) { rules, _ := samplingRulesFromEnv() assert := assert.New(t) - rs := newRulesSampler(rules) + rs := newSingleSpanRulesSampler(rules) span := makeSpan(tt.spanName, tt.spanSrv) result := rs.apply(span) diff --git a/ddtrace/tracer/single_sampler.go b/ddtrace/tracer/single_sampler.go new file mode 100644 index 0000000000..f8528ad466 --- /dev/null +++ b/ddtrace/tracer/single_sampler.go @@ -0,0 +1,88 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016 Datadog, Inc. + +package tracer + +import ( + "golang.org/x/time/rate" + "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext" + "gopkg.in/DataDog/dd-trace-go.v1/internal/samplernames" + "math" + "time" +) + +// singleSpanRulesSampler allows a user-defined list of rules to apply to spans +// to sample single spans. +// These rules match based on the span's Service and Name. If empty value is supplied +// to either Service or Name field, it will default to "*", allow all +// When making a sampling decision, the rules are checked in order until +// a match is found. +// If a match is found, the rate from that rule is used. +// If no match is found, no changes or further sampling is applied to the spans. +// The rate is used to determine if the span should be sampled, but an upper +// limit can be defined using the max_per_second field when supplying the rule. +// If such value is absent in the rule, the default is allow all. +// Its value is the max number of spans to sample per second. +// Spans that matched the rules but exceeded the rate limit are not sampled. +type singleSpanRulesSampler struct { + rules []SamplingRule // the rules to match spans with +} + +// newRulesSampler configures a *rulesSampler instance using the given set of rules. +// Invalid rules or environment variable values are tolerated, by logging warnings and then ignoring them. +func newSingleSpanRulesSampler(rules []SamplingRule) *singleSpanRulesSampler { + return &singleSpanRulesSampler{ + rules: rules, + } +} + +// apply uses the sampling rules to determine the sampling rate for the +// provided span. If the rules don't match, then it returns false and the span is not +// modified. +func (rs *singleSpanRulesSampler) apply(span *span) bool { + var matched bool + for _, rule := range rs.rules { + if rule.match(span) { + matched = true + rs.applyRate(span, rule, rule.Rate, time.Now()) + break + } + } + return matched +} + +func (rs *singleSpanRulesSampler) applyRate(span *span, rule SamplingRule, rate float64, now time.Time) { + span.SetTag(keyRulesSamplerAppliedRate, rate) + if !sampledByRate(span.SpanID, rate) { + span.setSamplingPriority(ext.PriorityUserReject, samplernames.RuleRate, rate) + return + } + + sampled := true + if rule.limiter != nil { + sampled, rate = rule.limiter.allowOne(now) + if !sampled { + return + } + } + span.setMetric(ext.SpanSamplingMechanism, ext.SingleSpanSamplingMechanism) + span.setMetric(ext.SingleSpanSamplingRuleRate, rate) + if rule.MaxPerSecond != 0 { + span.setMetric(ext.SingleSpanSamplingMPS, rule.MaxPerSecond) + } +} + +// newRateLimiter returns a rate limiter which restricts the number of single spans sampled per second. +// This defaults to infinite, allow all behaviour. The MaxPerSecond value of the rule may override the default. +func newSingleSpanRateLimiter(mps float64) *rateLimiter { + limit := math.MaxFloat64 + if mps > 0 { + limit = mps + } + return &rateLimiter{ + limiter: rate.NewLimiter(rate.Limit(limit), int(math.Ceil(limit))), + prevTime: time.Now(), + } +} diff --git a/ddtrace/tracer/spancontext.go b/ddtrace/tracer/spancontext.go index 0349ef1618..ab32f76b23 100644 --- a/ddtrace/tracer/spancontext.go +++ b/ddtrace/tracer/spancontext.go @@ -262,7 +262,7 @@ func (t *trace) push(sp *span) { } } -// finishedOne aknowledges that another span in the trace has finished, and checks +// finishedOne acknowledges that another span in the trace has finished, and checks // if the trace is complete, in which case it calls the onFinish function. It uses // the given priority, if non-nil, to mark the root span. func (t *trace) finishedOne(s *span) { diff --git a/ddtrace/tracer/tracer.go b/ddtrace/tracer/tracer.go index 7edeafd3e8..bfd90d8ab9 100644 --- a/ddtrace/tracer/tracer.go +++ b/ddtrace/tracer/tracer.go @@ -79,6 +79,12 @@ type tracer struct { // or operation name. rulesSampling *rulesSampler + // singleSpanRulesSampling holds an instance of the single span rules sampler. These are user-defined + // rules for applying a sampling rate to spans that match the designated service + // or operation name. Such spans are sampled if trace sampling decision is 'drop' and + // may be sent separately + singleSpanRulesSampling *singleSpanRulesSampler + // obfuscator holds the obfuscator used to obfuscate resources in aggregated stats. // obfuscator may be nil if disabled. obfuscator *obfuscate.Obfuscator @@ -176,12 +182,19 @@ const payloadQueueSize = 1000 func newUnstartedTracer(opts ...StartOption) *tracer { c := newConfig(opts...) - envRules, err := samplingRulesFromEnv() + traceRules, err := traceSamplingRulesFromEnv() + if err != nil { + log.Warn("DIAGNOSTICS Error(s) parsing DD_TRACE_SAMPLING_RULES: found errors: %s", err) + } + if traceRules != nil { + c.traceSamplingRules = traceRules + } + spanRules, err := spanSamplingRulesFromEnv() if err != nil { - log.Warn("DIAGNOSTICS Error(s) parsing sampling rules: found errors: %s", err) + log.Warn("DIAGNOSTICS Error(s) parsing DD_SPAN_SAMPLING_RULES: found errors: %s", err) } - if envRules != nil { - c.samplingRules = envRules + if spanRules != nil { + c.spanSamplingRules = spanRules } sampler := newPrioritySampler() var writer traceWriter @@ -191,15 +204,16 @@ func newUnstartedTracer(opts ...StartOption) *tracer { writer = newAgentTraceWriter(c, sampler) } t := &tracer{ - config: c, - traceWriter: writer, - out: make(chan []*span, payloadQueueSize), - stop: make(chan struct{}), - flush: make(chan chan<- struct{}), - rulesSampling: newRulesSampler(c.samplingRules), - prioritySampling: sampler, - pid: strconv.Itoa(os.Getpid()), - stats: newConcentrator(c, defaultStatsBucketSize), + config: c, + traceWriter: writer, + out: make(chan []*span, payloadQueueSize), + stop: make(chan struct{}), + flush: make(chan chan<- struct{}), + rulesSampling: newRulesSampler(c.traceSamplingRules), + singleSpanRulesSampling: newSingleSpanRulesSampler(c.spanSamplingRules), + prioritySampling: sampler, + pid: strconv.Itoa(os.Getpid()), + stats: newConcentrator(c, defaultStatsBucketSize), obfuscator: obfuscate.NewObfuscator(obfuscate.Config{ SQL: obfuscate.SQLConfig{ TableNames: c.agent.HasFlag("table_names"), From d056ab198f158dde3c3d1e377a5904f1107777e7 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Wed, 29 Jun 2022 17:19:41 +0200 Subject: [PATCH 04/27] pkg/tracer: added single span sampling at trace finish --- ddtrace/tracer/single_sampler.go | 8 +++++--- ddtrace/tracer/spancontext.go | 3 +++ ddtrace/tracer/tracer_test.go | 22 ++++++++++++++++++++++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/ddtrace/tracer/single_sampler.go b/ddtrace/tracer/single_sampler.go index f8528ad466..81c8fdd4c3 100644 --- a/ddtrace/tracer/single_sampler.go +++ b/ddtrace/tracer/single_sampler.go @@ -6,11 +6,13 @@ package tracer import ( + "math" + "time" + "golang.org/x/time/rate" + "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext" "gopkg.in/DataDog/dd-trace-go.v1/internal/samplernames" - "math" - "time" ) // singleSpanRulesSampler allows a user-defined list of rules to apply to spans @@ -54,7 +56,7 @@ func (rs *singleSpanRulesSampler) apply(span *span) bool { } func (rs *singleSpanRulesSampler) applyRate(span *span, rule SamplingRule, rate float64, now time.Time) { - span.SetTag(keyRulesSamplerAppliedRate, rate) + span.setMetric(keyRulesSamplerAppliedRate, rate) if !sampledByRate(span.SpanID, rate) { span.setSamplingPriority(ext.PriorityUserReject, samplernames.RuleRate, rate) return diff --git a/ddtrace/tracer/spancontext.go b/ddtrace/tracer/spancontext.go index ab32f76b23..edd31ac698 100644 --- a/ddtrace/tracer/spancontext.go +++ b/ddtrace/tracer/spancontext.go @@ -304,6 +304,9 @@ func (t *trace) finishedOne(s *span) { if !ok { return } + for i := range t.spans { + tr.singleSpanRulesSampling.apply(t.spans[i]) + } // we have a tracer that can receive completed traces. atomic.AddInt64(&tr.spansFinished, int64(len(t.spans))) sd := samplingDecision(atomic.LoadInt64((*int64)(&t.samplingDecision))) diff --git a/ddtrace/tracer/tracer_test.go b/ddtrace/tracer/tracer_test.go index 09bf702217..c014f42776 100644 --- a/ddtrace/tracer/tracer_test.go +++ b/ddtrace/tracer/tracer_test.go @@ -367,6 +367,28 @@ func TestSamplingDecision(t *testing.T) { assert.Equal(t, "", span.context.trace.tags[keyUpstreamServices]) assert.Equal(t, decisionDrop, span.context.trace.samplingDecision) }) + t.Run("client_dropped", func(t *testing.T) { + os.Setenv("DD_SPAN_SAMPLING_RULES", `[{"service": "test_*","name":"*_1", "sample_rate": 1.0, "max_per_second": 15.0}]`) + defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") + tracer, _, _, stop := startTestTracer(t) + defer stop() + tracer.config.agent.DropP0s = true + tracer.config.sampler = NewRateSampler(0) + tracer.prioritySampling.defaultRate = 0 + tracer.config.serviceName = "test_service" + span := tracer.StartSpan("name_1").(*span) + child := tracer.StartSpan("name_2", ChildOf(span.context)) + child.Finish() + span.Finish() + assert.Equal(t, float64(ext.PriorityAutoReject), span.Metrics[keySamplingPriority]) + // this trace won't be sent to the agent, + // therefore not necessary to populate keyUpstreamServices + assert.Equal(t, "", span.context.trace.tags[keyUpstreamServices]) + assert.Equal(t, decisionDrop, span.context.trace.samplingDecision) + assert.Equal(t, 8.0, span.Metrics[ext.SpanSamplingMechanism]) + assert.Equal(t, 1.0, span.Metrics[ext.SingleSpanSamplingRuleRate]) + assert.Equal(t, 15.0, span.Metrics[ext.SingleSpanSamplingMPS]) + }) } func TestTracerRuntimeMetrics(t *testing.T) { From bd20964575c07f00d616972129c540f47b5dcd81 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Wed, 29 Jun 2022 18:05:24 +0200 Subject: [PATCH 05/27] pkg/tracer: filter out single spans to send to the agent if the decision isn't keep --- ddtrace/tracer/sampler.go | 4 ++++ ddtrace/tracer/spancontext.go | 12 +++++++++++- ddtrace/tracer/tracer_test.go | 3 ++- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/ddtrace/tracer/sampler.go b/ddtrace/tracer/sampler.go index 0e624a66a1..673db0b59c 100644 --- a/ddtrace/tracer/sampler.go +++ b/ddtrace/tracer/sampler.go @@ -422,10 +422,14 @@ func (rs *rulesSampler) limit() (float64, bool) { return math.NaN(), false } +// SamplingRuleType represents a sampling rule type spans are matched against. type SamplingRuleType int const ( + // TraceSamplingRuleType rules match spans and influence sampling decision. TraceSamplingRuleType = iota + // SingleSpanSamplingType rules are matched against all spans regardless of + // sampling decision. Such spans might be sent separately. SingleSpanSamplingType ) diff --git a/ddtrace/tracer/spancontext.go b/ddtrace/tracer/spancontext.go index edd31ac698..e9e2e335b4 100644 --- a/ddtrace/tracer/spancontext.go +++ b/ddtrace/tracer/spancontext.go @@ -315,7 +315,17 @@ func (t *trace) finishedOne(s *span) { atomic.AddUint64(&tr.droppedP0Spans, uint64(len(t.spans))) atomic.AddUint64(&tr.droppedP0Traces, 1) } - return + //if trace sampling decision is drop, we still want to send single spans + singleSpans := []*span{} + for i, span := range t.spans { + if _, ok := span.Metrics[ext.SpanSamplingMechanism]; ok { + singleSpans = append(singleSpans, t.spans[i]) + } + } + t.spans = singleSpans + if len(t.spans) == 0 { + return + } } tr.pushTrace(t.spans) } diff --git a/ddtrace/tracer/tracer_test.go b/ddtrace/tracer/tracer_test.go index c014f42776..e5719671b3 100644 --- a/ddtrace/tracer/tracer_test.go +++ b/ddtrace/tracer/tracer_test.go @@ -367,7 +367,8 @@ func TestSamplingDecision(t *testing.T) { assert.Equal(t, "", span.context.trace.tags[keyUpstreamServices]) assert.Equal(t, decisionDrop, span.context.trace.samplingDecision) }) - t.Run("client_dropped", func(t *testing.T) { + //todo rename + t.Run("client_dropped_with_single_spans", func(t *testing.T) { os.Setenv("DD_SPAN_SAMPLING_RULES", `[{"service": "test_*","name":"*_1", "sample_rate": 1.0, "max_per_second": 15.0}]`) defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") tracer, _, _, stop := startTestTracer(t) From 408f38d27c1637083b5b2b1951cfa9ef4e426b8a Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Thu, 30 Jun 2022 16:58:29 +0200 Subject: [PATCH 06/27] updated according to comments --- ddtrace/ext/tags.go | 13 ----- ddtrace/tracer/log.go | 12 ++++- ddtrace/tracer/option.go | 12 +++-- ddtrace/tracer/sampler.go | 61 ++++++----------------- ddtrace/tracer/sampler_test.go | 83 ++++++++++++++++++++++++++------ ddtrace/tracer/single_sampler.go | 53 +++++++++++++++----- ddtrace/tracer/spancontext.go | 8 +-- ddtrace/tracer/tracer_test.go | 7 ++- ddtrace/tracer/util.go | 13 ----- ddtrace/tracer/util_test.go | 51 -------------------- 10 files changed, 147 insertions(+), 166 deletions(-) diff --git a/ddtrace/ext/tags.go b/ddtrace/ext/tags.go index 5ceee8e4ed..e7337d7ae7 100644 --- a/ddtrace/ext/tags.go +++ b/ddtrace/ext/tags.go @@ -79,19 +79,6 @@ const ( // Search & Analytics event. AnalyticsEvent = "analytics.event" - // SpanSamplingMechanism is the metric key holding a span sampling rule that a span was kept on - SpanSamplingMechanism = "_dd.span_sampling.mechanism" - - // SingleSpanSamplingMechanism is a constant reserved to indicate that a span was kept on account of a span sampling rule. - SingleSpanSamplingMechanism = 8 - - // SingleSpanSamplingRuleRate is the metric key containing the configured sampling probability for the span sampling rule. - SingleSpanSamplingRuleRate = "_dd.span_sampling.rule_rate" - - // SingleSpanSamplingMPS is the metric key contains the configured limit for the span sampling rule that the span matched. - // // If there is no configured limit, then this tag is omitted. - SingleSpanSamplingMPS = "_dd.span_sampling.max_per_second" - // ManualKeep is a tag which specifies that the trace to which this span // belongs to should be kept when set to true. ManualKeep = "manual.keep" diff --git a/ddtrace/tracer/log.go b/ddtrace/tracer/log.go index c696b9109b..9bc30351e1 100644 --- a/ddtrace/tracer/log.go +++ b/ddtrace/tracer/log.go @@ -12,6 +12,7 @@ import ( "math" "net/http" "runtime" + "strings" "time" "gopkg.in/DataDog/dd-trace-go.v1/internal/appsec" @@ -106,8 +107,15 @@ func logStartup(t *tracer) { AgentFeatures: t.config.agent, AppSec: appsec.Enabled(), } - if _, err := samplingRulesFromEnv(); err != nil { - info.SamplingRulesError = fmt.Sprintf("%s", err) + var errs []string + if _, err := traceSamplingRulesFromEnv(); err != nil { + errs = append(errs, err.Error()) + } + if _, err := spanSamplingRulesFromEnv(); err != nil { + errs = append(errs, err.Error()) + } + if errs != nil { + info.SamplingRulesError = fmt.Sprintf("%s", strings.Join(errs, "\n\t")) } if limit, ok := t.rulesSampling.limit(); ok { diff --git a/ddtrace/tracer/option.go b/ddtrace/tracer/option.go index c789fadbbb..9f82ce05a2 100644 --- a/ddtrace/tracer/option.go +++ b/ddtrace/tracer/option.go @@ -118,12 +118,14 @@ type config struct { // statsd is used for tracking metrics associated with the runtime and the tracer. statsd statsdClient - // traceSamplingRules contains user-defined rules determine the sampling rate to apply - // to trace spans. + // traceSamplingRules contains user-defined rules to determine the sampling rate to apply + // to trace spans. If a span matches a rule, it will impact the trace sampling decision. traceSamplingRules []SamplingRule - // spanSamplingRules contains user-defined rules determine the sampling rate to apply - // to single spans, regardless of the trace sampling decision. + // spanSamplingRules contains user-defined rules to determine the sampling rate to apply + // to single spans. If a span matches a rule, it will NOT impact the trace sampling decision. + // In the case that a trace is dropped and thus not sent to the Agent, spans kept on account + // of matching span sampling rules must be conveyed separately. spanSamplingRules []SamplingRule // tickChan specifies a channel which will receive the time every time the tracer must flush. @@ -660,7 +662,7 @@ func WithDogstatsdAddress(addr string) StartOption { // WithSamplingRules specifies the sampling rates to apply to trace spans based on the // provided rules. -// todo(shevchenko): there might be a need to add an analogous option for +// TODO(shevchenko): there might be a need to add an analogous option for // single span sampling rules func WithSamplingRules(rules []SamplingRule) StartOption { return func(cfg *config) { diff --git a/ddtrace/tracer/sampler.go b/ddtrace/tracer/sampler.go index 673db0b59c..72820de28d 100644 --- a/ddtrace/tracer/sampler.go +++ b/ddtrace/tracer/sampler.go @@ -188,66 +188,46 @@ func newRulesSampler(rules []SamplingRule) *rulesSampler { } } -// samplingRulesFromEnv parses sampling rules from the DD_TRACE_SAMPLING_RULES environment variable -// and combines them with spanSamplingRules -func samplingRulesFromEnv() ([]SamplingRule, error) { - errs := []string{} - spanRules, err := spanSamplingRulesFromEnv() - if err != nil { - errs = append(errs, err.Error()) - } - traceRules, err := traceSamplingRulesFromEnv() - if err != nil { - errs = append(errs, err.Error()) - } - rules := []SamplingRule{} - if traceRules != nil { - rules = traceRules - } - if spanRules != nil { - rules = append(rules, spanRules...) - } - if len(errs) != 0 { - return rules, fmt.Errorf("%s", strings.Join(errs, "\n\t")) - } - return rules, nil -} - -// traceSamplingRulesFromEnv parses sampling rules from the DD_TRACE_SAMPLING_RULES environment variable. +// traceSamplingRulesFromEnv parses sampling rules from the DD_TRACE_SAMPLING_RULES environment variable func traceSamplingRulesFromEnv() ([]SamplingRule, error) { rulesFromEnv := os.Getenv("DD_TRACE_SAMPLING_RULES") if rulesFromEnv == "" { return nil, nil } - return processSamplingRulesFromEnv([]byte(rulesFromEnv), TraceSamplingRuleType) + return processSamplingRules([]byte(rulesFromEnv), false) } // spanSamplingRulesFromEnv parses sampling rules from the DD_SPAN_SAMPLING_RULES and // DD_SPAN_SAMPLING_RULES_FILE environment variables. func spanSamplingRulesFromEnv() ([]SamplingRule, error) { - errs := []string{} - rules, err := processSamplingRulesFromEnv([]byte(os.Getenv("DD_SPAN_SAMPLING_RULES")), SingleSpanSamplingType) + var errs []string + rulesFromEnv := os.Getenv("DD_SPAN_SAMPLING_RULES") + if rulesFromEnv == "" { + return nil, nil + } + rules, err := processSamplingRules([]byte(rulesFromEnv), true) if err != nil { errs = append(errs, err.Error()) } + rulesFile := os.Getenv("DD_SPAN_SAMPLING_RULES_FILE") if len(rules) != 0 { - if os.Getenv("DD_SPAN_SAMPLING_RULES_FILE") != "" { + if rulesFile != "" { log.Warn("DIAGNOSTICS Error(s): DD_SPAN_SAMPLING_RULES is available and will take precedence over DD_SPAN_SAMPLING_RULES_FILE") } return rules, err } - rulesFile := os.Getenv("DD_SPAN_SAMPLING_RULES_FILE") if rulesFile == "" { return rules, err } rulesFromEnvFile, err := os.ReadFile(rulesFile) if err != nil { log.Warn("Couldn't read file from DD_SPAN_SAMPLING_RULES_FILE") + return nil, err } - return processSamplingRulesFromEnv(rulesFromEnvFile, SingleSpanSamplingType) + return processSamplingRules(rulesFromEnvFile, true) } -func processSamplingRulesFromEnv(b []byte, ruleType SamplingRuleType) ([]SamplingRule, error) { +func processSamplingRules(b []byte, isIndividualSamplingRule bool) ([]SamplingRule, error) { if len(b) == 0 { return nil, nil } @@ -277,7 +257,7 @@ func processSamplingRulesFromEnv(b []byte, ruleType SamplingRuleType) ([]Samplin log.Warn("at index %d: ignoring rule %+v: rate is out of [0.0, 1.0] range", i, v) continue } - if ruleType == SingleSpanSamplingType { + if isIndividualSamplingRule { if v.Service == "" { v.Service = "*" } @@ -299,7 +279,6 @@ func processSamplingRulesFromEnv(b []byte, ruleType SamplingRuleType) ([]Samplin Name: opGlob, Rate: rate, MaxPerSecond: v.MaxPerSecond, - Type: SingleSpanSamplingType, limiter: newSingleSpanRateLimiter(v.MaxPerSecond), }) continue @@ -422,17 +401,6 @@ func (rs *rulesSampler) limit() (float64, bool) { return math.NaN(), false } -// SamplingRuleType represents a sampling rule type spans are matched against. -type SamplingRuleType int - -const ( - // TraceSamplingRuleType rules match spans and influence sampling decision. - TraceSamplingRuleType = iota - // SingleSpanSamplingType rules are matched against all spans regardless of - // sampling decision. Such spans might be sent separately. - SingleSpanSamplingType -) - // SamplingRule is used for applying sampling rates to spans that match // the service name, operation name or both. // For basic usage, consider using the helper functions ServiceRule, NameRule, etc. @@ -441,7 +409,6 @@ type SamplingRule struct { Name *regexp.Regexp Rate float64 MaxPerSecond float64 - Type SamplingRuleType exactService string exactName string diff --git a/ddtrace/tracer/sampler_test.go b/ddtrace/tracer/sampler_test.go index 6a74d7d3b5..0402648485 100644 --- a/ddtrace/tracer/sampler_test.go +++ b/ddtrace/tracer/sampler_test.go @@ -260,7 +260,7 @@ func TestRuleEnvVars(t *testing.T) { } { t.Run("", func(t *testing.T) { os.Setenv("DD_TRACE_SAMPLING_RULES", tt.value) - rules, err := samplingRulesFromEnv() + rules, err := traceSamplingRulesFromEnv() if tt.errStr == "" { assert.NoError(err) } else { @@ -270,6 +270,7 @@ func TestRuleEnvVars(t *testing.T) { }) } }) + t.Run("span-sampling-rules", func(t *testing.T) { assert := assert.New(t) defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") @@ -299,7 +300,7 @@ func TestRuleEnvVars(t *testing.T) { } { t.Run(fmt.Sprintf("%v", i), func(t *testing.T) { os.Setenv("DD_SPAN_SAMPLING_RULES", tt.value) - rules, err := samplingRulesFromEnv() + rules, err := spanSamplingRulesFromEnv() if tt.errStr == "" { assert.NoError(err) } else { @@ -340,7 +341,7 @@ func TestRuleEnvVars(t *testing.T) { } { t.Run(fmt.Sprintf("%v", i), func(t *testing.T) { os.Setenv("DD_SPAN_SAMPLING_RULES", tt.rules) - rules, err := samplingRulesFromEnv() + rules, err := spanSamplingRulesFromEnv() assert.NoError(err) assert.Equal(tt.srvRegex, rules[0].Service.String()) assert.Equal(tt.nameRegex, rules[0].Name.String()) @@ -431,7 +432,7 @@ func TestRulesSampler(t *testing.T) { } { t.Run("", func(t *testing.T) { os.Setenv("DD_SPAN_SAMPLING_RULES", tt.rules) - rules, _ := samplingRulesFromEnv() + rules, _ := spanSamplingRulesFromEnv() assert := assert.New(t) rs := newSingleSpanRulesSampler(rules) @@ -439,12 +440,13 @@ func TestRulesSampler(t *testing.T) { span := makeSpan(tt.spanName, tt.spanSrv) result := rs.apply(span) assert.True(result) - assert.Contains(span.Metrics, ext.SpanSamplingMechanism) - assert.Contains(span.Metrics, ext.SingleSpanSamplingRuleRate) - assert.Contains(span.Metrics, ext.SingleSpanSamplingMPS) + assert.Contains(span.Metrics, spanSamplingMechanism) + assert.Contains(span.Metrics, singleSpanSamplingRuleRate) + assert.Contains(span.Metrics, singleSpanSamplingMPS) }) } }) + t.Run("not-matching-span-rules", func(t *testing.T) { defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") for _, tt := range []struct { @@ -471,7 +473,7 @@ func TestRulesSampler(t *testing.T) { } { t.Run("", func(t *testing.T) { os.Setenv("DD_SPAN_SAMPLING_RULES", tt.rules) - rules, _ := samplingRulesFromEnv() + rules, _ := spanSamplingRulesFromEnv() assert := assert.New(t) rs := newRulesSampler(rules) @@ -479,9 +481,9 @@ func TestRulesSampler(t *testing.T) { span := makeSpan(tt.spanName, tt.spanSrv) result := rs.apply(span) assert.False(result) - assert.NotContains(span.Metrics, ext.SpanSamplingMechanism) - assert.NotContains(span.Metrics, ext.SingleSpanSamplingRuleRate) - assert.NotContains(span.Metrics, ext.SingleSpanSamplingMPS) + assert.NotContains(span.Metrics, spanSamplingMechanism) + assert.NotContains(span.Metrics, singleSpanSamplingRuleRate) + assert.NotContains(span.Metrics, singleSpanSamplingMPS) }) } }) @@ -511,7 +513,7 @@ func TestRulesSampler(t *testing.T) { t.Run("", func(t *testing.T) { os.Setenv("DD_SPAN_SAMPLING_RULES", tt.spanRules) os.Setenv("DD_TRACE_SAMPLING_RULES", tt.traceRules) - rules, _ := samplingRulesFromEnv() + rules, _ := spanSamplingRulesFromEnv() assert := assert.New(t) rs := newRulesSampler(rules) @@ -519,9 +521,9 @@ func TestRulesSampler(t *testing.T) { span := makeSpan(tt.spanName, tt.spanSrv) result := rs.apply(span) assert.True(result) - assert.NotContains(span.Metrics, ext.SpanSamplingMechanism) - assert.NotContains(span.Metrics, ext.SingleSpanSamplingRuleRate) - assert.NotContains(span.Metrics, ext.SingleSpanSamplingMPS) + assert.NotContains(span.Metrics, spanSamplingMechanism) + assert.NotContains(span.Metrics, singleSpanSamplingRuleRate) + assert.NotContains(span.Metrics, singleSpanSamplingMPS) }) } }) @@ -788,3 +790,54 @@ func BenchmarkRulesSampler(b *testing.B) { benchmarkStartSpan(b, tracer) }) } + +func TestGlobMatch(t *testing.T) { + for i, tt := range []struct { + pattern string + input string + shouldMatch bool + }{ + // pattern with * + {"test*", "test", true}, + {"test*", "test-case", true}, + {"test*", "a-test", false}, + {"*test", "a-test", true}, + {"a*case", "acase", true}, + {"a*case", "a-test-case", true}, + {"a*test*case", "a-test-case", true}, + {"a*test*case", "atestcase", true}, + {"a*test*case", "abadcase", false}, + // pattern with ? + {"test?", "test", false}, + {"test?", "test-case", false}, + {"test?", "a-test", false}, + {"?test", "a-test", false}, + {"a?case", "acase", false}, + {"a?case", "a-case", true}, + {"a?test?case", "a-test-case", true}, + {"a?test?case", "a-test--case", false}, + // pattern with ? and * + {"?test*", "atest", true}, + {"?test*", "atestcase", true}, + {"?test*", "testcase", false}, + {"?test*", "testcase", false}, + {"test*case", "testcase", true}, + {"a?test*", "a-test-case", true}, + {"a?test*", "atestcase", false}, + {"a*test?", "a-test-", true}, + {"a*test?", "atestcase", false}, + {"a*test?case", "a--test-case", true}, + {"a*test?case", "a--test--case", false}, + {"a?test*case", "a-testing--case", true}, + {"the?test*case", "the-test-cases", false}, + } { + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + rg, _ := globMatch(tt.pattern) + if tt.shouldMatch { + assert.Regexp(t, rg, tt.input) + } else { + assert.NotRegexp(t, rg, tt.input) + } + }) + } +} diff --git a/ddtrace/tracer/single_sampler.go b/ddtrace/tracer/single_sampler.go index 81c8fdd4c3..39b37a4f73 100644 --- a/ddtrace/tracer/single_sampler.go +++ b/ddtrace/tracer/single_sampler.go @@ -6,7 +6,10 @@ package tracer import ( + "fmt" "math" + "regexp" + "strings" "time" "golang.org/x/time/rate" @@ -18,21 +21,21 @@ import ( // singleSpanRulesSampler allows a user-defined list of rules to apply to spans // to sample single spans. // These rules match based on the span's Service and Name. If empty value is supplied -// to either Service or Name field, it will default to "*", allow all +// to either Service or Name field, it will default to "*", allow all. // When making a sampling decision, the rules are checked in order until // a match is found. // If a match is found, the rate from that rule is used. // If no match is found, no changes or further sampling is applied to the spans. // The rate is used to determine if the span should be sampled, but an upper // limit can be defined using the max_per_second field when supplying the rule. -// If such value is absent in the rule, the default is allow all. +// If max_per_second is absent in the rule, the default is allow all. // Its value is the max number of spans to sample per second. // Spans that matched the rules but exceeded the rate limit are not sampled. type singleSpanRulesSampler struct { rules []SamplingRule // the rules to match spans with } -// newRulesSampler configures a *rulesSampler instance using the given set of rules. +// newSingleSpanRulesSampler configures a *singleSpanRulesSampler instance using the given set of rules. // Invalid rules or environment variable values are tolerated, by logging warnings and then ignoring them. func newSingleSpanRulesSampler(rules []SamplingRule) *singleSpanRulesSampler { return &singleSpanRulesSampler{ @@ -40,19 +43,33 @@ func newSingleSpanRulesSampler(rules []SamplingRule) *singleSpanRulesSampler { } } +const ( + // spanSamplingMechanism specifies the sampling mechanism by which an individual span was sampled + spanSamplingMechanism = "_dd.span_sampling.mechanism" + + // singleSpanSamplingMechanism specifies value reserved to indicate that a span was kept + // on account of a single span sampling rule. + singleSpanSamplingMechanism = 8 + + // singleSpanSamplingRuleRate specifies the configured sampling probability for the single span sampling rule. + singleSpanSamplingRuleRate = "_dd.span_sampling.rule_rate" + + // singleSpanSamplingMPS specifies the configured limit for the single span sampling rule + // that the span matched. If there is no configured limit, then this tag is omitted. + singleSpanSamplingMPS = "_dd.span_sampling.max_per_second" +) + // apply uses the sampling rules to determine the sampling rate for the // provided span. If the rules don't match, then it returns false and the span is not // modified. func (rs *singleSpanRulesSampler) apply(span *span) bool { - var matched bool for _, rule := range rs.rules { if rule.match(span) { - matched = true rs.applyRate(span, rule, rule.Rate, time.Now()) - break + return true } } - return matched + return false } func (rs *singleSpanRulesSampler) applyRate(span *span, rule SamplingRule, rate float64, now time.Time) { @@ -62,21 +79,21 @@ func (rs *singleSpanRulesSampler) applyRate(span *span, rule SamplingRule, rate return } - sampled := true + var sampled bool if rule.limiter != nil { sampled, rate = rule.limiter.allowOne(now) if !sampled { return } } - span.setMetric(ext.SpanSamplingMechanism, ext.SingleSpanSamplingMechanism) - span.setMetric(ext.SingleSpanSamplingRuleRate, rate) + span.setMetric(spanSamplingMechanism, singleSpanSamplingMechanism) + span.setMetric(singleSpanSamplingRuleRate, rate) if rule.MaxPerSecond != 0 { - span.setMetric(ext.SingleSpanSamplingMPS, rule.MaxPerSecond) + span.setMetric(singleSpanSamplingMPS, rule.MaxPerSecond) } } -// newRateLimiter returns a rate limiter which restricts the number of single spans sampled per second. +// newSingleSpanRateLimiter returns a rate limiter which restricts the number of single spans sampled per second. // This defaults to infinite, allow all behaviour. The MaxPerSecond value of the rule may override the default. func newSingleSpanRateLimiter(mps float64) *rateLimiter { limit := math.MaxFloat64 @@ -88,3 +105,15 @@ func newSingleSpanRateLimiter(mps float64) *rateLimiter { prevTime: time.Now(), } } + +// globMatch compiles pattern string into glob format, i.e. regular expressions with only '?' +// and '*' treated as regex metacharacters. +func globMatch(pattern string) (*regexp.Regexp, error) { + // escaping regex characters + pattern = regexp.QuoteMeta(pattern) + // replacing '?' and '*' with regex characters + pattern = strings.Replace(pattern, "\\?", ".", -1) + pattern = strings.Replace(pattern, "\\*", ".*", -1) + //pattern must match an entire string + return regexp.Compile(fmt.Sprintf("^%s$", pattern)) +} diff --git a/ddtrace/tracer/spancontext.go b/ddtrace/tracer/spancontext.go index e9e2e335b4..45ef312aa2 100644 --- a/ddtrace/tracer/spancontext.go +++ b/ddtrace/tracer/spancontext.go @@ -304,8 +304,8 @@ func (t *trace) finishedOne(s *span) { if !ok { return } - for i := range t.spans { - tr.singleSpanRulesSampling.apply(t.spans[i]) + for _, span := range t.spans { + tr.singleSpanRulesSampling.apply(span) } // we have a tracer that can receive completed traces. atomic.AddInt64(&tr.spansFinished, int64(len(t.spans))) @@ -316,9 +316,9 @@ func (t *trace) finishedOne(s *span) { atomic.AddUint64(&tr.droppedP0Traces, 1) } //if trace sampling decision is drop, we still want to send single spans - singleSpans := []*span{} + var singleSpans []*span for i, span := range t.spans { - if _, ok := span.Metrics[ext.SpanSamplingMechanism]; ok { + if _, ok := span.Metrics[spanSamplingMechanism]; ok { singleSpans = append(singleSpans, t.spans[i]) } } diff --git a/ddtrace/tracer/tracer_test.go b/ddtrace/tracer/tracer_test.go index e5719671b3..8da918eaf2 100644 --- a/ddtrace/tracer/tracer_test.go +++ b/ddtrace/tracer/tracer_test.go @@ -367,7 +367,6 @@ func TestSamplingDecision(t *testing.T) { assert.Equal(t, "", span.context.trace.tags[keyUpstreamServices]) assert.Equal(t, decisionDrop, span.context.trace.samplingDecision) }) - //todo rename t.Run("client_dropped_with_single_spans", func(t *testing.T) { os.Setenv("DD_SPAN_SAMPLING_RULES", `[{"service": "test_*","name":"*_1", "sample_rate": 1.0, "max_per_second": 15.0}]`) defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") @@ -386,9 +385,9 @@ func TestSamplingDecision(t *testing.T) { // therefore not necessary to populate keyUpstreamServices assert.Equal(t, "", span.context.trace.tags[keyUpstreamServices]) assert.Equal(t, decisionDrop, span.context.trace.samplingDecision) - assert.Equal(t, 8.0, span.Metrics[ext.SpanSamplingMechanism]) - assert.Equal(t, 1.0, span.Metrics[ext.SingleSpanSamplingRuleRate]) - assert.Equal(t, 15.0, span.Metrics[ext.SingleSpanSamplingMPS]) + assert.Equal(t, 8.0, span.Metrics[spanSamplingMechanism]) + assert.Equal(t, 1.0, span.Metrics[singleSpanSamplingRuleRate]) + assert.Equal(t, 15.0, span.Metrics[singleSpanSamplingMPS]) }) } diff --git a/ddtrace/tracer/util.go b/ddtrace/tracer/util.go index 51684db537..116d0aef4d 100644 --- a/ddtrace/tracer/util.go +++ b/ddtrace/tracer/util.go @@ -8,7 +8,6 @@ package tracer import ( "encoding/base64" "fmt" - "regexp" "strconv" "strings" @@ -130,15 +129,3 @@ var b64 = base64.StdEncoding.WithPadding(base64.NoPadding) func b64Encode(s string) string { return b64.EncodeToString([]byte(s)) } - -// globMatch compiles pattern string into glob format, i.e. regular expressions with only '?' -// and '*' treated as regex metacharacters. -func globMatch(pattern string) (*regexp.Regexp, error) { - // escaping regex characters - pattern = regexp.QuoteMeta(pattern) - // replacing '?' and '*' with regex characters - pattern = strings.Replace(pattern, "\\?", ".", -1) - pattern = strings.Replace(pattern, "\\*", ".*", -1) - //pattern must match an entire string - return regexp.Compile(fmt.Sprintf("^%s$", pattern)) -} diff --git a/ddtrace/tracer/util_test.go b/ddtrace/tracer/util_test.go index 7325b7a29c..42cc0a5f39 100644 --- a/ddtrace/tracer/util_test.go +++ b/ddtrace/tracer/util_test.go @@ -115,54 +115,3 @@ func TestParsePropagatableTraceTags(t *testing.T) { }) } } - -func TestGlobMatch(t *testing.T) { - for i, tt := range []struct { - pattern string - input string - shouldMatch bool - }{ - // pattern with * - {"test*", "test", true}, - {"test*", "test-case", true}, - {"test*", "a-test", false}, - {"*test", "a-test", true}, - {"a*case", "acase", true}, - {"a*case", "a-test-case", true}, - {"a*test*case", "a-test-case", true}, - {"a*test*case", "atestcase", true}, - {"a*test*case", "abadcase", false}, - // pattern with ? - {"test?", "test", false}, - {"test?", "test-case", false}, - {"test?", "a-test", false}, - {"?test", "a-test", false}, - {"a?case", "acase", false}, - {"a?case", "a-case", true}, - {"a?test?case", "a-test-case", true}, - {"a?test?case", "a-test--case", false}, - // pattern with ? and * - {"?test*", "atest", true}, - {"?test*", "atestcase", true}, - {"?test*", "testcase", false}, - {"?test*", "testcase", false}, - {"test*case", "testcase", true}, - {"a?test*", "a-test-case", true}, - {"a?test*", "atestcase", false}, - {"a*test?", "a-test-", true}, - {"a*test?", "atestcase", false}, - {"a*test?case", "a--test-case", true}, - {"a*test?case", "a--test--case", false}, - {"a?test*case", "a-testing--case", true}, - {"the?test*case", "the-test-cases", false}, - } { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { - rg, _ := globMatch(tt.pattern) - if tt.shouldMatch { - assert.Regexp(t, rg, tt.input) - } else { - assert.NotRegexp(t, rg, tt.input) - } - }) - } -} From f3de96c789d20d83b9184ecd646b0523d274fd66 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Thu, 30 Jun 2022 17:02:36 +0200 Subject: [PATCH 07/27] removed outdated test --- ddtrace/tracer/sampler_test.go | 40 ---------------------------------- 1 file changed, 40 deletions(-) diff --git a/ddtrace/tracer/sampler_test.go b/ddtrace/tracer/sampler_test.go index 0402648485..b9f62554a9 100644 --- a/ddtrace/tracer/sampler_test.go +++ b/ddtrace/tracer/sampler_test.go @@ -487,46 +487,6 @@ func TestRulesSampler(t *testing.T) { }) } }) - t.Run("not-matching-span-rules/matching-trace-rules", func(t *testing.T) { - defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") - defer os.Unsetenv("DD_TRACE_SAMPLING_RULES") - for _, tt := range []struct { - spanRules string - traceRules string - spanSrv string - spanName string - }{ - { - //first matching rule takes precedence - spanRules: `[{"name": "abcdef?", "sample_rate": 1.0}]`, - traceRules: `[{"name": "abcdef", "sample_rate": 1.0},{"name": "abcd?", "sample_rate": 1.0}]`, - spanName: "abcdef", - spanSrv: "test-service", - }, - { - spanRules: `[{"service": "abcde", "sample_rate": 1.0}]`, - traceRules: `[{"service": "abcd", "sample_rate": 1.0}]`, - spanSrv: "abcd", - spanName: "abcde", - }, - } { - t.Run("", func(t *testing.T) { - os.Setenv("DD_SPAN_SAMPLING_RULES", tt.spanRules) - os.Setenv("DD_TRACE_SAMPLING_RULES", tt.traceRules) - rules, _ := spanSamplingRulesFromEnv() - - assert := assert.New(t) - rs := newRulesSampler(rules) - - span := makeSpan(tt.spanName, tt.spanSrv) - result := rs.apply(span) - assert.True(result) - assert.NotContains(span.Metrics, spanSamplingMechanism) - assert.NotContains(span.Metrics, singleSpanSamplingRuleRate) - assert.NotContains(span.Metrics, singleSpanSamplingMPS) - }) - } - }) t.Run("default-rate", func(t *testing.T) { ruleSets := [][]SamplingRule{ From a975fdec0eb322d9c8e6a60b116d8549bed3acbb Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Tue, 5 Jul 2022 16:51:36 +0200 Subject: [PATCH 08/27] refactored according to comments Moved traceSampler and singleSpanSampler to a new struct; moved traceSampler code to a separate file; updated sampler to include rules from WithSamplingRules option where both types of rules can be set; --- ddtrace/tracer/log.go | 19 +--- ddtrace/tracer/log_test.go | 2 +- ddtrace/tracer/option.go | 24 ++--- ddtrace/tracer/rules_sampler.go | 132 +++++++++++++++++++++++ ddtrace/tracer/sampler.go | 184 ++++++++++++-------------------- ddtrace/tracer/sampler_test.go | 46 ++++---- ddtrace/tracer/spancontext.go | 11 +- ddtrace/tracer/tracer.go | 51 ++++----- 8 files changed, 262 insertions(+), 207 deletions(-) create mode 100644 ddtrace/tracer/rules_sampler.go diff --git a/ddtrace/tracer/log.go b/ddtrace/tracer/log.go index 9bc30351e1..1a6085d969 100644 --- a/ddtrace/tracer/log.go +++ b/ddtrace/tracer/log.go @@ -12,7 +12,6 @@ import ( "math" "net/http" "runtime" - "strings" "time" "gopkg.in/DataDog/dd-trace-go.v1/internal/appsec" @@ -91,9 +90,9 @@ func logStartup(t *tracer) { AgentURL: t.config.transport.endpoint(), Debug: t.config.debug, AnalyticsEnabled: !math.IsNaN(globalconfig.AnalyticsRate()), - SampleRate: fmt.Sprintf("%f", t.rulesSampling.globalRate), + SampleRate: fmt.Sprintf("%f", t.rulesSampling.traceRulesSampler.globalRate), SampleRateLimit: "disabled", - SamplingRules: t.rulesSampling.rules, + SamplingRules: t.config.samplingRules, ServiceMappings: t.config.serviceMappings, Tags: tags, RuntimeMetricsEnabled: t.config.runtimeMetrics, @@ -107,18 +106,10 @@ func logStartup(t *tracer) { AgentFeatures: t.config.agent, AppSec: appsec.Enabled(), } - var errs []string - if _, err := traceSamplingRulesFromEnv(); err != nil { - errs = append(errs, err.Error()) + if _, err := samplingRulesFromEnv(); err != nil { + info.SamplingRulesError = fmt.Sprintf("%s", err) } - if _, err := spanSamplingRulesFromEnv(); err != nil { - errs = append(errs, err.Error()) - } - if errs != nil { - info.SamplingRulesError = fmt.Sprintf("%s", strings.Join(errs, "\n\t")) - } - - if limit, ok := t.rulesSampling.limit(); ok { + if limit, ok := t.rulesSampling.traceRulesSampler.limit(); ok { info.SampleRateLimit = fmt.Sprintf("%v", limit) } if !t.config.logToStdout { diff --git a/ddtrace/tracer/log_test.go b/ddtrace/tracer/log_test.go index 3962eeaa72..5469301177 100644 --- a/ddtrace/tracer/log_test.go +++ b/ddtrace/tracer/log_test.go @@ -100,7 +100,7 @@ func TestStartupLog(t *testing.T) { tp.Reset() logStartup(tracer) assert.Len(tp.Lines(), 2) - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sample_rate_limit":"100","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234}\],"sampling_rules_error":"found errors:\\n\\tat index 1: rate not provided","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sample_rate_limit":"100","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234}\],"sampling_rules_error":"\\n\\tat index 1: rate not provided","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) }) t.Run("lambda", func(t *testing.T) { diff --git a/ddtrace/tracer/option.go b/ddtrace/tracer/option.go index 9f82ce05a2..f541fcb014 100644 --- a/ddtrace/tracer/option.go +++ b/ddtrace/tracer/option.go @@ -118,15 +118,15 @@ type config struct { // statsd is used for tracking metrics associated with the runtime and the tracer. statsd statsdClient - // traceSamplingRules contains user-defined rules to determine the sampling rate to apply - // to trace spans. If a span matches a rule, it will impact the trace sampling decision. - traceSamplingRules []SamplingRule - - // spanSamplingRules contains user-defined rules to determine the sampling rate to apply - // to single spans. If a span matches a rule, it will NOT impact the trace sampling decision. + // samplingRules contains user-defined rules to determine the sampling rate to apply + // to spans. Rules might be of a type SamplingRuleSingleSpan or SamplingRuleTrace. + // If a sampling rule is of type SamplingRuleTrace, such rule determines the sampling rate to apply + // to trace spans. If a span matches that rule, it will impact the trace sampling decision. + // If a sampling rule is of type SamplingRuleSingleSpan, such rule determines the sampling rate to apply + // to individual spans. If a span matches a rule, it will NOT impact the trace sampling decision. // In the case that a trace is dropped and thus not sent to the Agent, spans kept on account - // of matching span sampling rules must be conveyed separately. - spanSamplingRules []SamplingRule + // of matching SamplingRuleSingleSpan rules must be conveyed separately. + samplingRules []SamplingRule // tickChan specifies a channel which will receive the time every time the tracer must flush. // It defaults to time.Ticker; replaced in tests. @@ -661,12 +661,12 @@ func WithDogstatsdAddress(addr string) StartOption { } // WithSamplingRules specifies the sampling rates to apply to trace spans based on the -// provided rules. -// TODO(shevchenko): there might be a need to add an analogous option for -// single span sampling rules +// provided rules. Both trace sampling and single span sampling rules can be configured with this option. +// If one wants to configure single span rules, Type field of SamplingRule struct must be specified +// and be equal to SamplingRuleSingleSpan. func WithSamplingRules(rules []SamplingRule) StartOption { return func(cfg *config) { - cfg.traceSamplingRules = rules + cfg.samplingRules = rules } } diff --git a/ddtrace/tracer/rules_sampler.go b/ddtrace/tracer/rules_sampler.go new file mode 100644 index 0000000000..f13dc14705 --- /dev/null +++ b/ddtrace/tracer/rules_sampler.go @@ -0,0 +1,132 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016 Datadog, Inc. + +package tracer + +import ( + "math" + "os" + "strconv" + "time" + + "golang.org/x/time/rate" + + "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext" + "gopkg.in/DataDog/dd-trace-go.v1/internal/log" + "gopkg.in/DataDog/dd-trace-go.v1/internal/samplernames" +) + +// traceRulesSampler allows a user-defined list of rules to apply to spans. +// These rules can match based on the span's Service, Name or both. +// When making a sampling decision, the rules are checked in order until +// a match is found. +// If a match is found, the rate from that rule is used. +// If no match is found, and the DD_TRACE_SAMPLE_RATE environment variable +// was set to a valid rate, that value is used. +// Otherwise, the rules sampler didn't apply to the span, and the decision +// is passed to the priority sampler. +// +// The rate is used to determine if the span should be sampled, but an upper +// limit can be defined using the DD_TRACE_RATE_LIMIT environment variable. +// Its value is the number of spans to sample per second. +// Spans that matched the rules but exceeded the rate limit are not sampled. +type traceRulesSampler struct { + rules []SamplingRule // the rules to match spans with + globalRate float64 // a rate to apply when no rules match a span + limiter *rateLimiter // used to limit the volume of spans sampled +} + +// newTraceRulesSampler configures a *traceRulesSampler instance using the given set of rules. +// Invalid rules or environment variable values are tolerated, by logging warnings and then ignoring them. +func newTraceRulesSampler(rules []SamplingRule) *traceRulesSampler { + return &traceRulesSampler{ + rules: rules, + globalRate: globalSampleRate(), + limiter: newRateLimiter(), + } +} + +func (rs *traceRulesSampler) enabled() bool { + return len(rs.rules) > 0 || !math.IsNaN(rs.globalRate) +} + +// apply uses the sampling rules to determine the sampling rate for the +// provided span. If the rules don't match, and a default rate hasn't been +// set using DD_TRACE_SAMPLE_RATE, then it returns false and the span is not +// modified. +func (rs *traceRulesSampler) apply(span *span) bool { + if !rs.enabled() { + // short path when disabled + return false + } + + var matched bool + rate := rs.globalRate + for _, rule := range rs.rules { + if rule.match(span) { + matched = true + rate = rule.Rate + break + } + } + if !matched && math.IsNaN(rate) { + // no matching rule or global rate, so we want to fall back + // to priority sampling + return false + } + + rs.applyRate(span, rate, time.Now()) + return true +} + +func (rs *traceRulesSampler) applyRate(span *span, rate float64, now time.Time) { + span.SetTag(keyRulesSamplerAppliedRate, rate) + if !sampledByRate(span.TraceID, rate) { + span.setSamplingPriority(ext.PriorityUserReject, samplernames.RuleRate, rate) + return + } + + sampled, rate := rs.limiter.allowOne(now) + if sampled { + span.setSamplingPriority(ext.PriorityUserKeep, samplernames.RuleRate, rate) + } else { + span.setSamplingPriority(ext.PriorityUserReject, samplernames.RuleRate, rate) + } + span.SetTag(keyRulesSamplerLimiterRate, rate) +} + +// limit returns the rate limit set in the rules sampler, controlled by DD_TRACE_RATE_LIMIT, and +// true if rules sampling is enabled. If not present it returns math.NaN() and false. +func (rs *traceRulesSampler) limit() (float64, bool) { + if rs.enabled() { + return float64(rs.limiter.limiter.Limit()), true + } + return math.NaN(), false +} + +// defaultRateLimit specifies the default trace rate limit used when DD_TRACE_RATE_LIMIT is not set. +const defaultRateLimit = 100.0 + +// newRateLimiter returns a rate limiter which restricts the number of traces sampled per second. +// This defaults to 100.0. The DD_TRACE_RATE_LIMIT environment variable may override the default. +func newRateLimiter() *rateLimiter { + limit := defaultRateLimit + v := os.Getenv("DD_TRACE_RATE_LIMIT") + if v != "" { + l, err := strconv.ParseFloat(v, 64) + if err != nil { + log.Warn("using default rate limit because DD_TRACE_RATE_LIMIT is invalid: %v", err) + } else if l < 0.0 { + log.Warn("using default rate limit because DD_TRACE_RATE_LIMIT is negative: %f", l) + } else { + // override the default limit + limit = l + } + } + return &rateLimiter{ + limiter: rate.NewLimiter(rate.Limit(limit), int(math.Ceil(limit))), + prevTime: time.Now(), + } +} diff --git a/ddtrace/tracer/sampler.go b/ddtrace/tracer/sampler.go index 72820de28d..a39c6f47d6 100644 --- a/ddtrace/tracer/sampler.go +++ b/ddtrace/tracer/sampler.go @@ -158,43 +158,66 @@ func (ps *prioritySampler) apply(spn *span) { spn.SetTag(keySamplingPriorityRate, rate) } -// rulesSampler allows a user-defined list of rules to apply to spans. -// These rules can match based on the span's Service, Name or both. -// When making a sampling decision, the rules are checked in order until -// a match is found. -// If a match is found, the rate from that rule is used. -// If no match is found, and the DD_TRACE_SAMPLE_RATE environment variable -// was set to a valid rate, that value is used. -// Otherwise, the rules sampler didn't apply to the span, and the decision -// is passed to the priority sampler. -// -// The rate is used to determine if the span should be sampled, but an upper -// limit can be defined using the DD_TRACE_RATE_LIMIT environment variable. -// Its value is the number of spans to sample per second. -// Spans that matched the rules but exceeded the rate limit are not sampled. +// rulesSampler holds instances of trace sampler and single span sampler, that are configured with the given set of rules. +// traceRulesSampler samples trace spans based on a user-defined set of rules and might impact sampling decision of the trace. +// singleSpanRulesSampler samples individual spans based on a separate user-defined set of rules and +// cannot impact the trace sampling decision. type rulesSampler struct { - rules []SamplingRule // the rules to match spans with - globalRate float64 // a rate to apply when no rules match a span - limiter *rateLimiter // used to limit the volume of spans sampled + traceRulesSampler *traceRulesSampler + singleSpanRulesSampler *singleSpanRulesSampler } // newRulesSampler configures a *rulesSampler instance using the given set of rules. +// Rules are split between trace and single span sampling rules according to their type. +// Such rules are user-defined through environment variable or WithSamplingRules option. // Invalid rules or environment variable values are tolerated, by logging warnings and then ignoring them. func newRulesSampler(rules []SamplingRule) *rulesSampler { + var spanRules, traceRules []SamplingRule + for _, rule := range rules { + if rule.Type == SamplingRuleSingleSpan { + rule.limiter = newSingleSpanRateLimiter(rule.MaxPerSecond) + spanRules = append(spanRules, rule) + } else { + traceRules = append(traceRules, rule) + } + } return &rulesSampler{ - rules: rules, - globalRate: globalSampleRate(), - limiter: newRateLimiter(), + traceRulesSampler: newTraceRulesSampler(traceRules), + singleSpanRulesSampler: newSingleSpanRulesSampler(spanRules), } } -// traceSamplingRulesFromEnv parses sampling rules from the DD_TRACE_SAMPLING_RULES environment variable -func traceSamplingRulesFromEnv() ([]SamplingRule, error) { +const ( + // SamplingRuleTrace specifies that if a span matches a sampling rule of this type, + // an entire trace might be kept based on the given span and sent to the agent. + SamplingRuleTrace = iota + // SamplingRuleSingleSpan specifies that if a span matches a sampling rule, + // an individual span might be kept and sent to the agent. + SamplingRuleSingleSpan +) + +// samplingRulesFromEnv parses sampling rules from the DD_TRACE_SAMPLING_RULES, +// DD_SPAN_SAMPLING_RULES and DD_SPAN_SAMPLING_RULES_FILE environment variables. +func samplingRulesFromEnv() ([]SamplingRule, error) { + var errs []string + var rules []SamplingRule rulesFromEnv := os.Getenv("DD_TRACE_SAMPLING_RULES") - if rulesFromEnv == "" { - return nil, nil + if rulesFromEnv != "" { + traceRules, err := processSamplingRules([]byte(rulesFromEnv), SamplingRuleTrace) + if err != nil { + errs = append(errs, err.Error()) + } + rules = append(rules, traceRules...) } - return processSamplingRules([]byte(rulesFromEnv), false) + spanRules, err := spanSamplingRulesFromEnv() + if err != nil { + errs = append(errs, err.Error()) + } + rules = append(rules, spanRules...) + if len(errs) != 0 { + return rules, fmt.Errorf("%s", strings.Join(errs, "\n\t")) + } + return rules, nil } // spanSamplingRulesFromEnv parses sampling rules from the DD_SPAN_SAMPLING_RULES and @@ -205,7 +228,7 @@ func spanSamplingRulesFromEnv() ([]SamplingRule, error) { if rulesFromEnv == "" { return nil, nil } - rules, err := processSamplingRules([]byte(rulesFromEnv), true) + rules, err := processSamplingRules([]byte(rulesFromEnv), SamplingRuleSingleSpan) if err != nil { errs = append(errs, err.Error()) } @@ -216,18 +239,24 @@ func spanSamplingRulesFromEnv() ([]SamplingRule, error) { } return rules, err } - if rulesFile == "" { - return rules, err + if rulesFile != "" { + rulesFromEnvFile, err := os.ReadFile(rulesFile) + if err != nil { + log.Warn("Couldn't read file from DD_SPAN_SAMPLING_RULES_FILE") + return nil, err + } + rules, err = processSamplingRules(rulesFromEnvFile, SamplingRuleSingleSpan) + if err != nil { + errs = append(errs, err.Error()) + } } - rulesFromEnvFile, err := os.ReadFile(rulesFile) - if err != nil { - log.Warn("Couldn't read file from DD_SPAN_SAMPLING_RULES_FILE") - return nil, err + if len(errs) != 0 { + return rules, fmt.Errorf("%s", strings.Join(errs, "\n\t")) } - return processSamplingRules(rulesFromEnvFile, true) + return rules, nil } -func processSamplingRules(b []byte, isIndividualSamplingRule bool) ([]SamplingRule, error) { +func processSamplingRules(b []byte, spanType int) ([]SamplingRule, error) { if len(b) == 0 { return nil, nil } @@ -257,7 +286,7 @@ func processSamplingRules(b []byte, isIndividualSamplingRule bool) ([]SamplingRu log.Warn("at index %d: ignoring rule %+v: rate is out of [0.0, 1.0] range", i, v) continue } - if isIndividualSamplingRule { + if spanType == SamplingRuleSingleSpan { if v.Service == "" { v.Service = "*" } @@ -280,6 +309,7 @@ func processSamplingRules(b []byte, isIndividualSamplingRule bool) ([]SamplingRu Rate: rate, MaxPerSecond: v.MaxPerSecond, limiter: newSingleSpanRateLimiter(v.MaxPerSecond), + Type: SamplingRuleSingleSpan, }) continue } @@ -318,89 +348,6 @@ func globalSampleRate() float64 { return defaultRate } -// defaultRateLimit specifies the default trace rate limit used when DD_TRACE_RATE_LIMIT is not set. -const defaultRateLimit = 100.0 - -// newRateLimiter returns a rate limiter which restricts the number of traces sampled per second. -// This defaults to 100.0. The DD_TRACE_RATE_LIMIT environment variable may override the default. -func newRateLimiter() *rateLimiter { - limit := defaultRateLimit - v := os.Getenv("DD_TRACE_RATE_LIMIT") - if v != "" { - l, err := strconv.ParseFloat(v, 64) - if err != nil { - log.Warn("using default rate limit because DD_TRACE_RATE_LIMIT is invalid: %v", err) - } else if l < 0.0 { - log.Warn("using default rate limit because DD_TRACE_RATE_LIMIT is negative: %f", l) - } else { - // override the default limit - limit = l - } - } - return &rateLimiter{ - limiter: rate.NewLimiter(rate.Limit(limit), int(math.Ceil(limit))), - prevTime: time.Now(), - } -} - -func (rs *rulesSampler) enabled() bool { - return len(rs.rules) > 0 || !math.IsNaN(rs.globalRate) -} - -// apply uses the sampling rules to determine the sampling rate for the -// provided span. If the rules don't match, and a default rate hasn't been -// set using DD_TRACE_SAMPLE_RATE, then it returns false and the span is not -// modified. -func (rs *rulesSampler) apply(span *span) bool { - if !rs.enabled() { - // short path when disabled - return false - } - - var matched bool - rate := rs.globalRate - for _, rule := range rs.rules { - if rule.match(span) { - matched = true - rate = rule.Rate - break - } - } - if !matched && math.IsNaN(rate) { - // no matching rule or global rate, so we want to fall back - // to priority sampling - return false - } - - rs.applyRate(span, rate, time.Now()) - return true -} - -func (rs *rulesSampler) applyRate(span *span, rate float64, now time.Time) { - span.SetTag(keyRulesSamplerAppliedRate, rate) - if !sampledByRate(span.TraceID, rate) { - span.setSamplingPriority(ext.PriorityUserReject, samplernames.RuleRate, rate) - return - } - - sampled, rate := rs.limiter.allowOne(now) - if sampled { - span.setSamplingPriority(ext.PriorityUserKeep, samplernames.RuleRate, rate) - } else { - span.setSamplingPriority(ext.PriorityUserReject, samplernames.RuleRate, rate) - } - span.SetTag(keyRulesSamplerLimiterRate, rate) -} - -// limit returns the rate limit set in the rules sampler, controlled by DD_TRACE_RATE_LIMIT, and -// true if rules sampling is enabled. If not present it returns math.NaN() and false. -func (rs *rulesSampler) limit() (float64, bool) { - if rs.enabled() { - return float64(rs.limiter.limiter.Limit()), true - } - return math.NaN(), false -} - // SamplingRule is used for applying sampling rates to spans that match // the service name, operation name or both. // For basic usage, consider using the helper functions ServiceRule, NameRule, etc. @@ -409,6 +356,7 @@ type SamplingRule struct { Name *regexp.Regexp Rate float64 MaxPerSecond float64 + Type int exactService string exactName string diff --git a/ddtrace/tracer/sampler_test.go b/ddtrace/tracer/sampler_test.go index b9f62554a9..1ce16845ed 100644 --- a/ddtrace/tracer/sampler_test.go +++ b/ddtrace/tracer/sampler_test.go @@ -260,7 +260,7 @@ func TestRuleEnvVars(t *testing.T) { } { t.Run("", func(t *testing.T) { os.Setenv("DD_TRACE_SAMPLING_RULES", tt.value) - rules, err := traceSamplingRulesFromEnv() + rules, err := samplingRulesFromEnv() if tt.errStr == "" { assert.NoError(err) } else { @@ -300,7 +300,7 @@ func TestRuleEnvVars(t *testing.T) { } { t.Run(fmt.Sprintf("%v", i), func(t *testing.T) { os.Setenv("DD_SPAN_SAMPLING_RULES", tt.value) - rules, err := spanSamplingRulesFromEnv() + rules, err := samplingRulesFromEnv() if tt.errStr == "" { assert.NoError(err) } else { @@ -341,7 +341,7 @@ func TestRuleEnvVars(t *testing.T) { } { t.Run(fmt.Sprintf("%v", i), func(t *testing.T) { os.Setenv("DD_SPAN_SAMPLING_RULES", tt.rules) - rules, err := spanSamplingRulesFromEnv() + rules, err := samplingRulesFromEnv() assert.NoError(err) assert.Equal(tt.srvRegex, rules[0].Service.String()) assert.Equal(tt.nameRegex, rules[0].Name.String()) @@ -360,7 +360,7 @@ func TestRulesSampler(t *testing.T) { rs := newRulesSampler(nil) span := makeSpan("http.request", "test-service") - result := rs.apply(span) + result := rs.traceRulesSampler.apply(span) assert.False(result) }) @@ -378,7 +378,7 @@ func TestRulesSampler(t *testing.T) { rs := newRulesSampler(v) span := makeSpan("http.request", "test-service") - result := rs.apply(span) + result := rs.traceRulesSampler.apply(span) assert.True(result) assert.Equal(1.0, span.Metrics["_dd.rule_psr"]) assert.Equal(1.0, span.Metrics["_dd.limit_psr"]) @@ -401,7 +401,7 @@ func TestRulesSampler(t *testing.T) { rs := newRulesSampler(v) span := makeSpan("http.request", "test-service") - result := rs.apply(span) + result := rs.traceRulesSampler.apply(span) assert.False(result) }) } @@ -432,13 +432,13 @@ func TestRulesSampler(t *testing.T) { } { t.Run("", func(t *testing.T) { os.Setenv("DD_SPAN_SAMPLING_RULES", tt.rules) - rules, _ := spanSamplingRulesFromEnv() + rules, _ := samplingRulesFromEnv() assert := assert.New(t) - rs := newSingleSpanRulesSampler(rules) + rs := newRulesSampler(rules) span := makeSpan(tt.spanName, tt.spanSrv) - result := rs.apply(span) + result := rs.singleSpanRulesSampler.apply(span) assert.True(result) assert.Contains(span.Metrics, spanSamplingMechanism) assert.Contains(span.Metrics, singleSpanSamplingRuleRate) @@ -473,13 +473,13 @@ func TestRulesSampler(t *testing.T) { } { t.Run("", func(t *testing.T) { os.Setenv("DD_SPAN_SAMPLING_RULES", tt.rules) - rules, _ := spanSamplingRulesFromEnv() + rules, _ := samplingRulesFromEnv() assert := assert.New(t) rs := newRulesSampler(rules) span := makeSpan(tt.spanName, tt.spanSrv) - result := rs.apply(span) + result := rs.singleSpanRulesSampler.apply(span) assert.False(result) assert.NotContains(span.Metrics, spanSamplingMechanism) assert.NotContains(span.Metrics, singleSpanSamplingRuleRate) @@ -507,7 +507,7 @@ func TestRulesSampler(t *testing.T) { rs := newRulesSampler(rules) span := makeSpan("http.request", "test-service") - result := rs.apply(span) + result := rs.singleSpanRulesSampler.apply(span) assert.True(result) assert.Equal(rate, span.Metrics["_dd.rule_psr"]) if rate > 0.0 { @@ -551,7 +551,7 @@ func TestRulesSamplerInternals(t *testing.T) { now := time.Now() rs := &rulesSampler{} span := makeSpanAt("http.request", "test-service", now) - rs.applyRate(span, 0.0, now) + rs.traceRulesSampler.applyRate(span, 0.0, now) assert.Equal(0.0, span.Metrics["_dd.rule_psr"]) _, ok := span.Metrics["_dd.limit_psr"] assert.False(ok) @@ -562,12 +562,12 @@ func TestRulesSamplerInternals(t *testing.T) { now := time.Now() rs := newRulesSampler(nil) // set samplingLimiter to specific state - rs.limiter.prevTime = now.Add(-1 * time.Second) - rs.limiter.allowed = 1 - rs.limiter.seen = 1 + rs.traceRulesSampler.limiter.prevTime = now.Add(-1 * time.Second) + rs.traceRulesSampler.limiter.allowed = 1 + rs.traceRulesSampler.limiter.seen = 1 span := makeSpanAt("http.request", "test-service", now) - rs.applyRate(span, 1.0, now) + rs.traceRulesSampler.applyRate(span, 1.0, now) assert.Equal(1.0, span.Metrics["_dd.rule_psr"]) assert.Equal(1.0, span.Metrics["_dd.limit_psr"]) }) @@ -577,18 +577,18 @@ func TestRulesSamplerInternals(t *testing.T) { now := time.Now() rs := newRulesSampler(nil) // force sampling limiter to 1.0 spans/sec - rs.limiter.limiter = rate.NewLimiter(rate.Limit(1.0), 1) - rs.limiter.prevTime = now.Add(-1 * time.Second) - rs.limiter.allowed = 2 - rs.limiter.seen = 2 + rs.traceRulesSampler.limiter.limiter = rate.NewLimiter(rate.Limit(1.0), 1) + rs.traceRulesSampler.limiter.prevTime = now.Add(-1 * time.Second) + rs.traceRulesSampler.limiter.allowed = 2 + rs.traceRulesSampler.limiter.seen = 2 // first span kept, second dropped span := makeSpanAt("http.request", "test-service", now) - rs.applyRate(span, 1.0, now) + rs.traceRulesSampler.applyRate(span, 1.0, now) assert.EqualValues(ext.PriorityUserKeep, span.Metrics[keySamplingPriority]) assert.Equal(1.0, span.Metrics["_dd.rule_psr"]) assert.Equal(1.0, span.Metrics["_dd.limit_psr"]) span = makeSpanAt("http.request", "test-service", now) - rs.applyRate(span, 1.0, now) + rs.traceRulesSampler.applyRate(span, 1.0, now) assert.EqualValues(ext.PriorityUserReject, span.Metrics[keySamplingPriority]) assert.Equal(1.0, span.Metrics["_dd.rule_psr"]) assert.Equal(0.75, span.Metrics["_dd.limit_psr"]) diff --git a/ddtrace/tracer/spancontext.go b/ddtrace/tracer/spancontext.go index 45ef312aa2..1eb8c9e15b 100644 --- a/ddtrace/tracer/spancontext.go +++ b/ddtrace/tracer/spancontext.go @@ -304,9 +304,6 @@ func (t *trace) finishedOne(s *span) { if !ok { return } - for _, span := range t.spans { - tr.singleSpanRulesSampling.apply(span) - } // we have a tracer that can receive completed traces. atomic.AddInt64(&tr.spansFinished, int64(len(t.spans))) sd := samplingDecision(atomic.LoadInt64((*int64)(&t.samplingDecision))) @@ -315,11 +312,11 @@ func (t *trace) finishedOne(s *span) { atomic.AddUint64(&tr.droppedP0Spans, uint64(len(t.spans))) atomic.AddUint64(&tr.droppedP0Traces, 1) } - //if trace sampling decision is drop, we still want to send single spans + // if trace sampling decision is drop, we still want to send single spans var singleSpans []*span - for i, span := range t.spans { - if _, ok := span.Metrics[spanSamplingMechanism]; ok { - singleSpans = append(singleSpans, t.spans[i]) + for _, span := range t.spans { + if tr.rulesSampling.singleSpanRulesSampler.apply(span) { + singleSpans = append(singleSpans, span) } } t.spans = singleSpans diff --git a/ddtrace/tracer/tracer.go b/ddtrace/tracer/tracer.go index bfd90d8ab9..177204f02a 100644 --- a/ddtrace/tracer/tracer.go +++ b/ddtrace/tracer/tracer.go @@ -74,17 +74,12 @@ type tracer struct { // Records the number of dropped P0 traces and spans. droppedP0Traces, droppedP0Spans uint64 - // rulesSampling holds an instance of the rules sampler. These are user-defined + // rulesSampling holds an instance of the rules sampler used to apply either trace sampling, + //or single span sampling rules on spans. These are user-defined // rules for applying a sampling rate to spans that match the designated service // or operation name. rulesSampling *rulesSampler - // singleSpanRulesSampling holds an instance of the single span rules sampler. These are user-defined - // rules for applying a sampling rate to spans that match the designated service - // or operation name. Such spans are sampled if trace sampling decision is 'drop' and - // may be sent separately - singleSpanRulesSampling *singleSpanRulesSampler - // obfuscator holds the obfuscator used to obfuscate resources in aggregated stats. // obfuscator may be nil if disabled. obfuscator *obfuscate.Obfuscator @@ -182,20 +177,6 @@ const payloadQueueSize = 1000 func newUnstartedTracer(opts ...StartOption) *tracer { c := newConfig(opts...) - traceRules, err := traceSamplingRulesFromEnv() - if err != nil { - log.Warn("DIAGNOSTICS Error(s) parsing DD_TRACE_SAMPLING_RULES: found errors: %s", err) - } - if traceRules != nil { - c.traceSamplingRules = traceRules - } - spanRules, err := spanSamplingRulesFromEnv() - if err != nil { - log.Warn("DIAGNOSTICS Error(s) parsing DD_SPAN_SAMPLING_RULES: found errors: %s", err) - } - if spanRules != nil { - c.spanSamplingRules = spanRules - } sampler := newPrioritySampler() var writer traceWriter if c.logToStdout { @@ -203,17 +184,23 @@ func newUnstartedTracer(opts ...StartOption) *tracer { } else { writer = newAgentTraceWriter(c, sampler) } + rules, err := samplingRulesFromEnv() + if err != nil { + log.Warn("DIAGNOSTICS Error(s) parsing sampling rules: found errors:%s", err) + } + if rules != nil { + c.samplingRules = rules + } t := &tracer{ - config: c, - traceWriter: writer, - out: make(chan []*span, payloadQueueSize), - stop: make(chan struct{}), - flush: make(chan chan<- struct{}), - rulesSampling: newRulesSampler(c.traceSamplingRules), - singleSpanRulesSampling: newSingleSpanRulesSampler(c.spanSamplingRules), - prioritySampling: sampler, - pid: strconv.Itoa(os.Getpid()), - stats: newConcentrator(c, defaultStatsBucketSize), + config: c, + traceWriter: writer, + out: make(chan []*span, payloadQueueSize), + stop: make(chan struct{}), + flush: make(chan chan<- struct{}), + rulesSampling: newRulesSampler(c.samplingRules), + prioritySampling: sampler, + pid: strconv.Itoa(os.Getpid()), + stats: newConcentrator(c, defaultStatsBucketSize), obfuscator: obfuscate.NewObfuscator(obfuscate.Config{ SQL: obfuscate.SQLConfig{ TableNames: c.agent.HasFlag("table_names"), @@ -536,7 +523,7 @@ func (t *tracer) sample(span *span) { if rs, ok := sampler.(RateSampler); ok && rs.Rate() < 1 { span.setMetric(sampleRateMetricKey, rs.Rate()) } - if t.rulesSampling.apply(span) { + if t.rulesSampling.traceRulesSampler.apply(span) { return } t.prioritySampling.apply(span) From c465caed438a5107b95852a4647d9ffc5aa96e07 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Tue, 5 Jul 2022 16:55:20 +0200 Subject: [PATCH 09/27] fixed a test - wrong sampler method called --- ddtrace/tracer/sampler_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddtrace/tracer/sampler_test.go b/ddtrace/tracer/sampler_test.go index 1ce16845ed..53a04572c2 100644 --- a/ddtrace/tracer/sampler_test.go +++ b/ddtrace/tracer/sampler_test.go @@ -507,7 +507,7 @@ func TestRulesSampler(t *testing.T) { rs := newRulesSampler(rules) span := makeSpan("http.request", "test-service") - result := rs.singleSpanRulesSampler.apply(span) + result := rs.traceRulesSampler.apply(span) assert.True(result) assert.Equal(rate, span.Metrics["_dd.rule_psr"]) if rate > 0.0 { From db105b71ac5e1115d0e4416d956ed6e2b9078b2d Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Tue, 5 Jul 2022 17:24:30 +0200 Subject: [PATCH 10/27] reworded log message --- ddtrace/tracer/log.go | 2 +- ddtrace/tracer/log_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ddtrace/tracer/log.go b/ddtrace/tracer/log.go index 1a6085d969..ab3c213141 100644 --- a/ddtrace/tracer/log.go +++ b/ddtrace/tracer/log.go @@ -107,7 +107,7 @@ func logStartup(t *tracer) { AppSec: appsec.Enabled(), } if _, err := samplingRulesFromEnv(); err != nil { - info.SamplingRulesError = fmt.Sprintf("%s", err) + info.SamplingRulesError = fmt.Sprintf("found errors:%s", err) } if limit, ok := t.rulesSampling.traceRulesSampler.limit(); ok { info.SampleRateLimit = fmt.Sprintf("%v", limit) diff --git a/ddtrace/tracer/log_test.go b/ddtrace/tracer/log_test.go index 5469301177..3962eeaa72 100644 --- a/ddtrace/tracer/log_test.go +++ b/ddtrace/tracer/log_test.go @@ -100,7 +100,7 @@ func TestStartupLog(t *testing.T) { tp.Reset() logStartup(tracer) assert.Len(tp.Lines(), 2) - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sample_rate_limit":"100","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234}\],"sampling_rules_error":"\\n\\tat index 1: rate not provided","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sample_rate_limit":"100","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234}\],"sampling_rules_error":"found errors:\\n\\tat index 1: rate not provided","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) }) t.Run("lambda", func(t *testing.T) { From 92506eb2ebc4a9869952ee28e1405c007d911193 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Tue, 5 Jul 2022 17:35:24 +0200 Subject: [PATCH 11/27] removed outdated constant in a test --- ddtrace/tracer/tracer_test.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/ddtrace/tracer/tracer_test.go b/ddtrace/tracer/tracer_test.go index 5244ed6b2e..2ff54c318a 100644 --- a/ddtrace/tracer/tracer_test.go +++ b/ddtrace/tracer/tracer_test.go @@ -381,9 +381,6 @@ func TestSamplingDecision(t *testing.T) { child.Finish() span.Finish() assert.Equal(t, float64(ext.PriorityAutoReject), span.Metrics[keySamplingPriority]) - // this trace won't be sent to the agent, - // therefore not necessary to populate keyUpstreamServices - assert.Equal(t, "", span.context.trace.tags[keyUpstreamServices]) assert.Equal(t, decisionDrop, span.context.trace.samplingDecision) assert.Equal(t, 8.0, span.Metrics[spanSamplingMechanism]) assert.Equal(t, 1.0, span.Metrics[singleSpanSamplingRuleRate]) From 99390591c6ab1a1c061aef8bb9166c6882934734 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Wed, 6 Jul 2022 14:47:57 +0200 Subject: [PATCH 12/27] updated according to comments --- ddtrace/tracer/log.go | 4 +- ddtrace/tracer/option.go | 9 +- ddtrace/tracer/rules_sampler.go | 444 ++++++++++++++++++++++++++++++- ddtrace/tracer/sampler.go | 334 ----------------------- ddtrace/tracer/sampler_test.go | 34 +-- ddtrace/tracer/single_sampler.go | 119 --------- ddtrace/tracer/spancontext.go | 6 +- ddtrace/tracer/tracer.go | 2 +- ddtrace/tracer/tracer_test.go | 1 + 9 files changed, 468 insertions(+), 485 deletions(-) delete mode 100644 ddtrace/tracer/single_sampler.go diff --git a/ddtrace/tracer/log.go b/ddtrace/tracer/log.go index ab3c213141..587a646912 100644 --- a/ddtrace/tracer/log.go +++ b/ddtrace/tracer/log.go @@ -90,7 +90,7 @@ func logStartup(t *tracer) { AgentURL: t.config.transport.endpoint(), Debug: t.config.debug, AnalyticsEnabled: !math.IsNaN(globalconfig.AnalyticsRate()), - SampleRate: fmt.Sprintf("%f", t.rulesSampling.traceRulesSampler.globalRate), + SampleRate: fmt.Sprintf("%f", t.rulesSampling.traces.globalRate), SampleRateLimit: "disabled", SamplingRules: t.config.samplingRules, ServiceMappings: t.config.serviceMappings, @@ -109,7 +109,7 @@ func logStartup(t *tracer) { if _, err := samplingRulesFromEnv(); err != nil { info.SamplingRulesError = fmt.Sprintf("found errors:%s", err) } - if limit, ok := t.rulesSampling.traceRulesSampler.limit(); ok { + if limit, ok := t.rulesSampling.traces.limit(); ok { info.SampleRateLimit = fmt.Sprintf("%v", limit) } if !t.config.logToStdout { diff --git a/ddtrace/tracer/option.go b/ddtrace/tracer/option.go index 2f5fa830b0..4b88cc0865 100644 --- a/ddtrace/tracer/option.go +++ b/ddtrace/tracer/option.go @@ -120,12 +120,6 @@ type config struct { // samplingRules contains user-defined rules to determine the sampling rate to apply // to spans. Rules might be of a type SamplingRuleSingleSpan or SamplingRuleTrace. - // If a sampling rule is of type SamplingRuleTrace, such rule determines the sampling rate to apply - // to trace spans. If a span matches that rule, it will impact the trace sampling decision. - // If a sampling rule is of type SamplingRuleSingleSpan, such rule determines the sampling rate to apply - // to individual spans. If a span matches a rule, it will NOT impact the trace sampling decision. - // In the case that a trace is dropped and thus not sent to the Agent, spans kept on account - // of matching SamplingRuleSingleSpan rules must be conveyed separately. samplingRules []SamplingRule // tickChan specifies a channel which will receive the time every time the tracer must flush. @@ -675,8 +669,7 @@ func WithDogstatsdAddress(addr string) StartOption { // WithSamplingRules specifies the sampling rates to apply to trace spans based on the // provided rules. Both trace sampling and single span sampling rules can be configured with this option. -// If one wants to configure single span rules, Type field of SamplingRule struct must be specified -// and be equal to SamplingRuleSingleSpan. +// The `Type` determines the scope of the sampling rules, defaulting to the entire trace. See `SamplingRule` for more details. func WithSamplingRules(rules []SamplingRule) StartOption { return func(cfg *config) { cfg.samplingRules = rules diff --git a/ddtrace/tracer/rules_sampler.go b/ddtrace/tracer/rules_sampler.go index f13dc14705..db0506f3cd 100644 --- a/ddtrace/tracer/rules_sampler.go +++ b/ddtrace/tracer/rules_sampler.go @@ -6,9 +6,14 @@ package tracer import ( + "encoding/json" + "fmt" "math" "os" + "regexp" "strconv" + "strings" + "sync" "time" "golang.org/x/time/rate" @@ -18,6 +23,110 @@ import ( "gopkg.in/DataDog/dd-trace-go.v1/internal/samplernames" ) +// rulesSampler holds instances of trace sampler and single span sampler, that are configured with the given set of rules. +// traceRulesSampler samples trace spans based on a user-defined set of rules and might impact sampling decision of the trace. +// singleSpanRulesSampler samples individual spans based on a separate user-defined set of rules and +// cannot impact the trace sampling decision. +type rulesSampler struct { + traces *traceRulesSampler + spans *singleSpanRulesSampler +} + +// newRulesSampler configures a *rulesSampler instance using the given set of rules. +// Rules are split between trace and single span sampling rules according to their type. +// Such rules are user-defined through environment variable or WithSamplingRules option. +// Invalid rules or environment variable values are tolerated, by logging warnings and then ignoring them. +func newRulesSampler(rules []SamplingRule) *rulesSampler { + var spanRules, traceRules []SamplingRule + for _, rule := range rules { + if rule.Type == SamplingRuleSpan { + rule.limiter = newSingleSpanRateLimiter(rule.MaxPerSecond) + spanRules = append(spanRules, rule) + } else { + traceRules = append(traceRules, rule) + } + } + return &rulesSampler{ + traces: newTraceRulesSampler(traceRules), + spans: newSingleSpanRulesSampler(spanRules), + } +} + +func (r *rulesSampler) sampleTrace(s *span) bool { + return r.traces.apply(s) +} + +func (r *rulesSampler) sampleSpan(s *span) bool { + return r.spans.apply(s) +} + +// SamplingRule is used for applying sampling rates to spans that match +// the service name, operation name or both. +// For basic usage, consider using the helper functions ServiceRule, NameRule, etc. +// See `SamplingRuleType` for more details. +type SamplingRule struct { + Service *regexp.Regexp + Name *regexp.Regexp + Rate float64 + MaxPerSecond float64 + Type SamplingRuleType + + exactService string + exactName string + limiter *rateLimiter +} + +// SamplingRuleType represents a type of sampling rule spans are matched against. +type SamplingRuleType int + +const ( + // SamplingRuleTrace specifies a sampling rule that applies to the entire trace if any spans satisfy the criteria. + // If a sampling rule is of type SamplingRuleTrace, such rule determines the sampling rate to apply + // to trace spans. If a span matches that rule, it will impact the trace sampling decision. + SamplingRuleTrace = iota + // SamplingRuleSpan specifies a sampling rule that applies to a single span without affecting the entire trace. + // If a sampling rule is of type SamplingRuleSingleSpan, such rule determines the sampling rate to apply + // to individual spans. If a span matches a rule, it will NOT impact the trace sampling decision. + // In the case that a trace is dropped and thus not sent to the Agent, spans kept on account + // of matching SamplingRuleSingleSpan rules must be conveyed separately. + SamplingRuleSpan +) + +// ServiceRule returns a SamplingRule that applies the provided sampling rate +// to spans that match the service name provided. +func ServiceRule(service string, rate float64) SamplingRule { + return SamplingRule{ + exactService: service, + Rate: rate, + } +} + +// NameRule returns a SamplingRule that applies the provided sampling rate +// to spans that match the operation name provided. +func NameRule(name string, rate float64) SamplingRule { + return SamplingRule{ + exactName: name, + Rate: rate, + } +} + +// NameServiceRule returns a SamplingRule that applies the provided sampling rate +// to spans matching both the operation and service names provided. +func NameServiceRule(name string, service string, rate float64) SamplingRule { + return SamplingRule{ + exactService: service, + exactName: name, + Rate: rate, + } +} + +// RateRule returns a SamplingRule that applies the provided sampling rate to all spans. +func RateRule(rate float64) SamplingRule { + return SamplingRule{ + Rate: rate, + } +} + // traceRulesSampler allows a user-defined list of rules to apply to spans. // These rules can match based on the span's Service, Name or both. // When making a sampling decision, the rules are checked in order until @@ -48,6 +157,26 @@ func newTraceRulesSampler(rules []SamplingRule) *traceRulesSampler { } } +// globalSampleRate returns the sampling rate found in the DD_TRACE_SAMPLE_RATE environment variable. +// If it is invalid or not within the 0-1 range, NaN is returned. +func globalSampleRate() float64 { + defaultRate := math.NaN() + v := os.Getenv("DD_TRACE_SAMPLE_RATE") + if v == "" { + return defaultRate + } + r, err := strconv.ParseFloat(v, 64) + if err != nil { + log.Warn("ignoring DD_TRACE_SAMPLE_RATE: error: %v", err) + return defaultRate + } + if r >= 0.0 && r <= 1.0 { + return r + } + log.Warn("ignoring DD_TRACE_SAMPLE_RATE: out of range %f", r) + return defaultRate +} + func (rs *traceRulesSampler) enabled() bool { return len(rs.rules) > 0 || !math.IsNaN(rs.globalRate) } @@ -81,6 +210,21 @@ func (rs *traceRulesSampler) apply(span *span) bool { return true } +// match returns true when the span's details match all the expected values in the rule. +func (sr *SamplingRule) match(s *span) bool { + if sr.Service != nil && !sr.Service.MatchString(s.Service) { + return false + } else if sr.exactService != "" && sr.exactService != s.Service { + return false + } + if sr.Name != nil && !sr.Name.MatchString(s.Name) { + return false + } else if sr.exactName != "" && sr.exactName != s.Name { + return false + } + return true +} + func (rs *traceRulesSampler) applyRate(span *span, rate float64, now time.Time) { span.SetTag(keyRulesSamplerAppliedRate, rate) if !sampledByRate(span.TraceID, rate) { @@ -110,16 +254,16 @@ func (rs *traceRulesSampler) limit() (float64, bool) { const defaultRateLimit = 100.0 // newRateLimiter returns a rate limiter which restricts the number of traces sampled per second. -// This defaults to 100.0. The DD_TRACE_RATE_LIMIT environment variable may override the default. +// The limit is DD_TRACE_RATE_LIMIT if set, `defaultRateLimit` otherwise. func newRateLimiter() *rateLimiter { limit := defaultRateLimit v := os.Getenv("DD_TRACE_RATE_LIMIT") if v != "" { l, err := strconv.ParseFloat(v, 64) if err != nil { - log.Warn("using default rate limit because DD_TRACE_RATE_LIMIT is invalid: %v", err) + log.Warn("DD_TRACE_RATE_LIMIT invalid, using default value %f: %v", limit, err) } else if l < 0.0 { - log.Warn("using default rate limit because DD_TRACE_RATE_LIMIT is negative: %f", l) + log.Warn("DD_TRACE_RATE_LIMIT negative, using default value %f", limit) } else { // override the default limit limit = l @@ -130,3 +274,297 @@ func newRateLimiter() *rateLimiter { prevTime: time.Now(), } } + +// singleSpanRulesSampler allows a user-defined list of rules to apply to spans +// to sample single spans. +// These rules match based on the span's Service and Name. If empty value is supplied +// to either Service or Name field, it will default to "*", allow all. +// When making a sampling decision, the rules are checked in order until +// a match is found. +// If a match is found, the rate from that rule is used. +// If no match is found, no changes or further sampling is applied to the spans. +// The rate is used to determine if the span should be sampled, but an upper +// limit can be defined using the max_per_second field when supplying the rule. +// If max_per_second is absent in the rule, the default is allow all. +// Its value is the max number of spans to sample per second. +// Spans that matched the rules but exceeded the rate limit are not sampled. +type singleSpanRulesSampler struct { + rules []SamplingRule // the rules to match spans with +} + +// newSingleSpanRulesSampler configures a *singleSpanRulesSampler instance using the given set of rules. +// Invalid rules or environment variable values are tolerated, by logging warnings and then ignoring them. +func newSingleSpanRulesSampler(rules []SamplingRule) *singleSpanRulesSampler { + return &singleSpanRulesSampler{ + rules: rules, + } +} + +func (rs *singleSpanRulesSampler) enabled() bool { + return len(rs.rules) > 0 +} + +const ( + // spanSamplingMechanism specifies the sampling mechanism by which an individual span was sampled + spanSamplingMechanism = "_dd.span_sampling.mechanism" + + // singleSpanSamplingMechanism specifies value reserved to indicate that a span was kept + // on account of a single span sampling rule. + singleSpanSamplingMechanism = 8 + + // singleSpanSamplingRuleRate specifies the configured sampling probability for the single span sampling rule. + singleSpanSamplingRuleRate = "_dd.span_sampling.rule_rate" + + // singleSpanSamplingMPS specifies the configured limit for the single span sampling rule + // that the span matched. If there is no configured limit, then this tag is omitted. + singleSpanSamplingMPS = "_dd.span_sampling.max_per_second" +) + +// apply uses the sampling rules to determine the sampling rate for the +// provided span. If the rules don't match, then it returns false and the span is not +// modified. +func (rs *singleSpanRulesSampler) apply(span *span) bool { + for _, rule := range rs.rules { + if rule.match(span) { + rs.applyRate(span, rule, rule.Rate, time.Now()) + return true + } + } + return false +} + +func (rs *singleSpanRulesSampler) applyRate(span *span, rule SamplingRule, rate float64, now time.Time) { + span.setMetric(keyRulesSamplerAppliedRate, rate) + if !sampledByRate(span.SpanID, rate) { + span.setSamplingPriority(ext.PriorityUserReject, samplernames.RuleRate, rate) + return + } + + var sampled bool + if rule.limiter != nil { + sampled, rate = rule.limiter.allowOne(now) + if !sampled { + return + } + } + span.setMetric(spanSamplingMechanism, singleSpanSamplingMechanism) + span.setMetric(singleSpanSamplingRuleRate, rate) + if rule.MaxPerSecond != 0 { + span.setMetric(singleSpanSamplingMPS, rule.MaxPerSecond) + } +} + +// rateLimiter is a wrapper on top of golang.org/x/time/rate which implements a rate limiter but also +// returns the effective rate of allowance. +type rateLimiter struct { + limiter *rate.Limiter + + mu sync.Mutex // guards below fields + prevTime time.Time // time at which prevAllowed and prevSeen were set + allowed float64 // number of spans allowed in the current period + seen float64 // number of spans seen in the current period + prevAllowed float64 // number of spans allowed in the previous period + prevSeen float64 // number of spans seen in the previous period +} + +// allowOne returns the rate limiter's decision to allow the span to be sampled, and the +// effective rate at the time it is called. The effective rate is computed by averaging the rate +// for the previous second with the current rate +func (r *rateLimiter) allowOne(now time.Time) (bool, float64) { + r.mu.Lock() + defer r.mu.Unlock() + if d := now.Sub(r.prevTime); d >= time.Second { + // enough time has passed to reset the counters + if d.Truncate(time.Second) == time.Second && r.seen > 0 { + // exactly one second, so update prev + r.prevAllowed = r.allowed + r.prevSeen = r.seen + } else { + // more than one second, so reset previous rate + r.prevAllowed = 0 + r.prevSeen = 0 + } + r.prevTime = now + r.allowed = 0 + r.seen = 0 + } + + r.seen++ + var sampled bool + if r.limiter.AllowN(now, 1) { + r.allowed++ + sampled = true + } + er := (r.prevAllowed + r.allowed) / (r.prevSeen + r.seen) + return sampled, er +} + +// newSingleSpanRateLimiter returns a rate limiter which restricts the number of single spans sampled per second. +// This defaults to infinite, allow all behaviour. The MaxPerSecond value of the rule may override the default. +func newSingleSpanRateLimiter(mps float64) *rateLimiter { + limit := math.MaxFloat64 + if mps > 0 { + limit = mps + } + return &rateLimiter{ + limiter: rate.NewLimiter(rate.Limit(limit), int(math.Ceil(limit))), + prevTime: time.Now(), + } +} + +// globMatch compiles pattern string into glob format, i.e. regular expressions with only '?' +// and '*' treated as regex metacharacters. +func globMatch(pattern string) (*regexp.Regexp, error) { + // escaping regex characters + pattern = regexp.QuoteMeta(pattern) + // replacing '?' and '*' with regex characters + pattern = strings.Replace(pattern, "\\?", ".", -1) + pattern = strings.Replace(pattern, "\\*", ".*", -1) + //pattern must match an entire string + return regexp.Compile(fmt.Sprintf("^%s$", pattern)) +} + +// samplingRulesFromEnv parses sampling rules from the DD_TRACE_SAMPLING_RULES, +// DD_SPAN_SAMPLING_RULES and DD_SPAN_SAMPLING_RULES_FILE environment variables. +func samplingRulesFromEnv() ([]SamplingRule, error) { + var errs []string + var rules []SamplingRule + rulesFromEnv := os.Getenv("DD_TRACE_SAMPLING_RULES") + if rulesFromEnv != "" { + traceRules, err := unmarshalSamplingRules([]byte(rulesFromEnv), SamplingRuleTrace) + if err != nil { + errs = append(errs, err.Error()) + } + rules = append(rules, traceRules...) + } + spanRules, err := unmarshalSamplingRules([]byte(os.Getenv("DD_SPAN_SAMPLING_RULES")), SamplingRuleSpan) + if err != nil { + errs = append(errs, err.Error()) + } + rules = append(rules, spanRules...) + rulesFile := os.Getenv("DD_SPAN_SAMPLING_RULES_FILE") + if len(rules) != 0 { + if rulesFile != "" { + log.Warn("DIAGNOSTICS Error(s): DD_SPAN_SAMPLING_RULES is available and will take precedence over DD_SPAN_SAMPLING_RULES_FILE") + } + if len(errs) != 0 { + return rules, fmt.Errorf("%s", strings.Join(errs, "\n\t")) + } + return rules, nil + } + if rulesFile != "" { + rulesFromEnvFile, err := os.ReadFile(rulesFile) + if err != nil { + errs = append(errs, fmt.Sprintf("Couldn't read file from DD_SPAN_SAMPLING_RULES_FILE: %v", err)) + } + spanRules, err = unmarshalSamplingRules(rulesFromEnvFile, SamplingRuleSpan) + if err != nil { + errs = append(errs, err.Error()) + } + rules = append(rules, spanRules...) + } + if len(errs) != 0 { + return rules, fmt.Errorf("%s", strings.Join(errs, "\n\t")) + } + return rules, nil +} + +// unmarshalSamplingRules unmarshals JSON from b and returns the sampling rules found, attributing +// the type t to them. If any errors are occurred, they are returned. +func unmarshalSamplingRules(b []byte, spanType SamplingRuleType) ([]SamplingRule, error) { + if len(b) == 0 { + return nil, nil + } + var jsonRules []struct { + Service string `json:"service"` + Name string `json:"name"` + Rate json.Number `json:"sample_rate"` + MaxPerSecond float64 `json:"max_per_second"` + } + err := json.Unmarshal(b, &jsonRules) + if err != nil { + return nil, fmt.Errorf("error unmarshalling JSON: %v", err) + } + rules := make([]SamplingRule, 0, len(jsonRules)) + var errs []string + for i, v := range jsonRules { + if v.Rate == "" { + errs = append(errs, fmt.Sprintf("at index %d: rate not provided", i)) + continue + } + rate, err := v.Rate.Float64() + if err != nil { + errs = append(errs, fmt.Sprintf("at index %d: %v", i, err)) + continue + } + if !(rate >= 0.0 && rate <= 1.0) { + log.Warn("at index %d: ignoring rule %+v: rate is out of [0.0, 1.0] range", i, v) + continue + } + switch spanType { + case SamplingRuleSpan: + if v.Service == "" { + v.Service = "*" + } + srvGlob, err := globMatch(v.Service) + if err != nil { + log.Warn("at index %d: ignoring rule %+v: service name regex pattern can't be compiled", i, v) + continue + } + if v.Name == "" { + v.Name = "*" + } + opGlob, err := globMatch(v.Name) + if err != nil { + log.Warn("at index %d: ignoring rule %+v: operation name regex pattern can't be compiled", i, v) + continue + } + rules = append(rules, SamplingRule{ + Service: srvGlob, + Name: opGlob, + Rate: rate, + MaxPerSecond: v.MaxPerSecond, + limiter: newSingleSpanRateLimiter(v.MaxPerSecond), + Type: SamplingRuleSpan, + }) + case SamplingRuleTrace: + switch { + case v.Service != "" && v.Name != "": + rules = append(rules, NameServiceRule(v.Name, v.Service, rate)) + case v.Service != "": + rules = append(rules, ServiceRule(v.Service, rate)) + case v.Name != "": + rules = append(rules, NameRule(v.Name, rate)) + } + } + } + if len(errs) != 0 { + return rules, fmt.Errorf("\n\t%s", strings.Join(errs, "\n\t")) + } + return rules, nil +} + +// MarshalJSON implements the json.Marshaler interface. +func (sr *SamplingRule) MarshalJSON() ([]byte, error) { + s := struct { + Service string `json:"service"` + Name string `json:"name"` + Rate float64 `json:"sample_rate"` + MaxPerSecond *float64 `json:"max_per_second,omitempty"` + }{} + if sr.exactService != "" { + s.Service = sr.exactService + } else if sr.Service != nil { + s.Service = fmt.Sprintf("%s", sr.Service) + } + if sr.exactName != "" { + s.Name = sr.exactName + } else if sr.Name != nil { + s.Name = fmt.Sprintf("%s", sr.Name) + } + s.Rate = sr.Rate + if sr.MaxPerSecond != 0 { + s.MaxPerSecond = &sr.MaxPerSecond + } + return json.Marshal(&s) +} diff --git a/ddtrace/tracer/sampler.go b/ddtrace/tracer/sampler.go index a39c6f47d6..ce44a18766 100644 --- a/ddtrace/tracer/sampler.go +++ b/ddtrace/tracer/sampler.go @@ -7,22 +7,13 @@ package tracer import ( "encoding/json" - "fmt" "io" "math" - "os" - "regexp" - "strconv" - "strings" "sync" - "time" "gopkg.in/DataDog/dd-trace-go.v1/ddtrace" "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext" - "gopkg.in/DataDog/dd-trace-go.v1/internal/log" "gopkg.in/DataDog/dd-trace-go.v1/internal/samplernames" - - "golang.org/x/time/rate" ) // Sampler is the generic interface of any sampler. It must be safe for concurrent use. @@ -157,328 +148,3 @@ func (ps *prioritySampler) apply(spn *span) { } spn.SetTag(keySamplingPriorityRate, rate) } - -// rulesSampler holds instances of trace sampler and single span sampler, that are configured with the given set of rules. -// traceRulesSampler samples trace spans based on a user-defined set of rules and might impact sampling decision of the trace. -// singleSpanRulesSampler samples individual spans based on a separate user-defined set of rules and -// cannot impact the trace sampling decision. -type rulesSampler struct { - traceRulesSampler *traceRulesSampler - singleSpanRulesSampler *singleSpanRulesSampler -} - -// newRulesSampler configures a *rulesSampler instance using the given set of rules. -// Rules are split between trace and single span sampling rules according to their type. -// Such rules are user-defined through environment variable or WithSamplingRules option. -// Invalid rules or environment variable values are tolerated, by logging warnings and then ignoring them. -func newRulesSampler(rules []SamplingRule) *rulesSampler { - var spanRules, traceRules []SamplingRule - for _, rule := range rules { - if rule.Type == SamplingRuleSingleSpan { - rule.limiter = newSingleSpanRateLimiter(rule.MaxPerSecond) - spanRules = append(spanRules, rule) - } else { - traceRules = append(traceRules, rule) - } - } - return &rulesSampler{ - traceRulesSampler: newTraceRulesSampler(traceRules), - singleSpanRulesSampler: newSingleSpanRulesSampler(spanRules), - } -} - -const ( - // SamplingRuleTrace specifies that if a span matches a sampling rule of this type, - // an entire trace might be kept based on the given span and sent to the agent. - SamplingRuleTrace = iota - // SamplingRuleSingleSpan specifies that if a span matches a sampling rule, - // an individual span might be kept and sent to the agent. - SamplingRuleSingleSpan -) - -// samplingRulesFromEnv parses sampling rules from the DD_TRACE_SAMPLING_RULES, -// DD_SPAN_SAMPLING_RULES and DD_SPAN_SAMPLING_RULES_FILE environment variables. -func samplingRulesFromEnv() ([]SamplingRule, error) { - var errs []string - var rules []SamplingRule - rulesFromEnv := os.Getenv("DD_TRACE_SAMPLING_RULES") - if rulesFromEnv != "" { - traceRules, err := processSamplingRules([]byte(rulesFromEnv), SamplingRuleTrace) - if err != nil { - errs = append(errs, err.Error()) - } - rules = append(rules, traceRules...) - } - spanRules, err := spanSamplingRulesFromEnv() - if err != nil { - errs = append(errs, err.Error()) - } - rules = append(rules, spanRules...) - if len(errs) != 0 { - return rules, fmt.Errorf("%s", strings.Join(errs, "\n\t")) - } - return rules, nil -} - -// spanSamplingRulesFromEnv parses sampling rules from the DD_SPAN_SAMPLING_RULES and -// DD_SPAN_SAMPLING_RULES_FILE environment variables. -func spanSamplingRulesFromEnv() ([]SamplingRule, error) { - var errs []string - rulesFromEnv := os.Getenv("DD_SPAN_SAMPLING_RULES") - if rulesFromEnv == "" { - return nil, nil - } - rules, err := processSamplingRules([]byte(rulesFromEnv), SamplingRuleSingleSpan) - if err != nil { - errs = append(errs, err.Error()) - } - rulesFile := os.Getenv("DD_SPAN_SAMPLING_RULES_FILE") - if len(rules) != 0 { - if rulesFile != "" { - log.Warn("DIAGNOSTICS Error(s): DD_SPAN_SAMPLING_RULES is available and will take precedence over DD_SPAN_SAMPLING_RULES_FILE") - } - return rules, err - } - if rulesFile != "" { - rulesFromEnvFile, err := os.ReadFile(rulesFile) - if err != nil { - log.Warn("Couldn't read file from DD_SPAN_SAMPLING_RULES_FILE") - return nil, err - } - rules, err = processSamplingRules(rulesFromEnvFile, SamplingRuleSingleSpan) - if err != nil { - errs = append(errs, err.Error()) - } - } - if len(errs) != 0 { - return rules, fmt.Errorf("%s", strings.Join(errs, "\n\t")) - } - return rules, nil -} - -func processSamplingRules(b []byte, spanType int) ([]SamplingRule, error) { - if len(b) == 0 { - return nil, nil - } - var jsonRules []struct { - Service string `json:"service"` - Name string `json:"name"` - Rate json.Number `json:"sample_rate"` - MaxPerSecond float64 `json:"max_per_second"` - } - err := json.Unmarshal(b, &jsonRules) - if err != nil { - return nil, fmt.Errorf("error unmarshalling JSON: %v", err) - } - rules := make([]SamplingRule, 0, len(jsonRules)) - var errs []string - for i, v := range jsonRules { - if v.Rate == "" { - errs = append(errs, fmt.Sprintf("at index %d: rate not provided", i)) - continue - } - rate, err := v.Rate.Float64() - if err != nil { - errs = append(errs, fmt.Sprintf("at index %d: %v", i, err)) - continue - } - if !(rate >= 0.0 && rate <= 1.0) { - log.Warn("at index %d: ignoring rule %+v: rate is out of [0.0, 1.0] range", i, v) - continue - } - if spanType == SamplingRuleSingleSpan { - if v.Service == "" { - v.Service = "*" - } - srvGlob, err := globMatch(v.Service) - if err != nil { - log.Warn("at index %d: ignoring rule %+v: service name regex pattern can't be compiled", i, v) - continue - } - if v.Name == "" { - v.Name = "*" - } - opGlob, err := globMatch(v.Name) - if err != nil { - log.Warn("at index %d: ignoring rule %+v: operation name regex pattern can't be compiled", i, v) - continue - } - rules = append(rules, SamplingRule{ - Service: srvGlob, - Name: opGlob, - Rate: rate, - MaxPerSecond: v.MaxPerSecond, - limiter: newSingleSpanRateLimiter(v.MaxPerSecond), - Type: SamplingRuleSingleSpan, - }) - continue - } - switch { - case v.Service != "" && v.Name != "": - rules = append(rules, NameServiceRule(v.Name, v.Service, rate)) - case v.Service != "": - rules = append(rules, ServiceRule(v.Service, rate)) - case v.Name != "": - rules = append(rules, NameRule(v.Name, rate)) - } - } - if len(errs) != 0 { - return rules, fmt.Errorf("\n\t%s", strings.Join(errs, "\n\t")) - } - return rules, nil -} - -// globalSampleRate returns the sampling rate found in the DD_TRACE_SAMPLE_RATE environment variable. -// If it is invalid or not within the 0-1 range, NaN is returned. -func globalSampleRate() float64 { - defaultRate := math.NaN() - v := os.Getenv("DD_TRACE_SAMPLE_RATE") - if v == "" { - return defaultRate - } - r, err := strconv.ParseFloat(v, 64) - if err != nil { - log.Warn("ignoring DD_TRACE_SAMPLE_RATE: error: %v", err) - return defaultRate - } - if r >= 0.0 && r <= 1.0 { - return r - } - log.Warn("ignoring DD_TRACE_SAMPLE_RATE: out of range %f", r) - return defaultRate -} - -// SamplingRule is used for applying sampling rates to spans that match -// the service name, operation name or both. -// For basic usage, consider using the helper functions ServiceRule, NameRule, etc. -type SamplingRule struct { - Service *regexp.Regexp - Name *regexp.Regexp - Rate float64 - MaxPerSecond float64 - Type int - - exactService string - exactName string - limiter *rateLimiter -} - -// ServiceRule returns a SamplingRule that applies the provided sampling rate -// to spans that match the service name provided. -func ServiceRule(service string, rate float64) SamplingRule { - return SamplingRule{ - exactService: service, - Rate: rate, - } -} - -// NameRule returns a SamplingRule that applies the provided sampling rate -// to spans that match the operation name provided. -func NameRule(name string, rate float64) SamplingRule { - return SamplingRule{ - exactName: name, - Rate: rate, - } -} - -// NameServiceRule returns a SamplingRule that applies the provided sampling rate -// to spans matching both the operation and service names provided. -func NameServiceRule(name string, service string, rate float64) SamplingRule { - return SamplingRule{ - exactService: service, - exactName: name, - Rate: rate, - } -} - -// RateRule returns a SamplingRule that applies the provided sampling rate to all spans. -func RateRule(rate float64) SamplingRule { - return SamplingRule{ - Rate: rate, - } -} - -// match returns true when the span's details match all the expected values in the rule. -func (sr *SamplingRule) match(s *span) bool { - if sr.Service != nil && !sr.Service.MatchString(s.Service) { - return false - } else if sr.exactService != "" && sr.exactService != s.Service { - return false - } - if sr.Name != nil && !sr.Name.MatchString(s.Name) { - return false - } else if sr.exactName != "" && sr.exactName != s.Name { - return false - } - return true -} - -// MarshalJSON implements the json.Marshaler interface. -func (sr *SamplingRule) MarshalJSON() ([]byte, error) { - s := struct { - Service string `json:"service"` - Name string `json:"name"` - Rate float64 `json:"sample_rate"` - MaxPerSecond *float64 `json:"max_per_second,omitempty"` - }{} - if sr.exactService != "" { - s.Service = sr.exactService - } else if sr.Service != nil { - s.Service = fmt.Sprintf("%s", sr.Service) - } - if sr.exactName != "" { - s.Name = sr.exactName - } else if sr.Name != nil { - s.Name = fmt.Sprintf("%s", sr.Name) - } - s.Rate = sr.Rate - if sr.MaxPerSecond != 0 { - s.MaxPerSecond = &sr.MaxPerSecond - } - return json.Marshal(&s) -} - -// rateLimiter is a wrapper on top of golang.org/x/time/rate which implements a rate limiter but also -// returns the effective rate of allowance. -type rateLimiter struct { - limiter *rate.Limiter - - mu sync.Mutex // guards below fields - prevTime time.Time // time at which prevAllowed and prevSeen were set - allowed float64 // number of spans allowed in the current period - seen float64 // number of spans seen in the current period - prevAllowed float64 // number of spans allowed in the previous period - prevSeen float64 // number of spans seen in the previous period -} - -// allowOne returns the rate limiter's decision to allow the span to be sampled, and the -// effective rate at the time it is called. The effective rate is computed by averaging the rate -// for the previous second with the current rate -func (r *rateLimiter) allowOne(now time.Time) (bool, float64) { - r.mu.Lock() - defer r.mu.Unlock() - if d := now.Sub(r.prevTime); d >= time.Second { - // enough time has passed to reset the counters - if d.Truncate(time.Second) == time.Second && r.seen > 0 { - // exactly one second, so update prev - r.prevAllowed = r.allowed - r.prevSeen = r.seen - } else { - // more than one second, so reset previous rate - r.prevAllowed = 0 - r.prevSeen = 0 - } - r.prevTime = now - r.allowed = 0 - r.seen = 0 - } - - r.seen++ - var sampled bool - if r.limiter.AllowN(now, 1) { - r.allowed++ - sampled = true - } - er := (r.prevAllowed + r.allowed) / (r.prevSeen + r.seen) - return sampled, er -} diff --git a/ddtrace/tracer/sampler_test.go b/ddtrace/tracer/sampler_test.go index 53a04572c2..9059ed1432 100644 --- a/ddtrace/tracer/sampler_test.go +++ b/ddtrace/tracer/sampler_test.go @@ -360,7 +360,7 @@ func TestRulesSampler(t *testing.T) { rs := newRulesSampler(nil) span := makeSpan("http.request", "test-service") - result := rs.traceRulesSampler.apply(span) + result := rs.sampleTrace(span) assert.False(result) }) @@ -378,7 +378,7 @@ func TestRulesSampler(t *testing.T) { rs := newRulesSampler(v) span := makeSpan("http.request", "test-service") - result := rs.traceRulesSampler.apply(span) + result := rs.sampleTrace(span) assert.True(result) assert.Equal(1.0, span.Metrics["_dd.rule_psr"]) assert.Equal(1.0, span.Metrics["_dd.limit_psr"]) @@ -401,7 +401,7 @@ func TestRulesSampler(t *testing.T) { rs := newRulesSampler(v) span := makeSpan("http.request", "test-service") - result := rs.traceRulesSampler.apply(span) + result := rs.sampleTrace(span) assert.False(result) }) } @@ -438,7 +438,7 @@ func TestRulesSampler(t *testing.T) { rs := newRulesSampler(rules) span := makeSpan(tt.spanName, tt.spanSrv) - result := rs.singleSpanRulesSampler.apply(span) + result := rs.sampleSpan(span) assert.True(result) assert.Contains(span.Metrics, spanSamplingMechanism) assert.Contains(span.Metrics, singleSpanSamplingRuleRate) @@ -479,7 +479,7 @@ func TestRulesSampler(t *testing.T) { rs := newRulesSampler(rules) span := makeSpan(tt.spanName, tt.spanSrv) - result := rs.singleSpanRulesSampler.apply(span) + result := rs.sampleSpan(span) assert.False(result) assert.NotContains(span.Metrics, spanSamplingMechanism) assert.NotContains(span.Metrics, singleSpanSamplingRuleRate) @@ -507,7 +507,7 @@ func TestRulesSampler(t *testing.T) { rs := newRulesSampler(rules) span := makeSpan("http.request", "test-service") - result := rs.traceRulesSampler.apply(span) + result := rs.sampleTrace(span) assert.True(result) assert.Equal(rate, span.Metrics["_dd.rule_psr"]) if rate > 0.0 { @@ -551,7 +551,7 @@ func TestRulesSamplerInternals(t *testing.T) { now := time.Now() rs := &rulesSampler{} span := makeSpanAt("http.request", "test-service", now) - rs.traceRulesSampler.applyRate(span, 0.0, now) + rs.traces.applyRate(span, 0.0, now) assert.Equal(0.0, span.Metrics["_dd.rule_psr"]) _, ok := span.Metrics["_dd.limit_psr"] assert.False(ok) @@ -562,12 +562,12 @@ func TestRulesSamplerInternals(t *testing.T) { now := time.Now() rs := newRulesSampler(nil) // set samplingLimiter to specific state - rs.traceRulesSampler.limiter.prevTime = now.Add(-1 * time.Second) - rs.traceRulesSampler.limiter.allowed = 1 - rs.traceRulesSampler.limiter.seen = 1 + rs.traces.limiter.prevTime = now.Add(-1 * time.Second) + rs.traces.limiter.allowed = 1 + rs.traces.limiter.seen = 1 span := makeSpanAt("http.request", "test-service", now) - rs.traceRulesSampler.applyRate(span, 1.0, now) + rs.traces.applyRate(span, 1.0, now) assert.Equal(1.0, span.Metrics["_dd.rule_psr"]) assert.Equal(1.0, span.Metrics["_dd.limit_psr"]) }) @@ -577,18 +577,18 @@ func TestRulesSamplerInternals(t *testing.T) { now := time.Now() rs := newRulesSampler(nil) // force sampling limiter to 1.0 spans/sec - rs.traceRulesSampler.limiter.limiter = rate.NewLimiter(rate.Limit(1.0), 1) - rs.traceRulesSampler.limiter.prevTime = now.Add(-1 * time.Second) - rs.traceRulesSampler.limiter.allowed = 2 - rs.traceRulesSampler.limiter.seen = 2 + rs.traces.limiter.limiter = rate.NewLimiter(rate.Limit(1.0), 1) + rs.traces.limiter.prevTime = now.Add(-1 * time.Second) + rs.traces.limiter.allowed = 2 + rs.traces.limiter.seen = 2 // first span kept, second dropped span := makeSpanAt("http.request", "test-service", now) - rs.traceRulesSampler.applyRate(span, 1.0, now) + rs.traces.applyRate(span, 1.0, now) assert.EqualValues(ext.PriorityUserKeep, span.Metrics[keySamplingPriority]) assert.Equal(1.0, span.Metrics["_dd.rule_psr"]) assert.Equal(1.0, span.Metrics["_dd.limit_psr"]) span = makeSpanAt("http.request", "test-service", now) - rs.traceRulesSampler.applyRate(span, 1.0, now) + rs.traces.applyRate(span, 1.0, now) assert.EqualValues(ext.PriorityUserReject, span.Metrics[keySamplingPriority]) assert.Equal(1.0, span.Metrics["_dd.rule_psr"]) assert.Equal(0.75, span.Metrics["_dd.limit_psr"]) diff --git a/ddtrace/tracer/single_sampler.go b/ddtrace/tracer/single_sampler.go deleted file mode 100644 index 39b37a4f73..0000000000 --- a/ddtrace/tracer/single_sampler.go +++ /dev/null @@ -1,119 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2016 Datadog, Inc. - -package tracer - -import ( - "fmt" - "math" - "regexp" - "strings" - "time" - - "golang.org/x/time/rate" - - "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext" - "gopkg.in/DataDog/dd-trace-go.v1/internal/samplernames" -) - -// singleSpanRulesSampler allows a user-defined list of rules to apply to spans -// to sample single spans. -// These rules match based on the span's Service and Name. If empty value is supplied -// to either Service or Name field, it will default to "*", allow all. -// When making a sampling decision, the rules are checked in order until -// a match is found. -// If a match is found, the rate from that rule is used. -// If no match is found, no changes or further sampling is applied to the spans. -// The rate is used to determine if the span should be sampled, but an upper -// limit can be defined using the max_per_second field when supplying the rule. -// If max_per_second is absent in the rule, the default is allow all. -// Its value is the max number of spans to sample per second. -// Spans that matched the rules but exceeded the rate limit are not sampled. -type singleSpanRulesSampler struct { - rules []SamplingRule // the rules to match spans with -} - -// newSingleSpanRulesSampler configures a *singleSpanRulesSampler instance using the given set of rules. -// Invalid rules or environment variable values are tolerated, by logging warnings and then ignoring them. -func newSingleSpanRulesSampler(rules []SamplingRule) *singleSpanRulesSampler { - return &singleSpanRulesSampler{ - rules: rules, - } -} - -const ( - // spanSamplingMechanism specifies the sampling mechanism by which an individual span was sampled - spanSamplingMechanism = "_dd.span_sampling.mechanism" - - // singleSpanSamplingMechanism specifies value reserved to indicate that a span was kept - // on account of a single span sampling rule. - singleSpanSamplingMechanism = 8 - - // singleSpanSamplingRuleRate specifies the configured sampling probability for the single span sampling rule. - singleSpanSamplingRuleRate = "_dd.span_sampling.rule_rate" - - // singleSpanSamplingMPS specifies the configured limit for the single span sampling rule - // that the span matched. If there is no configured limit, then this tag is omitted. - singleSpanSamplingMPS = "_dd.span_sampling.max_per_second" -) - -// apply uses the sampling rules to determine the sampling rate for the -// provided span. If the rules don't match, then it returns false and the span is not -// modified. -func (rs *singleSpanRulesSampler) apply(span *span) bool { - for _, rule := range rs.rules { - if rule.match(span) { - rs.applyRate(span, rule, rule.Rate, time.Now()) - return true - } - } - return false -} - -func (rs *singleSpanRulesSampler) applyRate(span *span, rule SamplingRule, rate float64, now time.Time) { - span.setMetric(keyRulesSamplerAppliedRate, rate) - if !sampledByRate(span.SpanID, rate) { - span.setSamplingPriority(ext.PriorityUserReject, samplernames.RuleRate, rate) - return - } - - var sampled bool - if rule.limiter != nil { - sampled, rate = rule.limiter.allowOne(now) - if !sampled { - return - } - } - span.setMetric(spanSamplingMechanism, singleSpanSamplingMechanism) - span.setMetric(singleSpanSamplingRuleRate, rate) - if rule.MaxPerSecond != 0 { - span.setMetric(singleSpanSamplingMPS, rule.MaxPerSecond) - } -} - -// newSingleSpanRateLimiter returns a rate limiter which restricts the number of single spans sampled per second. -// This defaults to infinite, allow all behaviour. The MaxPerSecond value of the rule may override the default. -func newSingleSpanRateLimiter(mps float64) *rateLimiter { - limit := math.MaxFloat64 - if mps > 0 { - limit = mps - } - return &rateLimiter{ - limiter: rate.NewLimiter(rate.Limit(limit), int(math.Ceil(limit))), - prevTime: time.Now(), - } -} - -// globMatch compiles pattern string into glob format, i.e. regular expressions with only '?' -// and '*' treated as regex metacharacters. -func globMatch(pattern string) (*regexp.Regexp, error) { - // escaping regex characters - pattern = regexp.QuoteMeta(pattern) - // replacing '?' and '*' with regex characters - pattern = strings.Replace(pattern, "\\?", ".", -1) - pattern = strings.Replace(pattern, "\\*", ".*", -1) - //pattern must match an entire string - return regexp.Compile(fmt.Sprintf("^%s$", pattern)) -} diff --git a/ddtrace/tracer/spancontext.go b/ddtrace/tracer/spancontext.go index 6174392eab..ba4bfd4894 100644 --- a/ddtrace/tracer/spancontext.go +++ b/ddtrace/tracer/spancontext.go @@ -331,9 +331,13 @@ func (t *trace) finishedOne(s *span) { atomic.AddUint64(&tr.droppedP0Traces, 1) } // if trace sampling decision is drop, we still want to send single spans + // unless there are no single span sampling rules defined + if !tr.rulesSampling.spans.enabled() { + return + } var singleSpans []*span for _, span := range t.spans { - if tr.rulesSampling.singleSpanRulesSampler.apply(span) { + if tr.rulesSampling.sampleSpan(span) { singleSpans = append(singleSpans, span) } } diff --git a/ddtrace/tracer/tracer.go b/ddtrace/tracer/tracer.go index 177204f02a..f29a36eba7 100644 --- a/ddtrace/tracer/tracer.go +++ b/ddtrace/tracer/tracer.go @@ -523,7 +523,7 @@ func (t *tracer) sample(span *span) { if rs, ok := sampler.(RateSampler); ok && rs.Rate() < 1 { span.setMetric(sampleRateMetricKey, rs.Rate()) } - if t.rulesSampling.traceRulesSampler.apply(span) { + if t.rulesSampling.sampleTrace(span) { return } t.prioritySampling.apply(span) diff --git a/ddtrace/tracer/tracer_test.go b/ddtrace/tracer/tracer_test.go index 2ff54c318a..80b32f277b 100644 --- a/ddtrace/tracer/tracer_test.go +++ b/ddtrace/tracer/tracer_test.go @@ -367,6 +367,7 @@ func TestSamplingDecision(t *testing.T) { assert.Equal(t, "", span.context.trace.propagatingTags[keyDecisionMaker]) assert.Equal(t, decisionDrop, span.context.trace.samplingDecision) }) + t.Run("client_dropped_with_single_spans", func(t *testing.T) { os.Setenv("DD_SPAN_SAMPLING_RULES", `[{"service": "test_*","name":"*_1", "sample_rate": 1.0, "max_per_second": 15.0}]`) defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") From f554cda951eb611d2dbfc1ea9cfcd64ef5515af5 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Wed, 6 Jul 2022 15:36:48 +0200 Subject: [PATCH 13/27] added type field to marshalling sampling rules --- ddtrace/tracer/log_test.go | 6 +++--- ddtrace/tracer/rules_sampler.go | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ddtrace/tracer/log_test.go b/ddtrace/tracer/log_test.go index 138719f97c..5aec1d054e 100644 --- a/ddtrace/tracer/log_test.go +++ b/ddtrace/tracer/log_test.go @@ -55,7 +55,7 @@ func TestStartupLog(t *testing.T) { tp.Reset() logStartup(tracer) assert.Len(tp.Lines(), 2) - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sample_rate_limit":"100","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75}\],"sampling_rules_error":"","service_mappings":{"initial_service":"new_service"},"tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sample_rate_limit":"100","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75,"type":0}\],"sampling_rules_error":"","service_mappings":{"initial_service":"new_service"},"tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) }) t.Run("limit", func(t *testing.T) { @@ -86,7 +86,7 @@ func TestStartupLog(t *testing.T) { tp.Reset() logStartup(tracer) assert.Len(tp.Lines(), 2) - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sample_rate_limit":"1000.001","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75}\],"sampling_rules_error":"","service_mappings":{"initial_service":"new_service"},"tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sample_rate_limit":"1000.001","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75,"type":0}\],"sampling_rules_error":"","service_mappings":{"initial_service":"new_service"},"tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) }) t.Run("errors", func(t *testing.T) { @@ -100,7 +100,7 @@ func TestStartupLog(t *testing.T) { tp.Reset() logStartup(tracer) assert.Len(tp.Lines(), 2) - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sample_rate_limit":"100","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234}\],"sampling_rules_error":"found errors:\\n\\tat index 1: rate not provided","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sample_rate_limit":"100","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234,"type":0}\],"sampling_rules_error":"found errors:\\n\\tat index 1: rate not provided","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) }) t.Run("lambda", func(t *testing.T) { diff --git a/ddtrace/tracer/rules_sampler.go b/ddtrace/tracer/rules_sampler.go index db0506f3cd..fce0df72c4 100644 --- a/ddtrace/tracer/rules_sampler.go +++ b/ddtrace/tracer/rules_sampler.go @@ -550,6 +550,7 @@ func (sr *SamplingRule) MarshalJSON() ([]byte, error) { Service string `json:"service"` Name string `json:"name"` Rate float64 `json:"sample_rate"` + Type int `json:"type"` MaxPerSecond *float64 `json:"max_per_second,omitempty"` }{} if sr.exactService != "" { @@ -563,6 +564,7 @@ func (sr *SamplingRule) MarshalJSON() ([]byte, error) { s.Name = fmt.Sprintf("%s", sr.Name) } s.Rate = sr.Rate + s.Type = int(sr.Type) if sr.MaxPerSecond != 0 { s.MaxPerSecond = &sr.MaxPerSecond } From d7c149ace3214a1ab46f5c17ac1c447a5cd95533 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Thu, 7 Jul 2022 18:29:12 +0200 Subject: [PATCH 14/27] fixed comments, error formatting, added glob matcher benchmark --- ddtrace/tracer/log_test.go | 5 +-- ddtrace/tracer/rules_sampler.go | 33 ++++++++------- ddtrace/tracer/sampler_test.go | 72 ++++++++++++++++++++++++++++----- ddtrace/tracer/spancontext.go | 4 +- ddtrace/tracer/tracer.go | 2 +- 5 files changed, 86 insertions(+), 30 deletions(-) diff --git a/ddtrace/tracer/log_test.go b/ddtrace/tracer/log_test.go index 5aec1d054e..2381e501da 100644 --- a/ddtrace/tracer/log_test.go +++ b/ddtrace/tracer/log_test.go @@ -125,9 +125,8 @@ func TestLogSamplingRules(t *testing.T) { defer stop() lines := removeAppSec(tp.Lines()) - assert.Len(lines, 2) - assert.Contains(lines[0], "WARN: at index 4: ignoring rule {Service: Name: Rate:9.10 MaxPerSecond:0}: rate is out of [0.0, 1.0] range") - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? WARN: DIAGNOSTICS Error\(s\) parsing sampling rules: found errors:\n\tat index 1: rate not provided\n\tat index 3: rate not provided$`, lines[1]) + assert.Len(lines, 1) + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? WARN: DIAGNOSTICS Error\(s\) parsing sampling rules: found errors:\n\tat index 1: rate not provided\n\tat index 3: rate not provided\n\tat index 4: ignoring rule {Service: Name: Rate:9\.10 MaxPerSecond:0}: rate is out of \[0\.0, 1\.0] range$`, lines[0]) } func TestLogAgentReachable(t *testing.T) { diff --git a/ddtrace/tracer/rules_sampler.go b/ddtrace/tracer/rules_sampler.go index fce0df72c4..06767aa338 100644 --- a/ddtrace/tracer/rules_sampler.go +++ b/ddtrace/tracer/rules_sampler.go @@ -420,15 +420,19 @@ func globMatch(pattern string) (*regexp.Regexp, error) { // replacing '?' and '*' with regex characters pattern = strings.Replace(pattern, "\\?", ".", -1) pattern = strings.Replace(pattern, "\\*", ".*", -1) - //pattern must match an entire string + // pattern must match an entire string return regexp.Compile(fmt.Sprintf("^%s$", pattern)) } // samplingRulesFromEnv parses sampling rules from the DD_TRACE_SAMPLING_RULES, // DD_SPAN_SAMPLING_RULES and DD_SPAN_SAMPLING_RULES_FILE environment variables. -func samplingRulesFromEnv() ([]SamplingRule, error) { +func samplingRulesFromEnv() (rules []SamplingRule, err error) { var errs []string - var rules []SamplingRule + defer func() { + if len(errs) != 0 { + err = fmt.Errorf("\n\t%s", strings.Join(errs, "\n\t")) + } + }() rulesFromEnv := os.Getenv("DD_TRACE_SAMPLING_RULES") if rulesFromEnv != "" { traceRules, err := unmarshalSamplingRules([]byte(rulesFromEnv), SamplingRuleTrace) @@ -448,7 +452,7 @@ func samplingRulesFromEnv() ([]SamplingRule, error) { log.Warn("DIAGNOSTICS Error(s): DD_SPAN_SAMPLING_RULES is available and will take precedence over DD_SPAN_SAMPLING_RULES_FILE") } if len(errs) != 0 { - return rules, fmt.Errorf("%s", strings.Join(errs, "\n\t")) + return rules, err } return rules, nil } @@ -457,16 +461,13 @@ func samplingRulesFromEnv() ([]SamplingRule, error) { if err != nil { errs = append(errs, fmt.Sprintf("Couldn't read file from DD_SPAN_SAMPLING_RULES_FILE: %v", err)) } - spanRules, err = unmarshalSamplingRules(rulesFromEnvFile, SamplingRuleSpan) + spanRules, err := unmarshalSamplingRules(rulesFromEnvFile, SamplingRuleSpan) if err != nil { errs = append(errs, err.Error()) } rules = append(rules, spanRules...) } - if len(errs) != 0 { - return rules, fmt.Errorf("%s", strings.Join(errs, "\n\t")) - } - return rules, nil + return rules, err } // unmarshalSamplingRules unmarshals JSON from b and returns the sampling rules found, attributing @@ -497,18 +498,22 @@ func unmarshalSamplingRules(b []byte, spanType SamplingRuleType) ([]SamplingRule errs = append(errs, fmt.Sprintf("at index %d: %v", i, err)) continue } - if !(rate >= 0.0 && rate <= 1.0) { - log.Warn("at index %d: ignoring rule %+v: rate is out of [0.0, 1.0] range", i, v) + if rate < 0.0 || rate > 1.0 { + errs = append(errs, fmt.Sprintf("at index %d: ignoring rule %+v: rate is out of [0.0, 1.0] range", i, v)) continue } switch spanType { case SamplingRuleSpan: + if v.Service == "" && v.Name == "" { + errs = append(errs, fmt.Sprintf("at index %d: ignoring rule %+v: service name and operation name are not provided", i, v)) + continue + } if v.Service == "" { v.Service = "*" } srvGlob, err := globMatch(v.Service) if err != nil { - log.Warn("at index %d: ignoring rule %+v: service name regex pattern can't be compiled", i, v) + errs = append(errs, fmt.Sprintf("at index %d: ignoring rule %+v: service name regex pattern can't be compiled", i, v)) continue } if v.Name == "" { @@ -516,7 +521,7 @@ func unmarshalSamplingRules(b []byte, spanType SamplingRuleType) ([]SamplingRule } opGlob, err := globMatch(v.Name) if err != nil { - log.Warn("at index %d: ignoring rule %+v: operation name regex pattern can't be compiled", i, v) + errs = append(errs, fmt.Sprintf("at index %d: ignoring rule %+v: operation name regex pattern can't be compiled", i, v)) continue } rules = append(rules, SamplingRule{ @@ -539,7 +544,7 @@ func unmarshalSamplingRules(b []byte, spanType SamplingRuleType) ([]SamplingRule } } if len(errs) != 0 { - return rules, fmt.Errorf("\n\t%s", strings.Join(errs, "\n\t")) + return rules, fmt.Errorf("%s", strings.Join(errs, "\n\t")) } return rules, nil } diff --git a/ddtrace/tracer/sampler_test.go b/ddtrace/tracer/sampler_test.go index 9059ed1432..934d5caab2 100644 --- a/ddtrace/tracer/sampler_test.go +++ b/ddtrace/tracer/sampler_test.go @@ -251,11 +251,12 @@ func TestRuleEnvVars(t *testing.T) { ruleN: 3, }, { // invalid rule ignored - value: `[{"service": "abcd", "sample_rate": 42.0}, {"service": "abcd", "sample_rate": 0.2}]`, - ruleN: 1, + value: `[{"service": "abcd", "sample_rate": 42.0}, {"service": "abcd", "sample_rate": 0.2}]`, + ruleN: 1, + errStr: "\n\tat index 0: ignoring rule {Service:abcd Name: Rate:42.0 MaxPerSecond:0}: rate is out of [0.0, 1.0] range", }, { value: `not JSON at all`, - errStr: `error unmarshalling JSON: invalid character 'o' in literal null (expecting 'u')`, + errStr: "\n\terror unmarshalling JSON: invalid character 'o' in literal null (expecting 'u')", }, } { t.Run("", func(t *testing.T) { @@ -291,11 +292,15 @@ func TestRuleEnvVars(t *testing.T) { ruleN: 3, }, { // invalid rule ignored - value: `[{"service": "abcd", "sample_rate": 42.0}, {"service": "abcd", "sample_rate": 0.2}]`, - ruleN: 1, + value: `[{"service": "abcd", "sample_rate": 42.0}, {"service": "abcd", "sample_rate": 0.2}]`, + ruleN: 1, + errStr: "\n\tat index 0: ignoring rule {Service:abcd Name: Rate:42.0 MaxPerSecond:0}: rate is out of [0.0, 1.0] range", }, { value: `not JSON at all`, - errStr: `error unmarshalling JSON: invalid character 'o' in literal null (expecting 'u')`, + errStr: "\n\terror unmarshalling JSON: invalid character 'o' in literal null (expecting 'u')", + }, { + value: `[{"sample_rate": 1.0}]`, + errStr: "\n\tat index 0: ignoring rule {Service: Name: Rate:1.0 MaxPerSecond:0}: service name and operation name are not provided", }, } { t.Run(fmt.Sprintf("%v", i), func(t *testing.T) { @@ -333,10 +338,6 @@ func TestRuleEnvVars(t *testing.T) { rules: `[{"service": "*abcd", "sample_rate": 1.0}]`, nameRegex: "^.*$", srvRegex: "^.*abcd$", - }, { - rules: `[{"sample_rate": 1.0},{"name": "wxyz", "sample_rate": 0.9}]`, - nameRegex: "^.*$", - srvRegex: "^.*$", }, } { t.Run(fmt.Sprintf("%v", i), func(t *testing.T) { @@ -801,3 +802,54 @@ func TestGlobMatch(t *testing.T) { }) } } + +func BenchmarkGlobMatchSpan(b *testing.B) { + var spans []*span + for i := 0; i < 1000; i++ { + spans = append(spans, newSpan("name.ops.date", "srv.name.ops.date", "", 0, 0, 0)) + } + + b.Run("no-regex", func(b *testing.B) { + os.Setenv("DD_SPAN_SAMPLING_RULES", `[{"service": "srv.name.ops.date", name:"name.ops.date?", sample_rate": 0.234}]`) + os.Unsetenv("DD_SPAN_SAMPLING_RULES") + rules, err := samplingRulesFromEnv() + assert.Nil(b, err) + rs := newSingleSpanRulesSampler(rules) + b.ResetTimer() + for n := 0; n < b.N; n++ { + for _, span := range spans { + rs.apply(span) + } + } + }) + + b.Run("glob-match-?", func(b *testing.B) { + os.Setenv("DD_SPAN_SAMPLING_RULES", `[{"service": "srv?name?ops?date", name:"name*ops*date*", sample_rate": 0.234}]`) + os.Unsetenv("DD_SPAN_SAMPLING_RULES") + rules, err := samplingRulesFromEnv() + assert.Nil(b, err) + rs := newSingleSpanRulesSampler(rules) + b.ResetTimer() + for n := 0; n < b.N; n++ { + for _, span := range spans { + rs.apply(span) + } + } + }) + + b.Run("glob-match-*", func(b *testing.B) { + os.Setenv("DD_SPAN_SAMPLING_RULES", `[{"service": "srv*name*ops*date", name:"name?ops?date?", sample_rate": 0.234}]`) + os.Unsetenv("DD_SPAN_SAMPLING_RULES") + + rules, err := samplingRulesFromEnv() + assert.Nil(b, err) + rs := newSingleSpanRulesSampler(rules) + + b.ResetTimer() + for n := 0; n < b.N; n++ { + for _, span := range spans { + rs.apply(span) + } + } + }) +} diff --git a/ddtrace/tracer/spancontext.go b/ddtrace/tracer/spancontext.go index ba4bfd4894..ca23ad9e90 100644 --- a/ddtrace/tracer/spancontext.go +++ b/ddtrace/tracer/spancontext.go @@ -341,10 +341,10 @@ func (t *trace) finishedOne(s *span) { singleSpans = append(singleSpans, span) } } - t.spans = singleSpans if len(t.spans) == 0 { - return + return // no spans matched the rules and were sampled } + t.spans = singleSpans } tr.pushTrace(t.spans) } diff --git a/ddtrace/tracer/tracer.go b/ddtrace/tracer/tracer.go index f29a36eba7..d330338039 100644 --- a/ddtrace/tracer/tracer.go +++ b/ddtrace/tracer/tracer.go @@ -75,7 +75,7 @@ type tracer struct { droppedP0Traces, droppedP0Spans uint64 // rulesSampling holds an instance of the rules sampler used to apply either trace sampling, - //or single span sampling rules on spans. These are user-defined + // or single span sampling rules on spans. These are user-defined // rules for applying a sampling rate to spans that match the designated service // or operation name. rulesSampling *rulesSampler From 4ab5b8cd600aea6aaeebe3e54d0687386efbd797 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Fri, 8 Jul 2022 18:26:54 +0200 Subject: [PATCH 15/27] added test for a rule marshalling, simple benchmark for single span retention --- ddtrace/tracer/sampler_test.go | 23 ++++++++++++ ddtrace/tracer/tracer_test.go | 64 ++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) diff --git a/ddtrace/tracer/sampler_test.go b/ddtrace/tracer/sampler_test.go index 934d5caab2..474de13b27 100644 --- a/ddtrace/tracer/sampler_test.go +++ b/ddtrace/tracer/sampler_test.go @@ -803,6 +803,29 @@ func TestGlobMatch(t *testing.T) { } } +func TestSamplingRuleMarshall(t *testing.T) { + for _, tt := range []struct { + in SamplingRule + out string + }{ + {SamplingRule{nil, nil, 0, 0, 0, "srv", "ops", nil}, + `{"service":"srv","name":"ops","sample_rate":0,"type":0}`}, + {SamplingRule{regexp.MustCompile("srv.[0-9]+]"), nil, 0, 0, 0, "srv", "ops", nil}, + `{"service":"srv","name":"ops","sample_rate":0,"type":0}`}, + {SamplingRule{regexp.MustCompile("srv.*"), regexp.MustCompile("ops.[0-9]+]"), 0, 0, 0, "", "", nil}, + `{"service":"srv.*","name":"ops.[0-9]+]","sample_rate":0,"type":0}`}, + {SamplingRule{regexp.MustCompile("srv.[0-9]+]"), regexp.MustCompile("ops.[0-9]+]"), 0.55, 0, 0, "", "", nil}, + `{"service":"srv.[0-9]+]","name":"ops.[0-9]+]","sample_rate":0.55,"type":0}`}, + {SamplingRule{regexp.MustCompile("srv.[0-9]+]"), regexp.MustCompile("ops.[0-9]+]"), 0.55, 0, 1, "", "", nil}, + `{"service":"srv.[0-9]+]","name":"ops.[0-9]+]","sample_rate":0.55,"type":1}`}, + {SamplingRule{regexp.MustCompile("srv.[0-9]+]"), regexp.MustCompile("ops.[0-9]+]"), 0.55, 1000, 1, "", "", nil}, + `{"service":"srv.[0-9]+]","name":"ops.[0-9]+]","sample_rate":0.55,"type":1,"max_per_second":1000}`}, + } { + m, err := tt.in.MarshalJSON() + assert.Nil(t, err) + assert.Equal(t, tt.out, string(m)) + } +} func BenchmarkGlobMatchSpan(b *testing.B) { var spans []*span for i := 0; i < 1000; i++ { diff --git a/ddtrace/tracer/tracer_test.go b/ddtrace/tracer/tracer_test.go index 80b32f277b..b4413a18c0 100644 --- a/ddtrace/tracer/tracer_test.go +++ b/ddtrace/tracer/tracer_test.go @@ -1820,3 +1820,67 @@ func BenchmarkTracerStackFrames(b *testing.B) { span.Finish(StackFrames(64, 0)) } } + +func BenchmarkSingleSpanRetention(b *testing.B) { + b.Run("no-rules", func(b *testing.B) { + tracer, _, _, stop := startTestTracer(b) + defer stop() + tracer.config.agent.DropP0s = true + tracer.config.sampler = NewRateSampler(0) + tracer.prioritySampling.defaultRate = 0 + tracer.config.serviceName = "test_service" + b.ResetTimer() + for i := 0; i < b.N; i++ { + span := tracer.StartSpan("name_1").(*span) + for i := 0; i < 100; i++ { + child := tracer.StartSpan("name_2", ChildOf(span.context)) + child.Finish() + } + span.Finish() + } + }) + + b.Run("with-rules/match-half", func(b *testing.B) { + os.Setenv("DD_SPAN_SAMPLING_RULES", `[{"service": "test_*","name":"*_1", "sample_rate": 1.0, "max_per_second": 15.0}]`) + defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") + tracer, _, _, stop := startTestTracer(b) + defer stop() + tracer.config.agent.DropP0s = true + tracer.config.sampler = NewRateSampler(0) + tracer.prioritySampling.defaultRate = 0 + tracer.config.serviceName = "test_service" + b.ResetTimer() + for i := 0; i < b.N; i++ { + span := tracer.StartSpan("name_1").(*span) + for i := 0; i < 50; i++ { + child := tracer.StartSpan("name_2", ChildOf(span.context)) + child.Finish() + } + for i := 0; i < 50; i++ { + child := tracer.StartSpan("name", ChildOf(span.context)) + child.Finish() + } + span.Finish() + } + }) + + b.Run("with-rules/match-all", func(b *testing.B) { + os.Setenv("DD_SPAN_SAMPLING_RULES", `[{"service": "test_*","name":"*_1", "sample_rate": 1.0, "max_per_second": 15.0}]`) + defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") + tracer, _, _, stop := startTestTracer(b) + defer stop() + tracer.config.agent.DropP0s = true + tracer.config.sampler = NewRateSampler(0) + tracer.prioritySampling.defaultRate = 0 + tracer.config.serviceName = "test_service" + b.ResetTimer() + for i := 0; i < b.N; i++ { + span := tracer.StartSpan("name_1").(*span) + for i := 0; i < 100; i++ { + child := tracer.StartSpan("name_2", ChildOf(span.context)) + child.Finish() + } + span.Finish() + } + }) +} From 11394feedfa8d93c358a11c406f0b5e9d2515487 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Wed, 13 Jul 2022 23:54:06 +0200 Subject: [PATCH 16/27] removed the top_level tag, added another glob match example --- ddtrace/tracer/sampler_test.go | 2 ++ ddtrace/tracer/spancontext.go | 6 ++++++ ddtrace/tracer/tracer_test.go | 9 +++++++++ 3 files changed, 17 insertions(+) diff --git a/ddtrace/tracer/sampler_test.go b/ddtrace/tracer/sampler_test.go index 474de13b27..4ca9d5e56a 100644 --- a/ddtrace/tracer/sampler_test.go +++ b/ddtrace/tracer/sampler_test.go @@ -768,6 +768,8 @@ func TestGlobMatch(t *testing.T) { {"a*test*case", "a-test-case", true}, {"a*test*case", "atestcase", true}, {"a*test*case", "abadcase", false}, + {"*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaax", false}, + {"*a*a*a*a*a*a", "aaaaaaaarrrrrrraaaraaarararaarararaarararaaa", true}, // pattern with ? {"test?", "test", false}, {"test?", "test-case", false}, diff --git a/ddtrace/tracer/spancontext.go b/ddtrace/tracer/spancontext.go index ca23ad9e90..98daaf1b30 100644 --- a/ddtrace/tracer/spancontext.go +++ b/ddtrace/tracer/spancontext.go @@ -336,8 +336,14 @@ func (t *trace) finishedOne(s *span) { return } var singleSpans []*span + canDropP0s := tr.config.canDropP0s() for _, span := range t.spans { if tr.rulesSampling.sampleSpan(span) { + // since stats are computed on the tracer side, the keyTopLevel tag + // must be removed to preserve stats correctness + if canDropP0s { + delete(span.Metrics, keyTopLevel) + } singleSpans = append(singleSpans, span) } } diff --git a/ddtrace/tracer/tracer_test.go b/ddtrace/tracer/tracer_test.go index b4413a18c0..bd11bd3e3a 100644 --- a/ddtrace/tracer/tracer_test.go +++ b/ddtrace/tracer/tracer_test.go @@ -374,6 +374,8 @@ func TestSamplingDecision(t *testing.T) { tracer, _, _, stop := startTestTracer(t) defer stop() tracer.config.agent.DropP0s = true + tracer.config.featureFlags = make(map[string]struct{}) + tracer.config.featureFlags["discovery"] = struct{}{} tracer.config.sampler = NewRateSampler(0) tracer.prioritySampling.defaultRate = 0 tracer.config.serviceName = "test_service" @@ -386,6 +388,7 @@ func TestSamplingDecision(t *testing.T) { assert.Equal(t, 8.0, span.Metrics[spanSamplingMechanism]) assert.Equal(t, 1.0, span.Metrics[singleSpanSamplingRuleRate]) assert.Equal(t, 15.0, span.Metrics[singleSpanSamplingMPS]) + assert.NotContains(t, span.Metrics, keyTopLevel) }) } @@ -1826,6 +1829,8 @@ func BenchmarkSingleSpanRetention(b *testing.B) { tracer, _, _, stop := startTestTracer(b) defer stop() tracer.config.agent.DropP0s = true + tracer.config.featureFlags = make(map[string]struct{}) + tracer.config.featureFlags["discovery"] = struct{}{} tracer.config.sampler = NewRateSampler(0) tracer.prioritySampling.defaultRate = 0 tracer.config.serviceName = "test_service" @@ -1846,6 +1851,8 @@ func BenchmarkSingleSpanRetention(b *testing.B) { tracer, _, _, stop := startTestTracer(b) defer stop() tracer.config.agent.DropP0s = true + tracer.config.featureFlags = make(map[string]struct{}) + tracer.config.featureFlags["discovery"] = struct{}{} tracer.config.sampler = NewRateSampler(0) tracer.prioritySampling.defaultRate = 0 tracer.config.serviceName = "test_service" @@ -1870,6 +1877,8 @@ func BenchmarkSingleSpanRetention(b *testing.B) { tracer, _, _, stop := startTestTracer(b) defer stop() tracer.config.agent.DropP0s = true + tracer.config.featureFlags = make(map[string]struct{}) + tracer.config.featureFlags["discovery"] = struct{}{} tracer.config.sampler = NewRateSampler(0) tracer.prioritySampling.defaultRate = 0 tracer.config.serviceName = "test_service" From b595aca79db0ad67768054701a8a95b95c3ca944 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Tue, 19 Jul 2022 13:26:33 +0200 Subject: [PATCH 17/27] refactored according to comments --- ddtrace/tracer/log.go | 4 ++-- ddtrace/tracer/log_test.go | 2 +- ddtrace/tracer/option.go | 1 - ddtrace/tracer/rules_sampler.go | 31 ++++++++++++------------------- ddtrace/tracer/sampler_test.go | 12 ++++++------ ddtrace/tracer/span.go | 10 ++++++++++ ddtrace/tracer/spancontext.go | 31 +++++++++++++++---------------- ddtrace/tracer/tracer_test.go | 6 +++--- 8 files changed, 49 insertions(+), 48 deletions(-) diff --git a/ddtrace/tracer/log.go b/ddtrace/tracer/log.go index 587a646912..de0e32b021 100644 --- a/ddtrace/tracer/log.go +++ b/ddtrace/tracer/log.go @@ -107,9 +107,9 @@ func logStartup(t *tracer) { AppSec: appsec.Enabled(), } if _, err := samplingRulesFromEnv(); err != nil { - info.SamplingRulesError = fmt.Sprintf("found errors:%s", err) + info.SamplingRulesError = fmt.Sprintf("%s", err) } - if limit, ok := t.rulesSampling.traces.limit(); ok { + if limit, ok := t.rulesSampling.TraceRateLimit(); ok { info.SampleRateLimit = fmt.Sprintf("%v", limit) } if !t.config.logToStdout { diff --git a/ddtrace/tracer/log_test.go b/ddtrace/tracer/log_test.go index 2381e501da..ebc9df095b 100644 --- a/ddtrace/tracer/log_test.go +++ b/ddtrace/tracer/log_test.go @@ -100,7 +100,7 @@ func TestStartupLog(t *testing.T) { tp.Reset() logStartup(tracer) assert.Len(tp.Lines(), 2) - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sample_rate_limit":"100","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234,"type":0}\],"sampling_rules_error":"found errors:\\n\\tat index 1: rate not provided","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sample_rate_limit":"100","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234,"type":0}\],"sampling_rules_error":"\\n\\tat index 1: rate not provided","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) }) t.Run("lambda", func(t *testing.T) { diff --git a/ddtrace/tracer/option.go b/ddtrace/tracer/option.go index 4b88cc0865..1717cdec7c 100644 --- a/ddtrace/tracer/option.go +++ b/ddtrace/tracer/option.go @@ -669,7 +669,6 @@ func WithDogstatsdAddress(addr string) StartOption { // WithSamplingRules specifies the sampling rates to apply to trace spans based on the // provided rules. Both trace sampling and single span sampling rules can be configured with this option. -// The `Type` determines the scope of the sampling rules, defaulting to the entire trace. See `SamplingRule` for more details. func WithSamplingRules(rules []SamplingRule) StartOption { return func(cfg *config) { cfg.samplingRules = rules diff --git a/ddtrace/tracer/rules_sampler.go b/ddtrace/tracer/rules_sampler.go index 06767aa338..6804258c1f 100644 --- a/ddtrace/tracer/rules_sampler.go +++ b/ddtrace/tracer/rules_sampler.go @@ -60,6 +60,14 @@ func (r *rulesSampler) sampleSpan(s *span) bool { return r.spans.apply(s) } +func (r *rulesSampler) HasSpanRules() bool { + return r.spans.enabled() +} + +func (r *rulesSampler) TraceRateLimit() (float64, bool) { + return r.traces.limit() +} + // SamplingRule is used for applying sampling rates to spans that match // the service name, operation name or both. // For basic usage, consider using the helper functions ServiceRule, NameRule, etc. @@ -84,6 +92,7 @@ const ( // If a sampling rule is of type SamplingRuleTrace, such rule determines the sampling rate to apply // to trace spans. If a span matches that rule, it will impact the trace sampling decision. SamplingRuleTrace = iota + // SamplingRuleSpan specifies a sampling rule that applies to a single span without affecting the entire trace. // If a sampling rule is of type SamplingRuleSingleSpan, such rule determines the sampling rate to apply // to individual spans. If a span matches a rule, it will NOT impact the trace sampling decision. @@ -304,22 +313,6 @@ func (rs *singleSpanRulesSampler) enabled() bool { return len(rs.rules) > 0 } -const ( - // spanSamplingMechanism specifies the sampling mechanism by which an individual span was sampled - spanSamplingMechanism = "_dd.span_sampling.mechanism" - - // singleSpanSamplingMechanism specifies value reserved to indicate that a span was kept - // on account of a single span sampling rule. - singleSpanSamplingMechanism = 8 - - // singleSpanSamplingRuleRate specifies the configured sampling probability for the single span sampling rule. - singleSpanSamplingRuleRate = "_dd.span_sampling.rule_rate" - - // singleSpanSamplingMPS specifies the configured limit for the single span sampling rule - // that the span matched. If there is no configured limit, then this tag is omitted. - singleSpanSamplingMPS = "_dd.span_sampling.max_per_second" -) - // apply uses the sampling rules to determine the sampling rate for the // provided span. If the rules don't match, then it returns false and the span is not // modified. @@ -347,10 +340,10 @@ func (rs *singleSpanRulesSampler) applyRate(span *span, rule SamplingRule, rate return } } - span.setMetric(spanSamplingMechanism, singleSpanSamplingMechanism) - span.setMetric(singleSpanSamplingRuleRate, rate) + span.setMetric(keySpanSamplingMechanism, samplingMechanismSingleSpan) + span.setMetric(keySingleSpanSamplingRuleRate, rate) if rule.MaxPerSecond != 0 { - span.setMetric(singleSpanSamplingMPS, rule.MaxPerSecond) + span.setMetric(keySingleSpanSamplingMPS, rule.MaxPerSecond) } } diff --git a/ddtrace/tracer/sampler_test.go b/ddtrace/tracer/sampler_test.go index 4ca9d5e56a..e5f6d9614e 100644 --- a/ddtrace/tracer/sampler_test.go +++ b/ddtrace/tracer/sampler_test.go @@ -441,9 +441,9 @@ func TestRulesSampler(t *testing.T) { span := makeSpan(tt.spanName, tt.spanSrv) result := rs.sampleSpan(span) assert.True(result) - assert.Contains(span.Metrics, spanSamplingMechanism) - assert.Contains(span.Metrics, singleSpanSamplingRuleRate) - assert.Contains(span.Metrics, singleSpanSamplingMPS) + assert.Contains(span.Metrics, keySpanSamplingMechanism) + assert.Contains(span.Metrics, keySingleSpanSamplingRuleRate) + assert.Contains(span.Metrics, keySingleSpanSamplingMPS) }) } }) @@ -482,9 +482,9 @@ func TestRulesSampler(t *testing.T) { span := makeSpan(tt.spanName, tt.spanSrv) result := rs.sampleSpan(span) assert.False(result) - assert.NotContains(span.Metrics, spanSamplingMechanism) - assert.NotContains(span.Metrics, singleSpanSamplingRuleRate) - assert.NotContains(span.Metrics, singleSpanSamplingMPS) + assert.NotContains(span.Metrics, keySpanSamplingMechanism) + assert.NotContains(span.Metrics, keySingleSpanSamplingRuleRate) + assert.NotContains(span.Metrics, keySingleSpanSamplingMPS) }) } }) diff --git a/ddtrace/tracer/span.go b/ddtrace/tracer/span.go index dc4dd8e7cf..b6b9d2e652 100644 --- a/ddtrace/tracer/span.go +++ b/ddtrace/tracer/span.go @@ -574,4 +574,14 @@ const ( keyTopLevel = "_dd.top_level" // keyPropagationError holds any error from propagated trace tags (if any) keyPropagationError = "_dd.propagation_error" + // keySpanSamplingMechanism specifies the sampling mechanism by which an individual span was sampled + keySpanSamplingMechanism = "_dd.span_sampling.mechanism" + // singleSpanSamplingMechanism specifies value reserved to indicate that a span was kept + // on account of a single span sampling rule. + samplingMechanismSingleSpan = 8 + // keySingleSpanSamplingRuleRate specifies the configured sampling probability for the single span sampling rule. + keySingleSpanSamplingRuleRate = "_dd.span_sampling.rule_rate" + // keySingleSpanSamplingMPS specifies the configured limit for the single span sampling rule + // that the span matched. If there is no configured limit, then this tag is omitted. + keySingleSpanSamplingMPS = "_dd.span_sampling.max_per_second" ) diff --git a/ddtrace/tracer/spancontext.go b/ddtrace/tracer/spancontext.go index 98daaf1b30..5a65ff365f 100644 --- a/ddtrace/tracer/spancontext.go +++ b/ddtrace/tracer/spancontext.go @@ -326,28 +326,27 @@ func (t *trace) finishedOne(s *span) { atomic.AddInt64(&tr.spansFinished, int64(len(t.spans))) sd := samplingDecision(atomic.LoadInt64((*int64)(&t.samplingDecision))) if sd != decisionKeep { - if p, ok := t.samplingPriorityLocked(); ok && p == ext.PriorityAutoReject { - atomic.AddUint64(&tr.droppedP0Spans, uint64(len(t.spans))) - atomic.AddUint64(&tr.droppedP0Traces, 1) - } // if trace sampling decision is drop, we still want to send single spans // unless there are no single span sampling rules defined - if !tr.rulesSampling.spans.enabled() { - return - } var singleSpans []*span - canDropP0s := tr.config.canDropP0s() - for _, span := range t.spans { - if tr.rulesSampling.sampleSpan(span) { - // since stats are computed on the tracer side, the keyTopLevel tag - // must be removed to preserve stats correctness - if canDropP0s { - delete(span.Metrics, keyTopLevel) + if tr.rulesSampling.HasSpanRules() { + canDropP0s := tr.config.canDropP0s() + for _, span := range t.spans { + if tr.rulesSampling.sampleSpan(span) { + // since stats are computed on the tracer side, the keyTopLevel tag + // must be removed to preserve stats correctness + if canDropP0s { + delete(span.Metrics, keyTopLevel) + } + singleSpans = append(singleSpans, span) } - singleSpans = append(singleSpans, span) } } - if len(t.spans) == 0 { + if p, ok := t.samplingPriorityLocked(); ok && p == ext.PriorityAutoReject { + atomic.AddUint64(&tr.droppedP0Spans, uint64(len(t.spans)-len(singleSpans))) + atomic.AddUint64(&tr.droppedP0Traces, 1) + } + if len(singleSpans) == 0 { return // no spans matched the rules and were sampled } t.spans = singleSpans diff --git a/ddtrace/tracer/tracer_test.go b/ddtrace/tracer/tracer_test.go index bd11bd3e3a..0362cbc9c4 100644 --- a/ddtrace/tracer/tracer_test.go +++ b/ddtrace/tracer/tracer_test.go @@ -385,9 +385,9 @@ func TestSamplingDecision(t *testing.T) { span.Finish() assert.Equal(t, float64(ext.PriorityAutoReject), span.Metrics[keySamplingPriority]) assert.Equal(t, decisionDrop, span.context.trace.samplingDecision) - assert.Equal(t, 8.0, span.Metrics[spanSamplingMechanism]) - assert.Equal(t, 1.0, span.Metrics[singleSpanSamplingRuleRate]) - assert.Equal(t, 15.0, span.Metrics[singleSpanSamplingMPS]) + assert.Equal(t, 8.0, span.Metrics[keySpanSamplingMechanism]) + assert.Equal(t, 1.0, span.Metrics[keySingleSpanSamplingRuleRate]) + assert.Equal(t, 15.0, span.Metrics[keySingleSpanSamplingMPS]) assert.NotContains(t, span.Metrics, keyTopLevel) }) } From 1348caaa8370340db2e333a3fd1aea1e38f96732 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Tue, 19 Jul 2022 19:36:00 +0200 Subject: [PATCH 18/27] updated according to comments --- ddtrace/tracer/option.go | 6 ++-- ddtrace/tracer/rules_sampler.go | 57 +++++++++++++++------------------ ddtrace/tracer/sampler_test.go | 12 +++---- ddtrace/tracer/span.go | 9 ++++-- ddtrace/tracer/spancontext.go | 29 +++++++++-------- ddtrace/tracer/tracer.go | 2 +- 6 files changed, 56 insertions(+), 59 deletions(-) diff --git a/ddtrace/tracer/option.go b/ddtrace/tracer/option.go index 1717cdec7c..c7f0175c95 100644 --- a/ddtrace/tracer/option.go +++ b/ddtrace/tracer/option.go @@ -119,7 +119,7 @@ type config struct { statsd statsdClient // samplingRules contains user-defined rules to determine the sampling rate to apply - // to spans. Rules might be of a type SamplingRuleSingleSpan or SamplingRuleTrace. + // to spans. samplingRules []SamplingRule // tickChan specifies a channel which will receive the time every time the tracer must flush. @@ -667,8 +667,8 @@ func WithDogstatsdAddress(addr string) StartOption { } } -// WithSamplingRules specifies the sampling rates to apply to trace spans based on the -// provided rules. Both trace sampling and single span sampling rules can be configured with this option. +// WithSamplingRules specifies the sampling rates to apply to spans based on the +// provided rules. func WithSamplingRules(rules []SamplingRule) StartOption { return func(cfg *config) { cfg.samplingRules = rules diff --git a/ddtrace/tracer/rules_sampler.go b/ddtrace/tracer/rules_sampler.go index 6804258c1f..6d470a52e2 100644 --- a/ddtrace/tracer/rules_sampler.go +++ b/ddtrace/tracer/rules_sampler.go @@ -24,12 +24,13 @@ import ( ) // rulesSampler holds instances of trace sampler and single span sampler, that are configured with the given set of rules. -// traceRulesSampler samples trace spans based on a user-defined set of rules and might impact sampling decision of the trace. -// singleSpanRulesSampler samples individual spans based on a separate user-defined set of rules and -// cannot impact the trace sampling decision. type rulesSampler struct { + // traceRulesSampler samples trace spans based on a user-defined set of rules and might impact sampling decision of the trace. traces *traceRulesSampler - spans *singleSpanRulesSampler + + // singleSpanRulesSampler samples individual spans based on a separate user-defined set of rules and + // cannot impact the trace sampling decision. + spans *singleSpanRulesSampler } // newRulesSampler configures a *rulesSampler instance using the given set of rules. @@ -52,21 +53,13 @@ func newRulesSampler(rules []SamplingRule) *rulesSampler { } } -func (r *rulesSampler) sampleTrace(s *span) bool { - return r.traces.apply(s) -} +func (r *rulesSampler) SampleTrace(s *span) bool { return r.traces.apply(s) } -func (r *rulesSampler) sampleSpan(s *span) bool { - return r.spans.apply(s) -} +func (r *rulesSampler) SampleSpan(s *span) bool { return r.spans.apply(s) } -func (r *rulesSampler) HasSpanRules() bool { - return r.spans.enabled() -} +func (r *rulesSampler) HasSpanRules() bool { return r.spans.enabled() } -func (r *rulesSampler) TraceRateLimit() (float64, bool) { - return r.traces.limit() -} +func (r *rulesSampler) TraceRateLimit() (float64, bool) { return r.traces.limit() } // SamplingRule is used for applying sampling rates to spans that match // the service name, operation name or both. @@ -84,6 +77,21 @@ type SamplingRule struct { limiter *rateLimiter } +// match returns true when the span's details match all the expected values in the rule. +func (sr *SamplingRule) match(s *span) bool { + if sr.Service != nil && !sr.Service.MatchString(s.Service) { + return false + } else if sr.exactService != "" && sr.exactService != s.Service { + return false + } + if sr.Name != nil && !sr.Name.MatchString(s.Name) { + return false + } else if sr.exactName != "" && sr.exactName != s.Name { + return false + } + return true +} + // SamplingRuleType represents a type of sampling rule spans are matched against. type SamplingRuleType int @@ -136,7 +144,7 @@ func RateRule(rate float64) SamplingRule { } } -// traceRulesSampler allows a user-defined list of rules to apply to spans. +// traceRulesSampler allows a user-defined list of rules to apply to traces. // These rules can match based on the span's Service, Name or both. // When making a sampling decision, the rules are checked in order until // a match is found. @@ -219,21 +227,6 @@ func (rs *traceRulesSampler) apply(span *span) bool { return true } -// match returns true when the span's details match all the expected values in the rule. -func (sr *SamplingRule) match(s *span) bool { - if sr.Service != nil && !sr.Service.MatchString(s.Service) { - return false - } else if sr.exactService != "" && sr.exactService != s.Service { - return false - } - if sr.Name != nil && !sr.Name.MatchString(s.Name) { - return false - } else if sr.exactName != "" && sr.exactName != s.Name { - return false - } - return true -} - func (rs *traceRulesSampler) applyRate(span *span, rate float64, now time.Time) { span.SetTag(keyRulesSamplerAppliedRate, rate) if !sampledByRate(span.TraceID, rate) { diff --git a/ddtrace/tracer/sampler_test.go b/ddtrace/tracer/sampler_test.go index e5f6d9614e..72b9579801 100644 --- a/ddtrace/tracer/sampler_test.go +++ b/ddtrace/tracer/sampler_test.go @@ -361,7 +361,7 @@ func TestRulesSampler(t *testing.T) { rs := newRulesSampler(nil) span := makeSpan("http.request", "test-service") - result := rs.sampleTrace(span) + result := rs.SampleTrace(span) assert.False(result) }) @@ -379,7 +379,7 @@ func TestRulesSampler(t *testing.T) { rs := newRulesSampler(v) span := makeSpan("http.request", "test-service") - result := rs.sampleTrace(span) + result := rs.SampleTrace(span) assert.True(result) assert.Equal(1.0, span.Metrics["_dd.rule_psr"]) assert.Equal(1.0, span.Metrics["_dd.limit_psr"]) @@ -402,7 +402,7 @@ func TestRulesSampler(t *testing.T) { rs := newRulesSampler(v) span := makeSpan("http.request", "test-service") - result := rs.sampleTrace(span) + result := rs.SampleTrace(span) assert.False(result) }) } @@ -439,7 +439,7 @@ func TestRulesSampler(t *testing.T) { rs := newRulesSampler(rules) span := makeSpan(tt.spanName, tt.spanSrv) - result := rs.sampleSpan(span) + result := rs.SampleSpan(span) assert.True(result) assert.Contains(span.Metrics, keySpanSamplingMechanism) assert.Contains(span.Metrics, keySingleSpanSamplingRuleRate) @@ -480,7 +480,7 @@ func TestRulesSampler(t *testing.T) { rs := newRulesSampler(rules) span := makeSpan(tt.spanName, tt.spanSrv) - result := rs.sampleSpan(span) + result := rs.SampleSpan(span) assert.False(result) assert.NotContains(span.Metrics, keySpanSamplingMechanism) assert.NotContains(span.Metrics, keySingleSpanSamplingRuleRate) @@ -508,7 +508,7 @@ func TestRulesSampler(t *testing.T) { rs := newRulesSampler(rules) span := makeSpan("http.request", "test-service") - result := rs.sampleTrace(span) + result := rs.SampleTrace(span) assert.True(result) assert.Equal(rate, span.Metrics["_dd.rule_psr"]) if rate > 0.0 { diff --git a/ddtrace/tracer/span.go b/ddtrace/tracer/span.go index 65b41049f6..ca9be62568 100644 --- a/ddtrace/tracer/span.go +++ b/ddtrace/tracer/span.go @@ -582,12 +582,15 @@ const ( keyPropagationError = "_dd.propagation_error" // keySpanSamplingMechanism specifies the sampling mechanism by which an individual span was sampled keySpanSamplingMechanism = "_dd.span_sampling.mechanism" - // singleSpanSamplingMechanism specifies value reserved to indicate that a span was kept - // on account of a single span sampling rule. - samplingMechanismSingleSpan = 8 // keySingleSpanSamplingRuleRate specifies the configured sampling probability for the single span sampling rule. keySingleSpanSamplingRuleRate = "_dd.span_sampling.rule_rate" // keySingleSpanSamplingMPS specifies the configured limit for the single span sampling rule // that the span matched. If there is no configured limit, then this tag is omitted. keySingleSpanSamplingMPS = "_dd.span_sampling.max_per_second" ) + +const ( + // samplingMechanismSingleSpan specifies value reserved to indicate that a span was kept + // on account of a single span sampling rule. + samplingMechanismSingleSpan = 8 +) diff --git a/ddtrace/tracer/spancontext.go b/ddtrace/tracer/spancontext.go index 5a65ff365f..209d993042 100644 --- a/ddtrace/tracer/spancontext.go +++ b/ddtrace/tracer/spancontext.go @@ -328,28 +328,29 @@ func (t *trace) finishedOne(s *span) { if sd != decisionKeep { // if trace sampling decision is drop, we still want to send single spans // unless there are no single span sampling rules defined - var singleSpans []*span - if tr.rulesSampling.HasSpanRules() { - canDropP0s := tr.config.canDropP0s() - for _, span := range t.spans { - if tr.rulesSampling.sampleSpan(span) { - // since stats are computed on the tracer side, the keyTopLevel tag - // must be removed to preserve stats correctness - if canDropP0s { - delete(span.Metrics, keyTopLevel) - } - singleSpans = append(singleSpans, span) + if !tr.rulesSampling.HasSpanRules() { + return + } + var spans []*span + canDropP0s := tr.config.canDropP0s() + for _, span := range t.spans { + if tr.rulesSampling.SampleSpan(span) { + // since stats are computed on the tracer side, the keyTopLevel tag + // must be removed to preserve stats correctness + if canDropP0s { + delete(span.Metrics, keyTopLevel) } + spans = append(spans, span) } } if p, ok := t.samplingPriorityLocked(); ok && p == ext.PriorityAutoReject { - atomic.AddUint64(&tr.droppedP0Spans, uint64(len(t.spans)-len(singleSpans))) + atomic.AddUint64(&tr.droppedP0Spans, uint64(len(t.spans)-len(spans))) atomic.AddUint64(&tr.droppedP0Traces, 1) } - if len(singleSpans) == 0 { + if len(spans) == 0 { return // no spans matched the rules and were sampled } - t.spans = singleSpans + t.spans = spans } tr.pushTrace(t.spans) } diff --git a/ddtrace/tracer/tracer.go b/ddtrace/tracer/tracer.go index d330338039..e5437d30b2 100644 --- a/ddtrace/tracer/tracer.go +++ b/ddtrace/tracer/tracer.go @@ -523,7 +523,7 @@ func (t *tracer) sample(span *span) { if rs, ok := sampler.(RateSampler); ok && rs.Rate() < 1 { span.setMetric(sampleRateMetricKey, rs.Rate()) } - if t.rulesSampling.sampleTrace(span) { + if t.rulesSampling.SampleTrace(span) { return } t.prioritySampling.apply(span) From 4ce0635dfa25231e7956c8a8c2892d747b837b10 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Wed, 20 Jul 2022 15:08:00 +0200 Subject: [PATCH 19/27] moved single span sampling to the worker part --- ddtrace/tracer/spancontext.go | 34 +++--------------- ddtrace/tracer/tracer.go | 67 +++++++++++++++++++++++++++++------ ddtrace/tracer/tracer_test.go | 35 +++++++++--------- 3 files changed, 79 insertions(+), 57 deletions(-) diff --git a/ddtrace/tracer/spancontext.go b/ddtrace/tracer/spancontext.go index 209d993042..18418d8e66 100644 --- a/ddtrace/tracer/spancontext.go +++ b/ddtrace/tracer/spancontext.go @@ -12,7 +12,6 @@ import ( "sync/atomic" "gopkg.in/DataDog/dd-trace-go.v1/ddtrace" - "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/ext" "gopkg.in/DataDog/dd-trace-go.v1/ddtrace/internal" "gopkg.in/DataDog/dd-trace-go.v1/internal/log" "gopkg.in/DataDog/dd-trace-go.v1/internal/samplernames" @@ -324,33 +323,8 @@ func (t *trace) finishedOne(s *span) { } // we have a tracer that can receive completed traces. atomic.AddInt64(&tr.spansFinished, int64(len(t.spans))) - sd := samplingDecision(atomic.LoadInt64((*int64)(&t.samplingDecision))) - if sd != decisionKeep { - // if trace sampling decision is drop, we still want to send single spans - // unless there are no single span sampling rules defined - if !tr.rulesSampling.HasSpanRules() { - return - } - var spans []*span - canDropP0s := tr.config.canDropP0s() - for _, span := range t.spans { - if tr.rulesSampling.SampleSpan(span) { - // since stats are computed on the tracer side, the keyTopLevel tag - // must be removed to preserve stats correctness - if canDropP0s { - delete(span.Metrics, keyTopLevel) - } - spans = append(spans, span) - } - } - if p, ok := t.samplingPriorityLocked(); ok && p == ext.PriorityAutoReject { - atomic.AddUint64(&tr.droppedP0Spans, uint64(len(t.spans)-len(spans))) - atomic.AddUint64(&tr.droppedP0Traces, 1) - } - if len(spans) == 0 { - return // no spans matched the rules and were sampled - } - t.spans = spans - } - tr.pushTrace(t.spans) + tr.pushTraceInfo(&traceInfo{ + spans: t.spans, + samplingDecision: samplingDecision(atomic.LoadInt64((*int64)(&t.samplingDecision))), + }) } diff --git a/ddtrace/tracer/tracer.go b/ddtrace/tracer/tracer.go index e5437d30b2..a44ebd6b80 100644 --- a/ddtrace/tracer/tracer.go +++ b/ddtrace/tracer/tracer.go @@ -12,6 +12,7 @@ import ( rt "runtime/trace" "strconv" "sync" + "sync/atomic" "time" "gopkg.in/DataDog/dd-trace-go.v1/ddtrace" @@ -45,8 +46,8 @@ type tracer struct { // destination, such as the Trace Agent or Datadog Forwarder. traceWriter traceWriter - // out receives traces to be added to the payload. - out chan []*span + // out receives traceInfo with spans to be added to the payload. + out chan *traceInfo // flush receives a channel onto which it will confirm after a flush has been // triggered and completed. @@ -194,7 +195,7 @@ func newUnstartedTracer(opts ...StartOption) *tracer { t := &tracer{ config: c, traceWriter: writer, - out: make(chan []*span, payloadQueueSize), + out: make(chan *traceInfo, payloadQueueSize), stop: make(chan struct{}), flush: make(chan chan<- struct{}), rulesSampling: newRulesSampler(c.samplingRules), @@ -275,9 +276,11 @@ func (t *tracer) flushSync() { func (t *tracer) worker(tick <-chan time.Time) { for { select { - case trace := <-t.out: - t.traceWriter.add(trace) - + case info := <-t.out: + t.processTraceInfo(info) + if len(info.spans) != 0 { + t.traceWriter.add(info.spans) + } case <-tick: t.config.statsd.Incr("datadog.tracer.flush_triggered", []string{"reason:scheduled"}, 1) t.traceWriter.flush() @@ -296,8 +299,11 @@ func (t *tracer) worker(tick <-chan time.Time) { // before the final flush to ensure no traces are lost (see #526) for { select { - case trace := <-t.out: - t.traceWriter.add(trace) + case info := <-t.out: + t.processTraceInfo(info) + if len(info.spans) != 0 { + t.traceWriter.add(info.spans) + } default: break loop } @@ -307,16 +313,55 @@ func (t *tracer) worker(tick <-chan time.Time) { } } -func (t *tracer) pushTrace(trace []*span) { +// traceInfo contains span list and sampling decision of the trace +// needed to run single span sampling. +type traceInfo struct { + spans []*span + samplingDecision samplingDecision +} + +// processTraceInfo runs single span sampling on the spans pushed to the worker thread. +// Sampling spans on the worker thread keeps any associated latency off the application thread. +func (t *tracer) processTraceInfo(info *traceInfo) { + if info.samplingDecision == decisionKeep { + return + } + if !t.rulesSampling.HasSpanRules() { + info.spans = nil + return + } + // if trace sampling decision is drop, we still want to send single spans + // unless there are no single span sampling rules defined + var kept []*span + canDropP0s := t.config.canDropP0s() + for _, span := range info.spans { + if t.rulesSampling.SampleSpan(span) { + // since stats are computed on the tracer side, the keyTopLevel tag + // must be removed to preserve stats correctness + if canDropP0s { + delete(span.Metrics, keyTopLevel) + } + kept = append(kept, span) + } + } + atomic.AddUint64(&t.droppedP0Spans, uint64(len(info.spans)-len(kept))) + info.spans = kept + if len(kept) == 0 { + atomic.AddUint64(&t.droppedP0Traces, 1) + return // no spans matched the rules and were sampled + } +} + +func (t *tracer) pushTraceInfo(info *traceInfo) { select { case <-t.stop: return default: } select { - case t.out <- trace: + case t.out <- info: default: - log.Error("payload queue full, dropping %d traces", len(trace)) + log.Error("payload queue full, dropping %d traces", len(info.spans)) } } diff --git a/ddtrace/tracer/tracer_test.go b/ddtrace/tracer/tracer_test.go index 0362cbc9c4..d874e4e818 100644 --- a/ddtrace/tracer/tracer_test.go +++ b/ddtrace/tracer/tracer_test.go @@ -196,7 +196,7 @@ func TestTracerStart(t *testing.T) { Start() // ensure at least one worker started and handles requests - internal.GetGlobalTracer().(*tracer).pushTrace([]*span{}) + internal.GetGlobalTracer().(*tracer).pushTraceInfo(&traceInfo{spans: []*span{}}) Stop() Stop() @@ -207,7 +207,7 @@ func TestTracerStart(t *testing.T) { t.Run("deadlock/direct", func(t *testing.T) { tr, _, _, stop := startTestTracer(t) defer stop() - tr.pushTrace([]*span{}) // blocks until worker is started + tr.pushTraceInfo(&traceInfo{spans: []*span{}}) // blocks until worker is started select { case <-tr.stop: t.Fatal("stopped channel should be open") @@ -379,16 +379,19 @@ func TestSamplingDecision(t *testing.T) { tracer.config.sampler = NewRateSampler(0) tracer.prioritySampling.defaultRate = 0 tracer.config.serviceName = "test_service" - span := tracer.StartSpan("name_1").(*span) - child := tracer.StartSpan("name_2", ChildOf(span.context)) + parent := tracer.StartSpan("name_1").(*span) + child := tracer.StartSpan("name_2", ChildOf(parent.context)).(*span) child.Finish() - span.Finish() - assert.Equal(t, float64(ext.PriorityAutoReject), span.Metrics[keySamplingPriority]) - assert.Equal(t, decisionDrop, span.context.trace.samplingDecision) - assert.Equal(t, 8.0, span.Metrics[keySpanSamplingMechanism]) - assert.Equal(t, 1.0, span.Metrics[keySingleSpanSamplingRuleRate]) - assert.Equal(t, 15.0, span.Metrics[keySingleSpanSamplingMPS]) - assert.NotContains(t, span.Metrics, keyTopLevel) + parent.Finish() + tracer.pushTraceInfo(&traceInfo{spans: []*span{parent, child}}) + // timing out since single span sampling runs in the worker thread + time.Sleep(time.Millisecond) + assert.Equal(t, float64(ext.PriorityAutoReject), parent.Metrics[keySamplingPriority]) + assert.Equal(t, decisionDrop, parent.context.trace.samplingDecision) + assert.Equal(t, 8.0, parent.Metrics[keySpanSamplingMechanism]) + assert.Equal(t, 1.0, parent.Metrics[keySingleSpanSamplingRuleRate]) + assert.Equal(t, 15.0, parent.Metrics[keySingleSpanSamplingMPS]) + assert.NotContains(t, parent.Metrics, keyTopLevel) }) } @@ -1212,11 +1215,11 @@ func TestPushPayload(t *testing.T) { s.Meta["key"] = strings.Repeat("X", payloadSizeLimit/2+10) // half payload size reached - tracer.pushTrace([]*span{s}) + tracer.pushTraceInfo(&traceInfo{[]*span{s}, decisionKeep}) tracer.awaitPayload(t, 1) // payload size exceeded - tracer.pushTrace([]*span{s}) + tracer.pushTraceInfo(&traceInfo{[]*span{s}, decisionKeep}) flush(2) } @@ -1238,16 +1241,16 @@ func TestPushTrace(t *testing.T) { Resource: "/foo", }, } - tracer.pushTrace(trace) + tracer.pushTraceInfo(&traceInfo{spans: trace}) assert.Len(tracer.out, 1) t0 := <-tracer.out - assert.Equal(trace, t0) + assert.Equal(&traceInfo{spans: trace}, t0) many := payloadQueueSize + 2 for i := 0; i < many; i++ { - tracer.pushTrace(make([]*span, i)) + tracer.pushTraceInfo(&traceInfo{spans: make([]*span, i)}) } assert.Len(tracer.out, payloadQueueSize) log.Flush() From f33308b8fc5028740cfa6b054ec421f89ec026ac Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Wed, 20 Jul 2022 19:30:47 +0200 Subject: [PATCH 20/27] added docs to sampling rule struct --- ddtrace/tracer/rules_sampler.go | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/ddtrace/tracer/rules_sampler.go b/ddtrace/tracer/rules_sampler.go index 6d470a52e2..93630290ba 100644 --- a/ddtrace/tracer/rules_sampler.go +++ b/ddtrace/tracer/rules_sampler.go @@ -64,13 +64,23 @@ func (r *rulesSampler) TraceRateLimit() (float64, bool) { return r.traces.limit( // SamplingRule is used for applying sampling rates to spans that match // the service name, operation name or both. // For basic usage, consider using the helper functions ServiceRule, NameRule, etc. -// See `SamplingRuleType` for more details. type SamplingRule struct { - Service *regexp.Regexp - Name *regexp.Regexp - Rate float64 + // Service specifies the regex expression span service name should match. + Service *regexp.Regexp + + // Name specifies the regex expression span operation name should match. + Name *regexp.Regexp + + // Rate specifies the sampling rate that should be applied to spans that match + // service and/or name of the rule. + Rate float64 + + // MaxPerSecond specifies max number of spans per second that can be sampled per the rule. + // If not specified, the default is no limit. MaxPerSecond float64 - Type SamplingRuleType + + // Type specifies type of the sampling rules. See `SamplingRuleType` for more details. + Type SamplingRuleType exactService string exactName string From e4e6f3cdd7bf289850b3ab2a5af3cb7dd65d196f Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Thu, 21 Jul 2022 14:11:59 +0200 Subject: [PATCH 21/27] added `partial` tag to dropped_p0_traces metric --- ddtrace/tracer/tracer.go | 4 ++++ ddtrace/tracer/transport.go | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ddtrace/tracer/tracer.go b/ddtrace/tracer/tracer.go index a44ebd6b80..44920fccbc 100644 --- a/ddtrace/tracer/tracer.go +++ b/ddtrace/tracer/tracer.go @@ -75,6 +75,9 @@ type tracer struct { // Records the number of dropped P0 traces and spans. droppedP0Traces, droppedP0Spans uint64 + // partialTrace the number of partially dropped traces. + partialTraces uint64 + // rulesSampling holds an instance of the rules sampler used to apply either trace sampling, // or single span sampling rules on spans. These are user-defined // rules for applying a sampling rate to spans that match the designated service @@ -350,6 +353,7 @@ func (t *tracer) processTraceInfo(info *traceInfo) { atomic.AddUint64(&t.droppedP0Traces, 1) return // no spans matched the rules and were sampled } + atomic.AddUint64(&t.partialTraces, 1) } func (t *tracer) pushTraceInfo(info *traceInfo) { diff --git a/ddtrace/tracer/transport.go b/ddtrace/tracer/transport.go index 5c42347c54..85da05dd6f 100644 --- a/ddtrace/tracer/transport.go +++ b/ddtrace/tracer/transport.go @@ -152,9 +152,11 @@ func (t *httpTransport) send(p *payload) (body io.ReadCloser, err error) { req.Header.Set("Datadog-Client-Computed-Stats", "yes") } droppedTraces := int(atomic.SwapUint64(&t.droppedP0Traces, 0)) + partialTraces := int(atomic.SwapUint64(&t.partialTraces, 0)) droppedSpans := int(atomic.SwapUint64(&t.droppedP0Spans, 0)) if stats := t.config.statsd; stats != nil { - stats.Count("datadog.tracer.dropped_p0_traces", int64(droppedTraces), nil, 1) + stats.Count("datadog.tracer.dropped_p0_traces", int64(droppedTraces), + []string{fmt.Sprintf("partial:%s", strconv.FormatBool(partialTraces > 0))}, 1) stats.Count("datadog.tracer.dropped_p0_spans", int64(droppedSpans), nil, 1) } req.Header.Set("Datadog-Client-Dropped-P0-Traces", strconv.Itoa(droppedTraces)) From fbe94ea1bb249fafcdc9539c8328303e622b1786 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Thu, 21 Jul 2022 17:18:17 +0200 Subject: [PATCH 22/27] fixed flaky sampling test --- ddtrace/tracer/tracer_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddtrace/tracer/tracer_test.go b/ddtrace/tracer/tracer_test.go index d874e4e818..ab66160e72 100644 --- a/ddtrace/tracer/tracer_test.go +++ b/ddtrace/tracer/tracer_test.go @@ -385,7 +385,7 @@ func TestSamplingDecision(t *testing.T) { parent.Finish() tracer.pushTraceInfo(&traceInfo{spans: []*span{parent, child}}) // timing out since single span sampling runs in the worker thread - time.Sleep(time.Millisecond) + time.Sleep(5 * time.Millisecond) assert.Equal(t, float64(ext.PriorityAutoReject), parent.Metrics[keySamplingPriority]) assert.Equal(t, decisionDrop, parent.context.trace.samplingDecision) assert.Equal(t, 8.0, parent.Metrics[keySpanSamplingMechanism]) From d295eeeaa3b5938438fe096f51fe3d36ba03bcf2 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Mon, 1 Aug 2022 17:15:06 +0200 Subject: [PATCH 23/27] updated doc comments; set sampling priority to 2; do not remove the top level tag anymore --- ddtrace/tracer/rules_sampler.go | 5 +++-- ddtrace/tracer/sampler_test.go | 1 + ddtrace/tracer/tracer.go | 6 ------ ddtrace/tracer/tracer_test.go | 3 +-- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/ddtrace/tracer/rules_sampler.go b/ddtrace/tracer/rules_sampler.go index 93630290ba..a64e1d2f34 100644 --- a/ddtrace/tracer/rules_sampler.go +++ b/ddtrace/tracer/rules_sampler.go @@ -65,10 +65,10 @@ func (r *rulesSampler) TraceRateLimit() (float64, bool) { return r.traces.limit( // the service name, operation name or both. // For basic usage, consider using the helper functions ServiceRule, NameRule, etc. type SamplingRule struct { - // Service specifies the regex expression span service name should match. + // Service specifies the regex pattern that a span service name must match. Service *regexp.Regexp - // Name specifies the regex expression span operation name should match. + // Name specifies the regex pattern that a span operation name must match. Name *regexp.Regexp // Rate specifies the sampling rate that should be applied to spans that match @@ -343,6 +343,7 @@ func (rs *singleSpanRulesSampler) applyRate(span *span, rule SamplingRule, rate return } } + span.setSamplingPriority(ext.PriorityUserKeep, samplernames.RuleRate, rate) span.setMetric(keySpanSamplingMechanism, samplingMechanismSingleSpan) span.setMetric(keySingleSpanSamplingRuleRate, rate) if rule.MaxPerSecond != 0 { diff --git a/ddtrace/tracer/sampler_test.go b/ddtrace/tracer/sampler_test.go index 41b0042d0a..46a8f04c65 100644 --- a/ddtrace/tracer/sampler_test.go +++ b/ddtrace/tracer/sampler_test.go @@ -441,6 +441,7 @@ func TestRulesSampler(t *testing.T) { span := makeSpan(tt.spanName, tt.spanSrv) result := rs.SampleSpan(span) assert.True(result) + assert.Equal(float64(ext.PriorityUserKeep), span.Metrics[keySamplingPriority]) assert.Contains(span.Metrics, keySpanSamplingMechanism) assert.Contains(span.Metrics, keySingleSpanSamplingRuleRate) assert.Contains(span.Metrics, keySingleSpanSamplingMPS) diff --git a/ddtrace/tracer/tracer.go b/ddtrace/tracer/tracer.go index 44920fccbc..09bf3631ea 100644 --- a/ddtrace/tracer/tracer.go +++ b/ddtrace/tracer/tracer.go @@ -336,14 +336,8 @@ func (t *tracer) processTraceInfo(info *traceInfo) { // if trace sampling decision is drop, we still want to send single spans // unless there are no single span sampling rules defined var kept []*span - canDropP0s := t.config.canDropP0s() for _, span := range info.spans { if t.rulesSampling.SampleSpan(span) { - // since stats are computed on the tracer side, the keyTopLevel tag - // must be removed to preserve stats correctness - if canDropP0s { - delete(span.Metrics, keyTopLevel) - } kept = append(kept, span) } } diff --git a/ddtrace/tracer/tracer_test.go b/ddtrace/tracer/tracer_test.go index 2981bb0b4c..67a67f9233 100644 --- a/ddtrace/tracer/tracer_test.go +++ b/ddtrace/tracer/tracer_test.go @@ -383,14 +383,13 @@ func TestSamplingDecision(t *testing.T) { child.Finish() parent.Finish() tracer.pushTraceInfo(&traceInfo{spans: []*span{parent, child}}) - // timing out since single span sampling runs in the worker thread + tracer.Stop() time.Sleep(5 * time.Millisecond) assert.Equal(t, float64(ext.PriorityAutoReject), parent.Metrics[keySamplingPriority]) assert.Equal(t, decisionDrop, parent.context.trace.samplingDecision) assert.Equal(t, 8.0, parent.Metrics[keySpanSamplingMechanism]) assert.Equal(t, 1.0, parent.Metrics[keySingleSpanSamplingRuleRate]) assert.Equal(t, 15.0, parent.Metrics[keySingleSpanSamplingMPS]) - assert.NotContains(t, parent.Metrics, keyTopLevel) }) } From 58ff01920906a54115e32672ba9b6c9711ed9c24 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Mon, 1 Aug 2022 18:51:23 +0200 Subject: [PATCH 24/27] changed Sampling Rule marshalling for better readability --- ddtrace/tracer/log_test.go | 6 +++--- ddtrace/tracer/rules_sampler.go | 15 +++++++++++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/ddtrace/tracer/log_test.go b/ddtrace/tracer/log_test.go index ebc9df095b..c19a06d25b 100644 --- a/ddtrace/tracer/log_test.go +++ b/ddtrace/tracer/log_test.go @@ -55,7 +55,7 @@ func TestStartupLog(t *testing.T) { tp.Reset() logStartup(tracer) assert.Len(tp.Lines(), 2) - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sample_rate_limit":"100","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75,"type":0}\],"sampling_rules_error":"","service_mappings":{"initial_service":"new_service"},"tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sample_rate_limit":"100","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75,"type":"trace(0)"}\],"sampling_rules_error":"","service_mappings":{"initial_service":"new_service"},"tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) }) t.Run("limit", func(t *testing.T) { @@ -86,7 +86,7 @@ func TestStartupLog(t *testing.T) { tp.Reset() logStartup(tracer) assert.Len(tp.Lines(), 2) - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sample_rate_limit":"1000.001","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75,"type":0}\],"sampling_rules_error":"","service_mappings":{"initial_service":"new_service"},"tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sample_rate_limit":"1000.001","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75,"type":"trace(0)"}\],"sampling_rules_error":"","service_mappings":{"initial_service":"new_service"},"tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) }) t.Run("errors", func(t *testing.T) { @@ -100,7 +100,7 @@ func TestStartupLog(t *testing.T) { tp.Reset() logStartup(tracer) assert.Len(tp.Lines(), 2) - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sample_rate_limit":"100","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234,"type":0}\],"sampling_rules_error":"\\n\\tat index 1: rate not provided","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sample_rate_limit":"100","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234,"type":"trace(0)"}\],"sampling_rules_error":"\\n\\tat index 1: rate not provided","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) }) t.Run("lambda", func(t *testing.T) { diff --git a/ddtrace/tracer/rules_sampler.go b/ddtrace/tracer/rules_sampler.go index a64e1d2f34..263f956876 100644 --- a/ddtrace/tracer/rules_sampler.go +++ b/ddtrace/tracer/rules_sampler.go @@ -119,6 +119,17 @@ const ( SamplingRuleSpan ) +func (sr SamplingRuleType) String() string { + switch sr { + case SamplingRuleTrace: + return "trace" + case SamplingRuleSpan: + return "span" + default: + return "" + } +} + // ServiceRule returns a SamplingRule that applies the provided sampling rate // to spans that match the service name provided. func ServiceRule(service string, rate float64) SamplingRule { @@ -552,7 +563,7 @@ func (sr *SamplingRule) MarshalJSON() ([]byte, error) { Service string `json:"service"` Name string `json:"name"` Rate float64 `json:"sample_rate"` - Type int `json:"type"` + Type string `json:"type"` MaxPerSecond *float64 `json:"max_per_second,omitempty"` }{} if sr.exactService != "" { @@ -566,7 +577,7 @@ func (sr *SamplingRule) MarshalJSON() ([]byte, error) { s.Name = fmt.Sprintf("%s", sr.Name) } s.Rate = sr.Rate - s.Type = int(sr.Type) + s.Type = fmt.Sprintf("%v(%d)", sr.Type.String(), sr.Type) if sr.MaxPerSecond != 0 { s.MaxPerSecond = &sr.MaxPerSecond } From 46b0fe693f556f9644410adf1e3ada4efa01e006 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Mon, 1 Aug 2022 19:00:38 +0200 Subject: [PATCH 25/27] fixed regex --- ddtrace/tracer/log_test.go | 6 +++--- ddtrace/tracer/sampler_test.go | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ddtrace/tracer/log_test.go b/ddtrace/tracer/log_test.go index c19a06d25b..f8cdec0f69 100644 --- a/ddtrace/tracer/log_test.go +++ b/ddtrace/tracer/log_test.go @@ -55,7 +55,7 @@ func TestStartupLog(t *testing.T) { tp.Reset() logStartup(tracer) assert.Len(tp.Lines(), 2) - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sample_rate_limit":"100","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75,"type":"trace(0)"}\],"sampling_rules_error":"","service_mappings":{"initial_service":"new_service"},"tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sample_rate_limit":"100","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75,"type":"trace\(0\)"}\],"sampling_rules_error":"","service_mappings":{"initial_service":"new_service"},"tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) }) t.Run("limit", func(t *testing.T) { @@ -86,7 +86,7 @@ func TestStartupLog(t *testing.T) { tp.Reset() logStartup(tracer) assert.Len(tp.Lines(), 2) - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sample_rate_limit":"1000.001","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75,"type":"trace(0)"}\],"sampling_rules_error":"","service_mappings":{"initial_service":"new_service"},"tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sample_rate_limit":"1000.001","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75,"type":"trace\(0\)"}\],"sampling_rules_error":"","service_mappings":{"initial_service":"new_service"},"tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) }) t.Run("errors", func(t *testing.T) { @@ -100,7 +100,7 @@ func TestStartupLog(t *testing.T) { tp.Reset() logStartup(tracer) assert.Len(tp.Lines(), 2) - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sample_rate_limit":"100","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234,"type":"trace(0)"}\],"sampling_rules_error":"\\n\\tat index 1: rate not provided","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)? INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sample_rate_limit":"100","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234,"type":"trace\(0\)"}\],"sampling_rules_error":"\\n\\tat index 1: rate not provided","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1]) }) t.Run("lambda", func(t *testing.T) { diff --git a/ddtrace/tracer/sampler_test.go b/ddtrace/tracer/sampler_test.go index 46a8f04c65..39e996ada9 100644 --- a/ddtrace/tracer/sampler_test.go +++ b/ddtrace/tracer/sampler_test.go @@ -812,17 +812,17 @@ func TestSamplingRuleMarshall(t *testing.T) { out string }{ {SamplingRule{nil, nil, 0, 0, 0, "srv", "ops", nil}, - `{"service":"srv","name":"ops","sample_rate":0,"type":0}`}, + `{"service":"srv","name":"ops","sample_rate":0,"type":"trace(0)"}`}, {SamplingRule{regexp.MustCompile("srv.[0-9]+]"), nil, 0, 0, 0, "srv", "ops", nil}, - `{"service":"srv","name":"ops","sample_rate":0,"type":0}`}, + `{"service":"srv","name":"ops","sample_rate":0,"type":"trace(0)"}`}, {SamplingRule{regexp.MustCompile("srv.*"), regexp.MustCompile("ops.[0-9]+]"), 0, 0, 0, "", "", nil}, - `{"service":"srv.*","name":"ops.[0-9]+]","sample_rate":0,"type":0}`}, + `{"service":"srv.*","name":"ops.[0-9]+]","sample_rate":0,"type":"trace(0)"}`}, {SamplingRule{regexp.MustCompile("srv.[0-9]+]"), regexp.MustCompile("ops.[0-9]+]"), 0.55, 0, 0, "", "", nil}, - `{"service":"srv.[0-9]+]","name":"ops.[0-9]+]","sample_rate":0.55,"type":0}`}, + `{"service":"srv.[0-9]+]","name":"ops.[0-9]+]","sample_rate":0.55,"type":"trace(0)"}`}, {SamplingRule{regexp.MustCompile("srv.[0-9]+]"), regexp.MustCompile("ops.[0-9]+]"), 0.55, 0, 1, "", "", nil}, - `{"service":"srv.[0-9]+]","name":"ops.[0-9]+]","sample_rate":0.55,"type":1}`}, + `{"service":"srv.[0-9]+]","name":"ops.[0-9]+]","sample_rate":0.55,"type":"span(1)"}`}, {SamplingRule{regexp.MustCompile("srv.[0-9]+]"), regexp.MustCompile("ops.[0-9]+]"), 0.55, 1000, 1, "", "", nil}, - `{"service":"srv.[0-9]+]","name":"ops.[0-9]+]","sample_rate":0.55,"type":1,"max_per_second":1000}`}, + `{"service":"srv.[0-9]+]","name":"ops.[0-9]+]","sample_rate":0.55,"type":"span(1)","max_per_second":1000}`}, } { m, err := tt.in.MarshalJSON() assert.Nil(t, err) From e8d62c722a8795b39c7c2db08081b8e359987e06 Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Tue, 2 Aug 2022 11:59:50 +0200 Subject: [PATCH 26/27] refactored according to comments --- ddtrace/tracer/rules_sampler.go | 46 +++++++++++++++------------------ ddtrace/tracer/sampler_test.go | 8 +++--- ddtrace/tracer/spancontext.go | 6 ++--- ddtrace/tracer/tracer.go | 44 +++++++++++++++---------------- ddtrace/tracer/tracer_test.go | 16 ++++++------ 5 files changed, 57 insertions(+), 63 deletions(-) diff --git a/ddtrace/tracer/rules_sampler.go b/ddtrace/tracer/rules_sampler.go index 263f956876..b5697402b7 100644 --- a/ddtrace/tracer/rules_sampler.go +++ b/ddtrace/tracer/rules_sampler.go @@ -244,11 +244,11 @@ func (rs *traceRulesSampler) apply(span *span) bool { return false } - rs.applyRate(span, rate, time.Now()) + rs.applyRule(span, rate, time.Now()) return true } -func (rs *traceRulesSampler) applyRate(span *span, rate float64, now time.Time) { +func (rs *traceRulesSampler) applyRule(span *span, rate float64, now time.Time) { span.SetTag(keyRulesSamplerAppliedRate, rate) if !sampledByRate(span.TraceID, rate) { span.setSamplingPriority(ext.PriorityUserReject, samplernames.RuleRate, rate) @@ -333,35 +333,31 @@ func (rs *singleSpanRulesSampler) enabled() bool { func (rs *singleSpanRulesSampler) apply(span *span) bool { for _, rule := range rs.rules { if rule.match(span) { - rs.applyRate(span, rule, rule.Rate, time.Now()) + rate := rule.Rate + span.setMetric(keyRulesSamplerAppliedRate, rate) + if !sampledByRate(span.SpanID, rate) { + span.setSamplingPriority(ext.PriorityUserReject, samplernames.RuleRate, rate) + return false + } + var sampled bool + if rule.limiter != nil { + sampled, rate = rule.limiter.allowOne(time.Now()) + if !sampled { + return false + } + } + span.setSamplingPriority(ext.PriorityUserKeep, samplernames.RuleRate, rate) + span.setMetric(keySpanSamplingMechanism, samplingMechanismSingleSpan) + span.setMetric(keySingleSpanSamplingRuleRate, rate) + if rule.MaxPerSecond != 0 { + span.setMetric(keySingleSpanSamplingMPS, rule.MaxPerSecond) + } return true } } return false } -func (rs *singleSpanRulesSampler) applyRate(span *span, rule SamplingRule, rate float64, now time.Time) { - span.setMetric(keyRulesSamplerAppliedRate, rate) - if !sampledByRate(span.SpanID, rate) { - span.setSamplingPriority(ext.PriorityUserReject, samplernames.RuleRate, rate) - return - } - - var sampled bool - if rule.limiter != nil { - sampled, rate = rule.limiter.allowOne(now) - if !sampled { - return - } - } - span.setSamplingPriority(ext.PriorityUserKeep, samplernames.RuleRate, rate) - span.setMetric(keySpanSamplingMechanism, samplingMechanismSingleSpan) - span.setMetric(keySingleSpanSamplingRuleRate, rate) - if rule.MaxPerSecond != 0 { - span.setMetric(keySingleSpanSamplingMPS, rule.MaxPerSecond) - } -} - // rateLimiter is a wrapper on top of golang.org/x/time/rate which implements a rate limiter but also // returns the effective rate of allowance. type rateLimiter struct { diff --git a/ddtrace/tracer/sampler_test.go b/ddtrace/tracer/sampler_test.go index 39e996ada9..8c4b2f3e2e 100644 --- a/ddtrace/tracer/sampler_test.go +++ b/ddtrace/tracer/sampler_test.go @@ -553,7 +553,7 @@ func TestRulesSamplerInternals(t *testing.T) { now := time.Now() rs := &rulesSampler{} span := makeSpanAt("http.request", "test-service", now) - rs.traces.applyRate(span, 0.0, now) + rs.traces.applyRule(span, 0.0, now) assert.Equal(0.0, span.Metrics["_dd.rule_psr"]) _, ok := span.Metrics["_dd.limit_psr"] assert.False(ok) @@ -569,7 +569,7 @@ func TestRulesSamplerInternals(t *testing.T) { rs.traces.limiter.seen = 1 span := makeSpanAt("http.request", "test-service", now) - rs.traces.applyRate(span, 1.0, now) + rs.traces.applyRule(span, 1.0, now) assert.Equal(1.0, span.Metrics["_dd.rule_psr"]) assert.Equal(1.0, span.Metrics["_dd.limit_psr"]) }) @@ -585,12 +585,12 @@ func TestRulesSamplerInternals(t *testing.T) { rs.traces.limiter.seen = 2 // first span kept, second dropped span := makeSpanAt("http.request", "test-service", now) - rs.traces.applyRate(span, 1.0, now) + rs.traces.applyRule(span, 1.0, now) assert.EqualValues(ext.PriorityUserKeep, span.Metrics[keySamplingPriority]) assert.Equal(1.0, span.Metrics["_dd.rule_psr"]) assert.Equal(1.0, span.Metrics["_dd.limit_psr"]) span = makeSpanAt("http.request", "test-service", now) - rs.traces.applyRate(span, 1.0, now) + rs.traces.applyRule(span, 1.0, now) assert.EqualValues(ext.PriorityUserReject, span.Metrics[keySamplingPriority]) assert.Equal(1.0, span.Metrics["_dd.rule_psr"]) assert.Equal(0.75, span.Metrics["_dd.limit_psr"]) diff --git a/ddtrace/tracer/spancontext.go b/ddtrace/tracer/spancontext.go index 18418d8e66..97c1658199 100644 --- a/ddtrace/tracer/spancontext.go +++ b/ddtrace/tracer/spancontext.go @@ -323,8 +323,8 @@ func (t *trace) finishedOne(s *span) { } // we have a tracer that can receive completed traces. atomic.AddInt64(&tr.spansFinished, int64(len(t.spans))) - tr.pushTraceInfo(&traceInfo{ - spans: t.spans, - samplingDecision: samplingDecision(atomic.LoadInt64((*int64)(&t.samplingDecision))), + tr.pushTrace(&finishedTrace{ + spans: t.spans, + decision: samplingDecision(atomic.LoadInt64((*int64)(&t.samplingDecision))), }) } diff --git a/ddtrace/tracer/tracer.go b/ddtrace/tracer/tracer.go index 09bf3631ea..1ad026b57d 100644 --- a/ddtrace/tracer/tracer.go +++ b/ddtrace/tracer/tracer.go @@ -46,8 +46,8 @@ type tracer struct { // destination, such as the Trace Agent or Datadog Forwarder. traceWriter traceWriter - // out receives traceInfo with spans to be added to the payload. - out chan *traceInfo + // out receives finishedTrace with spans to be added to the payload. + out chan *finishedTrace // flush receives a channel onto which it will confirm after a flush has been // triggered and completed. @@ -198,7 +198,7 @@ func newUnstartedTracer(opts ...StartOption) *tracer { t := &tracer{ config: c, traceWriter: writer, - out: make(chan *traceInfo, payloadQueueSize), + out: make(chan *finishedTrace, payloadQueueSize), stop: make(chan struct{}), flush: make(chan chan<- struct{}), rulesSampling: newRulesSampler(c.samplingRules), @@ -279,10 +279,10 @@ func (t *tracer) flushSync() { func (t *tracer) worker(tick <-chan time.Time) { for { select { - case info := <-t.out: - t.processTraceInfo(info) - if len(info.spans) != 0 { - t.traceWriter.add(info.spans) + case trace := <-t.out: + t.sampleFinishedTrace(trace) + if len(trace.spans) != 0 { + t.traceWriter.add(trace.spans) } case <-tick: t.config.statsd.Incr("datadog.tracer.flush_triggered", []string{"reason:scheduled"}, 1) @@ -302,10 +302,10 @@ func (t *tracer) worker(tick <-chan time.Time) { // before the final flush to ensure no traces are lost (see #526) for { select { - case info := <-t.out: - t.processTraceInfo(info) - if len(info.spans) != 0 { - t.traceWriter.add(info.spans) + case trace := <-t.out: + t.sampleFinishedTrace(trace) + if len(trace.spans) != 0 { + t.traceWriter.add(trace.spans) } default: break loop @@ -316,17 +316,15 @@ func (t *tracer) worker(tick <-chan time.Time) { } } -// traceInfo contains span list and sampling decision of the trace -// needed to run single span sampling. -type traceInfo struct { - spans []*span - samplingDecision samplingDecision +// finishedTrace holds information about a trace that has finished, including its spans. +type finishedTrace struct { + spans []*span + decision samplingDecision } -// processTraceInfo runs single span sampling on the spans pushed to the worker thread. -// Sampling spans on the worker thread keeps any associated latency off the application thread. -func (t *tracer) processTraceInfo(info *traceInfo) { - if info.samplingDecision == decisionKeep { +// sampleFinishedTrace applies single-span sampling to the provided trace, which is considered to be finished. +func (t *tracer) sampleFinishedTrace(info *finishedTrace) { + if info.decision == decisionKeep { return } if !t.rulesSampling.HasSpanRules() { @@ -350,16 +348,16 @@ func (t *tracer) processTraceInfo(info *traceInfo) { atomic.AddUint64(&t.partialTraces, 1) } -func (t *tracer) pushTraceInfo(info *traceInfo) { +func (t *tracer) pushTrace(trace *finishedTrace) { select { case <-t.stop: return default: } select { - case t.out <- info: + case t.out <- trace: default: - log.Error("payload queue full, dropping %d traces", len(info.spans)) + log.Error("payload queue full, dropping %d traces", len(trace.spans)) } } diff --git a/ddtrace/tracer/tracer_test.go b/ddtrace/tracer/tracer_test.go index 67a67f9233..753240e71c 100644 --- a/ddtrace/tracer/tracer_test.go +++ b/ddtrace/tracer/tracer_test.go @@ -195,7 +195,7 @@ func TestTracerStart(t *testing.T) { Start() // ensure at least one worker started and handles requests - internal.GetGlobalTracer().(*tracer).pushTraceInfo(&traceInfo{spans: []*span{}}) + internal.GetGlobalTracer().(*tracer).pushTrace(&finishedTrace{spans: []*span{}}) Stop() Stop() @@ -206,7 +206,7 @@ func TestTracerStart(t *testing.T) { t.Run("deadlock/direct", func(t *testing.T) { tr, _, _, stop := startTestTracer(t) defer stop() - tr.pushTraceInfo(&traceInfo{spans: []*span{}}) // blocks until worker is started + tr.pushTrace(&finishedTrace{spans: []*span{}}) // blocks until worker is started select { case <-tr.stop: t.Fatal("stopped channel should be open") @@ -382,7 +382,7 @@ func TestSamplingDecision(t *testing.T) { child := tracer.StartSpan("name_2", ChildOf(parent.context)).(*span) child.Finish() parent.Finish() - tracer.pushTraceInfo(&traceInfo{spans: []*span{parent, child}}) + tracer.pushTrace(&finishedTrace{spans: []*span{parent, child}}) tracer.Stop() time.Sleep(5 * time.Millisecond) assert.Equal(t, float64(ext.PriorityAutoReject), parent.Metrics[keySamplingPriority]) @@ -1213,11 +1213,11 @@ func TestPushPayload(t *testing.T) { s.Meta["key"] = strings.Repeat("X", payloadSizeLimit/2+10) // half payload size reached - tracer.pushTraceInfo(&traceInfo{[]*span{s}, decisionKeep}) + tracer.pushTrace(&finishedTrace{[]*span{s}, decisionKeep}) tracer.awaitPayload(t, 1) // payload size exceeded - tracer.pushTraceInfo(&traceInfo{[]*span{s}, decisionKeep}) + tracer.pushTrace(&finishedTrace{[]*span{s}, decisionKeep}) flush(2) } @@ -1239,16 +1239,16 @@ func TestPushTrace(t *testing.T) { Resource: "/foo", }, } - tracer.pushTraceInfo(&traceInfo{spans: trace}) + tracer.pushTrace(&finishedTrace{spans: trace}) assert.Len(tracer.out, 1) t0 := <-tracer.out - assert.Equal(&traceInfo{spans: trace}, t0) + assert.Equal(&finishedTrace{spans: trace}, t0) many := payloadQueueSize + 2 for i := 0; i < many; i++ { - tracer.pushTraceInfo(&traceInfo{spans: make([]*span, i)}) + tracer.pushTrace(&finishedTrace{spans: make([]*span, i)}) } assert.Len(tracer.out, payloadQueueSize) log.Flush() From bafbfd91f63debf673cebcd103527357d48233eb Mon Sep 17 00:00:00 2001 From: "diana.shevchenko" Date: Tue, 2 Aug 2022 14:26:37 +0200 Subject: [PATCH 27/27] removed unnecessary time.Sleep --- ddtrace/tracer/tracer_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/ddtrace/tracer/tracer_test.go b/ddtrace/tracer/tracer_test.go index 753240e71c..aeb6e23586 100644 --- a/ddtrace/tracer/tracer_test.go +++ b/ddtrace/tracer/tracer_test.go @@ -384,7 +384,6 @@ func TestSamplingDecision(t *testing.T) { parent.Finish() tracer.pushTrace(&finishedTrace{spans: []*span{parent, child}}) tracer.Stop() - time.Sleep(5 * time.Millisecond) assert.Equal(t, float64(ext.PriorityAutoReject), parent.Metrics[keySamplingPriority]) assert.Equal(t, decisionDrop, parent.context.trace.samplingDecision) assert.Equal(t, 8.0, parent.Metrics[keySpanSamplingMechanism])