diff --git a/ddtrace/ext/tags.go b/ddtrace/ext/tags.go index e7337d7ae7..5ceee8e4ed 100644 --- a/ddtrace/ext/tags.go +++ b/ddtrace/ext/tags.go @@ -79,6 +79,19 @@ const ( // Search & Analytics event. AnalyticsEvent = "analytics.event" + // SpanSamplingMechanism is the metric key holding a span sampling rule that a span was kept on + SpanSamplingMechanism = "_dd.span_sampling.mechanism" + + // SingleSpanSamplingMechanism is a constant reserved to indicate that a span was kept on account of a span sampling rule. + SingleSpanSamplingMechanism = 8 + + // SingleSpanSamplingRuleRate is the metric key containing the configured sampling probability for the span sampling rule. + SingleSpanSamplingRuleRate = "_dd.span_sampling.rule_rate" + + // SingleSpanSamplingMPS is the metric key contains the configured limit for the span sampling rule that the span matched. + // // If there is no configured limit, then this tag is omitted. + SingleSpanSamplingMPS = "_dd.span_sampling.max_per_second" + // ManualKeep is a tag which specifies that the trace to which this span // belongs to should be kept when set to true. ManualKeep = "manual.keep" diff --git a/ddtrace/tracer/log_test.go b/ddtrace/tracer/log_test.go index 39f869256b..3962eeaa72 100644 --- a/ddtrace/tracer/log_test.go +++ b/ddtrace/tracer/log_test.go @@ -126,8 +126,8 @@ func TestLogSamplingRules(t *testing.T) { lines := removeAppSec(tp.Lines()) assert.Len(lines, 2) - assert.Contains(lines[0], "WARN: at index 4: ignoring rule {Service: Name: Rate:9.10}: rate is out of [0.0, 1.0] range") - assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ WARN: DIAGNOSTICS Error\(s\) parsing DD_TRACE_SAMPLING_RULES: found errors:\n\tat index 1: rate not provided\n\tat index 3: rate not provided$`, lines[1]) + assert.Contains(lines[0], "WARN: at index 4: ignoring rule {Service: Name: Rate:9.10 MaxPerSecond:0}: rate is out of [0.0, 1.0] range") + assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ WARN: DIAGNOSTICS Error\(s\) parsing sampling rules: found errors:\n\tat index 1: rate not provided\n\tat index 3: rate not provided$`, lines[1]) } func TestLogAgentReachable(t *testing.T) { diff --git a/ddtrace/tracer/sampler.go b/ddtrace/tracer/sampler.go index 2069436a1b..b0ee284ea5 100644 --- a/ddtrace/tracer/sampler.go +++ b/ddtrace/tracer/sampler.go @@ -188,19 +188,62 @@ func newRulesSampler(rules []SamplingRule) *rulesSampler { } } -// samplingRulesFromEnv parses sampling rules from the DD_TRACE_SAMPLING_RULES -// environment variable. +// samplingRulesFromEnv parses sampling rules from the DD_TRACE_SAMPLING_RULES environment variable +// and combines them with spanSamplingRules func samplingRulesFromEnv() ([]SamplingRule, error) { - rulesFromEnv := os.Getenv("DD_TRACE_SAMPLING_RULES") - if rulesFromEnv == "" { + errs := []string{} + spanRules, err := spanSamplingRulesFromEnv() + if err != nil { + errs = append(errs, err.Error()) + } + traceRules, err := processSamplingRulesFromEnv([]byte(os.Getenv("DD_TRACE_SAMPLING_RULES")), TraceSamplingRuleType) + if err != nil { + errs = append(errs, err.Error()) + } + + rules := append(spanRules, traceRules...) + if len(errs) != 0 { + return rules, fmt.Errorf("%s", strings.Join(errs, "\n\t")) + } + return rules, nil +} + +// spanSamplingRulesFromEnv parses sampling rules from the DD_SPAN_SAMPLING_RULES and +// DD_SPAN_SAMPLING_RULES_FILE environment variables. +func spanSamplingRulesFromEnv() ([]SamplingRule, error) { + errs := []string{} + rules, err := processSamplingRulesFromEnv([]byte(os.Getenv("DD_SPAN_SAMPLING_RULES")), SingleSpanSamplingType) + if err != nil { + errs = append(errs, err.Error()) + } + if len(rules) != 0 { + if os.Getenv("DD_SPAN_SAMPLING_RULES_FILE") != "" { + log.Warn("DIAGNOSTICS Error(s): DD_SPAN_SAMPLING_RULES is available and will take precedence over DD_SPAN_SAMPLING_RULES_FILE") + } + return rules, err + } + rulesFile := os.Getenv("DD_SPAN_SAMPLING_RULES_FILE") + if rulesFile == "" { + return rules, err + } + rulesFromEnvFile, err := os.ReadFile(rulesFile) + if err != nil { + log.Warn("Couldn't read file from DD_SPAN_SAMPLING_RULES_FILE") + } + return processSamplingRulesFromEnv(rulesFromEnvFile, SingleSpanSamplingType) +} + +func processSamplingRulesFromEnv(b []byte, ruleType SamplingRuleType) ([]SamplingRule, error) { + if len(b) == 0 { return nil, nil } - jsonRules := []struct { - Service string `json:"service"` - Name string `json:"name"` - Rate json.Number `json:"sample_rate"` - }{} - err := json.Unmarshal([]byte(rulesFromEnv), &jsonRules) + var jsonRules []struct { + Service string `json:"service"` + Name string `json:"name"` + Rate json.Number `json:"sample_rate"` + MaxPerSecond float64 `json:"max_per_second"` + } + err := json.Unmarshal(b, &jsonRules) if err != nil { return nil, fmt.Errorf("error unmarshalling JSON: %v", err) } @@ -220,6 +263,32 @@ func samplingRulesFromEnv() ([]SamplingRule, error) { log.Warn("at index %d: ignoring rule %+v: rate is out of [0.0, 1.0] range", i, v) continue } + if ruleType == SingleSpanSamplingType { + if v.Service == "" { + v.Service = "*" + } + srvGlob, err := globMatch(v.Service) + if err != nil { + log.Warn("at index %d: ignoring rule %+v: service name regex pattern can't be compiled", i, v) + continue + } + if v.Name == "" { + v.Name = "*" + } + opGlob, err := globMatch(v.Name) + if err != nil { + log.Warn("at index %d: ignoring rule %+v: operation name regex pattern can't be compiled", i, v) + continue + } + rules = append(rules, SamplingRule{ + Service: srvGlob, + Name: opGlob, + Rate: rate, + MaxPerSecond: v.MaxPerSecond, + Type: SingleSpanSamplingType, + }) + continue + } switch { case v.Service != "" && v.Name != "": rules = append(rules, NameServiceRule(v.Name, v.Service, rate)) @@ -230,7 +299,7 @@ func samplingRulesFromEnv() ([]SamplingRule, error) { } } if len(errs) != 0 { - return rules, fmt.Errorf("found errors:\n\t%s", strings.Join(errs, "\n\t")) + return rules, fmt.Errorf("\n\t%s", strings.Join(errs, "\n\t")) } return rules, nil } @@ -300,6 +369,13 @@ func (rs *rulesSampler) apply(span *span) bool { if rule.match(span) { matched = true rate = rule.Rate + if rule.Type == SingleSpanSamplingType { + span.setMetric(ext.SpanSamplingMechanism, ext.SingleSpanSamplingMechanism) + span.setMetric(ext.SingleSpanSamplingRuleRate, rule.Rate) + if rule.MaxPerSecond != 0 { + span.setMetric(ext.SingleSpanSamplingMPS, rule.MaxPerSecond) + } + } break } } @@ -338,13 +414,22 @@ func (rs *rulesSampler) limit() (float64, bool) { return math.NaN(), false } +type SamplingRuleType int + +const ( + TraceSamplingRuleType = iota + SingleSpanSamplingType +) + // SamplingRule is used for applying sampling rates to spans that match // the service name, operation name or both. // For basic usage, consider using the helper functions ServiceRule, NameRule, etc. type SamplingRule struct { - Service *regexp.Regexp - Name *regexp.Regexp - Rate float64 + Service *regexp.Regexp + Name *regexp.Regexp + Rate float64 + MaxPerSecond float64 + Type SamplingRuleType exactService string exactName string diff --git a/ddtrace/tracer/sampler_test.go b/ddtrace/tracer/sampler_test.go index 98b9edf915..3b22080495 100644 --- a/ddtrace/tracer/sampler_test.go +++ b/ddtrace/tracer/sampler_test.go @@ -231,7 +231,7 @@ func TestRuleEnvVars(t *testing.T) { } }) - t.Run("sampling-rules", func(t *testing.T) { + t.Run("trace-sampling-rules", func(t *testing.T) { assert := assert.New(t) defer os.Unsetenv("DD_TRACE_SAMPLING_RULES") @@ -264,12 +264,89 @@ func TestRuleEnvVars(t *testing.T) { if tt.errStr == "" { assert.NoError(err) } else { - assert.Equal(err.Error(), tt.errStr) + assert.Equal(tt.errStr, err.Error()) } assert.Len(rules, tt.ruleN) }) } }) + t.Run("span-sampling-rules", func(t *testing.T) { + assert := assert.New(t) + defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") + + for i, tt := range []struct { + value string + ruleN int + errStr string + }{ + { + value: "[]", + ruleN: 0, + }, { + value: `[{"service": "abcd", "sample_rate": 1.0}]`, + ruleN: 1, + }, { + value: `[{"service": "abcd", "sample_rate": 1.0},{"name": "wxyz", "sample_rate": 0.9},{"service": "efgh", "name": "lmnop", "sample_rate": 0.42}]`, + ruleN: 3, + }, { + // invalid rule ignored + value: `[{"service": "abcd", "sample_rate": 42.0}, {"service": "abcd", "sample_rate": 0.2}]`, + ruleN: 1, + }, { + value: `not JSON at all`, + errStr: `error unmarshalling JSON: invalid character 'o' in literal null (expecting 'u')`, + }, + } { + t.Run(fmt.Sprintf("%v", i), func(t *testing.T) { + os.Setenv("DD_SPAN_SAMPLING_RULES", tt.value) + rules, err := samplingRulesFromEnv() + if tt.errStr == "" { + assert.NoError(err) + } else { + assert.Equal(tt.errStr, err.Error()) + } + assert.Len(rules, tt.ruleN) + }) + } + }) + t.Run("span-sampling-rules-regex", func(t *testing.T) { + assert := assert.New(t) + defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") + + for i, tt := range []struct { + rules string + srvRegex string + nameRegex string + }{ + { + rules: `[{"name": "abcd?", "sample_rate": 1.0}]`, + srvRegex: "^.*$", + nameRegex: "^abcd.$", + }, + { + rules: `[{"service": "*abcd", "sample_rate": 1.0}]`, + nameRegex: "^.*$", + srvRegex: "^.*abcd$", + }, + { + rules: `[{"service": "*abcd", "sample_rate": 1.0}]`, + nameRegex: "^.*$", + srvRegex: "^.*abcd$", + }, { + rules: `[{"sample_rate": 1.0},{"name": "wxyz", "sample_rate": 0.9}]`, + nameRegex: "^.*$", + srvRegex: "^.*$", + }, + } { + t.Run(fmt.Sprintf("%v", i), func(t *testing.T) { + os.Setenv("DD_SPAN_SAMPLING_RULES", tt.rules) + rules, err := samplingRulesFromEnv() + assert.NoError(err) + assert.Equal(tt.srvRegex, rules[0].Service.String()) + assert.Equal(tt.nameRegex, rules[0].Name.String()) + }) + } + }) } func TestRulesSampler(t *testing.T) { @@ -329,6 +406,126 @@ func TestRulesSampler(t *testing.T) { } }) + t.Run("matching-span-rules", func(t *testing.T) { + defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") + for _, tt := range []struct { + rules string + spanSrv string + spanName string + }{ + { + rules: `[{"name": "abcd?", "sample_rate": 1.0, "max_per_second":100}]`, + spanSrv: "test-service", + spanName: "abcde", + }, + { + rules: `[{"service": "*abcd","max_per_second":100, "sample_rate": 1.0}]`, + spanSrv: "xyzabcd", + spanName: "abcde", + }, + { + rules: `[{"service": "?*", "sample_rate": 1.0, "max_per_second":100}]`, + spanSrv: "test-service", + spanName: "abcde", + }, + } { + t.Run("", func(t *testing.T) { + os.Setenv("DD_SPAN_SAMPLING_RULES", tt.rules) + rules, _ := samplingRulesFromEnv() + + assert := assert.New(t) + rs := newRulesSampler(rules) + + span := makeSpan(tt.spanName, tt.spanSrv) + result := rs.apply(span) + assert.True(result) + assert.Contains(span.Metrics, ext.SpanSamplingMechanism) + assert.Contains(span.Metrics, ext.SingleSpanSamplingRuleRate) + assert.Contains(span.Metrics, ext.SingleSpanSamplingMPS) + }) + } + }) + t.Run("not-matching-span-rules", func(t *testing.T) { + defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") + for _, tt := range []struct { + rules string + spanSrv string + spanName string + }{ + { + //first matching rule takes precedence + rules: `[{"name": "abcd?", "sample_rate": 0.0},{"name": "abcd?", "sample_rate": 1.0}]`, + spanSrv: "test-service", + spanName: "abcdef", + }, + { + rules: `[{"service": "abcd", "sample_rate": 1.0}]`, + spanSrv: "xyzabcd", + spanName: "abcde", + }, + { + rules: `[{"service": "?", "sample_rate": 1.0}]`, + spanSrv: "test-service", + spanName: "abcde", + }, + } { + t.Run("", func(t *testing.T) { + os.Setenv("DD_SPAN_SAMPLING_RULES", tt.rules) + rules, _ := samplingRulesFromEnv() + + assert := assert.New(t) + rs := newRulesSampler(rules) + + span := makeSpan(tt.spanName, tt.spanSrv) + result := rs.apply(span) + assert.False(result) + assert.NotContains(span.Metrics, ext.SpanSamplingMechanism) + assert.NotContains(span.Metrics, ext.SingleSpanSamplingRuleRate) + assert.NotContains(span.Metrics, ext.SingleSpanSamplingMPS) + }) + } + }) + t.Run("not-matching-span-rules/matching-trace-rules", func(t *testing.T) { + defer os.Unsetenv("DD_SPAN_SAMPLING_RULES") + defer os.Unsetenv("DD_TRACE_SAMPLING_RULES") + for _, tt := range []struct { + spanRules string + traceRules string + spanSrv string + spanName string + }{ + { + //first matching rule takes precedence + spanRules: `[{"name": "abcdef?", "sample_rate": 1.0}]`, + traceRules: `[{"name": "abcdef", "sample_rate": 1.0},{"name": "abcd?", "sample_rate": 1.0}]`, + spanName: "abcdef", + spanSrv: "test-service", + }, + { + spanRules: `[{"service": "abcde", "sample_rate": 1.0}]`, + traceRules: `[{"service": "abcd", "sample_rate": 1.0}]`, + spanSrv: "abcd", + spanName: "abcde", + }, + } { + t.Run("", func(t *testing.T) { + os.Setenv("DD_SPAN_SAMPLING_RULES", tt.spanRules) + os.Setenv("DD_TRACE_SAMPLING_RULES", tt.traceRules) + rules, _ := samplingRulesFromEnv() + + assert := assert.New(t) + rs := newRulesSampler(rules) + + span := makeSpan(tt.spanName, tt.spanSrv) + result := rs.apply(span) + assert.True(result) + assert.NotContains(span.Metrics, ext.SpanSamplingMechanism) + assert.NotContains(span.Metrics, ext.SingleSpanSamplingRuleRate) + assert.NotContains(span.Metrics, ext.SingleSpanSamplingMPS) + }) + } + }) + t.Run("default-rate", func(t *testing.T) { ruleSets := [][]SamplingRule{ {}, diff --git a/ddtrace/tracer/tracer.go b/ddtrace/tracer/tracer.go index ee32b00efb..7edeafd3e8 100644 --- a/ddtrace/tracer/tracer.go +++ b/ddtrace/tracer/tracer.go @@ -178,7 +178,7 @@ func newUnstartedTracer(opts ...StartOption) *tracer { c := newConfig(opts...) envRules, err := samplingRulesFromEnv() if err != nil { - log.Warn("DIAGNOSTICS Error(s) parsing DD_TRACE_SAMPLING_RULES: %s", err) + log.Warn("DIAGNOSTICS Error(s) parsing sampling rules: found errors: %s", err) } if envRules != nil { c.samplingRules = envRules