diff --git a/ddtrace/_trace/tracer.py b/ddtrace/_trace/tracer.py index bc4caea46f8..27c2d2ab8f9 100644 --- a/ddtrace/_trace/tracer.py +++ b/ddtrace/_trace/tracer.py @@ -234,9 +234,18 @@ def __init__( self._user_sampler: Optional[BaseSampler] = DatadogSampler() self._asm_enabled = asm_config._asm_enabled self._appsec_standalone_enabled = asm_config._appsec_standalone_enabled - self._sampler: BaseSampler = DatadogSampler() - self._maybe_opt_out() self._dogstatsd_url = agent.get_stats_url() if dogstatsd_url is None else dogstatsd_url + self._apm_opt_out = self._asm_enabled and self._appsec_standalone_enabled + if self._apm_opt_out: + self.enabled = False + # Disable compute stats (neither agent or tracer should compute them) + config._trace_compute_stats = False + # If ASM is enabled but tracing is disabled, + # we need to set the rate limiting to 1 trace per minute + # for the backend to consider the service as alive. + self._sampler: BaseSampler = DatadogSampler(rate_limit=1, rate_limit_window=60e9, rate_limit_always_on=True) + else: + self._sampler: BaseSampler = DatadogSampler() self._compute_stats = config._trace_compute_stats self._agent_url: str = agent.get_trace_url() if url is None else url verify_url(self._agent_url) @@ -295,19 +304,6 @@ def __init__( config._subscribe(["tags"], self._on_global_config_update) config._subscribe(["_tracing_enabled"], self._on_global_config_update) - def _maybe_opt_out(self): - self._apm_opt_out = self._asm_enabled and self._appsec_standalone_enabled - if self._apm_opt_out: - # If ASM is enabled but tracing is disabled, - # we need to set the rate limiting to 1 trace per minute - # for the backend to consider the service as alive. - from ddtrace.internal.rate_limiter import RateLimiter - - self._sampler.limiter = RateLimiter(rate_limit=1, time_window=60e9) # 1 trace per minute - # Disable compute stats (neither agent or tracer should compute them) - config._trace_compute_stats = False - self.enabled = False - def _atexit(self) -> None: key = "ctrl-break" if os.name == "nt" else "ctrl-c" log.debug( @@ -502,12 +498,29 @@ def configure( if appsec_standalone_enabled is not None: self._appsec_standalone_enabled = asm_config._appsec_standalone_enabled = appsec_standalone_enabled + if self._appsec_standalone_enabled and self._asm_enabled: + self._apm_opt_out = True + self.enabled = False + # Disable compute stats (neither agent or tracer should compute them) + config._trace_compute_stats = False + # Update the rate limiter to 1 trace per minute when tracing is disabled + if isinstance(sampler, DatadogSampler): + sampler._rate_limit_always_on = True + sampler.limiter.rate_limit = 1 + sampler.limiter.time_window = 60e9 + else: + if sampler is not None: + log.warning( + "Overriding sampler: %s, a DatadogSampler must be used in ASM Standalone mode", + sampler.__class__, + ) + sampler = DatadogSampler(rate_limit=1, rate_limit_window=60e9, rate_limit_always_on=True) + log.debug("ASM standalone mode is enabled, traces will be rate limited at 1 trace per minute") + if sampler is not None: self._sampler = sampler self._user_sampler = self._sampler - self._maybe_opt_out() - self._dogstatsd_url = dogstatsd_url or self._dogstatsd_url if any(x is not None for x in [hostname, port, uds_path, https]): diff --git a/ddtrace/internal/rate_limiter.py b/ddtrace/internal/rate_limiter.py index 63312911ecc..981701cd51b 100644 --- a/ddtrace/internal/rate_limiter.py +++ b/ddtrace/internal/rate_limiter.py @@ -12,7 +12,6 @@ from ddtrace.vendor.debtcollector import deprecate from ..internal import compat -from ..internal.constants import DEFAULT_SAMPLING_RATE_LIMIT class RateLimiter(object): @@ -59,10 +58,6 @@ def __init__(self, rate_limit: int, time_window: float = 1e9): self._lock = threading.Lock() - @property - def _has_been_configured(self): - return self.rate_limit != DEFAULT_SAMPLING_RATE_LIMIT - def is_allowed(self, timestamp_ns: Optional[int] = None) -> bool: """ Check whether the current request is allowed or not diff --git a/ddtrace/sampler.py b/ddtrace/sampler.py index c5fd896acfe..9c78905a70b 100644 --- a/ddtrace/sampler.py +++ b/ddtrace/sampler.py @@ -203,7 +203,7 @@ class DatadogSampler(RateByServiceSampler): per second. """ - __slots__ = ("limiter", "rules", "default_sample_rate") + __slots__ = ("limiter", "rules", "default_sample_rate", "_rate_limit_always_on") NO_RATE_LIMIT = -1 # deprecate and remove the DEFAULT_RATE_LIMIT field from DatadogSampler @@ -214,6 +214,8 @@ def __init__( rules=None, # type: Optional[List[SamplingRule]] default_sample_rate=None, # type: Optional[float] rate_limit=None, # type: Optional[int] + rate_limit_window=1e9, # type: float + rate_limit_always_on=False, # type: bool ): # type: (...) -> None """ @@ -228,13 +230,16 @@ def __init__( # Use default sample rate of 1.0 super(DatadogSampler, self).__init__() self.default_sample_rate = default_sample_rate + effective_sample_rate = default_sample_rate if default_sample_rate is None: if ddconfig._get_source("_trace_sample_rate") != "default": - default_sample_rate = float(ddconfig._trace_sample_rate) + effective_sample_rate = float(ddconfig._trace_sample_rate) if rate_limit is None: rate_limit = int(ddconfig._trace_rate_limit) + self._rate_limit_always_on = rate_limit_always_on + if rules is None: env_sampling_rules = ddconfig._trace_sampling_rules if env_sampling_rules: @@ -251,12 +256,12 @@ def __init__( elif config._raise: raise TypeError("Rule {!r} must be a sub-class of type ddtrace.sampler.SamplingRules".format(rule)) - # DEV: Default sampling rule must come last - if default_sample_rate is not None: - self.rules.append(SamplingRule(sample_rate=default_sample_rate)) + # DEV: sampling rule must come last + if effective_sample_rate is not None: + self.rules.append(SamplingRule(sample_rate=effective_sample_rate)) # Configure rate limiter - self.limiter = RateLimiter(rate_limit) + self.limiter = RateLimiter(rate_limit, rate_limit_window) log.debug("initialized %r", self) @@ -315,18 +320,22 @@ def sample(self, span): sampler = self._default_sampler # type: BaseSampler sample_rate = self.sample_rate if matched_rule: + # Client based sampling sampled = matched_rule.sample(span) sample_rate = matched_rule.sample_rate else: + # Agent based sampling sampled, sampler = super(DatadogSampler, self)._make_sampling_decision(span) if isinstance(sampler, RateSampler): sample_rate = sampler.sample_rate - # Apply rate limit - if sampled: - sampled = self.limiter.is_allowed() - if self.limiter._has_been_configured: - span.set_metric(SAMPLING_LIMIT_DECISION, self.limiter.effective_rate) + if matched_rule or self._rate_limit_always_on: + # Avoid rate limiting when trace sample rules and/or sample rates are NOT provided + # by users. In this scenario tracing should default to agent based sampling. ASM + # uses DatadogSampler._rate_limit_always_on to override this functionality. + if sampled: + sampled = self.limiter.is_allowed() + span.set_metric(SAMPLING_LIMIT_DECISION, self.limiter.effective_rate) _set_sampling_tags( span, sampled, @@ -345,10 +354,8 @@ def _choose_priority_category_with_rule(self, rule, sampler): if provenance == "dynamic": return _PRIORITY_CATEGORY.RULE_DYNAMIC return _PRIORITY_CATEGORY.RULE_DEF - - if self.limiter._has_been_configured: - # If the default rate limiter is NOT used to sample traces - # the sampling priority must be set to manual keep/drop. - # This will disable agent based sample rates. + elif self._rate_limit_always_on: + # backwards compaitbiility for ASM, when the rate limit is always on (ASM standalone mode) + # we want spans to be set to a MANUAL priority to avoid agent based sampling return _PRIORITY_CATEGORY.USER return super(DatadogSampler, self)._choose_priority_category(sampler) diff --git a/ddtrace/settings/config.py b/ddtrace/settings/config.py index 086b09f06c9..a35111481e7 100644 --- a/ddtrace/settings/config.py +++ b/ddtrace/settings/config.py @@ -376,7 +376,18 @@ def __init__(self): self._debug_mode = asbool(os.getenv("DD_TRACE_DEBUG", default=False)) self._startup_logs_enabled = asbool(os.getenv("DD_TRACE_STARTUP_LOGS", False)) - self._trace_rate_limit = int(os.getenv("DD_TRACE_RATE_LIMIT", default=DEFAULT_SAMPLING_RATE_LIMIT)) + rate_limit = os.getenv("DD_TRACE_RATE_LIMIT") + if rate_limit is not None and self._trace_sampling_rules in ("", "[]"): + # This warning will be logged when DD_TRACE_SAMPLE_RATE is set. This is intentional. + # Even though DD_TRACE_SAMPLE_RATE is treated as a global trace sampling rule, this configuration + # is deprecated. We should always encourage users to set DD_TRACE_SAMPLING_RULES instead. + log.warning( + "DD_TRACE_RATE_LIMIT is set to %s and DD_TRACE_SAMPLING_RULES is not set. " + "Tracer rate limitting is only applied to spans that match tracer sampling rules. " + "All other spans will be rate limited by the Datadog Agent via DD_APM_MAX_TPS.", + rate_limit, + ) + self._trace_rate_limit = int(rate_limit or DEFAULT_SAMPLING_RATE_LIMIT) self._partial_flush_enabled = asbool(os.getenv("DD_TRACE_PARTIAL_FLUSH_ENABLED", default=True)) self._partial_flush_min_spans = int(os.getenv("DD_TRACE_PARTIAL_FLUSH_MIN_SPANS", default=300)) diff --git a/docs/configuration.rst b/docs/configuration.rst index 944308ee2e8..58eaa2e0e1b 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -240,10 +240,11 @@ The following environment variables for the tracer are supported: type: int default: 100 description: | - Maximum number of traces per second to sample. Set a rate limit to avoid the ingestion volume overages in the case of traffic spikes. - + Maximum number of traces per second to sample. Set a rate limit to avoid the ingestion volume overages in the case of traffic spikes. This configuration + is only applied when client based sampling is configured, otherwise agent based rate limits are used. version_added: v0.33.0: + v2.15.0: Only applied when DD_TRACE_SAMPLE_RATE, DD_TRACE_SAMPLING_RULES, or DD_SPAN_SAMPLING_RULE are set. DD_TRACE_SAMPLING_RULES: type: JSON array diff --git a/releasenotes/notes/only-rate-limit-on-sampling-rules-6f807defa7d0f5f1.yaml b/releasenotes/notes/only-rate-limit-on-sampling-rules-6f807defa7d0f5f1.yaml new file mode 100644 index 00000000000..b469abb5809 --- /dev/null +++ b/releasenotes/notes/only-rate-limit-on-sampling-rules-6f807defa7d0f5f1.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + tracing: Ensures ``DD_TRACE_RATE_LIMIT`` environment variable is only applied to spans for which tracer sampling is configured. For spans not matching sampling rules default rate limits should be applied by the Datadog Agent. diff --git a/tests/integration/test_sampling.py b/tests/integration/test_sampling.py index 09de91d6ef5..04749e080ba 100644 --- a/tests/integration/test_sampling.py +++ b/tests/integration/test_sampling.py @@ -302,7 +302,8 @@ def test_rate_limiter_on_spans(tracer): """ Ensure that the rate limiter is applied to spans """ - tracer.configure(sampler=DatadogSampler(rate_limit=10)) + # Rate limit is only applied if a sample rate or trace sample rule is set + tracer.configure(sampler=DatadogSampler(default_sample_rate=1, rate_limit=10)) spans = [] # Generate 10 spans with the start and finish time in same second for x in range(10): diff --git a/tests/snapshots/test_extended_sampling_glob_multi_rule.json b/tests/snapshots/test_extended_sampling_glob_multi_rule.json index 860a058fcbf..0535ab93778 100644 --- a/tests/snapshots/test_extended_sampling_glob_multi_rule.json +++ b/tests/snapshots/test_extended_sampling_glob_multi_rule.json @@ -17,6 +17,7 @@ "test": "tag" }, "metrics": { + "_dd.limit_psr": 1.0, "_dd.rule_psr": 1.0, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, diff --git a/tests/snapshots/test_sampling_with_default_sample_rate_1.json b/tests/snapshots/test_sampling_with_default_sample_rate_1.json index a225f518fea..e9ec59dbab2 100644 --- a/tests/snapshots/test_sampling_with_default_sample_rate_1.json +++ b/tests/snapshots/test_sampling_with_default_sample_rate_1.json @@ -14,6 +14,7 @@ "runtime-id": "10b45fb36ca3465fa2b6dbd7b10bdccf" }, "metrics": { + "_dd.limit_psr": 1.0, "_dd.rule_psr": 1.0, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, diff --git a/tests/snapshots/test_sampling_with_default_sample_rate_1_and_rule_1.json b/tests/snapshots/test_sampling_with_default_sample_rate_1_and_rule_1.json index a51fd5a0251..6674e845323 100644 --- a/tests/snapshots/test_sampling_with_default_sample_rate_1_and_rule_1.json +++ b/tests/snapshots/test_sampling_with_default_sample_rate_1_and_rule_1.json @@ -14,6 +14,7 @@ "runtime-id": "10b45fb36ca3465fa2b6dbd7b10bdccf" }, "metrics": { + "_dd.limit_psr": 1.0, "_dd.rule_psr": 1.0, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, diff --git a/tests/snapshots/test_sampling_with_rate_limit_3.json b/tests/snapshots/test_sampling_with_rate_limit_3.json index e097d4eace9..267c8e68033 100644 --- a/tests/snapshots/test_sampling_with_rate_limit_3.json +++ b/tests/snapshots/test_sampling_with_rate_limit_3.json @@ -9,15 +9,14 @@ "type": "", "error": 0, "meta": { - "_dd.p.dm": "-3", + "_dd.p.dm": "-0", "language": "python", "runtime-id": "9d1cfdf7026e45dea46fc4c83768cc7b" }, "metrics": { - "_dd.limit_psr": 1.0, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, - "_sampling_priority_v1": 2, + "_sampling_priority_v1": 1, "process_id": 9723 }, "duration": 71105, diff --git a/tests/snapshots/test_sampling_with_sample_rate_1_and_rate_limit_3_and_rule_0.json b/tests/snapshots/test_sampling_with_sample_rate_1_and_rate_limit_3_and_rule_0.json index f4c5c03df9f..d8986904cf1 100644 --- a/tests/snapshots/test_sampling_with_sample_rate_1_and_rate_limit_3_and_rule_0.json +++ b/tests/snapshots/test_sampling_with_sample_rate_1_and_rate_limit_3_and_rule_0.json @@ -14,7 +14,6 @@ "runtime-id": "9d1cfdf7026e45dea46fc4c83768cc7b" }, "metrics": { - "_dd.limit_psr": 1, "_dd.rule_psr": 0, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, diff --git a/tests/snapshots/tests.integration.test_trace_stats.test_single_span_sampling[sampling_rule1].json b/tests/snapshots/tests.integration.test_trace_stats.test_single_span_sampling[sampling_rule1].json index f7a623c7439..bdc1f15cc14 100644 --- a/tests/snapshots/tests.integration.test_trace_stats.test_single_span_sampling[sampling_rule1].json +++ b/tests/snapshots/tests.integration.test_trace_stats.test_single_span_sampling[sampling_rule1].json @@ -16,6 +16,7 @@ "runtime-id": "423178dbb29d4ca784743ac73e5678c8" }, "metrics": { + "_dd.limit_psr": 1.0, "_dd.rule_psr": 1.0, "_dd.top_level": 1, "_dd.tracer_kr": 1.0, diff --git a/tests/tracer/test_sampler.py b/tests/tracer/test_sampler.py index 52fc98a56a4..bdbd8f11537 100644 --- a/tests/tracer/test_sampler.py +++ b/tests/tracer/test_sampler.py @@ -659,6 +659,18 @@ def test_sampling_rule_sample_rate_0(): ), "SamplingRule with rate=0 should never keep samples" +@pytest.mark.subprocess( + env={"DD_TRACE_RATE_LIMIT": "2", "DD_TRACE_SAMPLING_RULES": ""}, + err=b"DD_TRACE_RATE_LIMIT is set to 2 and DD_TRACE_SAMPLING_RULES is not set. " + b"Tracer rate limitting is only applied to spans that match tracer sampling rules. " + b"All other spans will be rate limited by the Datadog Agent via DD_APM_MAX_TPS.\n", +) +def test_rate_limit_without_sampling_rules_warning(): + from ddtrace import config + + assert config._trace_rate_limit == 2 + + def test_datadog_sampler_init(): sampler = DatadogSampler() assert sampler.rules == [], "DatadogSampler initialized with no arguments should hold no rules"