From 275c4b3433342f10c8b9e076c840230a06c1ab81 Mon Sep 17 00:00:00 2001 From: Marius Constantin Date: Fri, 14 Jun 2024 16:00:27 +0200 Subject: [PATCH] OtelTraceProvider.Builder introduce the trace rate limit property --- .../dd-sdk-android-trace-otel/api/apiSurface | 1 + .../api/dd-sdk-android-trace-otel.api | 1 + .../trace/opentelemetry/OtelTracerProvider.kt | 17 ++ .../OtelTracerBuilderProviderTest.kt | 33 ++++ .../otel/OtelTracerProviderTest.kt | 187 +++++++++++++++++- 5 files changed, 234 insertions(+), 5 deletions(-) diff --git a/features/dd-sdk-android-trace-otel/api/apiSurface b/features/dd-sdk-android-trace-otel/api/apiSurface index 644c1d2c9d..4c1ff05373 100644 --- a/features/dd-sdk-android-trace-otel/api/apiSurface +++ b/features/dd-sdk-android-trace-otel/api/apiSurface @@ -10,5 +10,6 @@ class com.datadog.android.trace.opentelemetry.OtelTracerProvider : io.openteleme fun setPartialFlushThreshold(Int): Builder fun addTag(String, String): Builder fun setSampleRate(Double): Builder + fun setTraceRateLimit(Int): Builder fun setBundleWithRumEnabled(Boolean): Builder override fun toString(): String diff --git a/features/dd-sdk-android-trace-otel/api/dd-sdk-android-trace-otel.api b/features/dd-sdk-android-trace-otel/api/dd-sdk-android-trace-otel.api index 7d7753babc..8cbf273e71 100644 --- a/features/dd-sdk-android-trace-otel/api/dd-sdk-android-trace-otel.api +++ b/features/dd-sdk-android-trace-otel/api/dd-sdk-android-trace-otel.api @@ -23,6 +23,7 @@ public final class com/datadog/android/trace/opentelemetry/OtelTracerProvider$Bu public final fun setPartialFlushThreshold (I)Lcom/datadog/android/trace/opentelemetry/OtelTracerProvider$Builder; public final fun setSampleRate (D)Lcom/datadog/android/trace/opentelemetry/OtelTracerProvider$Builder; public final fun setService (Ljava/lang/String;)Lcom/datadog/android/trace/opentelemetry/OtelTracerProvider$Builder; + public final fun setTraceRateLimit (I)Lcom/datadog/android/trace/opentelemetry/OtelTracerProvider$Builder; public final fun setTracingHeaderTypes (Ljava/util/Set;)Lcom/datadog/android/trace/opentelemetry/OtelTracerProvider$Builder; } diff --git a/features/dd-sdk-android-trace-otel/src/main/kotlin/com/datadog/android/trace/opentelemetry/OtelTracerProvider.kt b/features/dd-sdk-android-trace-otel/src/main/kotlin/com/datadog/android/trace/opentelemetry/OtelTracerProvider.kt index bab67859ef..1208b00586 100644 --- a/features/dd-sdk-android-trace-otel/src/main/kotlin/com/datadog/android/trace/opentelemetry/OtelTracerProvider.kt +++ b/features/dd-sdk-android-trace-otel/src/main/kotlin/com/datadog/android/trace/opentelemetry/OtelTracerProvider.kt @@ -7,6 +7,7 @@ package com.datadog.android.trace.opentelemetry import androidx.annotation.FloatRange +import androidx.annotation.IntRange import com.datadog.android.Datadog import com.datadog.android.api.InternalLogger import com.datadog.android.api.SdkCore @@ -105,6 +106,8 @@ class OtelTracerProvider internal constructor( private var tracingHeaderTypes: Set = setOf(TracingHeaderType.DATADOG, TracingHeaderType.TRACECONTEXT) private var sampleRate: Double? = null + private var traceRateLimit = Int.MAX_VALUE + private var serviceName: String = "" get() { return field.ifEmpty { @@ -236,6 +239,19 @@ class OtelTracerProvider internal constructor( return this } + /** + * Sets the trace rate limit. This is the maximum number of traces per second that will be + * accepted. Please not that this property is used in conjunction with the sample rate. If no sample rate + * is provided this property and its related logic will be ignored. + * @param traceRateLimit the trace rate limit as a value between 1 and Int.MAX_VALUE (default is Int.MAX_VALUE) + */ + fun setTraceRateLimit( + @IntRange(from = 1, to = Int.MAX_VALUE.toLong()) traceRateLimit: Int + ): Builder { + this.traceRateLimit = traceRateLimit + return this + } + /** * Enables the trace bundling with the current active View. If this feature is enabled all * the spans from this moment on will be bundled with the current view information and you @@ -253,6 +269,7 @@ class OtelTracerProvider internal constructor( TracerConfig.SPAN_TAGS, globalTags.map { "${it.key}:${it.value}" }.joinToString(",") ) + properties.setProperty(TracerConfig.TRACE_RATE_LIMIT, traceRateLimit.toString()) // In case the sample rate is not set we should not specify it. The agent code under the hood // will provide different sampler based on this property and also different sampling priorities used diff --git a/features/dd-sdk-android-trace-otel/src/test/kotlin/com/datadog/android/trace/opentelemetry/OtelTracerBuilderProviderTest.kt b/features/dd-sdk-android-trace-otel/src/test/kotlin/com/datadog/android/trace/opentelemetry/OtelTracerBuilderProviderTest.kt index 44a3d2e274..e6a24bbdd9 100644 --- a/features/dd-sdk-android-trace-otel/src/test/kotlin/com/datadog/android/trace/opentelemetry/OtelTracerBuilderProviderTest.kt +++ b/features/dd-sdk-android-trace-otel/src/test/kotlin/com/datadog/android/trace/opentelemetry/OtelTracerBuilderProviderTest.kt @@ -631,6 +631,39 @@ internal class OtelTracerBuilderProviderTest { // endregion + // region trace rate limit + + @Test + fun `M use the trace rate limit W setTraceRateLimit`( + @IntForgery(min = 1, max = Int.MAX_VALUE) traceRateLimit: Int + ) { + // Given + val tracer = testedOtelTracerProviderBuilder.setTraceRateLimit(traceRateLimit).build() + .tracerBuilder(fakeInstrumentationName).build() + + // When + val coreTracer: CoreTracer = tracer.getFieldValue("tracer") + + // Then + val config: Config = coreTracer.getFieldValue("initialConfig") + assertThat(config.traceRateLimit).isEqualTo(traceRateLimit) + } + + @Test + fun `M use the default rate limit W build { if not provided }`() { + // Given + val tracer = testedOtelTracerProviderBuilder.build().tracerBuilder(fakeInstrumentationName).build() + + // When + val coreTracer: CoreTracer = tracer.getFieldValue("tracer") + + // Then + val config: Config = coreTracer.getFieldValue("initialConfig") + assertThat(config.traceRateLimit).isEqualTo(Int.MAX_VALUE) + } + + // endregion + // region bundle with RUM @Test diff --git a/reliability/single-fit/trace/src/test/kotlin/com/datadog/android/trace/integration/otel/OtelTracerProviderTest.kt b/reliability/single-fit/trace/src/test/kotlin/com/datadog/android/trace/integration/otel/OtelTracerProviderTest.kt index e111b66fcc..238f4d00ec 100644 --- a/reliability/single-fit/trace/src/test/kotlin/com/datadog/android/trace/integration/otel/OtelTracerProviderTest.kt +++ b/reliability/single-fit/trace/src/test/kotlin/com/datadog/android/trace/integration/otel/OtelTracerProviderTest.kt @@ -23,6 +23,7 @@ import com.google.gson.JsonObject import com.google.gson.JsonParser import fr.xgouchet.elmyr.Forge import fr.xgouchet.elmyr.annotation.DoubleForgery +import fr.xgouchet.elmyr.annotation.IntForgery import fr.xgouchet.elmyr.annotation.StringForgery import fr.xgouchet.elmyr.junit5.ForgeConfiguration import fr.xgouchet.elmyr.junit5.ForgeExtension @@ -786,11 +787,9 @@ internal class OtelTracerProviderTest { // When repeat(numberOfSpans) { - val span = tracer.spanBuilder(forge.anAlphabeticalString()).startSpan() - // there is a throttle on the sampler which drops all the spans over the 100 limit in 1 second - // so we need to sleep a bit to make sure the spans are not dropped because of throttling - Thread.sleep(10) - span.end() + tracer.spanBuilder(forge.anAlphabeticalString()) + .startSpan() + .end() } // Then @@ -818,6 +817,183 @@ internal class OtelTracerProviderTest { // endregion + // region trace rate limit + + @Test + fun `M drop the spans W buildSpan { trace rate limit reached in 1 second, sample rate specified }`( + @StringForgery fakeInstrumentationName: String, + @IntForgery(min = 1, max = 3) traceLimit: Int, + forge: Forge + ) { + // Given + val testedProvider = OtelTracerProvider.Builder(stubSdkCore) + .setTraceRateLimit(traceLimit) + .setSampleRate(100.0) + .build() + val tracer = testedProvider.tracerBuilder(fakeInstrumentationName).build() + val blockingWriterWrapper = tracer.useBlockingWriter() + + // When + val startNanos = System.nanoTime() + var spansCounter = 0 + while ((System.nanoTime() - startNanos) < ONE_SECOND_AS_NANOS && (spansCounter < 200)) { + tracer.spanBuilder(forge.anAlphabeticalString()).startSpan().end() + spansCounter++ + } + + // Then + blockingWriterWrapper.waitForTracesMax(spansCounter) + val spansWritten = stubSdkCore.eventsWritten(Feature.TRACING_FEATURE_NAME) + .map { + (JsonParser.parseString(it.eventData) as JsonObject) + .getAsJsonArray("spans") + .get(0) + .asJsonObject + } + val userKeptSpans = spansWritten.filter { + it.getInt(SAMPLING_PRIORITY_KEY) == PrioritySampling.USER_KEEP.toInt() + } + val samplerKeptSpans = spansWritten.filter { + it.getInt(SAMPLING_PRIORITY_KEY) == PrioritySampling.SAMPLER_KEEP.toInt() + } + assertThat(samplerKeptSpans.size).isEqualTo(0) + assertThat(userKeptSpans.size).isLessThanOrEqualTo(traceLimit) + } + + @Test + fun `M ignore trace rate limit W buildSpan { trace rate limit reached in 1 second, sample rate not specified }`( + @StringForgery fakeInstrumentationName: String, + @IntForgery(min = 1, max = 3) traceLimit: Int, + forge: Forge + ) { + // Given + val testedProvider = OtelTracerProvider.Builder(stubSdkCore) + .setTraceRateLimit(traceLimit) + .build() + val tracer = testedProvider.tracerBuilder(fakeInstrumentationName).build() + val blockingWriterWrapper = tracer.useBlockingWriter() + + // When + val startNanos = System.nanoTime() + var spansCounter = 0 + while ((System.nanoTime() - startNanos) < ONE_SECOND_AS_NANOS && (spansCounter < 200)) { + tracer.spanBuilder(forge.anAlphabeticalString()).startSpan().end() + spansCounter++ + } + + // Then + blockingWriterWrapper.waitForTracesMax(spansCounter) + val spansWritten = stubSdkCore.eventsWritten(Feature.TRACING_FEATURE_NAME) + .map { + (JsonParser.parseString(it.eventData) as JsonObject) + .getAsJsonArray("spans") + .get(0) + .asJsonObject + } + val userKeptSpans = spansWritten.filter { + it.getInt(SAMPLING_PRIORITY_KEY) == PrioritySampling.USER_KEEP.toInt() + } + val samplerKeptSpans = spansWritten.filter { + it.getInt(SAMPLING_PRIORITY_KEY) == PrioritySampling.SAMPLER_KEEP.toInt() + } + assertThat(samplerKeptSpans.size).isEqualTo(spansCounter) + assertThat(userKeptSpans.size).isEqualTo(0) + } + + @Test + fun `M ignore trace limit W buildSpan { trace rate limit is 1 but sample rate is not specified }`( + @StringForgery fakeInstrumentationName: String, + forge: Forge + ) { + // Given + val testedProvider = OtelTracerProvider.Builder(stubSdkCore).setTraceRateLimit(1).build() + val tracer = testedProvider.tracerBuilder(fakeInstrumentationName).build() + val blockingWriterWrapper = tracer.useBlockingWriter() + + // When + val startNanos = System.nanoTime() + var spansCounter = 0 + while (((System.nanoTime() - startNanos) < (ONE_SECOND_AS_NANOS * 2)) && (spansCounter < 200)) { + tracer.spanBuilder(forge.anAlphabeticalString()).startSpan().end() + spansCounter++ + } + + // Then + blockingWriterWrapper.waitForTracesMax(spansCounter) + val spansWritten = stubSdkCore.eventsWritten(Feature.TRACING_FEATURE_NAME) + .map { + (JsonParser.parseString(it.eventData) as JsonObject) + .getAsJsonArray("spans") + .get(0) + .asJsonObject + } + val userDroppedSpans = spansWritten.filter { + it.getInt(SAMPLING_PRIORITY_KEY) == PrioritySampling.USER_DROP.toInt() + } + val samplerDroppedSpans = spansWritten.filter { + it.getInt(SAMPLING_PRIORITY_KEY) == PrioritySampling.SAMPLER_DROP.toInt() + } + val userKeptSpans = spansWritten.filter { + it.getInt(SAMPLING_PRIORITY_KEY) == PrioritySampling.USER_KEEP.toInt() + } + val samplerKeptSpans = spansWritten.filter { + it.getInt(SAMPLING_PRIORITY_KEY) == PrioritySampling.SAMPLER_KEEP.toInt() + } + assertThat(userDroppedSpans.size).isEqualTo(0) + assertThat(samplerDroppedSpans.size).isEqualTo(0) + assertThat(userKeptSpans.size).isEqualTo(0) + assertThat(samplerKeptSpans.size).isEqualTo(spansCounter) + } + + @Test + fun `M only keep 1 span W buildSpan { trace rate limit is 1 and sample rate is specified }`( + @StringForgery fakeInstrumentationName: String, + @IntForgery(min = 2, max = 10) numberOfSpans: Int, + forge: Forge + ) { + // Given + val testedProvider = OtelTracerProvider.Builder(stubSdkCore) + .setTraceRateLimit(1) + .setSampleRate(100.0) + .build() + val tracer = testedProvider.tracerBuilder(fakeInstrumentationName).build() + val blockingWriterWrapper = tracer.useBlockingWriter() + + // When + repeat(numberOfSpans) { + tracer.spanBuilder(forge.anAlphabeticalString()).startSpan().end() + } + + // Then + blockingWriterWrapper.waitForTracesMax(numberOfSpans) + val spansWritten = stubSdkCore.eventsWritten(Feature.TRACING_FEATURE_NAME) + .map { + (JsonParser.parseString(it.eventData) as JsonObject) + .getAsJsonArray("spans") + .get(0) + .asJsonObject + } + val userDroppedSpans = spansWritten.filter { + it.getInt(SAMPLING_PRIORITY_KEY) == PrioritySampling.USER_DROP.toInt() + } + val samplerDroppedSpans = spansWritten.filter { + it.getInt(SAMPLING_PRIORITY_KEY) == PrioritySampling.SAMPLER_DROP.toInt() + } + val userKeptSpans = spansWritten.filter { + it.getInt(SAMPLING_PRIORITY_KEY) == PrioritySampling.USER_KEEP.toInt() + } + val samplerKeptSpans = spansWritten.filter { + it.getInt(SAMPLING_PRIORITY_KEY) == PrioritySampling.SAMPLER_KEEP.toInt() + } + assertThat(userDroppedSpans.size + userKeptSpans.size).isEqualTo(numberOfSpans) + assertThat(samplerDroppedSpans.size).isEqualTo(0) + assertThat(userKeptSpans.size).isEqualTo(1) + assertThat(samplerKeptSpans.size).isEqualTo(0) + assertThat(userDroppedSpans.size).isEqualTo(numberOfSpans - 1) + } + + // endregion + // region Bundle with RUM @RepeatedTest(10) @@ -972,6 +1148,7 @@ internal class OtelTracerProviderTest { // endregion companion object { + private val ONE_SECOND_AS_NANOS = TimeUnit.SECONDS.toNanos(1) private const val DEFAULT_SPAN_NAME = "internal" private const val JOIN_TIMEOUT_MS = 5000L private const val SAMPLING_PRIORITY_KEY = "metrics._sampling_priority_v1"