From 0224918a23efd8e93f1ae329709f15691fea3b3d Mon Sep 17 00:00:00 2001 From: "Xavier F. Gouchet" Date: Wed, 13 Nov 2024 10:10:35 +0100 Subject: [PATCH] RUM-6866 create the KnuthStableSampler --- dd-sdk-android-core/api/apiSurface | 6 + .../api/dd-sdk-android-core.api | 9 + .../core/sampling/DeterministicSampler.kt | 91 ++++++++ .../sampling/JavaDeterministicSampler.java | 50 +++++ .../core/sampling/DeterministicSamplerTest.kt | 200 ++++++++++++++++++ 5 files changed, 356 insertions(+) create mode 100644 dd-sdk-android-core/src/main/kotlin/com/datadog/android/core/sampling/DeterministicSampler.kt create mode 100644 dd-sdk-android-core/src/test/java/com/datadog/trace/sampling/JavaDeterministicSampler.java create mode 100644 dd-sdk-android-core/src/test/kotlin/com/datadog/android/core/sampling/DeterministicSamplerTest.kt diff --git a/dd-sdk-android-core/api/apiSurface b/dd-sdk-android-core/api/apiSurface index 495016aa86..baba7394e6 100644 --- a/dd-sdk-android-core/api/apiSurface +++ b/dd-sdk-android-core/api/apiSurface @@ -323,6 +323,12 @@ interface com.datadog.android.core.persistence.Serializer fun Serializer.serializeToByteArray(T, com.datadog.android.api.InternalLogger): ByteArray? data class com.datadog.android.core.persistence.datastore.DataStoreContent constructor(Int, T?) +open class com.datadog.android.core.sampling.DeterministicSampler : Sampler + constructor((T) -> ULong, () -> Float) + constructor((T) -> ULong, Float) + constructor((T) -> ULong, Double) + override fun sample(T): Boolean + override fun getSampleRate(): Float open class com.datadog.android.core.sampling.RateBasedSampler : Sampler constructor(() -> Float) constructor(Float) diff --git a/dd-sdk-android-core/api/dd-sdk-android-core.api b/dd-sdk-android-core/api/dd-sdk-android-core.api index 34ac9c2b3d..4f0044a25c 100644 --- a/dd-sdk-android-core/api/dd-sdk-android-core.api +++ b/dd-sdk-android-core/api/dd-sdk-android-core.api @@ -867,6 +867,15 @@ public final class com/datadog/android/core/persistence/datastore/DataStoreConte public fun toString ()Ljava/lang/String; } +public class com/datadog/android/core/sampling/DeterministicSampler : com/datadog/android/core/sampling/Sampler { + public static final field SAMPLE_ALL_RATE F + public fun (Lkotlin/jvm/functions/Function1;D)V + public fun (Lkotlin/jvm/functions/Function1;F)V + public fun (Lkotlin/jvm/functions/Function1;Lkotlin/jvm/functions/Function0;)V + public fun getSampleRate ()Ljava/lang/Float; + public fun sample (Ljava/lang/Object;)Z +} + public class com/datadog/android/core/sampling/RateBasedSampler : com/datadog/android/core/sampling/Sampler { public static final field SAMPLE_ALL_RATE F public fun (D)V diff --git a/dd-sdk-android-core/src/main/kotlin/com/datadog/android/core/sampling/DeterministicSampler.kt b/dd-sdk-android-core/src/main/kotlin/com/datadog/android/core/sampling/DeterministicSampler.kt new file mode 100644 index 0000000000..f6e67f88ab --- /dev/null +++ b/dd-sdk-android-core/src/main/kotlin/com/datadog/android/core/sampling/DeterministicSampler.kt @@ -0,0 +1,91 @@ +/* + * Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0. + * This product includes software developed at Datadog (https://www.datadoghq.com/). + * Copyright 2016-Present Datadog, Inc. + */ + +package com.datadog.android.core.sampling + +import androidx.annotation.FloatRange +import com.datadog.android.api.InternalLogger + +/** + * [Sampler] with the given sample rate using a deterministic algorithm for a stable + * sampling decision across sources. + * + * @param T the type of items to sample. + * @param idConverter a lambda converting the input item into a stable numerical identifier + * @param sampleRateProvider Provider for the sample rate value which will be called each time + * the sampling decision needs to be made. All the values should be on the scale [0;100]. + */ +open class DeterministicSampler( + private val idConverter: (T) -> ULong, + private val sampleRateProvider: () -> Float +) : Sampler { + + /** + * Creates a new instance lof [DeterministicSampler] with the given sample rate. + * + * @param sampleRate Sample rate to use. + */ + constructor( + idConverter: (T) -> ULong, + @FloatRange(from = 0.0, to = 100.0) sampleRate: Float + ) : this(idConverter, { sampleRate }) + + /** + * Creates a new instance of [DeterministicSampler] with the given sample rate. + * + * @param sampleRate Sample rate to use. + */ + constructor( + idConverter: (T) -> ULong, + @FloatRange(from = 0.0, to = 100.0) sampleRate: Double + ) : this(idConverter, sampleRate.toFloat()) + + /** @inheritDoc */ + override fun sample(item: T): Boolean { + val sampleRate = getSampleRate() + if (sampleRate >= SAMPLE_ALL_RATE) { + return true + } else if (sampleRate <= 0f) { + return false + } else { + val stableId = idConverter(item) + val hash = stableId * SAMPLER_HASHER + val threshold = (MAX_ID.toDouble() * sampleRate / SAMPLE_ALL_RATE).toULong() + return hash < threshold + } + } + + /** @inheritDoc */ + override fun getSampleRate(): Float { + val rawSampleRate = sampleRateProvider() + return if (rawSampleRate < 0f) { + InternalLogger.UNBOUND.log( + InternalLogger.Level.WARN, + InternalLogger.Target.USER, + { "Sample rate value provided $rawSampleRate is below 0, setting it to 0." } + ) + 0f + } else if (rawSampleRate > SAMPLE_ALL_RATE) { + InternalLogger.UNBOUND.log( + InternalLogger.Level.WARN, + InternalLogger.Target.USER, + { "Sample rate value provided $rawSampleRate is above 100, setting it to 100." } + ) + SAMPLE_ALL_RATE + } else { + rawSampleRate + } + } + + private companion object { + const val SAMPLE_ALL_RATE = 100f + + // Good number for Knuth hashing (large, prime, fit in int64 for languages without uint64) + private val SAMPLER_HASHER: ULong = 1111111111111111111u + + private val MAX_ID: ULong = 0xFFFFFFFFFFFFFFFFUL + } +} diff --git a/dd-sdk-android-core/src/test/java/com/datadog/trace/sampling/JavaDeterministicSampler.java b/dd-sdk-android-core/src/test/java/com/datadog/trace/sampling/JavaDeterministicSampler.java new file mode 100644 index 0000000000..cb9917c536 --- /dev/null +++ b/dd-sdk-android-core/src/test/java/com/datadog/trace/sampling/JavaDeterministicSampler.java @@ -0,0 +1,50 @@ +/* + * Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0. + * This product includes software developed at Datadog (https://www.datadoghq.com/). + * Copyright 2016-Present Datadog, Inc. + */ + +package com.datadog.trace.sampling; + +import androidx.annotation.NonNull; +import androidx.annotation.Nullable; + +import com.datadog.android.core.sampling.Sampler; + +/** + * This is a pseudo-duplicate of the java implementation for testing purposes only to ensure + * compatibility between our generic implementation and the one in our backend agent. + */ +public class JavaDeterministicSampler implements Sampler { + + private static final long KNUTH_FACTOR = 1111111111111111111L; + + private static final double MAX = Math.pow(2, 64) - 1; + + private final float rate; + + public JavaDeterministicSampler(float rate) { + this.rate = rate; + } + + @Override + public boolean sample(@NonNull Long item) { + return item * KNUTH_FACTOR + Long.MIN_VALUE < cutoff(rate); + } + + @Nullable + @Override + public Float getSampleRate() { + return rate; + } + + private long cutoff(double rate) { + if (rate < 0.5) { + return (long) (rate * MAX) + Long.MIN_VALUE; + } + if (rate < 1.0) { + return (long) ((rate * MAX) + Long.MIN_VALUE); + } + return Long.MAX_VALUE; + } +} diff --git a/dd-sdk-android-core/src/test/kotlin/com/datadog/android/core/sampling/DeterministicSamplerTest.kt b/dd-sdk-android-core/src/test/kotlin/com/datadog/android/core/sampling/DeterministicSamplerTest.kt new file mode 100644 index 0000000000..1323fb8503 --- /dev/null +++ b/dd-sdk-android-core/src/test/kotlin/com/datadog/android/core/sampling/DeterministicSamplerTest.kt @@ -0,0 +1,200 @@ +package com.datadog.android.core.sampling + +import com.datadog.android.utils.forge.Configurator +import com.datadog.trace.sampling.JavaDeterministicSampler +import fr.xgouchet.elmyr.annotation.FloatForgery +import fr.xgouchet.elmyr.annotation.LongForgery +import fr.xgouchet.elmyr.junit5.ForgeConfiguration +import fr.xgouchet.elmyr.junit5.ForgeExtension +import org.assertj.core.api.Assertions.assertThat +import org.assertj.core.data.Offset +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.RepeatedTest +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.extension.ExtendWith +import org.junit.jupiter.api.extension.Extensions +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.Arguments +import org.junit.jupiter.params.provider.MethodSource +import org.mockito.Mock +import org.mockito.junit.jupiter.MockitoExtension +import org.mockito.junit.jupiter.MockitoSettings +import org.mockito.kotlin.doReturn +import org.mockito.kotlin.whenever +import org.mockito.quality.Strictness +import java.util.stream.Stream + +@Extensions( + ExtendWith(MockitoExtension::class), + ExtendWith(ForgeExtension::class) +) +@MockitoSettings(strictness = Strictness.LENIENT) +@ForgeConfiguration(Configurator::class) +internal class DeterministicSamplerTest { + + private lateinit var testedSampler: Sampler + + private var stubIdConverter: (ULong) -> ULong = { it } + + @Mock + lateinit var mockSampleRateProvider: () -> Float + + @BeforeEach + fun `set up`() { + testedSampler = DeterministicSampler( + stubIdConverter, + mockSampleRateProvider + ) + } + + @ParameterizedTest + @MethodSource("hardcodedFixtures") + fun `M return consistent results W sample() {hardcodedFixtures}`( + input: Fixture, + expectedDecision: Boolean + ) { + // Given + whenever(mockSampleRateProvider.invoke()) doReturn input.samplingRate + + // When + val sampled = testedSampler.sample(input.traceId) + + // + assertThat(sampled).isEqualTo(expectedDecision) + } + + @RepeatedTest(128) + fun `M return consistent results W sample() {java implementation}`( + @LongForgery traceIds: List, + @FloatForgery(min = 0f, max = 100f) fakeSampleRate: Float + ) { + // Given + whenever(mockSampleRateProvider.invoke()) doReturn fakeSampleRate + val javaSampler = JavaDeterministicSampler(fakeSampleRate / 100f) + + // When + traceIds.forEach { + val result = testedSampler.sample(it.toULong()) + val expectedResult = javaSampler.sample(it) + + assertThat(result).isEqualTo(expectedResult) + } + } + + @RepeatedTest(128) + fun `the sampler will sample the values based on the fixed sample rate`( + @LongForgery traceIds: List, + @FloatForgery(min = 0f, max = 100f) fakeSampleRate: Float + ) { + // Given + whenever(mockSampleRateProvider.invoke()) doReturn fakeSampleRate + var sampledIn = 0 + + // When + traceIds.forEach { + if (testedSampler.sample(it.toULong())) { + sampledIn++ + } + } + + // Then + assertThat(sampledIn.toFloat()).isCloseTo(traceIds.size * fakeSampleRate / 100f, Offset.offset(7.5f)) + } + + @Test + fun `when sample rate is 0 all values will be dropped`( + @LongForgery traceIds: List + ) { + // Given + whenever(mockSampleRateProvider.invoke()) doReturn 0f + var sampledIn = 0 + + // When + traceIds.forEach { + if (testedSampler.sample(it.toULong())) { + sampledIn++ + } + } + + // Then + assertThat(sampledIn).isEqualTo(0) + } + + @Test + fun `when sample rate is 100 all values will pass`( + @LongForgery traceIds: List + ) { + // Given + whenever(mockSampleRateProvider.invoke()) doReturn 100f + var sampledIn = 0 + + // When + traceIds.forEach { + if (testedSampler.sample(it.toULong())) { + sampledIn++ + } + } + + // Then + assertThat(sampledIn).isEqualTo(traceIds.size) + } + + @Test + fun `when sample rate is below 0 it is normalized to 0`( + @FloatForgery(max = 0f) fakeSampleRate: Float + ) { + // Given + whenever(mockSampleRateProvider.invoke()) doReturn fakeSampleRate + + // When + val effectiveSampleRate = testedSampler.getSampleRate() + + // Then + assertThat(effectiveSampleRate).isZero + } + + @Test + fun `when sample rate is above 100 it is normalized to 100`( + @FloatForgery(min = 100.01f) fakeSampleRate: Float + ) { + // Given + whenever(mockSampleRateProvider.invoke()) doReturn fakeSampleRate + + // When + val effectiveSampleRate = testedSampler.getSampleRate() + + // Then + assertThat(effectiveSampleRate).isEqualTo(100f) + } + + /** + * A data class is necessary to wrap the ULong, otherwise the jvm runner + * converts it to Long at some point. + */ + data class Fixture( + val traceId: ULong, + val samplingRate: Float + ) + + companion object { + + // Those hardcoded values ensures we are consistent with the decisions of our + // Backend implementation of the knuth sampling method + @Suppress("unused") + @JvmStatic + fun hardcodedFixtures(): Stream { + return listOf( + Arguments.of(Fixture(4815162342u, 55.9f), false), + Arguments.of(Fixture(4815162342u, 56.0f), true), + Arguments.of(Fixture(1415926535897932384u, 90.5f), false), + Arguments.of(Fixture(1415926535897932384u, 90.6f), true), + Arguments.of(Fixture(718281828459045235u, 7.4f), false), + Arguments.of(Fixture(718281828459045235u, 7.5f), true), + Arguments.of(Fixture(41421356237309504u, 32.1f), false), + Arguments.of(Fixture(41421356237309504u, 32.2f), true), + Arguments.of(Fixture(6180339887498948482u, 68.2f), false), + Arguments.of(Fixture(6180339887498948482u, 68.3f), true) + ).stream() + } + } +}