From 566a8961ecff9022249da9ce94ba07dccfae0096 Mon Sep 17 00:00:00 2001 From: "Xavier F. Gouchet" Date: Wed, 13 Nov 2024 10:10:35 +0100 Subject: [PATCH] RUM-6866 create the KnuthStableSampler --- dd-sdk-android-core/api/apiSurface | 6 + .../api/dd-sdk-android-core.api | 9 + .../core/sampling/DeterministicSampler.kt | 93 ++++++++ .../sampling/JavaDeterministicSampler.java | 50 +++++ .../core/sampling/DeterministicSamplerTest.kt | 200 ++++++++++++++++++ detekt_custom.yml | 6 +- 6 files changed, 362 insertions(+), 2 deletions(-) create mode 100644 dd-sdk-android-core/src/main/kotlin/com/datadog/android/core/sampling/DeterministicSampler.kt create mode 100644 dd-sdk-android-core/src/test/java/com/datadog/trace/sampling/JavaDeterministicSampler.java create mode 100644 dd-sdk-android-core/src/test/kotlin/com/datadog/android/core/sampling/DeterministicSamplerTest.kt diff --git a/dd-sdk-android-core/api/apiSurface b/dd-sdk-android-core/api/apiSurface index 495016aa86..baba7394e6 100644 --- a/dd-sdk-android-core/api/apiSurface +++ b/dd-sdk-android-core/api/apiSurface @@ -323,6 +323,12 @@ interface com.datadog.android.core.persistence.Serializer fun Serializer.serializeToByteArray(T, com.datadog.android.api.InternalLogger): ByteArray? data class com.datadog.android.core.persistence.datastore.DataStoreContent constructor(Int, T?) +open class com.datadog.android.core.sampling.DeterministicSampler : Sampler + constructor((T) -> ULong, () -> Float) + constructor((T) -> ULong, Float) + constructor((T) -> ULong, Double) + override fun sample(T): Boolean + override fun getSampleRate(): Float open class com.datadog.android.core.sampling.RateBasedSampler : Sampler constructor(() -> Float) constructor(Float) diff --git a/dd-sdk-android-core/api/dd-sdk-android-core.api b/dd-sdk-android-core/api/dd-sdk-android-core.api index 34ac9c2b3d..4f0044a25c 100644 --- a/dd-sdk-android-core/api/dd-sdk-android-core.api +++ b/dd-sdk-android-core/api/dd-sdk-android-core.api @@ -867,6 +867,15 @@ public final class com/datadog/android/core/persistence/datastore/DataStoreConte public fun toString ()Ljava/lang/String; } +public class com/datadog/android/core/sampling/DeterministicSampler : com/datadog/android/core/sampling/Sampler { + public static final field SAMPLE_ALL_RATE F + public fun (Lkotlin/jvm/functions/Function1;D)V + public fun (Lkotlin/jvm/functions/Function1;F)V + public fun (Lkotlin/jvm/functions/Function1;Lkotlin/jvm/functions/Function0;)V + public fun getSampleRate ()Ljava/lang/Float; + public fun sample (Ljava/lang/Object;)Z +} + public class com/datadog/android/core/sampling/RateBasedSampler : com/datadog/android/core/sampling/Sampler { public static final field SAMPLE_ALL_RATE F public fun (D)V diff --git a/dd-sdk-android-core/src/main/kotlin/com/datadog/android/core/sampling/DeterministicSampler.kt b/dd-sdk-android-core/src/main/kotlin/com/datadog/android/core/sampling/DeterministicSampler.kt new file mode 100644 index 0000000000..14dedfa63b --- /dev/null +++ b/dd-sdk-android-core/src/main/kotlin/com/datadog/android/core/sampling/DeterministicSampler.kt @@ -0,0 +1,93 @@ +/* + * Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0. + * This product includes software developed at Datadog (https://www.datadoghq.com/). + * Copyright 2016-Present Datadog, Inc. + */ + +package com.datadog.android.core.sampling + +import androidx.annotation.FloatRange +import com.datadog.android.api.InternalLogger + +/** + * [Sampler] with the given sample rate using a deterministic algorithm for a stable + * sampling decision across sources. + * + * @param T the type of items to sample. + * @param idConverter a lambda converting the input item into a stable numerical identifier + * @param sampleRateProvider Provider for the sample rate value which will be called each time + * the sampling decision needs to be made. All the values should be on the scale [0;100]. + */ +open class DeterministicSampler( + private val idConverter: (T) -> ULong, + private val sampleRateProvider: () -> Float +) : Sampler { + + /** + * Creates a new instance lof [DeterministicSampler] with the given sample rate. + * + * @param idConverter a lambda converting the input item into a stable numerical identifier + * @param sampleRate Sample rate to use. + */ + constructor( + idConverter: (T) -> ULong, + @FloatRange(from = 0.0, to = 100.0) sampleRate: Float + ) : this(idConverter, { sampleRate }) + + /** + * Creates a new instance of [DeterministicSampler] with the given sample rate. + * + * @param idConverter a lambda converting the input item into a stable numerical identifier + * @param sampleRate Sample rate to use. + */ + constructor( + idConverter: (T) -> ULong, + @FloatRange(from = 0.0, to = 100.0) sampleRate: Double + ) : this(idConverter, sampleRate.toFloat()) + + /** @inheritDoc */ + override fun sample(item: T): Boolean { + val sampleRate = getSampleRate() + + return when { + sampleRate >= SAMPLE_ALL_RATE -> true + sampleRate <= 0f -> false + else -> { + val hash = idConverter(item) * SAMPLER_HASHER + val threshold = (MAX_ID.toDouble() * sampleRate / SAMPLE_ALL_RATE).toULong() + hash < threshold + } + } + } + + /** @inheritDoc */ + override fun getSampleRate(): Float { + val rawSampleRate = sampleRateProvider() + return if (rawSampleRate < 0f) { + InternalLogger.UNBOUND.log( + InternalLogger.Level.WARN, + InternalLogger.Target.USER, + { "Sample rate value provided $rawSampleRate is below 0, setting it to 0." } + ) + 0f + } else if (rawSampleRate > SAMPLE_ALL_RATE) { + InternalLogger.UNBOUND.log( + InternalLogger.Level.WARN, + InternalLogger.Target.USER, + { "Sample rate value provided $rawSampleRate is above 100, setting it to 100." } + ) + SAMPLE_ALL_RATE + } else { + rawSampleRate + } + } + + private companion object { + const val SAMPLE_ALL_RATE = 100f + + // Good number for Knuth hashing (large, prime, fit in int64 for languages without uint64) + private const val SAMPLER_HASHER: ULong = 1111111111111111111u + + private const val MAX_ID: ULong = 0xFFFFFFFFFFFFFFFFUL + } +} diff --git a/dd-sdk-android-core/src/test/java/com/datadog/trace/sampling/JavaDeterministicSampler.java b/dd-sdk-android-core/src/test/java/com/datadog/trace/sampling/JavaDeterministicSampler.java new file mode 100644 index 0000000000..cb9917c536 --- /dev/null +++ b/dd-sdk-android-core/src/test/java/com/datadog/trace/sampling/JavaDeterministicSampler.java @@ -0,0 +1,50 @@ +/* + * Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0. + * This product includes software developed at Datadog (https://www.datadoghq.com/). + * Copyright 2016-Present Datadog, Inc. + */ + +package com.datadog.trace.sampling; + +import androidx.annotation.NonNull; +import androidx.annotation.Nullable; + +import com.datadog.android.core.sampling.Sampler; + +/** + * This is a pseudo-duplicate of the java implementation for testing purposes only to ensure + * compatibility between our generic implementation and the one in our backend agent. + */ +public class JavaDeterministicSampler implements Sampler { + + private static final long KNUTH_FACTOR = 1111111111111111111L; + + private static final double MAX = Math.pow(2, 64) - 1; + + private final float rate; + + public JavaDeterministicSampler(float rate) { + this.rate = rate; + } + + @Override + public boolean sample(@NonNull Long item) { + return item * KNUTH_FACTOR + Long.MIN_VALUE < cutoff(rate); + } + + @Nullable + @Override + public Float getSampleRate() { + return rate; + } + + private long cutoff(double rate) { + if (rate < 0.5) { + return (long) (rate * MAX) + Long.MIN_VALUE; + } + if (rate < 1.0) { + return (long) ((rate * MAX) + Long.MIN_VALUE); + } + return Long.MAX_VALUE; + } +} diff --git a/dd-sdk-android-core/src/test/kotlin/com/datadog/android/core/sampling/DeterministicSamplerTest.kt b/dd-sdk-android-core/src/test/kotlin/com/datadog/android/core/sampling/DeterministicSamplerTest.kt new file mode 100644 index 0000000000..1323fb8503 --- /dev/null +++ b/dd-sdk-android-core/src/test/kotlin/com/datadog/android/core/sampling/DeterministicSamplerTest.kt @@ -0,0 +1,200 @@ +package com.datadog.android.core.sampling + +import com.datadog.android.utils.forge.Configurator +import com.datadog.trace.sampling.JavaDeterministicSampler +import fr.xgouchet.elmyr.annotation.FloatForgery +import fr.xgouchet.elmyr.annotation.LongForgery +import fr.xgouchet.elmyr.junit5.ForgeConfiguration +import fr.xgouchet.elmyr.junit5.ForgeExtension +import org.assertj.core.api.Assertions.assertThat +import org.assertj.core.data.Offset +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.RepeatedTest +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.extension.ExtendWith +import org.junit.jupiter.api.extension.Extensions +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.Arguments +import org.junit.jupiter.params.provider.MethodSource +import org.mockito.Mock +import org.mockito.junit.jupiter.MockitoExtension +import org.mockito.junit.jupiter.MockitoSettings +import org.mockito.kotlin.doReturn +import org.mockito.kotlin.whenever +import org.mockito.quality.Strictness +import java.util.stream.Stream + +@Extensions( + ExtendWith(MockitoExtension::class), + ExtendWith(ForgeExtension::class) +) +@MockitoSettings(strictness = Strictness.LENIENT) +@ForgeConfiguration(Configurator::class) +internal class DeterministicSamplerTest { + + private lateinit var testedSampler: Sampler + + private var stubIdConverter: (ULong) -> ULong = { it } + + @Mock + lateinit var mockSampleRateProvider: () -> Float + + @BeforeEach + fun `set up`() { + testedSampler = DeterministicSampler( + stubIdConverter, + mockSampleRateProvider + ) + } + + @ParameterizedTest + @MethodSource("hardcodedFixtures") + fun `M return consistent results W sample() {hardcodedFixtures}`( + input: Fixture, + expectedDecision: Boolean + ) { + // Given + whenever(mockSampleRateProvider.invoke()) doReturn input.samplingRate + + // When + val sampled = testedSampler.sample(input.traceId) + + // + assertThat(sampled).isEqualTo(expectedDecision) + } + + @RepeatedTest(128) + fun `M return consistent results W sample() {java implementation}`( + @LongForgery traceIds: List, + @FloatForgery(min = 0f, max = 100f) fakeSampleRate: Float + ) { + // Given + whenever(mockSampleRateProvider.invoke()) doReturn fakeSampleRate + val javaSampler = JavaDeterministicSampler(fakeSampleRate / 100f) + + // When + traceIds.forEach { + val result = testedSampler.sample(it.toULong()) + val expectedResult = javaSampler.sample(it) + + assertThat(result).isEqualTo(expectedResult) + } + } + + @RepeatedTest(128) + fun `the sampler will sample the values based on the fixed sample rate`( + @LongForgery traceIds: List, + @FloatForgery(min = 0f, max = 100f) fakeSampleRate: Float + ) { + // Given + whenever(mockSampleRateProvider.invoke()) doReturn fakeSampleRate + var sampledIn = 0 + + // When + traceIds.forEach { + if (testedSampler.sample(it.toULong())) { + sampledIn++ + } + } + + // Then + assertThat(sampledIn.toFloat()).isCloseTo(traceIds.size * fakeSampleRate / 100f, Offset.offset(7.5f)) + } + + @Test + fun `when sample rate is 0 all values will be dropped`( + @LongForgery traceIds: List + ) { + // Given + whenever(mockSampleRateProvider.invoke()) doReturn 0f + var sampledIn = 0 + + // When + traceIds.forEach { + if (testedSampler.sample(it.toULong())) { + sampledIn++ + } + } + + // Then + assertThat(sampledIn).isEqualTo(0) + } + + @Test + fun `when sample rate is 100 all values will pass`( + @LongForgery traceIds: List + ) { + // Given + whenever(mockSampleRateProvider.invoke()) doReturn 100f + var sampledIn = 0 + + // When + traceIds.forEach { + if (testedSampler.sample(it.toULong())) { + sampledIn++ + } + } + + // Then + assertThat(sampledIn).isEqualTo(traceIds.size) + } + + @Test + fun `when sample rate is below 0 it is normalized to 0`( + @FloatForgery(max = 0f) fakeSampleRate: Float + ) { + // Given + whenever(mockSampleRateProvider.invoke()) doReturn fakeSampleRate + + // When + val effectiveSampleRate = testedSampler.getSampleRate() + + // Then + assertThat(effectiveSampleRate).isZero + } + + @Test + fun `when sample rate is above 100 it is normalized to 100`( + @FloatForgery(min = 100.01f) fakeSampleRate: Float + ) { + // Given + whenever(mockSampleRateProvider.invoke()) doReturn fakeSampleRate + + // When + val effectiveSampleRate = testedSampler.getSampleRate() + + // Then + assertThat(effectiveSampleRate).isEqualTo(100f) + } + + /** + * A data class is necessary to wrap the ULong, otherwise the jvm runner + * converts it to Long at some point. + */ + data class Fixture( + val traceId: ULong, + val samplingRate: Float + ) + + companion object { + + // Those hardcoded values ensures we are consistent with the decisions of our + // Backend implementation of the knuth sampling method + @Suppress("unused") + @JvmStatic + fun hardcodedFixtures(): Stream { + return listOf( + Arguments.of(Fixture(4815162342u, 55.9f), false), + Arguments.of(Fixture(4815162342u, 56.0f), true), + Arguments.of(Fixture(1415926535897932384u, 90.5f), false), + Arguments.of(Fixture(1415926535897932384u, 90.6f), true), + Arguments.of(Fixture(718281828459045235u, 7.4f), false), + Arguments.of(Fixture(718281828459045235u, 7.5f), true), + Arguments.of(Fixture(41421356237309504u, 32.1f), false), + Arguments.of(Fixture(41421356237309504u, 32.2f), true), + Arguments.of(Fixture(6180339887498948482u, 68.2f), false), + Arguments.of(Fixture(6180339887498948482u, 68.3f), true) + ).stream() + } + } +} diff --git a/detekt_custom.yml b/detekt_custom.yml index 997bac16a0..8a71871d6d 100644 --- a/detekt_custom.yml +++ b/detekt_custom.yml @@ -1060,17 +1060,18 @@ datadog: - "kotlin.Double.toFloat()" - "kotlin.Double.toInt()" - "kotlin.Double.toLong()" + - "kotlin.Double.toULong()" - "kotlin.Float.toDouble()" - "kotlin.Float.toFloat()" - "kotlin.Float.toInt()" - "kotlin.Float.toLong()" - "kotlin.Int.and(kotlin.Int)" + - "kotlin.Int.coerceAtMost(kotlin.Int)" - "kotlin.Int.inv()" - "kotlin.Int.toChar()" - "kotlin.Int.toDouble()" - "kotlin.Int.toFloat()" - "kotlin.Int.toLong()" - - "kotlin.Int.coerceAtMost(kotlin.Int)" - "kotlin.IntArray.constructor(kotlin.Int)" - "kotlin.IntArray.joinToString(kotlin.CharSequence, kotlin.CharSequence, kotlin.CharSequence, kotlin.Int, kotlin.CharSequence, kotlin.Function1?)" - "kotlin.Long.coerceIn(kotlin.Long, kotlin.Long)" @@ -1083,6 +1084,8 @@ datadog: - "kotlin.Number.toFloat()" - "kotlin.Number.toLong()" - "kotlin.Short.toUShort()" + - "kotlin.String.trim(kotlin.Function1)" + - "kotlin.ULong.toDouble()" - "kotlin.UShort.toShort()" - "kotlin.math.abs(kotlin.Float)" - "kotlin.math.max(kotlin.Double, kotlin.Double)" @@ -1091,7 +1094,6 @@ datadog: - "kotlin.math.min(kotlin.Double, kotlin.Double)" - "kotlin.math.min(kotlin.Long, kotlin.Long)" - "kotlin.math.sqrt(kotlin.Double)" - - "kotlin.String.trim(kotlin.Function1)" # endregion # region Kotlin Tuples - "kotlin.Pair.constructor(kotlin.String, kotlin.Int)"