Skip to content

Commit

Permalink
RUM-6866 create the KnuthStableSampler
Browse files Browse the repository at this point in the history
  • Loading branch information
xgouchet committed Nov 13, 2024
1 parent 808f3d7 commit 0224918
Show file tree
Hide file tree
Showing 5 changed files with 356 additions and 0 deletions.
6 changes: 6 additions & 0 deletions dd-sdk-android-core/api/apiSurface
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,12 @@ interface com.datadog.android.core.persistence.Serializer<T: Any>
fun <T: Any> Serializer<T>.serializeToByteArray(T, com.datadog.android.api.InternalLogger): ByteArray?
data class com.datadog.android.core.persistence.datastore.DataStoreContent<T: Any>
constructor(Int, T?)
open class com.datadog.android.core.sampling.DeterministicSampler<T: Any> : Sampler<T>
constructor((T) -> ULong, () -> Float)
constructor((T) -> ULong, Float)
constructor((T) -> ULong, Double)
override fun sample(T): Boolean
override fun getSampleRate(): Float
open class com.datadog.android.core.sampling.RateBasedSampler<T: Any> : Sampler<T>
constructor(() -> Float)
constructor(Float)
Expand Down
9 changes: 9 additions & 0 deletions dd-sdk-android-core/api/dd-sdk-android-core.api
Original file line number Diff line number Diff line change
Expand Up @@ -867,6 +867,15 @@ public final class com/datadog/android/core/persistence/datastore/DataStoreConte
public fun toString ()Ljava/lang/String;
}

public class com/datadog/android/core/sampling/DeterministicSampler : com/datadog/android/core/sampling/Sampler {
public static final field SAMPLE_ALL_RATE F
public fun <init> (Lkotlin/jvm/functions/Function1;D)V
public fun <init> (Lkotlin/jvm/functions/Function1;F)V
public fun <init> (Lkotlin/jvm/functions/Function1;Lkotlin/jvm/functions/Function0;)V
public fun getSampleRate ()Ljava/lang/Float;
public fun sample (Ljava/lang/Object;)Z
}

public class com/datadog/android/core/sampling/RateBasedSampler : com/datadog/android/core/sampling/Sampler {
public static final field SAMPLE_ALL_RATE F
public fun <init> (D)V
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0.
* This product includes software developed at Datadog (https://www.datadoghq.com/).
* Copyright 2016-Present Datadog, Inc.
*/

package com.datadog.android.core.sampling

import androidx.annotation.FloatRange
import com.datadog.android.api.InternalLogger

/**
* [Sampler] with the given sample rate using a deterministic algorithm for a stable
* sampling decision across sources.
*
* @param T the type of items to sample.
* @param idConverter a lambda converting the input item into a stable numerical identifier
* @param sampleRateProvider Provider for the sample rate value which will be called each time
* the sampling decision needs to be made. All the values should be on the scale [0;100].
*/
open class DeterministicSampler<T : Any>(
private val idConverter: (T) -> ULong,
private val sampleRateProvider: () -> Float
) : Sampler<T> {

/**
* Creates a new instance lof [DeterministicSampler] with the given sample rate.
*
* @param sampleRate Sample rate to use.
*/
constructor(
idConverter: (T) -> ULong,
@FloatRange(from = 0.0, to = 100.0) sampleRate: Float
) : this(idConverter, { sampleRate })

/**
* Creates a new instance of [DeterministicSampler] with the given sample rate.
*
* @param sampleRate Sample rate to use.
*/
constructor(
idConverter: (T) -> ULong,
@FloatRange(from = 0.0, to = 100.0) sampleRate: Double
) : this(idConverter, sampleRate.toFloat())

/** @inheritDoc */
override fun sample(item: T): Boolean {
val sampleRate = getSampleRate()
if (sampleRate >= SAMPLE_ALL_RATE) {
return true
} else if (sampleRate <= 0f) {
return false
} else {
val stableId = idConverter(item)
val hash = stableId * SAMPLER_HASHER
val threshold = (MAX_ID.toDouble() * sampleRate / SAMPLE_ALL_RATE).toULong()
return hash < threshold
}
}

/** @inheritDoc */
override fun getSampleRate(): Float {
val rawSampleRate = sampleRateProvider()
return if (rawSampleRate < 0f) {
InternalLogger.UNBOUND.log(
InternalLogger.Level.WARN,
InternalLogger.Target.USER,
{ "Sample rate value provided $rawSampleRate is below 0, setting it to 0." }
)
0f
} else if (rawSampleRate > SAMPLE_ALL_RATE) {
InternalLogger.UNBOUND.log(
InternalLogger.Level.WARN,
InternalLogger.Target.USER,
{ "Sample rate value provided $rawSampleRate is above 100, setting it to 100." }
)
SAMPLE_ALL_RATE
} else {
rawSampleRate
}
}

private companion object {
const val SAMPLE_ALL_RATE = 100f

// Good number for Knuth hashing (large, prime, fit in int64 for languages without uint64)
private val SAMPLER_HASHER: ULong = 1111111111111111111u

private val MAX_ID: ULong = 0xFFFFFFFFFFFFFFFFUL
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0.
* This product includes software developed at Datadog (https://www.datadoghq.com/).
* Copyright 2016-Present Datadog, Inc.
*/

package com.datadog.trace.sampling;

import androidx.annotation.NonNull;
import androidx.annotation.Nullable;

import com.datadog.android.core.sampling.Sampler;

/**
* This is a pseudo-duplicate of the java implementation for testing purposes only to ensure
* compatibility between our generic implementation and the one in our backend agent.
*/
public class JavaDeterministicSampler implements Sampler<Long> {

private static final long KNUTH_FACTOR = 1111111111111111111L;

private static final double MAX = Math.pow(2, 64) - 1;

private final float rate;

public JavaDeterministicSampler(float rate) {
this.rate = rate;
}

@Override
public boolean sample(@NonNull Long item) {
return item * KNUTH_FACTOR + Long.MIN_VALUE < cutoff(rate);
}

@Nullable
@Override
public Float getSampleRate() {
return rate;
}

private long cutoff(double rate) {
if (rate < 0.5) {
return (long) (rate * MAX) + Long.MIN_VALUE;
}
if (rate < 1.0) {
return (long) ((rate * MAX) + Long.MIN_VALUE);
}
return Long.MAX_VALUE;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
package com.datadog.android.core.sampling

import com.datadog.android.utils.forge.Configurator
import com.datadog.trace.sampling.JavaDeterministicSampler
import fr.xgouchet.elmyr.annotation.FloatForgery
import fr.xgouchet.elmyr.annotation.LongForgery
import fr.xgouchet.elmyr.junit5.ForgeConfiguration
import fr.xgouchet.elmyr.junit5.ForgeExtension
import org.assertj.core.api.Assertions.assertThat
import org.assertj.core.data.Offset
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.RepeatedTest
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.extension.ExtendWith
import org.junit.jupiter.api.extension.Extensions
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.Arguments
import org.junit.jupiter.params.provider.MethodSource
import org.mockito.Mock
import org.mockito.junit.jupiter.MockitoExtension
import org.mockito.junit.jupiter.MockitoSettings
import org.mockito.kotlin.doReturn
import org.mockito.kotlin.whenever
import org.mockito.quality.Strictness
import java.util.stream.Stream

@Extensions(
ExtendWith(MockitoExtension::class),
ExtendWith(ForgeExtension::class)
)
@MockitoSettings(strictness = Strictness.LENIENT)
@ForgeConfiguration(Configurator::class)
internal class DeterministicSamplerTest {

private lateinit var testedSampler: Sampler<ULong>

private var stubIdConverter: (ULong) -> ULong = { it }

@Mock
lateinit var mockSampleRateProvider: () -> Float

@BeforeEach
fun `set up`() {
testedSampler = DeterministicSampler(
stubIdConverter,
mockSampleRateProvider
)
}

@ParameterizedTest
@MethodSource("hardcodedFixtures")
fun `M return consistent results W sample() {hardcodedFixtures}`(
input: Fixture,
expectedDecision: Boolean
) {
// Given
whenever(mockSampleRateProvider.invoke()) doReturn input.samplingRate

// When
val sampled = testedSampler.sample(input.traceId)

//
assertThat(sampled).isEqualTo(expectedDecision)
}

@RepeatedTest(128)
fun `M return consistent results W sample() {java implementation}`(
@LongForgery traceIds: List<Long>,
@FloatForgery(min = 0f, max = 100f) fakeSampleRate: Float
) {
// Given
whenever(mockSampleRateProvider.invoke()) doReturn fakeSampleRate
val javaSampler = JavaDeterministicSampler(fakeSampleRate / 100f)

// When
traceIds.forEach {
val result = testedSampler.sample(it.toULong())
val expectedResult = javaSampler.sample(it)

assertThat(result).isEqualTo(expectedResult)
}
}

@RepeatedTest(128)
fun `the sampler will sample the values based on the fixed sample rate`(
@LongForgery traceIds: List<Long>,
@FloatForgery(min = 0f, max = 100f) fakeSampleRate: Float
) {
// Given
whenever(mockSampleRateProvider.invoke()) doReturn fakeSampleRate
var sampledIn = 0

// When
traceIds.forEach {
if (testedSampler.sample(it.toULong())) {
sampledIn++
}
}

// Then
assertThat(sampledIn.toFloat()).isCloseTo(traceIds.size * fakeSampleRate / 100f, Offset.offset(7.5f))
}

@Test
fun `when sample rate is 0 all values will be dropped`(
@LongForgery traceIds: List<Long>
) {
// Given
whenever(mockSampleRateProvider.invoke()) doReturn 0f
var sampledIn = 0

// When
traceIds.forEach {
if (testedSampler.sample(it.toULong())) {
sampledIn++
}
}

// Then
assertThat(sampledIn).isEqualTo(0)
}

@Test
fun `when sample rate is 100 all values will pass`(
@LongForgery traceIds: List<Long>
) {
// Given
whenever(mockSampleRateProvider.invoke()) doReturn 100f
var sampledIn = 0

// When
traceIds.forEach {
if (testedSampler.sample(it.toULong())) {
sampledIn++
}
}

// Then
assertThat(sampledIn).isEqualTo(traceIds.size)
}

@Test
fun `when sample rate is below 0 it is normalized to 0`(
@FloatForgery(max = 0f) fakeSampleRate: Float
) {
// Given
whenever(mockSampleRateProvider.invoke()) doReturn fakeSampleRate

// When
val effectiveSampleRate = testedSampler.getSampleRate()

// Then
assertThat(effectiveSampleRate).isZero
}

@Test
fun `when sample rate is above 100 it is normalized to 100`(
@FloatForgery(min = 100.01f) fakeSampleRate: Float
) {
// Given
whenever(mockSampleRateProvider.invoke()) doReturn fakeSampleRate

// When
val effectiveSampleRate = testedSampler.getSampleRate()

// Then
assertThat(effectiveSampleRate).isEqualTo(100f)
}

/**
* A data class is necessary to wrap the ULong, otherwise the jvm runner
* converts it to Long at some point.
*/
data class Fixture(
val traceId: ULong,
val samplingRate: Float
)

companion object {

// Those hardcoded values ensures we are consistent with the decisions of our
// Backend implementation of the knuth sampling method
@Suppress("unused")
@JvmStatic
fun hardcodedFixtures(): Stream<Arguments> {
return listOf(
Arguments.of(Fixture(4815162342u, 55.9f), false),
Arguments.of(Fixture(4815162342u, 56.0f), true),
Arguments.of(Fixture(1415926535897932384u, 90.5f), false),
Arguments.of(Fixture(1415926535897932384u, 90.6f), true),
Arguments.of(Fixture(718281828459045235u, 7.4f), false),
Arguments.of(Fixture(718281828459045235u, 7.5f), true),
Arguments.of(Fixture(41421356237309504u, 32.1f), false),
Arguments.of(Fixture(41421356237309504u, 32.2f), true),
Arguments.of(Fixture(6180339887498948482u, 68.2f), false),
Arguments.of(Fixture(6180339887498948482u, 68.3f), true)
).stream()
}
}
}

0 comments on commit 0224918

Please sign in to comment.