Skip to content

Commit

Permalink
Update ratio based sampler threshold algorithm
Browse files Browse the repository at this point in the history
See OTEP 235, Threshold = (1 - Probability) * 2**56. W3C Trace Context Level 2 specifies that traceId should contain at least 7 bytes of randomness.
  • Loading branch information
Nevay committed Mar 29, 2024
1 parent a1489d3 commit c0bd2c6
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 23 deletions.
87 changes: 70 additions & 17 deletions src/SDK/Trace/Sampler/TraceIdRatioBasedSampler.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,22 @@

namespace OpenTelemetry\SDK\Trace\Sampler;

use function assert;
use function bin2hex;
use InvalidArgumentException;
use function max;
use function min;
use OpenTelemetry\Context\ContextInterface;
use OpenTelemetry\SDK\Common\Attribute\AttributesInterface;
use OpenTelemetry\SDK\Trace\SamplerInterface;
use OpenTelemetry\SDK\Trace\SamplingResult;
use OpenTelemetry\SDK\Trace\Span;
use function pack;
use function rtrim;
use function sprintf;
use function substr;
use function substr_compare;
use function unpack;

/**
* This implementation of the SamplerInterface records with given probability.
Expand All @@ -22,22 +32,21 @@
class TraceIdRatioBasedSampler implements SamplerInterface
{
private readonly float $probability;
private readonly string $tv;

/**
* @param float $probability Probability float value between 0.0 and 1.0.
*/
public function __construct(float $probability)
{
if ($probability < 0.0 || $probability > 1.0) {
if (!($probability >= 0 && $probability <= 1)) {
throw new InvalidArgumentException('probability should be be between 0.0 and 1.0.');
}

$this->probability = $probability;
$this->tv = rtrim(bin2hex(substr(pack('J', self::computeTValue($probability, 14)), 1)), '0') ?: '0';
}

/**
* Returns `SamplingResult` based on probability. Respects the parent `SampleFlag`
* {@inheritdoc}
*/
public function shouldSample(
ContextInterface $parentContext,
string $traceId,
Expand All @@ -46,20 +55,64 @@ public function shouldSample(
AttributesInterface $attributes,
array $links,
): SamplingResult {
// TODO: Add config to adjust which spans get sampled (only default from specification is implemented)
$parentSpan = Span::fromContext($parentContext);
$parentSpanContext = $parentSpan->getContext();
$traceState = $parentSpanContext->getTraceState();
$traceState = Span::fromContext($parentContext)->getContext()->getTraceState();

$samplingDecision = $this->probability >= 2 ** -56 && substr_compare($traceId, $this->tv, -14) >= 0
? SamplingResult::RECORD_AND_SAMPLE
: SamplingResult::DROP;

return new SamplingResult($samplingDecision, [], $traceState);
}

/**
* Computes the 56-bit rejection treshold (T-value) for a given probability.
*
* The T-value is computed as `2**56*(1-$probability)` with a precision of
* `2**-(4*Max(⌈(-log2($probability)+$bitPrecision)/4⌉,$minPrecision))`.
*
* Values below `2**-56` will return `0`.
*
* ```
* 1/3 w/ $minPrecision=3
* => 1 - 1/3
* => 2/3
* => 2730.666../4096
* => 2731/4096
* => 0xaab
* ```
*
* Converting the result into `th` hexadecimal value:
* ```
* $th = rtrim(bin2hex(substr(pack('J', $t), 1)), '0') ?: '0';
* ```
*
* @param float $probability sampling probability, must be between 0 and 1
* @param int $minPrecision minimum precision in hexadecimal digits
* @param int $bitPrecision precision increase in bits
* @return int 56bit T-value
*
* @internal
*/
public static function computeTValue(float $probability, int $minPrecision = 0, int $bitPrecision = 0): int
{
assert($probability >= 0 && $probability <= 1);
assert($minPrecision >= 0);
assert($bitPrecision >= 0);

$b = unpack('J', pack('E', $probability))[1];
$e = $b >> 52 & (1 << 11) - 1;
$f = $b & (1 << 52) - 1 | ($e ? 1 << 52 : 0);

// 56+1bit for rounding
$s = $e - 1023 - 52 + 57;
$t = (1 << 57) - ($s < 0 ? $f >> -$s : $f << $s);

/**
* Since php can only store up to 63 bit positive integers
*/
$traceIdLimit = (1 << 60) - 1;
$lowerOrderBytes = hexdec(substr($traceId, strlen($traceId) - 15, 15));
$traceIdCondition = $lowerOrderBytes < round($this->probability * $traceIdLimit);
$decision = $traceIdCondition ? SamplingResult::RECORD_AND_SAMPLE : SamplingResult::DROP;
// minimum precision in hexadecimal digits
$p = -($e - 1023) + 3 + $bitPrecision >> 2;
$p = min(14, max($minPrecision, $p));
$m = 1 << (14 - $p << 2);

return new SamplingResult($decision, [], $traceState);
return $t + $m >> 1 & -$m;
}

public function getDescription(): string
Expand Down
4 changes: 2 additions & 2 deletions tests/Integration/SDK/TraceIdRatioBasedSamplerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public function test_failing_trace_id_ratio_based_sampler_decision(): void
$sampler = new TraceIdRatioBasedSampler(0.99);
$decision = $sampler->shouldSample(
Context::getRoot(),
'4bf92f3577b34da6afffffffffffffff',
'4bf92f3577b34da6a000000000000000',
'test.opentelemetry.io',
API\SpanKind::KIND_INTERNAL,
Attributes::create([]),
Expand All @@ -66,7 +66,7 @@ public function test_passing_trace_id_ratio_based_sampler_decision(): void
$sampler = new TraceIdRatioBasedSampler(0.01);
$decision = $sampler->shouldSample(
Context::getRoot(),
'4bf92f3577b34da6a000000000000000',
'4bf92f3577b34da6afffffffffffffff',
'test.opentelemetry.io',
API\SpanKind::KIND_INTERNAL,
Attributes::create([]),
Expand Down
79 changes: 75 additions & 4 deletions tests/Unit/SDK/Trace/Sampler/TraceIdRatioBasedSamplerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@

namespace OpenTelemetry\Tests\SDK\Unit\Trace\Sampler;

use function bin2hex;
use InvalidArgumentException;
use OpenTelemetry\API\Trace as API;
use OpenTelemetry\Context\Context;
use OpenTelemetry\SDK\Common\Attribute\Attributes;
use OpenTelemetry\SDK\Trace\Sampler\TraceIdRatioBasedSampler;
use OpenTelemetry\SDK\Trace\SamplingResult;
use function pack;
use PHPUnit\Framework\TestCase;
use function rtrim;
use function substr;

/**
* @coversDefaultClass OpenTelemetry\SDK\Trace\Sampler\TraceIdRatioBasedSampler
Expand All @@ -20,19 +24,85 @@ class TraceIdRatioBasedSamplerTest extends TestCase
/**
* @covers ::__construct
* @covers ::shouldSample
* @dataProvider shouldSampleProvider
*/
public function test_should_sample(): void
public function test_should_sample(string $traceId, float $probability, int $result): void
{
$sampler = new TraceIdRatioBasedSampler(1.0);
$sampler = new TraceIdRatioBasedSampler($probability);
$decision = $sampler->shouldSample(
Context::getRoot(),
'4bf92f3577b34da6a3ce929d0e0e4736',
$traceId,
'test.opentelemetry.io',
API\SpanKind::KIND_INTERNAL,
Attributes::create([]),
[],
);
$this->assertEquals(SamplingResult::RECORD_AND_SAMPLE, $decision->getDecision());
$this->assertEquals($result, $decision->getDecision());
}

public static function shouldSampleProvider(): iterable
{
yield 'otep-0235' => ['123456789123456789d29d6a7215ced0', 0.25, SamplingResult::RECORD_AND_SAMPLE];

yield 'tv=0' => ['4bf92f3577b34da6a3ce929d0e0e4736', 1.0, SamplingResult::RECORD_AND_SAMPLE];
yield 'tv=8' => ['4bf92f3577b34da6a3ce929d0e0e4736', 0.5, SamplingResult::RECORD_AND_SAMPLE];
yield 'tv=cccd' => ['4bf92f3577b34da6a3ce929d0e0e4736', 1 / 5, SamplingResult::RECORD_AND_SAMPLE];
yield 'tv=d' => ['4bf92f3577b34da6a3ce929d0e0e4736', 3 / 16, SamplingResult::DROP];

yield ['4bf92f3577b34da6a380000000000000', 0.5, SamplingResult::RECORD_AND_SAMPLE];
yield ['4bf92f3577b34da6a37fffffffffffff', 0.5, SamplingResult::DROP];
yield ['4bf92f3577b34da6a3f5560000000000', 1 / 24, SamplingResult::RECORD_AND_SAMPLE];
yield ['4bf92f3577b34da6a3f554ffffffffff', 1 / 24, SamplingResult::DROP];
yield ['4bf92f3577b34da6a3fffffffffffff0', 2 ** -52, SamplingResult::RECORD_AND_SAMPLE];
yield ['4bf92f3577b34da6a3ffffffffffffef', 2 ** -52, SamplingResult::DROP];
yield ['4bf92f3577b34da6a3ffffffffffffff', 2 ** -56, SamplingResult::RECORD_AND_SAMPLE];
yield ['4bf92f3577b34da6a3fffffffffffffe', 2 ** -56, SamplingResult::DROP];
yield ['4bf92f3577b34da6a3ffffffffffffff', 2 ** -57, SamplingResult::DROP];
}

/**
* @dataProvider computeTValueProvider
* @covers \OpenTelemetry\SDK\Trace\Sampler\TraceIdRatioBasedSampler::computeTValue
*/
public function test_compute_t_value(string $expected, float $probability, int $precision = 0): void
{
$tv = TraceIdRatioBasedSampler::computeTValue($probability, $precision);
$this->assertSame($expected, rtrim(bin2hex(substr(pack('J', $tv), 1)), '0') ?: '0');
}

public static function computeTValueProvider(): iterable
{
// see https://github.com/open-telemetry/opentelemetry-specification/pull/3910
yield ['0', 1];
yield ['4', 3/4];
yield ['555', 2/3, 3];
yield ['5555', 2/3, 4];
yield ['8', 1/2];
yield ['aab', 1/3, 3];
yield ['aaab', 1/3, 4];
yield ['c', 1/4];
yield ['ccd', 1/5, 3];
yield ['cccd', 1/5, 4];
yield ['d', 3/16];
yield ['e', 1/8];
yield ['f', 1/16];
yield ['f0f', 1/17, 3];
yield ['f555', 1/24, 4];
yield ['f555555', 1/24, 7];
yield ['f8', 1/32];
yield ['faab', 1/48, 4];
yield ['faaaaab', 1/48, 7];
yield ['fc', 1/64];
yield ['fd27d', 1/90, 5];
yield ['fd27d27d', 1/90, 8];
yield ['fe', 1/128];
yield ['ff', 1/256];
yield ['ffff', 2**-16];
yield ['ffffff', 2**-24];
yield ['ffffffff', 2**-32];
yield ['ffffffffff', 2**-40];
yield ['ffffffffffff', 2**-48];
yield ['ffffffffffffff', 2**-56];
}

/**
Expand All @@ -50,6 +120,7 @@ public static function invalidProbabilityProvider(): array
return [
'negative' => [-0.05],
'greater than one' => [1.5],
'NaN' => [NAN],
];
}

Expand Down

0 comments on commit c0bd2c6

Please sign in to comment.