open-telemetry · c24t · Oct 24, 2019 · Oct 16, 2019 · Oct 16, 2019 · Oct 16, 2019
diff --git a/opentelemetry-api/src/opentelemetry/trace/__init__.py b/opentelemetry-api/src/opentelemetry/trace/__init__.py
@@ -267,6 +267,10 @@ class TraceOptions(int):
     def get_default(cls) -> "TraceOptions":
         return cls(cls.DEFAULT)
 
+    @property
+    def recorded(self) -> bool:
+        return bool(self & TraceOptions.RECORDED)
+
 
 DEFAULT_TRACE_OPTIONS = TraceOptions.get_default()
 
@@ -307,8 +311,8 @@ class SpanContext:
     Args:
         trace_id: The ID of the trace that this span belongs to.
         span_id: This span's ID.
-        options: Trace options to propagate.
-        state: Tracing-system-specific info to propagate.
+        trace_options: Trace options to propagate.
+        trace_state: Tracing-system-specific info to propagate.
     """
 
     def __init__(
@@ -361,6 +365,9 @@ def __init__(self, context: "SpanContext") -> None:
     def get_context(self) -> "SpanContext":
         return self._context
 
+    def is_recording_events(self) -> bool:
+        return False
+
 
 INVALID_SPAN_ID = 0x0000000000000000
 INVALID_TRACE_ID = 0x00000000000000000000000000000000

diff --git a/opentelemetry-api/src/opentelemetry/trace/sampling.py b/opentelemetry-api/src/opentelemetry/trace/sampling.py
@@ -0,0 +1,128 @@
+# Copyright 2019, OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+from typing import Dict, Mapping, Optional, Sequence
+
+# pylint: disable=unused-import
+from opentelemetry.trace import Link, SpanContext
+from opentelemetry.util.types import AttributeValue
+
+
+class Decision:
+    """A sampling decision as applied to a newly-created Span.
+
+    Args:
+        sampled: Whether the `Span` should be sampled.
+        attributes: Attributes to add to the `Span`.
+    """
+
+    def __repr__(self) -> str:
+        return "{}({}, attributes={})".format(
+            type(self).__name__, str(self.sampled), str(self.attributes)
+        )
+
+    def __init__(
+        self,
+        sampled: bool = False,
+        attributes: Mapping[str, "AttributeValue"] = None,
+    ) -> None:
+        self.sampled: bool
+        self.attributes: Dict[str, "AttributeValue"]
+
+        self.sampled = sampled
+        if attributes is None:
+            self.attributes = {}
+        else:
+            self.attributes = dict(attributes)
+
+
+class Sampler(abc.ABC):
+    @abc.abstractmethod
+    def should_sample(
+        self,
+        parent_context: Optional["SpanContext"],
+        trace_id: int,
+        span_id: int,
+        name: str,
+        links: Optional[Sequence["Link"]] = None,
+    ) -> "Decision":
+        pass
+
+
+class StaticSampler(Sampler):
+    """Sampler that always returns the same decision."""
+
+    def __init__(self, decision: "Decision"):
+        self.decision = decision
+
+    def should_sample(
+        self,
+        parent_context: Optional["SpanContext"],
+        trace_id: int,
+        span_id: int,
+        name: str,
+        links: Optional[Sequence["Link"]] = None,
+    ) -> "Decision":
+        return self.decision
+
+
+class ProbabilitySampler(Sampler):
+    def __init__(self, rate: float):
+        self._rate = rate
+        self._bound = self.get_bound_for_rate(self._rate)
+
+    # The sampler checks the last 8 bytes of the trace ID to decide whether to
+    # sample a given trace.
+    CHECK_BYTES = 0xFFFFFFFFFFFFFFFF
+
+    @classmethod
+    def get_bound_for_rate(cls, rate: float) -> int:
+        return round(rate * (cls.CHECK_BYTES + 1))
+
+    @property
+    def rate(self) -> float:
+        return self._rate
+
+    @rate.setter
+    def rate(self, new_rate: float) -> None:
+        self._rate = new_rate
+        self._bound = self.get_bound_for_rate(self._rate)
+
+    @property
+    def bound(self) -> int:
+        return self._bound
+
+    def should_sample(
+        self,
+        parent_context: Optional["SpanContext"],
+        trace_id: int,
+        span_id: int,
+        name: str,
+        links: Optional[Sequence["Link"]] = None,
+    ) -> "Decision":
+        if parent_context is not None:
+            return Decision(parent_context.trace_options.recorded, {})
+
+        return Decision(trace_id & self.CHECK_BYTES < self.bound, {})
+
+
+# Samplers that ignore the parent sampling decision and never/always sample.
+ALWAYS_OFF = StaticSampler(Decision(False))
+ALWAYS_ON = StaticSampler(Decision(True))
+
+# Samplers that respect the parent sampling decision, but otherwise
+# never/always sample.
+DEFAULT_OFF = ProbabilitySampler(0.0)
+DEFAULT_ON = ProbabilitySampler(1.0)
diff --git a/opentelemetry-api/tests/trace/test_sampling.py b/opentelemetry-api/tests/trace/test_sampling.py
@@ -0,0 +1,223 @@
+# Copyright 2019, OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from opentelemetry import trace
+from opentelemetry.trace import sampling
+
+TO_DEFAULT = trace.TraceOptions(trace.TraceOptions.DEFAULT)
+TO_RECORDED = trace.TraceOptions(trace.TraceOptions.RECORDED)
+
+
+class TestSampler(unittest.TestCase):
+    def test_always_on(self):
+        no_record_always_on = sampling.ALWAYS_ON.should_sample(
+            trace.SpanContext(
+                0xDEADBEEF, 0xDEADBEF0, trace_options=TO_DEFAULT
+            ),
+            0xDEADBEF1,
+            0xDEADBEF2,
+            "unrecorded parent, sampling on",
+        )
+        self.assertTrue(no_record_always_on.sampled)
+        self.assertEqual(no_record_always_on.attributes, {})
+
+        recorded_always_on = sampling.ALWAYS_ON.should_sample(
+            trace.SpanContext(
+                0xDEADBEEF, 0xDEADBEF0, trace_options=TO_RECORDED
+            ),
+            0xDEADBEF1,
+            0xDEADBEF2,
+            "recorded parent, sampling on",
+        )
+        self.assertTrue(recorded_always_on.sampled)
+        self.assertEqual(recorded_always_on.attributes, {})
+
+    def test_always_off(self):
+        no_record_always_off = sampling.ALWAYS_OFF.should_sample(
+            trace.SpanContext(
+                0xDEADBEEF, 0xDEADBEF0, trace_options=TO_DEFAULT
+            ),
+            0xDEADBEF1,
+            0xDEADBEF2,
+            "unrecorded parent, sampling off",
+        )
+        self.assertFalse(no_record_always_off.sampled)
+        self.assertEqual(no_record_always_off.attributes, {})
+
+        recorded_always_on = sampling.ALWAYS_OFF.should_sample(
+            trace.SpanContext(
+                0xDEADBEEF, 0xDEADBEF0, trace_options=TO_RECORDED
+            ),
+            0xDEADBEF1,
+            0xDEADBEF2,
+            "recorded parent, sampling off",
+        )
+        self.assertFalse(recorded_always_on.sampled)
+        self.assertEqual(recorded_always_on.attributes, {})
+
+    def test_default_on(self):
+        no_record_default_on = sampling.DEFAULT_ON.should_sample(
+            trace.SpanContext(
+                0xDEADBEEF, 0xDEADBEF0, trace_options=TO_DEFAULT
+            ),
+            0xDEADBEF1,
+            0xDEADBEF2,
+            "unrecorded parent, sampling on",
+        )
+        self.assertFalse(no_record_default_on.sampled)
+        self.assertEqual(no_record_default_on.attributes, {})
+
+        recorded_default_on = sampling.DEFAULT_ON.should_sample(
+            trace.SpanContext(
+                0xDEADBEEF, 0xDEADBEF0, trace_options=TO_RECORDED
+            ),
+            0xDEADBEF1,
+            0xDEADBEF2,
+            "recorded parent, sampling on",
+        )
+        self.assertTrue(recorded_default_on.sampled)
+        self.assertEqual(recorded_default_on.attributes, {})
+
+    def test_default_off(self):
+        no_record_default_off = sampling.DEFAULT_OFF.should_sample(
+            trace.SpanContext(
+                0xDEADBEEF, 0xDEADBEF0, trace_options=TO_DEFAULT
+            ),
+            0xDEADBEF1,
+            0xDEADBEF2,
+            "unrecorded parent, sampling off",
+        )
+        self.assertFalse(no_record_default_off.sampled)
+        self.assertEqual(no_record_default_off.attributes, {})
+
+        recorded_default_off = sampling.DEFAULT_OFF.should_sample(
+            trace.SpanContext(
+                0xDEADBEEF, 0xDEADBEF0, trace_options=TO_RECORDED
+            ),
+            0xDEADBEF1,
+            0xDEADBEF2,
+            "recorded parent, sampling off",
+        )
+        self.assertTrue(recorded_default_off.sampled)
+        self.assertEqual(recorded_default_off.attributes, {})
+
+    def test_probability_sampler(self):
+        sampler = sampling.ProbabilitySampler(0.5)
+
+        # Check that we sample based on the trace ID if the parent context is
+        # null
+        self.assertTrue(
+            sampler.should_sample(
+                None, 0x7FFFFFFFFFFFFFFF, 0xDEADBEEF, "span name"
+            ).sampled
+        )
+        self.assertFalse(
+            sampler.should_sample(
+                None, 0x8000000000000000, 0xDEADBEEF, "span name"
+            ).sampled
+        )
+
+        # Check that the sampling decision matches the parent context if given,
+        # and that the sampler ignores the trace ID
+        self.assertFalse(
+            sampler.should_sample(
+                trace.SpanContext(
+                    0xDEADBEF0, 0xDEADBEF1, trace_options=TO_DEFAULT
+                ),
+                0x8000000000000000,
+                0xDEADBEEF,
+                "span name",
+            ).sampled
+        )
+        self.assertTrue(
+            sampler.should_sample(
+                trace.SpanContext(
+                    0xDEADBEF0, 0xDEADBEF1, trace_options=TO_RECORDED
+                ),
+                0x8000000000000001,
+                0xDEADBEEF,
+                "span name",
+            ).sampled
+        )
+
+    def test_probability_sampler_zero(self):
+        default_off = sampling.ProbabilitySampler(0.0)
+        self.assertFalse(
+            default_off.should_sample(
+                None, 0x0, 0xDEADBEEF, "span name"
+            ).sampled
+        )
+
+    def test_probability_sampler_one(self):
+        default_off = sampling.ProbabilitySampler(1.0)
+        self.assertTrue(
+            default_off.should_sample(
+                None, 0xFFFFFFFFFFFFFFFF, 0xDEADBEEF, "span name"
+            ).sampled
+        )
+
+    def test_probability_sampler_limits(self):
+
+        # Sample one of every 2^64 (= 5e-20) traces. This is the lowest
+        # possible meaningful sampling rate, only traces with trace ID 0x0
+        # should get sampled.
+        almost_always_off = sampling.ProbabilitySampler(1 / 2 ** 64)
+        self.assertTrue(
+            almost_always_off.should_sample(
+                None, 0x0, 0xDEADBEEF, "span name"
+            ).sampled
+        )
+        self.assertFalse(
+            almost_always_off.should_sample(
+                None, 0x1, 0xDEADBEEF, "span name"
+            ).sampled
+        )
+        self.assertEqual(
+            sampling.ProbabilitySampler.get_bound_for_rate(1 / 2 ** 64), 0x1
+        )
+
+        # Sample every trace with (last 8 bytes of) trace ID less than
+        # 0xffffffffffffffff. In principle this is the highest possible
+        # sampling rate less than 1, but we can't actually express this rate as
+        # a float!
+        #
+        # In practice, the highest possible sampling rate is:
+        #
+        #     round(sys.float_info.epsilon * 2 ** 64)
+
+        almost_always_on = sampling.ProbabilitySampler(1 - (1 / 2 ** 64))
+        self.assertTrue(
+            almost_always_on.should_sample(
+                None, 0xFFFFFFFFFFFFFFFE, 0xDEADBEEF, "span name"
+            ).sampled
+        )
+
+        # These tests are logically consistent, but fail because of the float
+        # precision issue above. Changing the sampler to check fewer bytes of
+        # the trace ID will cause these to pass.
+
+        # self.assertFalse(
+        #     almost_always_on.should_sample(
+        #         None,
+        #         0xffffffffffffffff,
+        #         0xdeadbeef,
+        #         "span name",
+        #     ).sampled
+        # )
+        # self.assertEqual(
+        #     sampling.ProbabilitySampler.get_bound_for_rate(1 - (1 / 2 ** 64)),
+        #     0xffffffffffffffff,
+        # )