-
Notifications
You must be signed in to change notification settings - Fork 411
/
span.py
572 lines (483 loc) · 19 KB
/
span.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
import math
import pprint
import sys
import traceback
from typing import Any
from typing import Callable
from typing import Dict
from typing import List
from typing import Optional
from typing import Text
from typing import Union
import six
from . import config
from .constants import ANALYTICS_SAMPLE_RATE_KEY
from .constants import ERROR_MSG
from .constants import ERROR_STACK
from .constants import ERROR_TYPE
from .constants import MANUAL_DROP_KEY
from .constants import MANUAL_KEEP_KEY
from .constants import SAMPLING_AGENT_DECISION
from .constants import SAMPLING_LIMIT_DECISION
from .constants import SAMPLING_RULE_DECISION
from .constants import SERVICE_KEY
from .constants import SERVICE_VERSION_KEY
from .constants import SPAN_MEASURED_KEY
from .constants import USER_KEEP
from .constants import USER_REJECT
from .constants import VERSION_KEY
from .context import Context
from .ext import http
from .ext import net
from .internal._rand import rand64bits as _rand64bits
from .internal._rand import rand128bits as _rand128bits
from .internal.compat import NumericType
from .internal.compat import StringIO
from .internal.compat import ensure_text
from .internal.compat import is_integer
from .internal.compat import iteritems
from .internal.compat import numeric_types
from .internal.compat import stringify
from .internal.compat import time_ns
from .internal.constants import MAX_UINT_64BITS as _MAX_UINT_64BITS
from .internal.constants import SPAN_API_DATADOG
from .internal.logger import get_logger
from .internal.sampling import SamplingMechanism
from .internal.sampling import set_sampling_decision_maker
_NUMERIC_TAGS = (ANALYTICS_SAMPLE_RATE_KEY,)
_TagNameType = Union[Text, bytes]
_MetaDictType = Dict[_TagNameType, Text]
_MetricDictType = Dict[_TagNameType, NumericType]
log = get_logger(__name__)
def _get_64_lowest_order_bits_as_int(large_int):
# type: (int) -> int
"""Get the 64 lowest order bits from a 128bit integer"""
return _MAX_UINT_64BITS & large_int
def _get_64_highest_order_bits_as_hex(large_int):
# type: (int) -> str
"""Get the 64 highest order bits from a 128bit integer"""
return "{:032x}".format(large_int)[:16]
class Span(object):
__slots__ = [
# Public span attributes
"service",
"name",
"_resource",
"_span_api",
"span_id",
"trace_id",
"parent_id",
"_meta",
"error",
"_metrics",
"_store",
"span_type",
"start_ns",
"duration_ns",
# Sampler attributes
"sampled",
# Internal attributes
"_context",
"_local_root",
"_parent",
"_ignored_exceptions",
"_on_finish_callbacks",
"__weakref__",
]
def __init__(
self,
name, # type: str
service=None, # type: Optional[str]
resource=None, # type: Optional[str]
span_type=None, # type: Optional[str]
trace_id=None, # type: Optional[int]
span_id=None, # type: Optional[int]
parent_id=None, # type: Optional[int]
start=None, # type: Optional[int]
context=None, # type: Optional[Context]
on_finish=None, # type: Optional[List[Callable[[Span], None]]]
span_api=SPAN_API_DATADOG, # type: str
):
# type: (...) -> None
"""
Create a new span. Call `finish` once the traced operation is over.
**Note:** A ``Span`` should only be accessed or modified in the process
that it was created in. Using a ``Span`` from within a child process
could result in a deadlock or unexpected behavior.
:param str name: the name of the traced operation.
:param str service: the service name
:param str resource: the resource name
:param str span_type: the span type
:param int trace_id: the id of this trace's root span.
:param int parent_id: the id of this span's direct parent span.
:param int span_id: the id of this span.
:param int start: the start time of request as a unix epoch in seconds
:param object context: the Context of the span.
:param on_finish: list of functions called when the span finishes.
"""
# pre-conditions
if not (span_id is None or isinstance(span_id, six.integer_types)):
raise TypeError("span_id must be an integer")
if not (trace_id is None or isinstance(trace_id, six.integer_types)):
raise TypeError("trace_id must be an integer")
if not (parent_id is None or isinstance(parent_id, six.integer_types)):
raise TypeError("parent_id must be an integer")
# required span info
self.name = name
self.service = service
self._resource = [resource or name]
self.span_type = span_type
self._span_api = span_api
# tags / metadata
self._meta = {} # type: _MetaDictType
self.error = 0
self._metrics = {} # type: _MetricDictType
# timing
self.start_ns = time_ns() if start is None else int(start * 1e9) # type: int
self.duration_ns = None # type: Optional[int]
# tracing
if trace_id is not None:
self.trace_id = trace_id # type: int
elif config._128_bit_trace_id_enabled:
self.trace_id = _rand128bits()
else:
self.trace_id = _rand64bits()
self.span_id = span_id or _rand64bits() # type: int
self.parent_id = parent_id # type: Optional[int]
self._on_finish_callbacks = [] if on_finish is None else on_finish
# sampling
self.sampled = True # type: bool
self._context = context._with_span(self) if context else None # type: Optional[Context]
self._parent = None # type: Optional[Span]
self._ignored_exceptions = None # type: Optional[List[Exception]]
self._local_root = None # type: Optional[Span]
self._store = None # type: Optional[Dict[str, Any]]
def _ignore_exception(self, exc):
# type: (Exception) -> None
if self._ignored_exceptions is None:
self._ignored_exceptions = [exc]
else:
self._ignored_exceptions.append(exc)
def _set_ctx_item(self, key, val):
# type: (str, Any) -> None
if not self._store:
self._store = {}
self._store[key] = val
def _set_ctx_items(self, items):
# type: (Dict[str, Any]) -> None
if not self._store:
self._store = {}
self._store.update(items)
def _get_ctx_item(self, key):
# type: (str) -> Optional[Any]
if not self._store:
return None
return self._store.get(key)
@property
def _trace_id_64bits(self):
return _get_64_lowest_order_bits_as_int(self.trace_id)
@property
def start(self):
# type: () -> float
"""The start timestamp in Unix epoch seconds."""
return self.start_ns / 1e9
@start.setter
def start(self, value):
# type: (Union[int, float]) -> None
self.start_ns = int(value * 1e9)
@property
def resource(self):
return self._resource[0]
@resource.setter
def resource(self, value):
self._resource[0] = value
@property
def finished(self):
# type: () -> bool
return self.duration_ns is not None
@finished.setter
def finished(self, value):
# type: (bool) -> None
"""Finishes the span if set to a truthy value.
If the span is already finished and a truthy value is provided
no action will occur.
"""
if value:
if not self.finished:
self.duration_ns = time_ns() - self.start_ns
else:
self.duration_ns = None
@property
def duration(self):
# type: () -> Optional[float]
"""The span duration in seconds."""
if self.duration_ns is not None:
return self.duration_ns / 1e9
return None
@duration.setter
def duration(self, value):
# type: (float) -> None
self.duration_ns = int(value * 1e9)
def finish(self, finish_time=None):
# type: (Optional[float]) -> None
"""Mark the end time of the span and submit it to the tracer.
If the span has already been finished don't do anything.
:param finish_time: The end time of the span, in seconds. Defaults to ``now``.
"""
if finish_time is None:
self._finish_ns(time_ns())
else:
self._finish_ns(int(finish_time * 1e9))
def _finish_ns(self, finish_time_ns):
# type: (int) -> None
if self.duration_ns is not None:
return
# be defensive so we don't die if start isn't set
self.duration_ns = finish_time_ns - (self.start_ns or finish_time_ns)
for cb in self._on_finish_callbacks:
cb(self)
def _override_sampling_decision(self, decision):
self.context.sampling_priority = decision
set_sampling_decision_maker(self.context, SamplingMechanism.MANUAL)
for key in (SAMPLING_RULE_DECISION, SAMPLING_AGENT_DECISION, SAMPLING_LIMIT_DECISION):
if key in self._local_root._metrics:
del self._local_root._metrics[key]
def set_tag(self, key, value=None):
# type: (_TagNameType, Any) -> None
"""Set a tag key/value pair on the span.
Keys must be strings, values must be ``stringify``-able.
:param key: Key to use for the tag
:type key: str
:param value: Value to assign for the tag
:type value: ``stringify``-able value
"""
if not isinstance(key, six.string_types):
log.warning("Ignoring tag pair %s:%s. Key must be a string.", key, value)
return
# Special case, force `http.status_code` as a string
# DEV: `http.status_code` *has* to be in `meta` for metrics
# calculated in the trace agent
if key == http.STATUS_CODE:
value = str(value)
# Determine once up front
val_is_an_int = is_integer(value)
# Explicitly try to convert expected integers to `int`
# DEV: Some integrations parse these values from strings, but don't call `int(value)` themselves
INT_TYPES = (net.TARGET_PORT,)
if key in INT_TYPES and not val_is_an_int:
try:
value = int(value)
val_is_an_int = True
except (ValueError, TypeError):
pass
# Set integers that are less than equal to 2^53 as metrics
if value is not None and val_is_an_int and abs(value) <= 2 ** 53:
self.set_metric(key, value)
return
# All floats should be set as a metric
elif isinstance(value, float):
self.set_metric(key, value)
return
# Key should explicitly be converted to a float if needed
elif key in _NUMERIC_TAGS:
if value is None:
log.debug("ignoring not number metric %s:%s", key, value)
return
try:
# DEV: `set_metric` will try to cast to `float()` for us
self.set_metric(key, value)
except (TypeError, ValueError):
log.warning("error setting numeric metric %s:%s", key, value)
return
elif key == MANUAL_KEEP_KEY:
self._override_sampling_decision(USER_KEEP)
return
elif key == MANUAL_DROP_KEY:
self._override_sampling_decision(USER_REJECT)
return
elif key == SERVICE_KEY:
self.service = value
elif key == SERVICE_VERSION_KEY:
# Also set the `version` tag to the same value
# DEV: Note that we do no return, we want to set both
self.set_tag(VERSION_KEY, value)
elif key == SPAN_MEASURED_KEY:
# Set `_dd.measured` tag as a metric
# DEV: `set_metric` will ensure it is an integer 0 or 1
if value is None:
value = 1
self.set_metric(key, value)
return
try:
self._meta[key] = stringify(value)
if key in self._metrics:
del self._metrics[key]
except Exception:
log.warning("error setting tag %s, ignoring it", key, exc_info=True)
def set_tag_str(self, key, value):
# type: (_TagNameType, Text) -> None
"""Set a value for a tag. Values are coerced to unicode in Python 2 and
str in Python 3, with decoding errors in conversion being replaced with
U+FFFD.
"""
try:
self._meta[key] = ensure_text(value, errors="replace")
except Exception as e:
if config._raise:
raise e
log.warning("Failed to set text tag '%s'", key, exc_info=True)
def _remove_tag(self, key):
# type: (_TagNameType) -> None
if key in self._meta:
del self._meta[key]
def get_tag(self, key):
# type: (_TagNameType) -> Optional[Text]
"""Return the given tag or None if it doesn't exist."""
return self._meta.get(key, None)
def get_tags(self):
# type: () -> _MetaDictType
"""Return all tags."""
return self._meta.copy()
def set_tags(self, tags):
# type: (_MetaDictType) -> None
"""Set a dictionary of tags on the given span. Keys and values
must be strings (or stringable)
"""
if tags:
for k, v in iter(tags.items()):
self.set_tag(k, v)
def set_metric(self, key, value):
# type: (_TagNameType, NumericType) -> None
# This method sets a numeric tag value for the given key.
# Enforce a specific connstant for `_dd.measured`
if key == SPAN_MEASURED_KEY:
try:
value = int(bool(value))
except (ValueError, TypeError):
log.warning("failed to convert %r tag to an integer from %r", key, value)
return
# FIXME[matt] we could push this check to serialization time as well.
# only permit types that are commonly serializable (don't use
# isinstance so that we convert unserializable types like numpy
# numbers)
if type(value) not in numeric_types:
try:
value = float(value)
except (ValueError, TypeError):
log.debug("ignoring not number metric %s:%s", key, value)
return
# don't allow nan or inf
if math.isnan(value) or math.isinf(value):
log.debug("ignoring not real metric %s:%s", key, value)
return
if key in self._meta:
del self._meta[key]
self._metrics[key] = value
def set_metrics(self, metrics):
# type: (_MetricDictType) -> None
if metrics:
for k, v in iteritems(metrics):
self.set_metric(k, v)
def get_metric(self, key):
# type: (_TagNameType) -> Optional[NumericType]
"""Return the given metric or None if it doesn't exist."""
return self._metrics.get(key)
def get_metrics(self):
# type: () -> _MetricDictType
"""Return all metrics."""
return self._metrics.copy()
def set_traceback(self, limit=30):
# type: (int) -> None
"""If the current stack has an exception, tag the span with the
relevant error info. If not, set the span to the current python stack.
"""
(exc_type, exc_val, exc_tb) = sys.exc_info()
if exc_type and exc_val and exc_tb:
self.set_exc_info(exc_type, exc_val, exc_tb)
else:
tb = "".join(traceback.format_stack(limit=limit + 1)[:-1])
self._meta[ERROR_STACK] = tb
def set_exc_info(self, exc_type, exc_val, exc_tb):
# type: (Any, Any, Any) -> None
"""Tag the span with an error tuple as from `sys.exc_info()`."""
if not (exc_type and exc_val and exc_tb):
return # nothing to do
if self._ignored_exceptions and any([issubclass(exc_type, e) for e in self._ignored_exceptions]): # type: ignore[arg-type] # noqa
return
self.error = 1
self._set_exc_tags(exc_type, exc_val, exc_tb)
def _set_exc_tags(self, exc_type, exc_val, exc_tb):
# get the traceback
buff = StringIO()
traceback.print_exception(exc_type, exc_val, exc_tb, file=buff, limit=30)
tb = buff.getvalue()
# readable version of type (e.g. exceptions.ZeroDivisionError)
exc_type_str = "%s.%s" % (exc_type.__module__, exc_type.__name__)
self._meta[ERROR_MSG] = stringify(exc_val)
self._meta[ERROR_TYPE] = exc_type_str
self._meta[ERROR_STACK] = tb
def _pprint(self):
# type: () -> str
"""Return a human readable version of the span."""
data = [
("name", self.name),
("id", self.span_id),
("trace_id", self.trace_id),
("parent_id", self.parent_id),
("service", self.service),
("resource", self.resource),
("type", self.span_type),
("start", self.start),
("end", None if not self.duration else self.start + self.duration),
("duration", self.duration),
("error", self.error),
("tags", dict(sorted(self._meta.items()))),
("metrics", dict(sorted(self._metrics.items()))),
]
return " ".join(
# use a large column width to keep pprint output on one line
"%s=%s" % (k, pprint.pformat(v, width=1024 ** 2).strip())
for (k, v) in data
)
@property
def context(self):
# type: () -> Context
"""Return the trace context for this span."""
if self._context is None:
self._context = Context(trace_id=self.trace_id, span_id=self.span_id)
return self._context
def finish_with_ancestors(self):
# type: () -> None
"""Finish this span along with all (accessible) ancestors of this span.
This method is useful if a sudden program shutdown is required and finishing
the trace is desired.
"""
span = self # type: Optional[Span]
while span is not None:
span.finish()
span = span._parent
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
try:
if exc_type:
self.set_exc_info(exc_type, exc_val, exc_tb)
self.finish()
except Exception:
log.exception("error closing trace")
def __repr__(self):
return "<Span(id=%s,trace_id=%s,parent_id=%s,name=%s)>" % (
self.span_id,
self.trace_id,
self.parent_id,
self.name,
)
def _is_top_level(span):
# type: (Span) -> bool
"""Return whether the span is a "top level" span.
Top level meaning the root of the trace or a child span
whose service is different from its parent.
"""
return (span._local_root is span) or (
span._parent is not None and span._parent.service != span.service and span.service is not None
)