-
Notifications
You must be signed in to change notification settings - Fork 666
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update WSGI & Flask integrations to follow new semantic conventions #299
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,9 @@ | |
|
||
from opentelemetry import propagators, trace | ||
from opentelemetry.ext.wsgi.version import __version__ # noqa | ||
from opentelemetry.trace.status import Status, StatusCanonicalCode | ||
|
||
_HTTP_VERSION_PREFIX = "HTTP/" | ||
|
||
|
||
def get_header_from_environ( | ||
|
@@ -41,44 +44,80 @@ def get_header_from_environ( | |
return [] | ||
|
||
|
||
def add_request_attributes(span, environ): | ||
"""Adds HTTP request attributes from the PEP3333-conforming WSGI environ to span.""" | ||
|
||
span.set_attribute("component", "http") | ||
span.set_attribute("http.method", environ["REQUEST_METHOD"]) | ||
|
||
host = environ.get("HTTP_HOST") | ||
if not host: | ||
host = environ["SERVER_NAME"] | ||
port = environ["SERVER_PORT"] | ||
scheme = environ["wsgi.url_scheme"] | ||
if ( | ||
scheme == "http" | ||
and port != "80" | ||
or scheme == "https" | ||
and port != "443" | ||
): | ||
host += ":" + port | ||
|
||
# NOTE: Nonstandard (but see | ||
# https://github.com/open-telemetry/opentelemetry-specification/pull/263) | ||
span.set_attribute("http.host", host) | ||
|
||
url = environ.get("REQUEST_URI") or environ.get("RAW_URI") | ||
|
||
if url: | ||
if url[0] == "/": | ||
# We assume that no scheme-relative URLs will be in url here. | ||
# After all, if a request is made to http://myserver//foo, we may get | ||
# //foo which looks like scheme-relative but isn't. | ||
url = environ["wsgi.url_scheme"] + "://" + host + url | ||
elif not url.startswith(environ["wsgi.url_scheme"] + ":"): | ||
# Something fishy is in RAW_URL. Let's fall back to request_uri() | ||
url = wsgiref_util.request_uri(environ) | ||
def setifnotnone(dic, key, value): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should follow the underscore naming convention, even for this small method name. |
||
if value is not None: | ||
dic[key] = value | ||
|
||
|
||
def http_status_to_canonical_code(code: int, allow_redirect: bool = True): | ||
# pylint:disable=too-many-branches,too-many-return-statements | ||
if code < 100: | ||
return StatusCanonicalCode.UNKNOWN | ||
if code <= 299: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure if it would be worth adding a check for 200 at the top since i suspect that's going to be by large the status code we hit. Possibly a premature optimization though There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wouldn't want to guess. 204 No Content is probably also a popular response code to POST requests and the like. I think before that I'd rather do things like the classical There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It feels to me that, rather than this range, just creating a dict with hard-coded values would be more performant. Might be worth a benchmark but would make behavior consistent regardless of what codes are common. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You mean a dict with 500 entries? |
||
return StatusCanonicalCode.OK | ||
if code <= 399: | ||
if allow_redirect: | ||
return StatusCanonicalCode.OK | ||
return StatusCanonicalCode.DEADLINE_EXCEEDED | ||
if code <= 499: | ||
if code == 401: # HTTPStatus.UNAUTHORIZED: | ||
return StatusCanonicalCode.UNAUTHENTICATED | ||
if code == 403: # HTTPStatus.FORBIDDEN: | ||
return StatusCanonicalCode.PERMISSION_DENIED | ||
if code == 404: # HTTPStatus.NOT_FOUND: | ||
return StatusCanonicalCode.NOT_FOUND | ||
if code == 429: # HTTPStatus.TOO_MANY_REQUESTS: | ||
return StatusCanonicalCode.RESOURCE_EXHAUSTED | ||
return StatusCanonicalCode.INVALID_ARGUMENT | ||
if code <= 599: | ||
if code == 501: # HTTPStatus.NOT_IMPLEMENTED: | ||
return StatusCanonicalCode.UNIMPLEMENTED | ||
if code == 503: # HTTPStatus.SERVICE_UNAVAILABLE: | ||
return StatusCanonicalCode.UNAVAILABLE | ||
if code == 504: # HTTPStatus.GATEWAY_TIMEOUT: | ||
return StatusCanonicalCode.DEADLINE_EXCEEDED | ||
return StatusCanonicalCode.INTERNAL | ||
return StatusCanonicalCode.UNKNOWN | ||
|
||
|
||
def collect_request_attributes(environ): | ||
"""Collects HTTP request attributes from the PEP3333-conforming | ||
WSGI environ and returns a dictionary to be used as span creation attributes.""" | ||
|
||
result = { | ||
"component": "http", | ||
"http.method": environ["REQUEST_METHOD"], | ||
"http.server_name": environ["SERVER_NAME"], | ||
"http.scheme": environ["wsgi.url_scheme"], | ||
"host.port": int(environ["SERVER_PORT"]), | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there ever a case where any of these environment variables wouldn't be present? If so using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. According to PEP3333, these are required: https://www.python.org/dev/peps/pep-3333/#environ-variables. I guess there might be nonconforming implementations out there. I have no strong opinion here. Maybe it would be better to come up with a convention of using |
||
|
||
setifnotnone(result, "http.host", environ.get("HTTP_HOST")) | ||
target = environ.get("RAW_URI") | ||
if target is None: # Note: `"" or None is None` | ||
target = environ.get("REQUEST_URI") | ||
if target is not None: | ||
result["http.target"] = target | ||
else: | ||
url = wsgiref_util.request_uri(environ) | ||
result["http.url"] = wsgiref_util.request_uri(environ) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it feels to me like http.url could be decomposed into http.target and http.host or something along those lines: otherwise if I wanted the target, I would have to know and check for url in the case that target did not exist, and perform a split to extract that myself. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, we could decompose, but I see no benefit in it. Who do you mean with "myself" above? I think the back-end is a better place for such logic, the integrations should send information to the back-end as "raw" as they want as more raw usually means more info. |
||
|
||
remote_addr = environ.get("REMOTE_ADDR") | ||
if remote_addr: | ||
result[ | ||
"peer.ipv6" if ":" in remote_addr else "peer.ipv4" | ||
] = remote_addr | ||
remote_host = environ.get("REMOTE_HOST") | ||
if remote_host and remote_host != remote_addr: | ||
result["peer.hostname"] = remote_host | ||
|
||
span.set_attribute("http.url", url) | ||
setifnotnone(result, "peer.port", environ.get("REMOTE_PORT")) | ||
flavor = environ.get("SERVER_PROTOCOL", "") | ||
if flavor.upper().startswith(_HTTP_VERSION_PREFIX): | ||
flavor = flavor[len(_HTTP_VERSION_PREFIX) :] | ||
if flavor: | ||
result["http.flavor"] = flavor | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately not, since I use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah right! |
||
|
||
return result | ||
|
||
|
||
def add_response_attributes( | ||
|
@@ -93,9 +132,15 @@ def add_response_attributes( | |
try: | ||
status_code = int(status_code) | ||
except ValueError: | ||
pass | ||
span.set_status( | ||
Status( | ||
StatusCanonicalCode.UNKNOWN, | ||
"Non-integer HTTP status: " + repr(status_code), | ||
) | ||
) | ||
else: | ||
span.set_attribute("http.status_code", status_code) | ||
span.set_status(Status(http_status_to_canonical_code(status_code))) | ||
|
||
|
||
def get_default_span_name(environ): | ||
|
@@ -142,19 +187,22 @@ def __call__(self, environ, start_response): | |
span_name = get_default_span_name(environ) | ||
|
||
span = tracer.start_span( | ||
span_name, parent_span, kind=trace.SpanKind.SERVER | ||
span_name, | ||
parent_span, | ||
kind=trace.SpanKind.SERVER, | ||
attributes=collect_request_attributes(environ), | ||
) | ||
|
||
try: | ||
with tracer.use_span(span): | ||
add_request_attributes(span, environ) | ||
start_response = self._create_start_response( | ||
span, start_response | ||
) | ||
|
||
iterable = self.wsgi(environ, start_response) | ||
return _end_span_after_iterating(iterable, span, tracer) | ||
except: # noqa | ||
# TODO Set span status (cf. https://github.com/open-telemetry/opentelemetry-python/issues/292) | ||
span.end() | ||
raise | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
random and more general question: is it needed to have "status_text"? feels to me like it adds a lot to the payload and is often identical.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, the status text is optional. But note that the integration does not insert static text here but actual "output" from the WSGI application. EDIT: See spec: https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/data-http.md#common-attributes