Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update WSGI & Flask integrations to follow new semantic conventions #299

Merged
merged 3 commits into from
Dec 9, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ ignore =
E501 # line too long, defer to black
F401 # unused import, defer to pylint
W503 # allow line breaks after binary ops, not after
E203 # allow whitespace before ':' (https://github.com/psf/black#slices)
exclude =
.bzr
.git
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,20 +62,21 @@ def _before_flask_request():

tracer = trace.tracer()

attributes = otel_wsgi.collect_request_attributes(environ)
if flask_request.url_rule:
# For 404 that result from no route found, etc, we don't have a url_rule.
attributes["http.route"] = flask_request.url_rule.rule
span = tracer.start_span(
span_name,
parent_span,
kind=trace.SpanKind.SERVER,
attributes=attributes,
start_time=environ.get(_ENVIRON_STARTTIME_KEY),
)
activation = tracer.use_span(span, end_on_exit=True)
activation.__enter__()
environ[_ENVIRON_ACTIVATION_KEY] = activation
environ[_ENVIRON_SPAN_KEY] = span
otel_wsgi.add_request_attributes(span, environ)
if flask_request.url_rule:
# For 404 that result from no route found, etc, we don't have a url_rule.
span.set_attribute("http.route", flask_request.url_rule.rule)


def _teardown_flask_request(exc):
Expand Down
56 changes: 34 additions & 22 deletions ext/opentelemetry-ext-flask/tests/test_flask_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,22 +57,25 @@ def test_simple(self):
"hello_endpoint",
trace_api.INVALID_SPAN_CONTEXT,
kind=trace_api.SpanKind.SERVER,
attributes={
"component": "http",
"http.method": "GET",
"http.server_name": "localhost",
"http.scheme": "http",
"host.port": 80,
"http.host": "localhost",
"http.target": "/hello/123",
"http.flavor": "1.1",
"http.route": "/hello/<int:helloid>",
},
start_time=mock.ANY,
)

# TODO: Change this test to use the SDK, as mocking becomes painful

self.assertEqual(
self.span_attrs,
{
"component": "http",
"http.method": "GET",
"http.host": "localhost",
"http.url": "http://localhost/hello/123",
"http.route": "/hello/<int:helloid>",
"http.status_code": 200,
"http.status_text": "OK",
},
{"http.status_code": 200, "http.status_text": "OK"},
)

def test_404(self):
Expand All @@ -84,6 +87,16 @@ def test_404(self):
"/bye",
trace_api.INVALID_SPAN_CONTEXT,
kind=trace_api.SpanKind.SERVER,
attributes={
"component": "http",
"http.method": "POST",
"http.server_name": "localhost",
"http.scheme": "http",
"host.port": 80,
"http.host": "localhost",
"http.target": "/bye",
"http.flavor": "1.1",
},
start_time=mock.ANY,
)

Expand All @@ -93,14 +106,7 @@ def test_404(self):

self.assertEqual(
self.span_attrs,
{
"component": "http",
"http.method": "POST",
"http.host": "localhost",
"http.url": "http://localhost/bye",
"http.status_code": 404,
"http.status_text": "NOT FOUND",
},
{"http.status_code": 404, "http.status_text": "NOT FOUND"},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

random and more general question: is it needed to have "status_text"? feels to me like it adds a lot to the payload and is often identical.

Copy link
Member Author

@Oberon00 Oberon00 Dec 9, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the status text is optional. But note that the integration does not insert static text here but actual "output" from the WSGI application. EDIT: See spec: https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/data-http.md#common-attributes

)

def test_internal_error(self):
Expand All @@ -112,6 +118,17 @@ def test_internal_error(self):
"hello_endpoint",
trace_api.INVALID_SPAN_CONTEXT,
kind=trace_api.SpanKind.SERVER,
attributes={
"component": "http",
"http.method": "GET",
"http.server_name": "localhost",
"http.scheme": "http",
"host.port": 80,
"http.host": "localhost",
"http.target": "/hello/500",
"http.flavor": "1.1",
"http.route": "/hello/<int:helloid>",
},
start_time=mock.ANY,
)

Expand All @@ -122,11 +139,6 @@ def test_internal_error(self):
self.assertEqual(
self.span_attrs,
{
"component": "http",
"http.method": "GET",
"http.host": "localhost",
"http.url": "http://localhost/hello/500",
"http.route": "/hello/<int:helloid>",
"http.status_code": 500,
"http.status_text": "INTERNAL SERVER ERROR",
},
Expand Down
126 changes: 87 additions & 39 deletions ext/opentelemetry-ext-wsgi/src/opentelemetry/ext/wsgi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@

from opentelemetry import propagators, trace
from opentelemetry.ext.wsgi.version import __version__ # noqa
from opentelemetry.trace.status import Status, StatusCanonicalCode

_HTTP_VERSION_PREFIX = "HTTP/"


def get_header_from_environ(
Expand All @@ -41,44 +44,80 @@ def get_header_from_environ(
return []


def add_request_attributes(span, environ):
"""Adds HTTP request attributes from the PEP3333-conforming WSGI environ to span."""

span.set_attribute("component", "http")
span.set_attribute("http.method", environ["REQUEST_METHOD"])

host = environ.get("HTTP_HOST")
if not host:
host = environ["SERVER_NAME"]
port = environ["SERVER_PORT"]
scheme = environ["wsgi.url_scheme"]
if (
scheme == "http"
and port != "80"
or scheme == "https"
and port != "443"
):
host += ":" + port

# NOTE: Nonstandard (but see
# https://github.com/open-telemetry/opentelemetry-specification/pull/263)
span.set_attribute("http.host", host)

url = environ.get("REQUEST_URI") or environ.get("RAW_URI")

if url:
if url[0] == "/":
# We assume that no scheme-relative URLs will be in url here.
# After all, if a request is made to http://myserver//foo, we may get
# //foo which looks like scheme-relative but isn't.
url = environ["wsgi.url_scheme"] + "://" + host + url
elif not url.startswith(environ["wsgi.url_scheme"] + ":"):
# Something fishy is in RAW_URL. Let's fall back to request_uri()
url = wsgiref_util.request_uri(environ)
def setifnotnone(dic, key, value):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should follow the underscore naming convention, even for this small method name.

if value is not None:
dic[key] = value


def http_status_to_canonical_code(code: int, allow_redirect: bool = True):
# pylint:disable=too-many-branches,too-many-return-statements
if code < 100:
return StatusCanonicalCode.UNKNOWN
if code <= 299:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if it would be worth adding a check for 200 at the top since i suspect that's going to be by large the status code we hit. Possibly a premature optimization though

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wouldn't want to guess. 204 No Content is probably also a popular response code to POST requests and the like. I think before that I'd rather do things like the classical _StatusCanonicalCode=StatusCanonicalCode default parameter to avoid accessing globals.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It feels to me that, rather than this range, just creating a dict with hard-coded values would be more performant. Might be worth a benchmark but would make behavior consistent regardless of what codes are common.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean a dict with 500 entries?

return StatusCanonicalCode.OK
if code <= 399:
if allow_redirect:
return StatusCanonicalCode.OK
return StatusCanonicalCode.DEADLINE_EXCEEDED
if code <= 499:
if code == 401: # HTTPStatus.UNAUTHORIZED:
return StatusCanonicalCode.UNAUTHENTICATED
if code == 403: # HTTPStatus.FORBIDDEN:
return StatusCanonicalCode.PERMISSION_DENIED
if code == 404: # HTTPStatus.NOT_FOUND:
return StatusCanonicalCode.NOT_FOUND
if code == 429: # HTTPStatus.TOO_MANY_REQUESTS:
return StatusCanonicalCode.RESOURCE_EXHAUSTED
return StatusCanonicalCode.INVALID_ARGUMENT
if code <= 599:
if code == 501: # HTTPStatus.NOT_IMPLEMENTED:
return StatusCanonicalCode.UNIMPLEMENTED
if code == 503: # HTTPStatus.SERVICE_UNAVAILABLE:
return StatusCanonicalCode.UNAVAILABLE
if code == 504: # HTTPStatus.GATEWAY_TIMEOUT:
return StatusCanonicalCode.DEADLINE_EXCEEDED
return StatusCanonicalCode.INTERNAL
return StatusCanonicalCode.UNKNOWN


def collect_request_attributes(environ):
"""Collects HTTP request attributes from the PEP3333-conforming
WSGI environ and returns a dictionary to be used as span creation attributes."""

result = {
"component": "http",
"http.method": environ["REQUEST_METHOD"],
"http.server_name": environ["SERVER_NAME"],
"http.scheme": environ["wsgi.url_scheme"],
"host.port": int(environ["SERVER_PORT"]),
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there ever a case where any of these environment variables wouldn't be present? If so using environ.get may be better, unless raising an exception is preferable

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to PEP3333, these are required: https://www.python.org/dev/peps/pep-3333/#environ-variables. I guess there might be nonconforming implementations out there. I have no strong opinion here. Maybe it would be better to come up with a convention of using try:/except: to make sure the integrations don't break applications.


setifnotnone(result, "http.host", environ.get("HTTP_HOST"))
target = environ.get("RAW_URI")
if target is None: # Note: `"" or None is None`
target = environ.get("REQUEST_URI")
if target is not None:
result["http.target"] = target
else:
url = wsgiref_util.request_uri(environ)
result["http.url"] = wsgiref_util.request_uri(environ)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it feels to me like http.url could be decomposed into http.target and http.host or something along those lines: otherwise if I wanted the target, I would have to know and check for url in the case that target did not exist, and perform a split to extract that myself.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, we could decompose, but I see no benefit in it. Who do you mean with "myself" above? I think the back-end is a better place for such logic, the integrations should send information to the back-end as "raw" as they want as more raw usually means more info.


remote_addr = environ.get("REMOTE_ADDR")
if remote_addr:
result[
"peer.ipv6" if ":" in remote_addr else "peer.ipv4"
] = remote_addr
remote_host = environ.get("REMOTE_HOST")
if remote_host and remote_host != remote_addr:
result["peer.hostname"] = remote_host

span.set_attribute("http.url", url)
setifnotnone(result, "peer.port", environ.get("REMOTE_PORT"))
flavor = environ.get("SERVER_PROTOCOL", "")
if flavor.upper().startswith(_HTTP_VERSION_PREFIX):
flavor = flavor[len(_HTTP_VERSION_PREFIX) :]
if flavor:
result["http.flavor"] = flavor
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could use setifnotnone here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately not, since I use "" as second argument to get above, in order to unconditionally use upper and startswith. I think there is really no way to save a conditional here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah right!


return result


def add_response_attributes(
Expand All @@ -93,9 +132,15 @@ def add_response_attributes(
try:
status_code = int(status_code)
except ValueError:
pass
span.set_status(
Status(
StatusCanonicalCode.UNKNOWN,
"Non-integer HTTP status: " + repr(status_code),
)
)
else:
span.set_attribute("http.status_code", status_code)
span.set_status(Status(http_status_to_canonical_code(status_code)))


def get_default_span_name(environ):
Expand Down Expand Up @@ -142,19 +187,22 @@ def __call__(self, environ, start_response):
span_name = get_default_span_name(environ)

span = tracer.start_span(
span_name, parent_span, kind=trace.SpanKind.SERVER
span_name,
parent_span,
kind=trace.SpanKind.SERVER,
attributes=collect_request_attributes(environ),
)

try:
with tracer.use_span(span):
add_request_attributes(span, environ)
start_response = self._create_start_response(
span, start_response
)

iterable = self.wsgi(environ, start_response)
return _end_span_after_iterating(iterable, span, tracer)
except: # noqa
# TODO Set span status (cf. https://github.com/open-telemetry/opentelemetry-python/issues/292)
span.end()
raise

Expand Down
Loading