From 34e50c456fed467a26ab0a90c0d1f3f4e4ed1996 Mon Sep 17 00:00:00 2001 From: Dan Rogers Date: Mon, 24 Oct 2022 12:09:11 -0400 Subject: [PATCH] Add support for regular expression matching and sanitizing of headers in WSGI. --- CHANGELOG.md | 2 + .../instrumentation/wsgi/__init__.py | 150 ++++++++++++------ .../tests/test_wsgi_middleware.py | 37 ++++- 3 files changed, 140 insertions(+), 49 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dbf86c5fd0..7e40ddf6c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#1333](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1333)) - `opentelemetry-instrumentation-asgi` metrics record target attribute (FastAPI only) ([#1323](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1323)) +- `opentelemetry-instrumentation-wsgi` Add support for regular expression matching of HTTP headers. + ([#1402](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1402)) ### Fixed diff --git a/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py b/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py index 128866f82c..d3f27e160b 100644 --- a/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py @@ -85,8 +85,15 @@ def GET(self): Request/Response hooks ********************** -Utilize request/response hooks to execute custom logic to be performed before/after performing a request. Environ is an instance of WSGIEnvironment. -Response_headers is a list of key-value (tuples) representing the response headers returned from the response. +This instrumentation supports request and response hooks. These are functions that get called +right after a span is created for a request and right before the span is finished for the response. + +- The client request hook is called with the internal span and an instance of WSGIEnvironment when the method + ``receive`` is called. +- The client response hook is called with the internal span, the status of the response and a list of key-value (tuples) + representing the response headers returned from the response when the method ``send`` is called. + +For example, .. code-block:: python @@ -102,54 +109,93 @@ def response_hook(span: Span, environ: WSGIEnvironment, status: str, response_he Capture HTTP request and response headers ***************************************** -You can configure the agent to capture predefined HTTP headers as span attributes, according to the `semantic convention `_. +You can configure the agent to capture specified HTTP headers as span attributes, according to the +`semantic convention `_. Request headers *************** -To capture predefined HTTP request headers as span attributes, set the environment variable ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST`` -to a comma-separated list of HTTP header names. +To capture HTTP request headers as span attributes, set the environment variable +``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST`` to a comma delimited list of HTTP header names. For example, - :: export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST="content-type,custom_request_header" -will extract ``content-type`` and ``custom_request_header`` from request headers and add them as span attributes. +will extract ``content-type`` and ``custom_request_header`` from the request headers and add them as span attributes. + +Request header names in WSGI are case-insensitive and ``-`` characters are replaced by ``_``. So, giving the header +name as ``CUStom_Header`` in the environment variable will capture the header named ``custom-header``. + +Regular expressions may also be used to match multiple headers that correspond to the given pattern. For example: +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST="Accept.*,X-.*" + +Would match all request headers that start with ``Accept`` and ``X-``. -It is recommended that you should give the correct names of the headers to be captured in the environment variable. -Request header names in wsgi are case insensitive and - characters are replaced by _. So, giving header name as ``CUStom_Header`` in environment variable will be able capture header with name ``custom-header``. +To capture all request headers, set ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST`` to ``".*"``. +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST=".*" -The name of the added span attribute will follow the format ``http.request.header.`` where ```` being the normalized HTTP header name (lowercase, with - characters replaced by _ ). -The value of the attribute will be single item list containing all the header values. +The name of the added span attribute will follow the format ``http.request.header.`` where ```` +is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a +single item list containing all the header values. -Example of the added span attribute, +For example: ``http.request.header.custom_request_header = [","]`` Response headers **************** -To capture predefined HTTP response headers as span attributes, set the environment variable ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE`` -to a comma-separated list of HTTP header names. +To capture HTTP response headers as span attributes, set the environment variable +``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE`` to a comma delimited list of HTTP header names. For example, - :: export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE="content-type,custom_response_header" -will extract ``content-type`` and ``custom_response_header`` from response headers and add them as span attributes. +will extract ``content-type`` and ``custom_response_header`` from the response headers and add them as span attributes. + +Response header names in WSGI are case-insensitive. So, giving the header name as ``CUStom-Header`` in the environment +variable will capture the header named ``custom-header``. + +Regular expressions may also be used to match multiple headers that correspond to the given pattern. For example: +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE="Content.*,X-.*" + +Would match all response headers that start with ``Content`` and ``X-``. + +To capture all response headers, set ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE`` to ``".*"``. +:: -It is recommended that you should give the correct names of the headers to be captured in the environment variable. -Response header names captured in wsgi are case insensitive. So, giving header name as ``CUStomHeader`` in environment variable will be able capture header with name ``customheader``. + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE=".*" -The name of the added span attribute will follow the format ``http.response.header.`` where ```` being the normalized HTTP header name (lowercase, with - characters replaced by _ ). -The value of the attribute will be single item list containing all the header values. +The name of the added span attribute will follow the format ``http.response.header.`` where ```` +is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a +single item list containing all the header values. -Example of the added span attribute, +For example: ``http.response.header.custom_response_header = [","]`` +Sanitizing headers +****************** +In order to prevent storing sensitive data such as personally identifiable information (PII), session keys, passwords, +etc, set the environment variable ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS`` +to a comma delimited list of HTTP header names to be sanitized. Regexes may be used, and all header names will be +matched in a case-insensitive manner. + +For example, +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS=".*session.*,set-cookie" + +will replace the value of headers such as ``session-id`` and ``set-cookie`` with ``[REDACTED]`` in the span. + Note: - Environment variable names to capture http headers are still experimental, and thus are subject to change. + The environment variable names used to capture HTTP headers are still experimental, and thus are subject to change. API --- @@ -172,8 +218,10 @@ def response_hook(span: Span, environ: WSGIEnvironment, status: str, response_he from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.trace.status import Status, StatusCode from opentelemetry.util.http import ( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS, OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST, OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE, + SanitizeValue, get_custom_headers, normalise_request_header_name, normalise_response_header_name, @@ -293,38 +341,48 @@ def collect_custom_request_headers_attributes(environ): """Returns custom HTTP request headers which are configured by the user from the PEP3333-conforming WSGI environ to be used as span creation attributes as described in the specification https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/http.md#http-request-and-response-headers""" - attributes = {} - custom_request_headers_name = get_custom_headers( - OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST + + sanitize = SanitizeValue( + get_custom_headers( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS + ) + ) + + headers = { + key[_CARRIER_KEY_PREFIX_LEN:].replace("_", "-"): val + for key, val in environ.items() + if key.startswith(_CARRIER_KEY_PREFIX) + } + + return sanitize.sanitize_header_values( + headers, + get_custom_headers( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST + ), + normalise_request_header_name, ) - for header_name in custom_request_headers_name: - wsgi_env_var = header_name.upper().replace("-", "_") - header_values = environ.get(f"HTTP_{wsgi_env_var}") - if header_values: - key = normalise_request_header_name(header_name) - attributes[key] = [header_values] - return attributes def collect_custom_response_headers_attributes(response_headers): """Returns custom HTTP response headers which are configured by the user from the PEP3333-conforming WSGI environ as described in the specification https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/http.md#http-request-and-response-headers""" - attributes = {} - custom_response_headers_name = get_custom_headers( - OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE + + sanitize = SanitizeValue( + get_custom_headers( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS + ) + ) + + response_headers = dict(response_headers) + + return sanitize.sanitize_header_values( + response_headers, + get_custom_headers( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE + ), + normalise_response_header_name, ) - response_headers_dict = {} - if response_headers: - for header_name, header_value in response_headers: - response_headers_dict[header_name.lower()] = header_value - - for header_name in custom_response_headers_name: - header_values = response_headers_dict.get(header_name.lower()) - if header_values: - key = normalise_response_header_name(header_name) - attributes[key] = [header_values] - return attributes def _parse_status_code(resp_status): diff --git a/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py b/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py index eeaad4c3cb..6073b9daa7 100644 --- a/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py +++ b/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py @@ -30,6 +30,7 @@ from opentelemetry.test.wsgitestutil import WsgiTestBase from opentelemetry.trace import StatusCode from opentelemetry.util.http import ( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS, OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST, OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE, ) @@ -98,6 +99,15 @@ def wsgi_with_custom_response_headers(environ, start_response): ("content-type", "text/plain; charset=utf-8"), ("content-length", "100"), ("my-custom-header", "my-custom-value-1,my-custom-header-2"), + ( + "my-custom-regex-header-1", + "my-custom-regex-value-1,my-custom-regex-value-2", + ), + ( + "My-Custom-Regex-Header-2", + "my-custom-regex-value-3,my-custom-regex-value-4", + ), + ("My-Secret-Header", "My Secret Value"), ], ) return [b"*"] @@ -521,7 +531,8 @@ def iterate_response(self, response): @mock.patch.dict( "os.environ", { - OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST: "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3", + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS: ".*my-secret.*", + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST: "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3,Regex-Test-Header-.*,Regex-Invalid-Test-Header-.*,.*my-secret.*", }, ) def test_custom_request_headers_non_recording_span(self): @@ -531,6 +542,9 @@ def test_custom_request_headers_non_recording_span(self): { "HTTP_CUSTOM_TEST_HEADER_1": "Test Value 2", "HTTP_CUSTOM_TEST_HEADER_2": "TestValue2,TestValue3", + "HTTP_REGEX_TEST_HEADER_1": "Regex Test Value 1", + "HTTP_REGEX_TEST_HEADER_2": "RegexTestValue2,RegexTestValue3", + "HTTP_MY_SECRET_HEADER": "My Secret Value", } ) app = otel_wsgi.OpenTelemetryMiddleware( @@ -544,7 +558,8 @@ def test_custom_request_headers_non_recording_span(self): @mock.patch.dict( "os.environ", { - OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST: "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3" + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS: ".*my-secret.*", + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST: "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3,Regex-Test-Header-.*,Regex-Invalid-Test-Header-.*,.*my-secret.*", }, ) def test_custom_request_headers_added_in_server_span(self): @@ -552,6 +567,9 @@ def test_custom_request_headers_added_in_server_span(self): { "HTTP_CUSTOM_TEST_HEADER_1": "Test Value 1", "HTTP_CUSTOM_TEST_HEADER_2": "TestValue2,TestValue3", + "HTTP_REGEX_TEST_HEADER_1": "Regex Test Value 1", + "HTTP_REGEX_TEST_HEADER_2": "RegexTestValue2,RegexTestValue3", + "HTTP_MY_SECRET_HEADER": "My Secret Value", } ) app = otel_wsgi.OpenTelemetryMiddleware(simple_wsgi) @@ -563,6 +581,11 @@ def test_custom_request_headers_added_in_server_span(self): "http.request.header.custom_test_header_2": ( "TestValue2,TestValue3", ), + "http.request.header.regex_test_header_1": ("Regex Test Value 1",), + "http.request.header.regex_test_header_2": ( + "RegexTestValue2,RegexTestValue3", + ), + "http.request.header.my_secret_header": ("[REDACTED]",), } self.assertSpanHasAttributes(span, expected) @@ -595,7 +618,8 @@ def test_custom_request_headers_not_added_in_internal_span(self): @mock.patch.dict( "os.environ", { - OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE: "content-type,content-length,my-custom-header,invalid-header" + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS: ".*my-secret.*", + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE: "content-type,content-length,my-custom-header,invalid-header,my-custom-regex-header-.*,invalid-regex-header-.*,.*my-secret.*", }, ) def test_custom_response_headers_added_in_server_span(self): @@ -613,6 +637,13 @@ def test_custom_response_headers_added_in_server_span(self): "http.response.header.my_custom_header": ( "my-custom-value-1,my-custom-header-2", ), + "http.response.header.my_custom_regex_header_1": ( + "my-custom-regex-value-1,my-custom-regex-value-2", + ), + "http.response.header.my_custom_regex_header_2": ( + "my-custom-regex-value-3,my-custom-regex-value-4", + ), + "http.response.header.my_secret_header": ("[REDACTED]",), } self.assertSpanHasAttributes(span, expected)