From 9342700e1a2c4df39a356e50b9c3addbfaa38cba Mon Sep 17 00:00:00 2001 From: Diana Huang Date: Thu, 26 Sep 2024 11:45:33 -0400 Subject: [PATCH] feat: Add root span tagging for exceptions. Datadog has issues with tagging the root span with error information. This change adds the functionality to tag the root span on exceptions. This also renames the CachedCustomMonitoringMiddleware into MonitoringSupportMiddleware so that it can be a central place for most monitoring middleware changes instead of adding a new one every time. At some point, CachedCustomMonitoringMiddleware will be removed. https://github.com/edx/edx-arch-experiments/issues/647 --- CHANGELOG.rst | 12 +++++++++ edx_django_utils/__init__.py | 2 +- edx_django_utils/monitoring/__init__.py | 3 ++- .../monitoring/internal/backends.py | 20 +++++++++++++++ .../monitoring/internal/middleware.py | 24 ++++++++++++++++-- .../tests/test_custom_monitoring.py | 25 +++++++++++++++---- 6 files changed, 77 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 1eb48550..75d9447b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -11,6 +11,18 @@ Change Log .. There should always be an "Unreleased" section for changes pending release. +[5.16.0] - 2024-09-27 +--------------------- +Added +~~~~~ +* Added a new method to backends for ``tag_root_span_with_error`` and added Datadog implementation of the functionality. +* Uses the new method to tag the root span when processing exceptions. + +Changed +~~~~~~~ +* Renamed ``CachedCustomMonitoringMiddleware`` to ``MonitoringSupportMiddleware`` and deprecated the old name. It will be removed in a future release. + + [5.15.0] - 2024-07-29 --------------------- Added diff --git a/edx_django_utils/__init__.py b/edx_django_utils/__init__.py index d2b27a88..60e79e78 100644 --- a/edx_django_utils/__init__.py +++ b/edx_django_utils/__init__.py @@ -2,7 +2,7 @@ EdX utilities for Django Application development.. """ -__version__ = "5.15.0" +__version__ = "5.16.0" default_app_config = ( "edx_django_utils.apps.EdxDjangoUtilsConfig" diff --git a/edx_django_utils/monitoring/__init__.py b/edx_django_utils/monitoring/__init__.py index 8b4a34e0..92b4d0d0 100644 --- a/edx_django_utils/monitoring/__init__.py +++ b/edx_django_utils/monitoring/__init__.py @@ -15,7 +15,8 @@ CookieMonitoringMiddleware, DeploymentMonitoringMiddleware, FrontendMonitoringMiddleware, - MonitoringMemoryMiddleware + MonitoringMemoryMiddleware, + MonitoringSupportMiddleware ) from .internal.transactions import get_current_transaction, ignore_transaction, set_monitoring_transaction_name from .internal.utils import ( diff --git a/edx_django_utils/monitoring/internal/backends.py b/edx_django_utils/monitoring/internal/backends.py index e37ba07c..7113a1e0 100644 --- a/edx_django_utils/monitoring/internal/backends.py +++ b/edx_django_utils/monitoring/internal/backends.py @@ -62,6 +62,14 @@ def create_span(self, name): or create a new root span if not currently in a span. """ + @abstractmethod + def tag_root_span_with_error(self, exception): + """ + Tags the root span with the given exception. This is primarily useful for + Datadog as New Relic handles this behavior correctly. Unclear if this is also needs to + be implemented for OTEL. + """ + class NewRelicBackend(TelemetryBackend): """ @@ -96,6 +104,10 @@ def create_span(self, name): nr_transaction = newrelic.agent.current_transaction() return newrelic.agent.FunctionTrace(nr_transaction, name) + def tag_root_span_with_error(self, exception): + # Does not need to be implemented for NewRelic, because it is handled automatically. + pass + class OpenTelemetryBackend(TelemetryBackend): """ @@ -121,6 +133,10 @@ def create_span(self, name): # Currently, this is not implemented. pass + def tag_root_span_with_error(self, exception): + # Currently, this is not implemented for OTel + pass + class DatadogBackend(TelemetryBackend): """ @@ -145,6 +161,10 @@ def record_exception(self): def create_span(self, name): return self.dd_tracer.trace(name) + def tag_root_span_with_error(self, exception): + root_span = self.dd_tracer.current_root_span() + root_span.set_exc_info(type(exception), exception, exception.__traceback__) + # We're using an lru_cache instead of assigning the result to a variable on # module load. With the default settings (pointing to a TelemetryBackend diff --git a/edx_django_utils/monitoring/internal/middleware.py b/edx_django_utils/monitoring/internal/middleware.py index 6c4a358f..981082ce 100644 --- a/edx_django_utils/monitoring/internal/middleware.py +++ b/edx_django_utils/monitoring/internal/middleware.py @@ -70,9 +70,12 @@ def record_python_version(): _set_custom_attribute('python_version', platform.python_version()) -class CachedCustomMonitoringMiddleware(MiddlewareMixin): +class MonitoringSupportMiddleware(MiddlewareMixin): """ - Middleware batch reports cached custom attributes at the end of a request. + Middleware to support monitoring. + + 1. Middleware batch reports cached custom attributes at the end of a request. + 2. Middleware adds error span tags to the root span. Make sure to add below the request cache in MIDDLEWARE. @@ -130,6 +133,13 @@ def _batch_report(cls): for key, value in attributes_cache.data.items(): _set_custom_attribute(key, value) + def _tag_root_span_with_error(self, exception): + """ + Tags the root span with the exception information for all configured backends. + """ + for backend in configured_backends(): + backend.tag_root_span_with_error(exception) + # Whether or not there was an exception, report any custom NR attributes that # may have been collected. @@ -145,6 +155,16 @@ def process_exception(self, request, exception): # pylint: disable=W0613 Django middleware handler to process an exception """ self._batch_report() + self._tag_root_span_with_error(exception) + + +class CachedCustomMonitoringMiddleware(MonitoringSupportMiddleware): + """ + DEPRECATED: Use MonitoringSupportMiddleware instead. + + This is the old name for the MonitoringSupportMiddleware. We are keeping it + around for backwards compatibility until it can be fully removed. + """ def _set_custom_attribute(key, value): diff --git a/edx_django_utils/monitoring/tests/test_custom_monitoring.py b/edx_django_utils/monitoring/tests/test_custom_monitoring.py index 59a4a12c..44594104 100644 --- a/edx_django_utils/monitoring/tests/test_custom_monitoring.py +++ b/edx_django_utils/monitoring/tests/test_custom_monitoring.py @@ -6,10 +6,10 @@ from unittest.mock import Mock, call, patch import ddt -from django.test import TestCase +from django.test import TestCase, override_settings from edx_django_utils.cache import RequestCache -from edx_django_utils.monitoring import CachedCustomMonitoringMiddleware, accumulate, get_current_transaction, increment +from edx_django_utils.monitoring import MonitoringSupportMiddleware, accumulate, get_current_transaction, increment from ..middleware import CachedCustomMonitoringMiddleware as DeprecatedCachedCustomMonitoringMiddleware from ..middleware import MonitoringCustomMetricsMiddleware as DeprecatedMonitoringCustomMetricsMiddleware @@ -31,8 +31,8 @@ def setUp(self): @patch('newrelic.agent') @ddt.data( - (CachedCustomMonitoringMiddleware, False, 'process_response'), - (CachedCustomMonitoringMiddleware, False, 'process_exception'), + (MonitoringSupportMiddleware, False, 'process_response'), + (MonitoringSupportMiddleware, False, 'process_exception'), (DeprecatedCachedCustomMonitoringMiddleware, True, 'process_response'), (DeprecatedMonitoringCustomMetricsMiddleware, True, 'process_response'), ) @@ -76,7 +76,7 @@ def test_accumulate_and_increment( @patch('newrelic.agent') @ddt.data( - (CachedCustomMonitoringMiddleware, False), + (MonitoringSupportMiddleware, False), (DeprecatedCachedCustomMonitoringMiddleware, True), (DeprecatedMonitoringCustomMetricsMiddleware, True), ) @@ -113,6 +113,21 @@ def test_accumulate_with_illegal_value( # Assert call args to newrelic.agent.add_custom_parameter(). mock_newrelic_agent.add_custom_parameter.assert_has_calls(nr_agent_calls_expected, any_order=True) + @patch('ddtrace.Tracer.current_root_span') + def test_error_tagging(self, mock_get_root_span): + # Ensure that we continue to support tagging exceptions in MonitoringSupportMiddleware. + # This is only implemented for DatadogBackend at the moment. + fake_exception = Exception() + mock_root_span = Mock() + mock_get_root_span.return_value = mock_root_span + with override_settings(OPENEDX_TELEMETRY=['edx_django_utils.monitoring.DatadogBackend']): + MonitoringSupportMiddleware(self.mock_response).process_exception( + 'fake request', fake_exception + ) + mock_root_span.set_exc_info.assert_called_with( + type(fake_exception), fake_exception, fake_exception.__traceback__ + ) + @patch('newrelic.agent') def test_get_current_transaction(self, mock_newrelic_agent): mock_newrelic_agent.current_transaction().name = 'fake-transaction'