From 49d7c539d95933c233b874c7dd17f824939773be Mon Sep 17 00:00:00 2001 From: Rytis Bagdziunas Date: Fri, 21 Apr 2023 00:32:03 +0200 Subject: [PATCH 01/12] Use a weak reference to sqlalchemy Engine to avoid memory leak Closes #1761 By using a weak reference to the `Engine` object, we can avoid the memory leak as disposed `Engines` get properly deallocated. Whenever `SQLAlchemy` is uninstrumented, we only trigger a removal for those event listeners which are listening for objects that haven't been garbage-collected yet. --- .../opentelemetry/instrumentation/sqlalchemy/engine.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py index ca691fc052..5b776ddf89 100644 --- a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py +++ b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py @@ -13,6 +13,7 @@ # limitations under the License. import os import re +import weakref from sqlalchemy.event import ( # pylint: disable=no-name-in-module listen, @@ -160,12 +161,15 @@ def _pool_checkout( @classmethod def _register_event_listener(cls, target, identifier, func, *args, **kw): listen(target, identifier, func, *args, **kw) - cls._remove_event_listener_params.append((target, identifier, func)) + cls._remove_event_listener_params.append((weakref.ref(target), identifier, func)) @classmethod def remove_all_event_listeners(cls): for remove_params in cls._remove_event_listener_params: - remove(*remove_params) + # Remove an event listener only if saved weak reference points to an object + # which has not been garbage collected + if remove_params[0]() is not None: + remove(*remove_params) cls._remove_event_listener_params.clear() def _operation_name(self, db_name, statement): From edf8d11f73abc7895aee277b8a15bd29902202a3 Mon Sep 17 00:00:00 2001 From: Rytis Bagdziunas Date: Fri, 21 Apr 2023 00:49:19 +0200 Subject: [PATCH 02/12] Made a mistake in resolving the weak reference --- .../src/opentelemetry/instrumentation/sqlalchemy/engine.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py index 5b776ddf89..b11ca284dc 100644 --- a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py +++ b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py @@ -165,11 +165,11 @@ def _register_event_listener(cls, target, identifier, func, *args, **kw): @classmethod def remove_all_event_listeners(cls): - for remove_params in cls._remove_event_listener_params: + for (weak_ref_target, identifier, func) in cls._remove_event_listener_params: # Remove an event listener only if saved weak reference points to an object # which has not been garbage collected - if remove_params[0]() is not None: - remove(*remove_params) + if weak_ref_target is not None: + remove(weak_ref_target(), identifier, func) cls._remove_event_listener_params.clear() def _operation_name(self, db_name, statement): From ed836534471ea342143c8ef37e382a9494c3c976 Mon Sep 17 00:00:00 2001 From: rbagd Date: Sat, 22 Apr 2023 13:53:27 +0200 Subject: [PATCH 03/12] Fixed formatting issues --- .../opentelemetry/instrumentation/sqlalchemy/engine.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py index b11ca284dc..03643bddcc 100644 --- a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py +++ b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py @@ -161,11 +161,17 @@ def _pool_checkout( @classmethod def _register_event_listener(cls, target, identifier, func, *args, **kw): listen(target, identifier, func, *args, **kw) - cls._remove_event_listener_params.append((weakref.ref(target), identifier, func)) + cls._remove_event_listener_params.append( + (weakref.ref(target), identifier, func) + ) @classmethod def remove_all_event_listeners(cls): - for (weak_ref_target, identifier, func) in cls._remove_event_listener_params: + for ( + weak_ref_target, + identifier, + func, + ) in cls._remove_event_listener_params: # Remove an event listener only if saved weak reference points to an object # which has not been garbage collected if weak_ref_target is not None: From 6b02c5d4e0d5b5d160f9840ebc6e92d3c0f8e35e Mon Sep 17 00:00:00 2001 From: rbagd Date: Sat, 22 Apr 2023 14:37:10 +0200 Subject: [PATCH 04/12] Updated changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a4e787b8bb..065b158b4c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `opentelemetry-instrumentation-system-metrics` Add `process.` prefix to `runtime.memory`, `runtime.cpu.time`, and `runtime.gc_count`. Change `runtime.memory` from count to UpDownCounter. ([#1735](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1735)) - Add request and response hooks for GRPC instrumentation (client only) ([#1706](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1706)) +- Fix memory leak in SQLAlchemy instrumentation where disposed `Engine` does not get garbage collected + ([#1761](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1771) ### Added From 04218472cd69cca4341f7efcc4e971e98c90e1a0 Mon Sep 17 00:00:00 2001 From: rbagd Date: Sat, 22 Apr 2023 21:49:00 +0200 Subject: [PATCH 05/12] Added unit test to check that engine was garbage collected --- .../tests/test_sqlalchemy.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py b/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py index 981da107db..0128913f36 100644 --- a/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py +++ b/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py @@ -307,3 +307,23 @@ def test_no_op_tracer_provider(self): cnx.execute("SELECT 1 + 1;").fetchall() spans = self.memory_exporter.get_finished_spans() self.assertEqual(len(spans), 0) + + def test_no_memory_leakage_if_engine_diposed(self): + SQLAlchemyInstrumentor().instrument() + import gc, weakref # pylint: disable=all + from sqlalchemy import create_engine # pylint: disable=all + + callback = mock.Mock() + def make_shortlived_engine(): + engine = create_engine("sqlite:///:memory:") + # Callback will be called if engine is deallocated during garbage + # collection + weakref.finalize(engine, callback) + with engine.connect() as conn: + conn.execute("SELECT 1 + 1;").fetchall() + + for _ in range(0, 5): + make_shortlived_engine() + + gc.collect() + assert callback.call_count == 5 From be480135e89f48e57eb2cfa4dd228074195b550e Mon Sep 17 00:00:00 2001 From: rbagd Date: Sat, 22 Apr 2023 21:51:14 +0200 Subject: [PATCH 06/12] Do not save engine in EngineTracer to avoid memory leak --- .../instrumentation/sqlalchemy/engine.py | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py index 03643bddcc..95101aa3e0 100644 --- a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py +++ b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py @@ -100,11 +100,11 @@ def __init__( commenter_options=None, ): self.tracer = tracer - self.engine = engine self.connections_usage = connections_usage self.vendor = _normalize_vendor(engine.name) self.enable_commenter = enable_commenter self.commenter_options = commenter_options if commenter_options else {} + self._engine_attrs = _get_attributes_from_engine(engine) self._leading_comment_remover = re.compile(r"^/\*.*?\*/") self._register_event_listener( @@ -119,14 +119,11 @@ def __init__( self._register_event_listener(engine, "checkin", self._pool_checkin) self._register_event_listener(engine, "checkout", self._pool_checkout) - def _get_pool_name(self): - return self.engine.pool.logging_name or "" - def _add_idle_to_connection_usage(self, value): self.connections_usage.add( value, attributes={ - "pool.name": self._get_pool_name(), + **self._engine_attrs, "state": "idle", }, ) @@ -135,7 +132,7 @@ def _add_used_to_connection_usage(self, value): self.connections_usage.add( value, attributes={ - "pool.name": self._get_pool_name(), + **self._engine_attrs, "state": "used", }, ) @@ -174,7 +171,7 @@ def remove_all_event_listeners(cls): ) in cls._remove_event_listener_params: # Remove an event listener only if saved weak reference points to an object # which has not been garbage collected - if weak_ref_target is not None: + if weak_ref_target() is not None: remove(weak_ref_target(), identifier, func) cls._remove_event_listener_params.clear() @@ -301,3 +298,14 @@ def _get_attributes_from_cursor(vendor, cursor, attrs): if info.port: attrs[SpanAttributes.NET_PEER_PORT] = int(info.port) return attrs + + +def _get_attributes_from_engine(engine): + """Set metadata attributes of the database engine""" + attrs = {} + + attrs["pool.name"] = getattr( + getattr(engine, "pool", None), "logging_name", "" + ) + + return attrs From bd32d9db4b4f73fdb8d0e8d50dfc3e3b086bbd6b Mon Sep 17 00:00:00 2001 From: rbagd Date: Sat, 22 Apr 2023 22:08:22 +0200 Subject: [PATCH 07/12] Add an empty line to satisfy black formatter --- .../tests/test_sqlalchemy.py | 1 + 1 file changed, 1 insertion(+) diff --git a/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py b/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py index 0128913f36..9036be898d 100644 --- a/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py +++ b/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py @@ -314,6 +314,7 @@ def test_no_memory_leakage_if_engine_diposed(self): from sqlalchemy import create_engine # pylint: disable=all callback = mock.Mock() + def make_shortlived_engine(): engine = create_engine("sqlite:///:memory:") # Callback will be called if engine is deallocated during garbage From 75d31d7fa111f1ad2f554efa29f2664c3d7616ff Mon Sep 17 00:00:00 2001 From: rbagd Date: Sat, 22 Apr 2023 22:34:44 +0200 Subject: [PATCH 08/12] Fix isort complaints --- .../tests/test_sqlalchemy.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py b/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py index 9036be898d..8f706ca8c8 100644 --- a/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py +++ b/instrumentation/opentelemetry-instrumentation-sqlalchemy/tests/test_sqlalchemy.py @@ -310,8 +310,10 @@ def test_no_op_tracer_provider(self): def test_no_memory_leakage_if_engine_diposed(self): SQLAlchemyInstrumentor().instrument() - import gc, weakref # pylint: disable=all - from sqlalchemy import create_engine # pylint: disable=all + import gc + import weakref + + from sqlalchemy import create_engine callback = mock.Mock() From 441afc79d6bb434684f69476b19bfa96c0ce77ca Mon Sep 17 00:00:00 2001 From: rbagd Date: Tue, 25 Apr 2023 23:48:15 +0200 Subject: [PATCH 09/12] Fixed the issue when pool name is not set and =None --- .../src/opentelemetry/instrumentation/sqlalchemy/engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py index 95101aa3e0..41784f3879 100644 --- a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py +++ b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py @@ -306,6 +306,6 @@ def _get_attributes_from_engine(engine): attrs["pool.name"] = getattr( getattr(engine, "pool", None), "logging_name", "" - ) + ) or "" return attrs From b50a258194de0b72bd80eec2fe5ab1859c3984e7 Mon Sep 17 00:00:00 2001 From: rbagd Date: Wed, 26 Apr 2023 00:11:51 +0200 Subject: [PATCH 10/12] Fix formatting issue --- .../src/opentelemetry/instrumentation/sqlalchemy/engine.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py index 41784f3879..c5a0c13cec 100644 --- a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py +++ b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py @@ -304,8 +304,8 @@ def _get_attributes_from_engine(engine): """Set metadata attributes of the database engine""" attrs = {} - attrs["pool.name"] = getattr( - getattr(engine, "pool", None), "logging_name", "" - ) or "" + attrs["pool.name"] = ( + getattr(getattr(engine, "pool", None), "logging_name", "") or "" + ) return attrs From afcf1212e4d23a341c9d75de53973ddd5ede20d8 Mon Sep 17 00:00:00 2001 From: rbagd Date: Sun, 7 May 2023 15:11:56 +0200 Subject: [PATCH 11/12] Rebased after changes in a recent commit --- .../instrumentation/sqlalchemy/engine.py | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py index 712067e482..0e18bc9bed 100644 --- a/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py +++ b/instrumentation/opentelemetry-instrumentation-sqlalchemy/src/opentelemetry/instrumentation/sqlalchemy/engine.py @@ -119,18 +119,6 @@ def __init__( self._register_event_listener(engine, "checkin", self._pool_checkin) self._register_event_listener(engine, "checkout", self._pool_checkout) - def _get_connection_string(self): - drivername = self.engine.url.drivername or "" - host = self.engine.url.host or "" - port = self.engine.url.port or "" - database = self.engine.url.database or "" - return f"{drivername}://{host}:{port}/{database}" - - def _get_pool_name(self): - if self.engine.pool.logging_name is not None: - return self.engine.pool.logging_name - return self._get_connection_string() - def _add_idle_to_connection_usage(self, value): self.connections_usage.add( value, @@ -312,12 +300,20 @@ def _get_attributes_from_cursor(vendor, cursor, attrs): return attrs +def _get_connection_string(engine): + drivername = engine.url.drivername or "" + host = engine.url.host or "" + port = engine.url.port or "" + database = engine.url.database or "" + return f"{drivername}://{host}:{port}/{database}" + + def _get_attributes_from_engine(engine): """Set metadata attributes of the database engine""" attrs = {} - attrs["pool.name"] = ( - getattr(getattr(engine, "pool", None), "logging_name", "") or "" - ) + attrs["pool.name"] = getattr( + getattr(engine, "pool", None), "logging_name", None + ) or _get_connection_string(engine) return attrs From 514491be7cb9d22a95d1fe23f372d17955728587 Mon Sep 17 00:00:00 2001 From: rbagd Date: Sun, 4 Jun 2023 21:22:34 +0200 Subject: [PATCH 12/12] Updated PR number in changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b2284df1c8..0bccfc3286 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add request and response hooks for GRPC instrumentation (client only) ([#1706](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1706)) - Fix memory leak in SQLAlchemy instrumentation where disposed `Engine` does not get garbage collected - ([#1761](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1771) + ([#1771](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1771) - `opentelemetry-instrumentation-pymemcache` Update instrumentation to support pymemcache >4 ([#1764](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1764))