From e402a04716ed0436d6bbe635b66fe23545ec2544 Mon Sep 17 00:00:00 2001 From: Kevin Lloyd Bernal Date: Fri, 19 Jan 2024 00:08:46 +0800 Subject: [PATCH] create a weak_cache in Injector --- docs/providers.rst | 8 ++++++++ scrapy_poet/injection.py | 11 +++++++++++ tests/test_injection.py | 10 ++++++++++ 3 files changed, 29 insertions(+) diff --git a/docs/providers.rst b/docs/providers.rst index a7433dcf..2e71cb17 100644 --- a/docs/providers.rst +++ b/docs/providers.rst @@ -127,6 +127,14 @@ which Scrapy has. Although they are quite similar in its intended purpose, could be anything that could stretch beyond Scrapy's ``Responses`` `(e.g. Network Database queries, API Calls, AWS S3 files, etc)`. +.. note:: + + The :class:`scrapy_poet.injection.Injector` maintains a ``.weak_cache`` which + stores the instances created by the providers as long as the corresponding + :class:`scrapy.Request ` instance exists. This means that + the instances created by earlier providers can be accessed and reused by latter + providers. This is turned on by default and the instances are stored in memory. + Configuring providers ===================== diff --git a/scrapy_poet/injection.py b/scrapy_poet/injection.py index d662ed0d..22c04a6f 100644 --- a/scrapy_poet/injection.py +++ b/scrapy_poet/injection.py @@ -5,6 +5,7 @@ import pprint import warnings from typing import Any, Callable, Dict, List, Mapping, Optional, Set, Type, cast +from weakref import WeakKeyDictionary import andi from andi.typeutils import issubclass_safe @@ -95,6 +96,11 @@ def init_cache(self): # noqa: D102 f"Cache enabled. Folder: {cache_path!r}. Caching errors: {self.caching_errors}" ) + # This is different from the cache above as it only stores instances as long + # as the request exists. This is useful for latter providers to re-use the + # already built instances by earlier providers. + self.weak_cache: WeakKeyDictionary[Request, Dict] = WeakKeyDictionary() + def available_dependencies_for_providers( self, request: Request, response: Response ): # noqa: D102 @@ -294,6 +300,11 @@ def build_instances_from_providers( ) instances.update(objs_by_type) + if self.weak_cache.get(request): + self.weak_cache[request].update(objs_by_type) + else: + self.weak_cache[request] = objs_by_type + if self.cache and not cache_hit: # Save the results in the cache self.cache[fingerprint] = serialize(objs) diff --git a/tests/test_injection.py b/tests/test_injection.py index 65690173..c8ecb304 100644 --- a/tests/test_injection.py +++ b/tests/test_injection.py @@ -203,6 +203,7 @@ def callback( ClsReqResponse: ClsReqResponse(), ClsNoProviderRequired: ClsNoProviderRequired(), } + assert injector.weak_cache.get(request).keys() == {ClsReqResponse, Cls1, Cls2} instances = yield from injector.build_instances_from_providers( request, response, plan @@ -212,6 +213,7 @@ def callback( Cls2: Cls2(), ClsReqResponse: ClsReqResponse(), } + assert injector.weak_cache.get(request).keys() == {ClsReqResponse, Cls1, Cls2} @inlineCallbacks def test_build_instances_from_providers_unexpected_return(self): @@ -230,6 +232,7 @@ def callback(response: DummyResponse, a: Cls1): yield from injector.build_instances_from_providers( response.request, response, plan ) + assert injector.weak_cache.get(response.request) is None assert "Provider" in str(exinf.value) assert "Cls2" in str(exinf.value) @@ -256,6 +259,7 @@ def callback(response: DummyResponse, arg: str): instances = yield from injector.build_instances_from_providers( response.request, response, plan ) + assert injector.weak_cache.get(response.request).keys() == {str} assert instances[str] == min(str_list) @@ -628,6 +632,7 @@ def callback_factory(): if name.startswith(prefix) } assert set(poet_stats) == expected + assert injector.weak_cache.get(response.request) is None @inlineCallbacks def test_po_provided_via_item(self): @@ -642,6 +647,7 @@ def callback(response: DummyResponse, item: TestItem): _ = yield from injector.build_callback_dependencies(response.request, response) key = "poet/injector/tests.test_injection.TestItemPage" assert key in set(injector.crawler.stats.get_stats()) + assert injector.weak_cache.get(response.request) is None class TestInjectorOverrides: @@ -787,6 +793,7 @@ def callback(response: DummyResponse, arg_price: Price, arg_name: Name): response.request, response, plan ) assert cache.exists() + assert injector.weak_cache.get(response.request).keys() == {Price, Name} validate_instances(instances) @@ -799,6 +806,7 @@ def callback(response: DummyResponse, arg_price: Price, arg_name: Name): instances = yield from injector.build_instances_from_providers( response.request, response, plan ) + assert injector.weak_cache.get(response.request) is None # Different providers. They return a different result, but the cache data should prevail. providers = { @@ -812,6 +820,7 @@ def callback(response: DummyResponse, arg_price: Price, arg_name: Name): instances = yield from injector.build_instances_from_providers( response.request, response, plan ) + assert injector.weak_cache.get(response.request).keys() == {Price, Name} validate_instances(instances) @@ -823,3 +832,4 @@ def callback(response: DummyResponse, arg_price: Price, arg_name: Name): instances = yield from injector.build_instances_from_providers( response.request, response, plan ) + assert injector.weak_cache.get(response.request) is None