Skip to content

Commit

Permalink
create a weak_cache in Injector
Browse files Browse the repository at this point in the history
  • Loading branch information
BurnzZ committed Jan 18, 2024
1 parent 3b2a0f7 commit e402a04
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 0 deletions.
8 changes: 8 additions & 0 deletions docs/providers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,14 @@ which Scrapy has. Although they are quite similar in its intended purpose,
could be anything that could stretch beyond Scrapy's ``Responses`` `(e.g. Network
Database queries, API Calls, AWS S3 files, etc)`.

.. note::

The :class:`scrapy_poet.injection.Injector` maintains a ``.weak_cache`` which
stores the instances created by the providers as long as the corresponding
:class:`scrapy.Request <scrapy.http.Request>` instance exists. This means that
the instances created by earlier providers can be accessed and reused by latter
providers. This is turned on by default and the instances are stored in memory.


Configuring providers
=====================
Expand Down
11 changes: 11 additions & 0 deletions scrapy_poet/injection.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pprint
import warnings
from typing import Any, Callable, Dict, List, Mapping, Optional, Set, Type, cast
from weakref import WeakKeyDictionary

import andi
from andi.typeutils import issubclass_safe
Expand Down Expand Up @@ -95,6 +96,11 @@ def init_cache(self): # noqa: D102
f"Cache enabled. Folder: {cache_path!r}. Caching errors: {self.caching_errors}"
)

# This is different from the cache above as it only stores instances as long
# as the request exists. This is useful for latter providers to re-use the
# already built instances by earlier providers.
self.weak_cache: WeakKeyDictionary[Request, Dict] = WeakKeyDictionary()

def available_dependencies_for_providers(
self, request: Request, response: Response
): # noqa: D102
Expand Down Expand Up @@ -294,6 +300,11 @@ def build_instances_from_providers(
)
instances.update(objs_by_type)

if self.weak_cache.get(request):
self.weak_cache[request].update(objs_by_type)
else:
self.weak_cache[request] = objs_by_type

if self.cache and not cache_hit:
# Save the results in the cache
self.cache[fingerprint] = serialize(objs)
Expand Down
10 changes: 10 additions & 0 deletions tests/test_injection.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ def callback(
ClsReqResponse: ClsReqResponse(),
ClsNoProviderRequired: ClsNoProviderRequired(),
}
assert injector.weak_cache.get(request).keys() == {ClsReqResponse, Cls1, Cls2}

instances = yield from injector.build_instances_from_providers(
request, response, plan
Expand All @@ -212,6 +213,7 @@ def callback(
Cls2: Cls2(),
ClsReqResponse: ClsReqResponse(),
}
assert injector.weak_cache.get(request).keys() == {ClsReqResponse, Cls1, Cls2}

@inlineCallbacks
def test_build_instances_from_providers_unexpected_return(self):
Expand All @@ -230,6 +232,7 @@ def callback(response: DummyResponse, a: Cls1):
yield from injector.build_instances_from_providers(
response.request, response, plan
)
assert injector.weak_cache.get(response.request) is None

assert "Provider" in str(exinf.value)
assert "Cls2" in str(exinf.value)
Expand All @@ -256,6 +259,7 @@ def callback(response: DummyResponse, arg: str):
instances = yield from injector.build_instances_from_providers(
response.request, response, plan
)
assert injector.weak_cache.get(response.request).keys() == {str}

assert instances[str] == min(str_list)

Expand Down Expand Up @@ -628,6 +632,7 @@ def callback_factory():
if name.startswith(prefix)
}
assert set(poet_stats) == expected
assert injector.weak_cache.get(response.request) is None

@inlineCallbacks
def test_po_provided_via_item(self):
Expand All @@ -642,6 +647,7 @@ def callback(response: DummyResponse, item: TestItem):
_ = yield from injector.build_callback_dependencies(response.request, response)
key = "poet/injector/tests.test_injection.TestItemPage"
assert key in set(injector.crawler.stats.get_stats())
assert injector.weak_cache.get(response.request) is None


class TestInjectorOverrides:
Expand Down Expand Up @@ -787,6 +793,7 @@ def callback(response: DummyResponse, arg_price: Price, arg_name: Name):
response.request, response, plan
)
assert cache.exists()
assert injector.weak_cache.get(response.request).keys() == {Price, Name}

validate_instances(instances)

Expand All @@ -799,6 +806,7 @@ def callback(response: DummyResponse, arg_price: Price, arg_name: Name):
instances = yield from injector.build_instances_from_providers(
response.request, response, plan
)
assert injector.weak_cache.get(response.request) is None

# Different providers. They return a different result, but the cache data should prevail.
providers = {
Expand All @@ -812,6 +820,7 @@ def callback(response: DummyResponse, arg_price: Price, arg_name: Name):
instances = yield from injector.build_instances_from_providers(
response.request, response, plan
)
assert injector.weak_cache.get(response.request).keys() == {Price, Name}

validate_instances(instances)

Expand All @@ -823,3 +832,4 @@ def callback(response: DummyResponse, arg_price: Price, arg_name: Name):
instances = yield from injector.build_instances_from_providers(
response.request, response, plan
)
assert injector.weak_cache.get(response.request) is None

0 comments on commit e402a04

Please sign in to comment.