Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for annotated deps in savefixture. #190

Merged
merged 7 commits into from
Mar 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions docs/providers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,8 @@ To handle this you need the following changes in your provider:
.. code-block:: python

from andi.typeutils import strip_annotated
from scrapy_poet import AnnotatedResult, PageObjectInputProvider
from scrapy_poet import PageObjectInputProvider
from web_poet.annotated import AnnotatedInstance


class Provider(PageObjectInputProvider):
Expand All @@ -360,10 +361,7 @@ To handle this you need the following changes in your provider:
metadata = getattr(cls, "__metadata__", None)
obj = ... # create the instance using cls and metadata
if metadata:
# wrap the instance into a scrapy_poet.AnnotatedResult object
obj = AnnotatedResult(obj, metadata)
# wrap the instance into a web_poet.annotated.AnnotatedInstance object
obj = AnnotatedInstance(obj, metadata)
result.append(obj)
return result

.. autoclass:: scrapy_poet.AnnotatedResult
:members:
2 changes: 1 addition & 1 deletion scrapy_poet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .api import AnnotatedResult, DummyResponse, callback_for
from .api import DummyResponse, callback_for
from .downloadermiddlewares import DownloaderStatsMiddleware, InjectionMiddleware
from .page_input_providers import HttpResponseProvider, PageObjectInputProvider
from .spidermiddlewares import RetryMiddleware
Expand Down
28 changes: 1 addition & 27 deletions scrapy_poet/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from dataclasses import dataclass
from inspect import iscoroutinefunction
from typing import Any, Callable, Optional, Tuple, Type
from typing import Callable, Optional, Type

from scrapy.http import Request, Response
from web_poet.pages import ItemPage
Expand Down Expand Up @@ -134,28 +133,3 @@ def parse(*args, item: page_or_item_cls, **kwargs): # type:ignore

setattr(parse, _CALLBACK_FOR_MARKER, True)
return parse


@dataclass
class AnnotatedResult:
"""Wrapper for annotated dependencies.

When a provider gets a :data:`typing.Annotated` type as a dependency type,
it will return an ``AnnotatedResult`` instance for it so that the caller
can match the dependency to its annotation.

:param result: The wrapped dependency instance.
:type result: Any

:param metadata: The copy of the annotation.
:type metadata: Tuple[Any, ...]
"""

result: Any
metadata: Tuple[Any, ...]

def get_annotated_cls(self):
"""Returns a re-created :class:`typing.Annotated` type."""
from typing import Annotated

return Annotated[(type(self.result), *self.metadata)]
7 changes: 6 additions & 1 deletion scrapy_poet/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from scrapy.utils.misc import load_object
from twisted.internet.defer import inlineCallbacks
from web_poet import ItemPage
from web_poet.annotated import AnnotatedInstance
from web_poet.exceptions import PageObjectAction
from web_poet.testing import Fixture
from web_poet.utils import ensure_awaitable
Expand Down Expand Up @@ -43,7 +44,11 @@
request, response, plan
)
if request.meta.get("savefixture", False):
saved_dependencies.extend(instances.values())
for cls, value in instances.items():
metadata = getattr(cls, "__metadata__", None)
if metadata:
value = AnnotatedInstance(value, metadata)
saved_dependencies.append(value)

Check warning on line 51 in scrapy_poet/commands.py

View check run for this annotation

Codecov / codecov/patch

scrapy_poet/commands.py#L47-L51

Added lines #L47 - L51 were not covered by tests
return instances


Expand Down
5 changes: 3 additions & 2 deletions scrapy_poet/injection.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@
from scrapy.utils.misc import load_object
from twisted.internet.defer import inlineCallbacks
from web_poet import RulesRegistry
from web_poet.annotated import AnnotatedInstance
from web_poet.page_inputs.http import request_fingerprint
from web_poet.pages import ItemPage, is_injectable
from web_poet.serialization.api import deserialize_leaf, load_class, serialize
from web_poet.utils import get_fq_class_name

from scrapy_poet.api import _CALLBACK_FOR_MARKER, AnnotatedResult, DummyResponse
from scrapy_poet.api import _CALLBACK_FOR_MARKER, DummyResponse
from scrapy_poet.cache import SerializedDataCache
from scrapy_poet.injection_errors import (
NonCallableProviderError,
Expand Down Expand Up @@ -300,7 +301,7 @@ def build_instances_from_providers(

objs_by_type: Dict[Callable, Any] = {}
for obj in objs:
if isinstance(obj, AnnotatedResult):
if isinstance(obj, AnnotatedInstance):
cls = obj.get_annotated_cls()
obj = obj.result
else:
Expand Down
4 changes: 3 additions & 1 deletion scrapy_poet/utils/mockserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ def get_ephemeral_port():


class MockServer:
def __init__(self, resource, port=None):
def __init__(self, resource, port=None, pythonpath=None):
self.resource = "{0}.{1}".format(resource.__module__, resource.__name__)
self.proc = None
host = socket.gethostbyname(socket.gethostname())
self.port = port or get_ephemeral_port()
self.root_url = "http://%s:%d" % (host, self.port)
self.pythonpath = pythonpath or ""

def __enter__(self):
self.proc = Popen(
Expand All @@ -35,6 +36,7 @@ def __enter__(self):
str(self.port),
],
stdout=PIPE,
env={"PYTHONPATH": self.pythonpath},
)
self.proc.stdout.readline()
return self
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@
"parsel >= 1.5.0",
"scrapy >= 2.6.0",
"sqlitedict >= 1.5.0",
"time_machine >= 2.2.0",
"time_machine >= 2.7.1",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New web-poet requires >= 2.7.1.

"twisted >= 18.9.0",
"url-matcher >= 0.2.0",
"web-poet >= 0.15.1",
"web-poet >= 0.17.0",
],
classifiers=[
"Development Status :: 3 - Alpha",
Expand Down
134 changes: 120 additions & 14 deletions tests/test_commands.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import datetime
import json
import os
import subprocess
import sys
import tempfile
from pathlib import Path

import pytest
from twisted.web.resource import Resource
from web_poet.testing import Fixture

Expand All @@ -17,6 +19,8 @@
ProductHtml,
)

pytest_plugins = ["pytester"]


def call_scrapy_command(cwd: str, *args: str) -> None:
with tempfile.TemporaryFile() as out:
Expand All @@ -34,18 +38,22 @@ def __init__(self):
self.putChild(b"drop", DropResource())


def test_savefixture(tmp_path) -> None:
def _get_pythonpath() -> str:
# needed for mockserver to find CustomResource as the pytester fixture changes the working directory
return str(Path(os.path.dirname(__file__)).parent)


def test_savefixture(pytester) -> None:
project_name = "foo"
cwd = Path(tmp_path)
cwd = Path(pytester.path)
call_scrapy_command(str(cwd), "startproject", project_name)
cwd /= project_name
type_name = "foo.po.BTSBookPage"
(cwd / project_name / "po.py").write_text(
"""
import attrs
from web_poet import HttpClient
from web_poet import HttpClient, WebPage
from web_poet.exceptions import HttpRequestError, HttpResponseError
from web_poet.pages import WebPage


@attrs.define
Expand All @@ -69,8 +77,7 @@ async def to_item(self):
}
"""
)

with MockServer(CustomResource) as server:
with MockServer(CustomResource, pythonpath=_get_pythonpath()) as server:
call_scrapy_command(
str(cwd),
"savefixture",
Expand All @@ -91,11 +98,14 @@ async def to_item(self):
frozen_time_str = json.loads(fixture.meta_path.read_bytes())["frozen_time"]
frozen_time = datetime.datetime.fromisoformat(frozen_time_str)
assert frozen_time.microsecond == 0
os.chdir(cwd)
result = pytester.runpytest_subprocess()
result.assert_outcomes(passed=4)


def test_savefixture_spider(tmp_path) -> None:
def test_savefixture_spider(pytester) -> None:
project_name = "foo"
cwd = Path(tmp_path)
cwd = Path(pytester.path)
call_scrapy_command(str(cwd), "startproject", project_name)
cwd /= project_name

Expand All @@ -115,7 +125,7 @@ class MySpider(Spider):
(cwd / project_name / "po.py").write_text(
"""
import json
from web_poet.pages import WebPage
from web_poet import WebPage


class HeadersPage(WebPage):
Expand All @@ -136,18 +146,21 @@ async def to_item(self):
assert fixture.is_valid()
item = json.loads(fixture.output_path.read_bytes())
assert item == {"ua": ["scrapy/savefixture"]}
os.chdir(cwd)
result = pytester.runpytest_subprocess()
result.assert_outcomes(passed=3)


def test_savefixture_expected_exception(tmp_path) -> None:
def test_savefixture_expected_exception(pytester) -> None:
project_name = "foo"
cwd = Path(tmp_path)
cwd = Path(pytester.path)
call_scrapy_command(str(cwd), "startproject", project_name)
cwd /= project_name
type_name = "foo.po.SamplePage"
(cwd / project_name / "po.py").write_text(
"""
from web_poet import WebPage
from web_poet.exceptions import UseFallback
from web_poet.pages import WebPage


class SamplePage(WebPage):
Expand All @@ -166,11 +179,14 @@ def to_item(self):
json.loads(fixture.exception_path.read_bytes())["import_path"]
== "web_poet.exceptions.core.UseFallback"
)
os.chdir(cwd)
result = pytester.runpytest_subprocess()
result.assert_outcomes(passed=1)


def test_savefixture_adapter(tmp_path) -> None:
def test_savefixture_adapter(pytester) -> None:
project_name = "foo"
cwd = Path(tmp_path)
cwd = Path(pytester.path)
call_scrapy_command(str(cwd), "startproject", project_name)
cwd /= project_name
type_name = "foo.po.BTSBookPage"
Expand Down Expand Up @@ -222,3 +238,93 @@ class CustomItemAdapter(ItemAdapter):
assert fixture.is_valid()
item = json.loads(fixture.output_path.read_bytes())
assert item == {"name": "chocolate"}
os.chdir(cwd)
result = pytester.runpytest_subprocess()
result.assert_outcomes(passed=3)


@pytest.mark.skipif(
sys.version_info < (3, 9), reason="No Annotated support in Python < 3.9"
)
def test_savefixture_annotated(pytester) -> None:
project_name = "foo"
cwd = Path(pytester.path)
call_scrapy_command(str(cwd), "startproject", project_name)
cwd /= project_name
type_name = "foo.po.BTSBookPage"
(cwd / project_name / "providers.py").write_text(
"""
from andi.typeutils import strip_annotated
from scrapy.http import Response
from scrapy_poet import HttpResponseProvider
from web_poet import HttpResponse, HttpResponseHeaders
from web_poet.annotated import AnnotatedInstance


class AnnotatedHttpResponseProvider(HttpResponseProvider):
def is_provided(self, type_) -> bool:
return super().is_provided(strip_annotated(type_))

def __call__(self, to_provide, response: Response):
result = []
for cls in to_provide:
obj = HttpResponse(
url=response.url,
body=response.body,
status=response.status,
headers=HttpResponseHeaders.from_bytes_dict(response.headers),
)
if metadata := getattr(cls, "__metadata__", None):
obj = AnnotatedInstance(obj, metadata)
result.append(obj)
return result
"""
)
(cwd / project_name / "po.py").write_text(
"""
from typing import Annotated

import attrs
from web_poet import HttpResponse, WebPage


@attrs.define
class BTSBookPage(WebPage):

response: Annotated[HttpResponse, "foo", 42]

async def to_item(self):
return {
'url': self.url,
'name': self.css("h1.name::text").get(),
}
"""
)
with (cwd / project_name / "settings.py").open("a") as f:
f.write(
f"""
SCRAPY_POET_PROVIDERS = {{"{project_name}.providers.AnnotatedHttpResponseProvider": 500}}
"""
)

with MockServer(CustomResource, pythonpath=_get_pythonpath()) as server:
call_scrapy_command(
str(cwd),
"savefixture",
type_name,
f"{server.root_url}",
)
fixtures_dir = cwd / "fixtures"
fixture_dir = fixtures_dir / type_name / "test-1"
fixture = Fixture(fixture_dir)
assert fixture.is_valid()
assert (
fixture.input_path / "AnnotatedInstance HttpResponse-metadata.json"
).exists()
assert (
fixture.input_path / "AnnotatedInstance HttpResponse-result-body.html"
).exists()
assert fixture.meta_path.exists()
os.chdir(cwd)
result = pytester.runpytest_subprocess()
result.assert_outcomes(passed=4)
6 changes: 3 additions & 3 deletions tests/test_injection.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@
from url_matcher import Patterns
from url_matcher.util import get_domain
from web_poet import Injectable, ItemPage, RulesRegistry, field
from web_poet.annotated import AnnotatedInstance
from web_poet.mixins import ResponseShortcutsMixin
from web_poet.rules import ApplyRule

from scrapy_poet import DummyResponse, HttpResponseProvider, PageObjectInputProvider
from scrapy_poet.injection import (
AnnotatedResult,
Injector,
check_all_providers_are_callable,
get_injector_for_testing,
Expand Down Expand Up @@ -50,7 +50,7 @@ def __call__(self, to_provide):
for cls in to_provide:
obj = cls(content) if content else cls()
if metadata := getattr(cls, "__metadata__", None):
obj = AnnotatedResult(obj, metadata)
obj = AnnotatedInstance(obj, metadata)
result.append(obj)
return result

Expand Down Expand Up @@ -445,7 +445,7 @@ def __call__(self, to_provide):
processed_classes.add(cls_stripped)
obj = cls()
if metadata := getattr(cls, "__metadata__", None):
obj = AnnotatedResult(obj, metadata)
obj = AnnotatedInstance(obj, metadata)
result.append(obj)
return result

Expand Down
Loading
Loading