diff --git a/docs/api-reference.rst b/docs/api-reference.rst index 5641c23a..abbfa5ec 100644 --- a/docs/api-reference.rst +++ b/docs/api-reference.rst @@ -27,6 +27,12 @@ Page Inputs :inherited-members: str,bytes,MultiDict :show-inheritance: +.. automodule:: web_poet.page_inputs.response + :members: + :undoc-members: + :inherited-members: str + :show-inheritance: + .. automodule:: web_poet.page_inputs.page_params :members: :undoc-members: diff --git a/docs/page-objects/inputs.rst b/docs/page-objects/inputs.rst index f9a0190a..cb6ff600 100644 --- a/docs/page-objects/inputs.rst +++ b/docs/page-objects/inputs.rst @@ -66,6 +66,11 @@ define as inputs for a page object class, including: status code and :class:`~web_poet.page_inputs.browser.BrowserHtml` of a rendered web page. +- :class:`~web_poet.page_inputs.response.AnyResponse`, which either holds + :class:`~web_poet.page_inputs.browser.BrowserResponse` or + :class:`~web_poet.page_inputs.http.HttpResponse` as the ``.response`` + instance, depending on which one is available or is more appropriate. + .. _Document Object Model: https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model diff --git a/tests/test_page_inputs.py b/tests/test_page_inputs.py index 3bd70d4e..ed4e156a 100644 --- a/tests/test_page_inputs.py +++ b/tests/test_page_inputs.py @@ -8,6 +8,7 @@ from web_poet import BrowserResponse, RequestUrl, ResponseUrl from web_poet.page_inputs import ( + AnyResponse, BrowserHtml, HttpRequest, HttpRequestBody, @@ -642,3 +643,36 @@ def test_stats() -> None: stats.inc("c") assert stats._stats._stats == {"a": "1", "b": 8, "c": 1} + + +def test_http_or_browser_response() -> None: + url = "http://example.com" + html = "
Hello,
world!
" + + browser_response = BrowserResponse(url=url, html=html) + response_1 = AnyResponse(response=browser_response) + assert isinstance(response_1.response, BrowserResponse) + assert response_1.response == browser_response + + http_response = HttpResponse(url=url, body=html.encode()) + response_2 = AnyResponse(response=http_response) + assert isinstance(response_2.response, HttpResponse) + assert response_2.response == http_response + + for response in [response_1, response_2]: + assert isinstance(response.url, ResponseUrl) + assert str(response.url) == url + assert response.text == html + assert response.xpath("//p/text()").getall() == ["Hello, ", "world!"] + assert response.css("p::text").getall() == ["Hello, ", "world!"] + assert isinstance(response.selector, parsel.Selector) + assert str(response.urljoin("products")) == "http://example.com/products" + assert response.status is None + + response = AnyResponse(response=BrowserResponse(url=url, html=html, status=200)) + assert response.status == 200 + + response = AnyResponse( + response=HttpResponse(url=url, body=html.encode(), status=200) + ) + assert response.status == 200 diff --git a/tests/test_rules.py b/tests/test_rules.py index f36f01a7..7994d4ce 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -162,7 +162,7 @@ def test_apply_rule_kwargs_only() -> None: ApplyRule( "example.com", *[params[r] for r in remove], - **{k: v for k, v in params.items() if k not in remove}, # type: ignore[arg-type] + **{k: v for k, v in params.items() if k not in remove}, # type: ignore[arg-type] # noqa: B038 ) diff --git a/web_poet/__init__.py b/web_poet/__init__.py index e387c878..88c05e7a 100644 --- a/web_poet/__init__.py +++ b/web_poet/__init__.py @@ -1,5 +1,6 @@ from .fields import field, item_from_fields, item_from_fields_sync from .page_inputs import ( + AnyResponse, BrowserHtml, BrowserResponse, HttpClient, diff --git a/web_poet/page_inputs/__init__.py b/web_poet/page_inputs/__init__.py index 4fd9712c..9c781e29 100644 --- a/web_poet/page_inputs/__init__.py +++ b/web_poet/page_inputs/__init__.py @@ -9,5 +9,6 @@ HttpResponseHeaders, ) from .page_params import PageParams +from .response import AnyResponse from .stats import Stats from .url import RequestUrl, ResponseUrl diff --git a/web_poet/page_inputs/response.py b/web_poet/page_inputs/response.py new file mode 100644 index 00000000..2b7d6a4c --- /dev/null +++ b/web_poet/page_inputs/response.py @@ -0,0 +1,35 @@ +from typing import Optional, Union + +import attrs + +from web_poet.mixins import SelectableMixin, UrlShortcutsMixin +from web_poet.page_inputs.browser import BrowserResponse +from web_poet.page_inputs.http import HttpResponse +from web_poet.page_inputs.url import ResponseUrl + + +@attrs.define +class AnyResponse(SelectableMixin, UrlShortcutsMixin): + """A container that holds either :class:`~.BrowserResponse` or :class:`~.HttpResponse`.""" + + response: Union[BrowserResponse, HttpResponse] + + @property + def url(self) -> ResponseUrl: + """URL of the response.""" + return self.response.url + + @property + def text(self) -> str: + """Text or HTML contents of the response.""" + if isinstance(self.response, BrowserResponse): + return self.response.html + return self.response.text + + @property + def status(self) -> Optional[int]: + """The int status code of the HTTP response, if available.""" + return self.response.status + + def _selector_input(self) -> str: + return self.text