From 6b9f421ad061c6fffee628040acb5be6a9491a37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Setla?= <38295919+setla@users.noreply.github.com> Date: Tue, 13 Feb 2024 06:32:00 +0100 Subject: [PATCH] Implement granular URL error hierarchy in the HTTP client This patch introduces 5 granular user-facing exceptions that may occur when HTTP requests are made: * `InvalidUrlClientError` * `RedirectClientError` * `NonHttpUrlClientError` * `InvalidUrlRedirectClientError` * `NonHttpUrlRedirectClientError` Previously `ValueError` or `InvalidURL` was raised and screening out was complicated (a valid URL that redirects to invalid one raised the same error as an invalid URL). Ref: https://github.com/aio-libs/aiohttp/pull/6722#discussion_r1477103562 PR #6722 Resolves #2507 Resolves #2630 Resolves #3315 Co-authored-by: Sviatoslav Sydorenko (cherry picked from commit fb465e155b872f01489173d11e35f02ccbf3a940) --- CHANGES/2507.feature.rst | 1 + CHANGES/3315.feature.rst | 1 + CHANGES/6722.feature | 12 +++ CONTRIBUTORS.txt | 1 + aiohttp/__init__.py | 10 +++ aiohttp/client.py | 53 +++++++++--- aiohttp/client_exceptions.py | 54 +++++++++++-- docs/client_reference.rst | 49 ++++++++++++ tests/test_client_exceptions.py | 25 +++++- tests/test_client_functional.py | 137 +++++++++++++++++++++++++++++++- 10 files changed, 321 insertions(+), 22 deletions(-) create mode 120000 CHANGES/2507.feature.rst create mode 120000 CHANGES/3315.feature.rst create mode 100644 CHANGES/6722.feature diff --git a/CHANGES/2507.feature.rst b/CHANGES/2507.feature.rst new file mode 120000 index 00000000000..f569cd92882 --- /dev/null +++ b/CHANGES/2507.feature.rst @@ -0,0 +1 @@ +6722.feature \ No newline at end of file diff --git a/CHANGES/3315.feature.rst b/CHANGES/3315.feature.rst new file mode 120000 index 00000000000..f569cd92882 --- /dev/null +++ b/CHANGES/3315.feature.rst @@ -0,0 +1 @@ +6722.feature \ No newline at end of file diff --git a/CHANGES/6722.feature b/CHANGES/6722.feature new file mode 100644 index 00000000000..1dd253a0997 --- /dev/null +++ b/CHANGES/6722.feature @@ -0,0 +1,12 @@ +Added 5 new exceptions: :py:exc:`~aiohttp.InvalidUrlClientError`, :py:exc:`~aiohttp.RedirectClientError`, +:py:exc:`~aiohttp.NonHttpUrlClientError`, :py:exc:`~aiohttp.InvalidUrlRedirectClientError`, +:py:exc:`~aiohttp.NonHttpUrlRedirectClientError` + +:py:exc:`~aiohttp.InvalidUrlRedirectClientError`, :py:exc:`~aiohttp.NonHttpUrlRedirectClientError` +are raised instead of :py:exc:`ValueError` or :py:exc:`~aiohttp.InvalidURL` when the redirect URL is invalid. Classes +:py:exc:`~aiohttp.InvalidUrlClientError`, :py:exc:`~aiohttp.RedirectClientError`, +:py:exc:`~aiohttp.NonHttpUrlClientError` are base for them. + +The :py:exc:`~aiohttp.InvalidURL` now exposes a ``description`` property with the text explanation of the error details. + +-- by :user:`setla` diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 8df68497dbe..c7e18d955e5 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -366,5 +366,6 @@ Yuvi Panda Zainab Lawal Zeal Wierslee Zlatan Sičanica +Łukasz Setla Марк Коренберг Семён Марьясин diff --git a/aiohttp/__init__.py b/aiohttp/__init__.py index 3f8b2728863..5064b043006 100644 --- a/aiohttp/__init__.py +++ b/aiohttp/__init__.py @@ -25,7 +25,12 @@ ContentTypeError, Fingerprint, InvalidURL, + InvalidUrlClientError, + InvalidUrlRedirectClientError, NamedPipeConnector, + NonHttpUrlClientError, + NonHttpUrlRedirectClientError, + RedirectClientError, RequestInfo, ServerConnectionError, ServerDisconnectedError, @@ -137,6 +142,11 @@ "ContentTypeError", "Fingerprint", "InvalidURL", + "InvalidUrlClientError", + "InvalidUrlRedirectClientError", + "NonHttpUrlClientError", + "NonHttpUrlRedirectClientError", + "RedirectClientError", "RequestInfo", "ServerConnectionError", "ServerDisconnectedError", diff --git a/aiohttp/client.py b/aiohttp/client.py index 36dbf6a7119..8d8d13f25f7 100644 --- a/aiohttp/client.py +++ b/aiohttp/client.py @@ -52,6 +52,11 @@ ConnectionTimeoutError, ContentTypeError, InvalidURL, + InvalidUrlClientError, + InvalidUrlRedirectClientError, + NonHttpUrlClientError, + NonHttpUrlRedirectClientError, + RedirectClientError, ServerConnectionError, ServerDisconnectedError, ServerFingerprintMismatch, @@ -109,6 +114,11 @@ "ConnectionTimeoutError", "ContentTypeError", "InvalidURL", + "InvalidUrlClientError", + "RedirectClientError", + "NonHttpUrlClientError", + "InvalidUrlRedirectClientError", + "NonHttpUrlRedirectClientError", "ServerConnectionError", "ServerDisconnectedError", "ServerFingerprintMismatch", @@ -168,6 +178,7 @@ class ClientTimeout: # https://www.rfc-editor.org/rfc/rfc9110#section-9.2.2 IDEMPOTENT_METHODS = frozenset({"GET", "HEAD", "OPTIONS", "TRACE", "PUT", "DELETE"}) +HTTP_SCHEMA_SET = frozenset({"http", "https", ""}) _RetType = TypeVar("_RetType") _CharsetResolver = Callable[[ClientResponse, bytes], str] @@ -455,7 +466,10 @@ async def _request( try: url = self._build_url(str_or_url) except ValueError as e: - raise InvalidURL(str_or_url) from e + raise InvalidUrlClientError(str_or_url) from e + + if url.scheme not in HTTP_SCHEMA_SET: + raise NonHttpUrlClientError(url) skip_headers = set(self._skip_auto_headers) if skip_auto_headers is not None: @@ -513,6 +527,15 @@ async def _request( retry_persistent_connection = method in IDEMPOTENT_METHODS while True: url, auth_from_url = strip_auth_from_url(url) + if not url.raw_host: + # NOTE: Bail early, otherwise, causes `InvalidURL` through + # NOTE: `self._request_class()` below. + err_exc_cls = ( + InvalidUrlRedirectClientError + if redirects + else InvalidUrlClientError + ) + raise err_exc_cls(url) if auth and auth_from_url: raise ValueError( "Cannot combine AUTH argument with " @@ -670,25 +693,35 @@ async def _request( resp.release() try: - parsed_url = URL( + parsed_redirect_url = URL( r_url, encoded=not self._requote_redirect_url ) - except ValueError as e: - raise InvalidURL(r_url) from e + raise InvalidUrlRedirectClientError( + r_url, + "Server attempted redirecting to a location that does not look like a URL", + ) from e - scheme = parsed_url.scheme - if scheme not in ("http", "https", ""): + scheme = parsed_redirect_url.scheme + if scheme not in HTTP_SCHEMA_SET: resp.close() - raise ValueError("Can redirect only to http or https") + raise NonHttpUrlRedirectClientError(r_url) elif not scheme: - parsed_url = url.join(parsed_url) + parsed_redirect_url = url.join(parsed_redirect_url) - if url.origin() != parsed_url.origin(): + try: + redirect_origin = parsed_redirect_url.origin() + except ValueError as origin_val_err: + raise InvalidUrlRedirectClientError( + parsed_redirect_url, + "Invalid redirect URL origin", + ) from origin_val_err + + if url.origin() != redirect_origin: auth = None headers.pop(hdrs.AUTHORIZATION, None) - url = parsed_url + url = parsed_redirect_url params = {} resp.release() continue diff --git a/aiohttp/client_exceptions.py b/aiohttp/client_exceptions.py index 60bf058e887..f15a9ee3d3e 100644 --- a/aiohttp/client_exceptions.py +++ b/aiohttp/client_exceptions.py @@ -2,10 +2,10 @@ import asyncio import warnings -from typing import TYPE_CHECKING, Any, Optional, Tuple, Union +from typing import TYPE_CHECKING, Optional, Tuple, Union from .http_parser import RawResponseMessage -from .typedefs import LooseHeaders +from .typedefs import LooseHeaders, StrOrURL try: import ssl @@ -41,6 +41,11 @@ "ContentTypeError", "ClientPayloadError", "InvalidURL", + "InvalidUrlClientError", + "RedirectClientError", + "NonHttpUrlClientError", + "InvalidUrlRedirectClientError", + "NonHttpUrlRedirectClientError", ) @@ -281,17 +286,52 @@ class InvalidURL(ClientError, ValueError): # Derive from ValueError for backward compatibility - def __init__(self, url: Any) -> None: + def __init__(self, url: StrOrURL, description: Union[str, None] = None) -> None: # The type of url is not yarl.URL because the exception can be raised # on URL(url) call - super().__init__(url) + self._url = url + self._description = description + + if description: + super().__init__(url, description) + else: + super().__init__(url) + + @property + def url(self) -> StrOrURL: + return self._url @property - def url(self) -> Any: - return self.args[0] + def description(self) -> "str | None": + return self._description def __repr__(self) -> str: - return f"<{self.__class__.__name__} {self.url}>" + return f"<{self.__class__.__name__} {self}>" + + def __str__(self) -> str: + if self._description: + return f"{self._url} - {self._description}" + return str(self._url) + + +class InvalidUrlClientError(InvalidURL): + """Invalid URL client error.""" + + +class RedirectClientError(ClientError): + """Client redirect error.""" + + +class NonHttpUrlClientError(ClientError): + """Non http URL client error.""" + + +class InvalidUrlRedirectClientError(InvalidUrlClientError, RedirectClientError): + """Invalid URL redirect client error.""" + + +class NonHttpUrlRedirectClientError(NonHttpUrlClientError, RedirectClientError): + """Non http URL redirect client error.""" class ClientSSLError(ClientConnectorError): diff --git a/docs/client_reference.rst b/docs/client_reference.rst index 93b3459ba7c..838aee0c7d6 100644 --- a/docs/client_reference.rst +++ b/docs/client_reference.rst @@ -2115,6 +2115,41 @@ All exceptions are available as members of *aiohttp* module. Invalid URL, :class:`yarl.URL` instance. + .. attribute:: description + + Invalid URL description, :class:`str` instance or :data:`None`. + +.. exception:: InvalidUrlClientError + + Base class for all errors related to client url. + + Derived from :exc:`InvalidURL` + +.. exception:: RedirectClientError + + Base class for all errors related to client redirects. + + Derived from :exc:`ClientError` + +.. exception:: NonHttpUrlClientError + + Base class for all errors related to non http client urls. + + Derived from :exc:`ClientError` + +.. exception:: InvalidUrlRedirectClientError + + Redirect URL is malformed, e.g. it does not contain host part. + + Derived from :exc:`InvalidUrlClientError` and :exc:`RedirectClientError` + +.. exception:: NonHttpUrlRedirectClientError + + Redirect URL does not contain http schema. + + Derived from :exc:`RedirectClientError` and :exc:`NonHttpUrlClientError` + + .. class:: ContentDisposition Represent Content-Disposition header @@ -2331,3 +2366,17 @@ Hierarchy of exceptions * :exc:`WSServerHandshakeError` * :exc:`InvalidURL` + + * :exc:`InvalidUrlClientError` + + * :exc:`InvalidUrlRedirectClientError` + + * :exc:`NonHttpUrlClientError` + + * :exc:`NonHttpUrlRedirectClientError` + + * :exc:`RedirectClientError` + + * :exc:`InvalidUrlRedirectClientError` + + * :exc:`NonHttpUrlRedirectClientError` diff --git a/tests/test_client_exceptions.py b/tests/test_client_exceptions.py index f70ba5d09a6..d863d6674a3 100644 --- a/tests/test_client_exceptions.py +++ b/tests/test_client_exceptions.py @@ -5,6 +5,7 @@ from unittest import mock import pytest +from yarl import URL from aiohttp import client, client_reqrep @@ -298,8 +299,9 @@ def test_repr(self) -> None: class TestInvalidURL: def test_ctor(self) -> None: - err = client.InvalidURL(url=":wrong:url:") + err = client.InvalidURL(url=":wrong:url:", description=":description:") assert err.url == ":wrong:url:" + assert err.description == ":description:" def test_pickle(self) -> None: err = client.InvalidURL(url=":wrong:url:") @@ -310,10 +312,27 @@ def test_pickle(self) -> None: assert err2.url == ":wrong:url:" assert err2.foo == "bar" - def test_repr(self) -> None: + def test_repr_no_description(self) -> None: err = client.InvalidURL(url=":wrong:url:") + assert err.args == (":wrong:url:",) assert repr(err) == "" - def test_str(self) -> None: + def test_repr_yarl_URL(self) -> None: + err = client.InvalidURL(url=URL(":wrong:url:")) + assert repr(err) == "" + + def test_repr_with_description(self) -> None: + err = client.InvalidURL(url=":wrong:url:", description=":description:") + assert repr(err) == "" + + def test_str_no_description(self) -> None: err = client.InvalidURL(url=":wrong:url:") assert str(err) == ":wrong:url:" + + def test_none_description(self) -> None: + err = client.InvalidURL(":wrong:url:") + assert err.description is None + + def test_str_with_description(self) -> None: + err = client.InvalidURL(url=":wrong:url:", description=":description:") + assert str(err) == ":wrong:url: - :description:" diff --git a/tests/test_client_functional.py b/tests/test_client_functional.py index 654788afa72..4d804a31ddc 100644 --- a/tests/test_client_functional.py +++ b/tests/test_client_functional.py @@ -20,7 +20,14 @@ import aiohttp from aiohttp import Fingerprint, ServerFingerprintMismatch, hdrs, web from aiohttp.abc import AbstractResolver -from aiohttp.client_exceptions import SocketTimeoutError, TooManyRedirects +from aiohttp.client_exceptions import ( + InvalidUrlClientError, + InvalidUrlRedirectClientError, + NonHttpUrlClientError, + NonHttpUrlRedirectClientError, + SocketTimeoutError, + TooManyRedirects, +) from aiohttp.pytest_plugin import AiohttpClient, TestClient from aiohttp.test_utils import unused_port @@ -1121,7 +1128,7 @@ async def redirect(request): app.router.add_get("/redirect", redirect) client = await aiohttp_client(app) - with pytest.raises(ValueError): + with pytest.raises(NonHttpUrlRedirectClientError): await client.get("/redirect") @@ -2497,6 +2504,132 @@ async def handler_redirect(request): await client.post("/", chunked=1024) +INVALID_URL_WITH_ERROR_MESSAGE_YARL_NEW = ( + # yarl.URL.__new__ raises ValueError + ("http://:/", "http://:/"), + ("http://example.org:non_int_port/", "http://example.org:non_int_port/"), +) + +INVALID_URL_WITH_ERROR_MESSAGE_YARL_ORIGIN = ( + # # yarl.URL.origin raises ValueError + ("http:/", "http:///"), + ("http:/example.com", "http:///example.com"), + ("http:///example.com", "http:///example.com"), +) + +NON_HTTP_URL_WITH_ERROR_MESSAGE = ( + ("call:+380123456789", r"call:\+380123456789"), + ("skype:handle", "skype:handle"), + ("slack://instance/room", "slack://instance/room"), + ("steam:code", "steam:code"), + ("twitter://handle", "twitter://handle"), + ("bluesky://profile/d:i:d", "bluesky://profile/d:i:d"), +) + + +@pytest.mark.parametrize( + ("url", "error_message_url", "expected_exception_class"), + ( + *( + (url, message, InvalidUrlClientError) + for (url, message) in INVALID_URL_WITH_ERROR_MESSAGE_YARL_NEW + ), + *( + (url, message, InvalidUrlClientError) + for (url, message) in INVALID_URL_WITH_ERROR_MESSAGE_YARL_ORIGIN + ), + *( + (url, message, NonHttpUrlClientError) + for (url, message) in NON_HTTP_URL_WITH_ERROR_MESSAGE + ), + ), +) +async def test_invalid_and_non_http_url( + url: Any, error_message_url: Any, expected_exception_class: Any +) -> None: + async with aiohttp.ClientSession() as http_session: + with pytest.raises( + expected_exception_class, match=rf"^{error_message_url}( - [A-Za-z ]+)?" + ): + await http_session.get(url) + + +@pytest.mark.parametrize( + ("invalid_redirect_url", "error_message_url", "expected_exception_class"), + ( + *( + (url, message, InvalidUrlRedirectClientError) + for (url, message) in INVALID_URL_WITH_ERROR_MESSAGE_YARL_ORIGIN + + INVALID_URL_WITH_ERROR_MESSAGE_YARL_NEW + ), + *( + (url, message, NonHttpUrlRedirectClientError) + for (url, message) in NON_HTTP_URL_WITH_ERROR_MESSAGE + ), + ), +) +async def test_invalid_redirect_url( + aiohttp_client: Any, + invalid_redirect_url: Any, + error_message_url: str, + expected_exception_class: Any, +) -> None: + headers = {hdrs.LOCATION: invalid_redirect_url} + + async def generate_redirecting_response(request): + return web.Response(status=301, headers=headers) + + app = web.Application() + app.router.add_get("/redirect", generate_redirecting_response) + client = await aiohttp_client(app) + + with pytest.raises( + expected_exception_class, match=rf"^{error_message_url}( - [A-Za-z ]+)?" + ): + await client.get("/redirect") + + +@pytest.mark.parametrize( + ("invalid_redirect_url", "error_message_url", "expected_exception_class"), + ( + *( + (url, message, InvalidUrlRedirectClientError) + for (url, message) in INVALID_URL_WITH_ERROR_MESSAGE_YARL_ORIGIN + + INVALID_URL_WITH_ERROR_MESSAGE_YARL_NEW + ), + *( + (url, message, NonHttpUrlRedirectClientError) + for (url, message) in NON_HTTP_URL_WITH_ERROR_MESSAGE + ), + ), +) +async def test_invalid_redirect_url_multiple_redirects( + aiohttp_client: Any, + invalid_redirect_url: Any, + error_message_url: str, + expected_exception_class: Any, +) -> None: + app = web.Application() + + for path, location in [ + ("/redirect", "/redirect1"), + ("/redirect1", "/redirect2"), + ("/redirect2", invalid_redirect_url), + ]: + + async def generate_redirecting_response(request): + return web.Response(status=301, headers={hdrs.LOCATION: location}) + + app.router.add_get(path, generate_redirecting_response) + + client = await aiohttp_client(app) + + with pytest.raises( + expected_exception_class, match=rf"^{error_message_url}( - [A-Za-z ]+)?" + ): + await client.get("/redirect") + + @pytest.mark.parametrize( ("status", "expected_ok"), (