From b7bc5ce874944df9bcc776362f7ea186257dbb82 Mon Sep 17 00:00:00 2001 From: Arun Babu Neelicattu Date: Wed, 28 Feb 2024 01:48:18 +0100 Subject: [PATCH] harden lazy wheel wheel error handling This change attempts to provide a catch-all safety net for circumstances where an index server is behaving badly when ranged requests are made. Additionally, this change also helps guard against missing "Content-Range" headers in specific scenarios. --- src/poetry/inspection/lazy_wheel.py | 41 ++++- src/poetry/repositories/http_repository.py | 4 +- tests/conftest.py | 12 ++ tests/helpers.py | 26 ++++ tests/inspection/test_lazy_wheel.py | 173 ++++++++++++++++----- tests/types.py | 12 ++ 6 files changed, 223 insertions(+), 45 deletions(-) diff --git a/src/poetry/inspection/lazy_wheel.py b/src/poetry/inspection/lazy_wheel.py index 54f566bb3c6..dbedeafdeb5 100644 --- a/src/poetry/inspection/lazy_wheel.py +++ b/src/poetry/inspection/lazy_wheel.py @@ -41,15 +41,19 @@ logger = logging.getLogger(__name__) -class HTTPRangeRequestUnsupported(Exception): +class LazyWheelUnsupportedError(Exception): + """Raised when a lazy wheel is unsupported.""" + + +class HTTPRangeRequestUnsupported(LazyWheelUnsupportedError): """Raised when the remote server appears unable to support byte ranges.""" -class UnsupportedWheel(Exception): +class UnsupportedWheel(LazyWheelUnsupportedError): """Unsupported wheel.""" -class InvalidWheel(Exception): +class InvalidWheel(LazyWheelUnsupportedError): """Invalid (e.g. corrupt) wheel.""" def __init__(self, location: str, name: str) -> None: @@ -85,6 +89,24 @@ def metadata_from_wheel_url( # themselves are invalid, not because we've messed up our bookkeeping # and produced an invalid file. raise InvalidWheel(url, name) + except Exception as e: + if isinstance(e, LazyWheelUnsupportedError): + # this is expected when the code handles issues with lazy wheel metadata retrieval correctly + raise e + + logger.debug( + "There was an unexpected %s when handling lazy wheel metadata retrieval for %s from %s: %s", + type(e).__name__, + name, + url, + e, + ) + + # Catch all exception to handle any issues that may have occurred during + # attempts to use Lazy Wheel. + raise LazyWheelUnsupportedError( + f"Attempts to use lazy wheel metadata retrieval for {name} from {url} failed" + ) from e class MergeIntervals: @@ -590,6 +612,12 @@ def _try_initial_chunk_request( f"did not receive partial content: got code {code}" ) + if "Content-Range" not in tail.headers: + raise LazyWheelUnsupportedError( + f"file length cannot be determined for {self._url}, " + f"did not receive content range header from server" + ) + file_length = self._parse_full_length_from_content_range( tail.headers["Content-Range"] ) @@ -614,10 +642,13 @@ def _extract_content_length( # This indicates that the requested range from the end was larger than the # actual file size: https://www.rfc-editor.org/rfc/rfc9110#status.416. - if code == codes.requested_range_not_satisfiable: + if ( + code == codes.requested_range_not_satisfiable + and resp is not None + and "Content-Range" in resp.headers + ): # In this case, we don't have any file content yet, but we do know the # size the file will be, so we can return that and exit here. - assert resp is not None file_length = self._parse_full_length_from_content_range( resp.headers["Content-Range"] ) diff --git a/src/poetry/repositories/http_repository.py b/src/poetry/repositories/http_repository.py index 788f7cef9ae..cd7801fdd9d 100644 --- a/src/poetry/repositories/http_repository.py +++ b/src/poetry/repositories/http_repository.py @@ -20,7 +20,7 @@ from poetry.config.config import Config from poetry.inspection.info import PackageInfo -from poetry.inspection.lazy_wheel import HTTPRangeRequestUnsupported +from poetry.inspection.lazy_wheel import LazyWheelUnsupportedError from poetry.inspection.lazy_wheel import metadata_from_wheel_url from poetry.repositories.cached_repository import CachedRepository from poetry.repositories.exceptions import PackageNotFound @@ -122,7 +122,7 @@ def _get_info_from_wheel(self, link: Link) -> PackageInfo: package_info = PackageInfo.from_metadata( metadata_from_wheel_url(link.filename, link.url, self.session) ) - except HTTPRangeRequestUnsupported: + except LazyWheelUnsupportedError: # Do not set to False if we already know that the domain supports # range requests for some URLs! raise_accepts_ranges = False diff --git a/tests/conftest.py b/tests/conftest.py index 796df5f7a55..5955c81dacd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -37,6 +37,7 @@ from tests.helpers import TestLocker from tests.helpers import TestRepository from tests.helpers import get_package +from tests.helpers import http_setup_redirect from tests.helpers import isolated_environment from tests.helpers import mock_clone from tests.helpers import mock_download @@ -321,6 +322,11 @@ def http() -> Iterator[type[httpretty.httpretty]]: yield httpretty +@pytest.fixture +def http_redirector(http: type[httpretty.httpretty]) -> None: + http_setup_redirect(http, http.HEAD, http.GET, http.PUT, http.POST) + + @pytest.fixture def project_root() -> Path: return Path(__file__).parent.parent @@ -513,3 +519,9 @@ def venv_flags_default() -> dict[str, bool]: "no-pip": False, "no-setuptools": False, } + + +@pytest.fixture(autouse=(os.name == "nt")) +def httpretty_windows_mock_urllib3_wait_for_socket(mocker: MockerFixture) -> None: + # this is a workaround for https://github.com/gabrielfalcao/HTTPretty/issues/442 + mocker.patch("urllib3.util.wait.select_wait_for_socket", returns=True) diff --git a/tests/helpers.py b/tests/helpers.py index c7f9576df2a..d222a7be0b5 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -29,6 +29,9 @@ from typing import Any from typing import Mapping + import httpretty + + from httpretty.core import HTTPrettyRequest from poetry.core.constraints.version import Version from poetry.core.packages.dependency import Dependency from pytest_mock import MockerFixture @@ -38,6 +41,7 @@ from poetry.installation.operations.operation import Operation from poetry.poetry import Poetry from poetry.utils.authenticator import Authenticator + from tests.types import HTTPrettyResponse FIXTURE_PATH = Path(__file__).parent / "fixtures" @@ -321,3 +325,25 @@ def recurse_keys(obj: Mapping[str, Any]) -> Iterator[tuple[list[str], Any]]: yield ([], obj) return {delimiter.join(path): value for path, value in recurse_keys(obj)} + + +def http_setup_redirect( + http: type[httpretty.httpretty], *methods: str, status_code: int = 301 +) -> None: + redirect_uri_regex = re.compile("^(?Phttps?)://redirect.(?P.*)$") + + def redirect_request_callback( + request: HTTPrettyRequest, uri: str, headers: dict[str, Any] + ) -> HTTPrettyResponse: + redirect_uri_match = redirect_uri_regex.match(uri) + assert redirect_uri_match is not None + redirect_uri = f"{redirect_uri_match.group('protocol')}://{redirect_uri_match.group('uri')}" + return status_code, {"Location": redirect_uri}, b"" + + for method in methods: + http.register_uri( + method, + redirect_uri_regex, + status=status_code, + body=redirect_request_callback, + ) diff --git a/tests/inspection/test_lazy_wheel.py b/tests/inspection/test_lazy_wheel.py index 0d22fcc8e65..332384fe94c 100644 --- a/tests/inspection/test_lazy_wheel.py +++ b/tests/inspection/test_lazy_wheel.py @@ -5,12 +5,9 @@ from pathlib import Path from typing import TYPE_CHECKING from typing import Any -from typing import Dict from typing import Protocol -from typing import Tuple from urllib.parse import urlparse -import httpretty import pytest import requests @@ -18,20 +15,21 @@ from poetry.inspection.lazy_wheel import HTTPRangeRequestUnsupported from poetry.inspection.lazy_wheel import InvalidWheel +from poetry.inspection.lazy_wheel import LazyWheelUnsupportedError from poetry.inspection.lazy_wheel import metadata_from_wheel_url +from tests.helpers import http_setup_redirect if TYPE_CHECKING: - from collections.abc import Callable + import httpretty from httpretty.core import HTTPrettyRequest + from pytest_mock import MockerFixture from tests.types import FixtureDirGetter - - HTTPrettyResponse = Tuple[int, Dict[str, Any], bytes] # status code, headers, body - HTTPrettyRequestCallback = Callable[ - [HTTPrettyRequest, str, Dict[str, Any]], HTTPrettyResponse - ] + from tests.types import HTTPPrettyRequestCallbackWrapper + from tests.types import HTTPrettyRequestCallback + from tests.types import HTTPrettyResponse class RequestCallbackFactory(Protocol): def __call__( @@ -41,6 +39,17 @@ def __call__( negative_offset_error: tuple[int, bytes] | None = None, ) -> HTTPrettyRequestCallback: ... + class AssertMetadataFromWheelUrl(Protocol): + def __call__( + self, + *, + accept_ranges: str | None = "bytes", + negative_offset_error: tuple[int, bytes] | None = None, + expected_requests: int = 3, + request_callback_wrapper: HTTPPrettyRequestCallbackWrapper | None = None, + redirect: bool = True, + ) -> None: ... + NEGATIVE_OFFSET_AS_POSITIVE = -1 @@ -152,6 +161,55 @@ def handle_request( return _factory +@pytest.fixture +def assert_metadata_from_wheel_url( + http: type[httpretty.httpretty], + handle_request_factory: RequestCallbackFactory, +) -> AssertMetadataFromWheelUrl: + def _assertion( + *, + accept_ranges: str | None = "bytes", + negative_offset_error: tuple[int, bytes] | None = None, + expected_requests: int = 3, + request_callback_wrapper: HTTPPrettyRequestCallbackWrapper | None = None, + redirect: bool = False, + ) -> None: + latest_requests = http.latest_requests() + latest_requests.clear() + + domain = ( + f"lazy-wheel-{negative_offset_error[0] if negative_offset_error else 0}.com" + ) + uri_regex = re.compile(f"^https://{domain}/.*$") + request_callback = handle_request_factory( + accept_ranges=accept_ranges, negative_offset_error=negative_offset_error + ) + if request_callback_wrapper is not None: + request_callback = request_callback_wrapper(request_callback) + + http.register_uri(http.GET, uri_regex, body=request_callback) + http.register_uri(http.HEAD, uri_regex, body=request_callback) + + if redirect: + http_setup_redirect(http, http.GET, http.HEAD) + + url_prefix = "redirect." if redirect else "" + url = f"https://{url_prefix}{domain}/poetry_core-1.5.0-py3-none-any.whl" + + metadata = metadata_from_wheel_url("poetry-core", url, requests.Session()) + + assert metadata["name"] == "poetry-core" + assert metadata["version"] == "1.5.0" + assert metadata["author"] == "Sébastien Eustace" + assert metadata["requires_dist"] == [ + 'importlib-metadata (>=1.7.0) ; python_version < "3.8"' + ] + + assert len(latest_requests) == expected_requests + + return _assertion + + @pytest.mark.parametrize( "negative_offset_error", [ @@ -165,31 +223,9 @@ def handle_request( ], ) def test_metadata_from_wheel_url( - http: type[httpretty.httpretty], - handle_request_factory: RequestCallbackFactory, + assert_metadata_from_wheel_url: AssertMetadataFromWheelUrl, negative_offset_error: tuple[int, bytes] | None, ) -> None: - domain = ( - f"lazy-wheel-{negative_offset_error[0] if negative_offset_error else 0}.com" - ) - uri_regex = re.compile(f"^https://{domain}/.*$") - request_callback = handle_request_factory( - negative_offset_error=negative_offset_error - ) - http.register_uri(http.GET, uri_regex, body=request_callback) - http.register_uri(http.HEAD, uri_regex, body=request_callback) - - url = f"https://{domain}/poetry_core-1.5.0-py3-none-any.whl" - - metadata = metadata_from_wheel_url("poetry-core", url, requests.Session()) - - assert metadata["name"] == "poetry-core" - assert metadata["version"] == "1.5.0" - assert metadata["author"] == "Sébastien Eustace" - assert metadata["requires_dist"] == [ - 'importlib-metadata (>=1.7.0) ; python_version < "3.8"' - ] - # negative offsets supported: # 1. end of central directory # 2. whole central directory @@ -207,15 +243,60 @@ def test_metadata_from_wheel_url( expected_requests += 1 else: expected_requests += 2 - latest_requests = http.latest_requests() - assert len(latest_requests) == expected_requests + + assert_metadata_from_wheel_url( + negative_offset_error=negative_offset_error, expected_requests=expected_requests + ) # second wheel -> one less request if negative offsets are not supported - latest_requests.clear() - metadata_from_wheel_url("poetry-core", url, requests.Session()) expected_requests = min(expected_requests, 4) - latest_requests = httpretty.latest_requests() - assert len(latest_requests) == expected_requests + assert_metadata_from_wheel_url( + negative_offset_error=negative_offset_error, expected_requests=expected_requests + ) + + +def test_metadata_from_wheel_url_416_missing_content_range( + assert_metadata_from_wheel_url: AssertMetadataFromWheelUrl, +) -> None: + def request_callback_wrapper( + request_callback: HTTPrettyRequestCallback, + ) -> HTTPrettyRequestCallback: + def _wrapped( + request: HTTPrettyRequest, uri: str, response_headers: dict[str, Any] + ) -> HTTPrettyResponse: + status_code, response_headers, body = request_callback( + request, uri, response_headers + ) + return ( + status_code, + { + header: response_headers[header] + for header in response_headers + if header.lower() != "content-range" + }, + body, + ) + + return _wrapped + + assert_metadata_from_wheel_url( + negative_offset_error=( + codes.requested_range_not_satisfiable, + b"Requested range not satisfiable", + ), + expected_requests=5, + request_callback_wrapper=request_callback_wrapper, + ) + + +def test_metadata_from_wheel_url_with_redirect( + assert_metadata_from_wheel_url: AssertMetadataFromWheelUrl, +) -> None: + assert_metadata_from_wheel_url( + negative_offset_error=None, + expected_requests=6, + redirect=True, + ) @pytest.mark.parametrize("negative_offset_as_positive", [False, True]) @@ -320,3 +401,19 @@ def test_metadata_from_wheel_url_invalid_wheel( latest_requests = http.latest_requests() assert len(latest_requests) == 1 assert latest_requests[0].method == "GET" + + +def test_metadata_from_wheel_url_handles_unexpected_errors( + mocker: MockerFixture, +) -> None: + mocker.patch( + "poetry.inspection.lazy_wheel.LazyWheelOverHTTP.read_metadata", + side_effect=RuntimeError(), + ) + + with pytest.raises(LazyWheelUnsupportedError): + metadata_from_wheel_url( + "demo-missing-dist-info", + "https://runtime-error.com/demo_missing_dist_info-0.1.0-py2.py3-none-any.whl", + requests.Session(), + ) diff --git a/tests/types.py b/tests/types.py index 0d9605c56dd..4fc279abe06 100644 --- a/tests/types.py +++ b/tests/types.py @@ -6,12 +6,16 @@ if TYPE_CHECKING: + from collections.abc import Callable from pathlib import Path + from typing import Dict + from typing import Tuple import requests from cleo.io.io import IO from cleo.testers.command_tester import CommandTester + from httpretty.core import HTTPrettyRequest from poetry.config.config import Config from poetry.config.source import Source @@ -20,6 +24,14 @@ from poetry.poetry import Poetry from poetry.utils.env import Env + HTTPrettyResponse = Tuple[int, Dict[str, Any], bytes] # status code, headers, body + HTTPrettyRequestCallback = Callable[ + [HTTPrettyRequest, str, Dict[str, Any]], HTTPrettyResponse + ] + HTTPPrettyRequestCallbackWrapper = Callable[ + [HTTPrettyRequestCallback], HTTPrettyRequestCallback + ] + class CommandTesterFactory(Protocol): def __call__(