Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support wheel cache when using --require-hashes #11042

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/5037.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Support wheel cache when using ``--require-hashes``.
6 changes: 6 additions & 0 deletions src/pip/_internal/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
import json
import logging
import os
from pathlib import Path
from typing import Any, Dict, List, Optional, Set

from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
from pip._vendor.packaging.utils import canonicalize_name

from pip._internal.exceptions import InvalidWheelFilename
from pip._internal.models.direct_url import DirectUrl
from pip._internal.models.format_control import FormatControl
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
Expand Down Expand Up @@ -204,6 +206,10 @@ def __init__(
):
self.link = link
self.persistent = persistent
self.origin: Optional[DirectUrl] = None
origin_direct_url_path = Path(self.link.file_path).parent / "origin.json"
if origin_direct_url_path.exists():
self.origin = DirectUrl.from_json(origin_direct_url_path.read_text())


class WheelCache(Cache):
Expand Down
16 changes: 12 additions & 4 deletions src/pip/_internal/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,20 @@
import configparser
import re
from itertools import chain, groupby, repeat
from typing import TYPE_CHECKING, Dict, List, Optional, Union
from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Union

from pip._vendor.requests.models import Request, Response
from pip._vendor.rich.console import Console, ConsoleOptions, RenderResult
from pip._vendor.rich.markup import escape
from pip._vendor.rich.text import Text

if TYPE_CHECKING:
from hashlib import _Hash
from typing import Literal
from typing import Literal, Protocol

from pip._internal.metadata import BaseDistribution
from pip._internal.req.req_install import InstallRequirement
else:
Protocol = object


#
Expand Down Expand Up @@ -570,6 +571,11 @@ class HashUnpinned(HashError):
)


class SupportsHexDigest(Protocol):
def hexdigest(self) -> str:
...


class HashMismatch(HashError):
"""
Distribution file hash values don't match.
Expand All @@ -588,7 +594,9 @@ class HashMismatch(HashError):
"someone may have tampered with them."
)

def __init__(self, allowed: Dict[str, List[str]], gots: Dict[str, "_Hash"]) -> None:
def __init__(
self, allowed: Dict[str, List[str]], gots: Mapping[str, SupportsHexDigest]
) -> None:
"""
:param allowed: A dict of algorithm names pointing to lists of allowed
hex digests
Expand Down
32 changes: 28 additions & 4 deletions src/pip/_internal/operations/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,12 @@
from pip._internal.req.req_install import InstallRequirement
from pip._internal.utils.hashes import Hashes, MissingHashes
from pip._internal.utils.logging import indent_log
from pip._internal.utils.misc import display_path, hide_url, is_installable_dir
from pip._internal.utils.misc import (
display_path,
hash_file,
hide_url,
is_installable_dir,
)
from pip._internal.utils.temp_dir import TempDirectory
from pip._internal.utils.unpacking import unpack_file
from pip._internal.vcs import vcs
Expand Down Expand Up @@ -98,7 +103,10 @@ def get_http_url(


def get_file_url(
link: Link, download_dir: Optional[str] = None, hashes: Optional[Hashes] = None
link: Link,
download_dir: Optional[str] = None,
hashes: Optional[Hashes] = None,
archive_hash: Optional[str] = None,
) -> File:
"""Get file and optionally check its hash."""
# If a download dir is specified, is the file already there and valid?
Expand All @@ -117,7 +125,13 @@ def get_file_url(
# hash in `hashes` matching: a URL-based or an option-based
# one; no internet-sourced hash will be in `hashes`.
if hashes:
hashes.check_against_path(from_path)
if archive_hash:
# When we get a wheel from the cache, we don't check the file hash but
# rather compare expected hash against the hash of the original archive
# that was downloaded to build the cached wheel.
hashes.check_against_hash(archive_hash)
else:
hashes.check_against_path(from_path)
return File(from_path, None)


Expand All @@ -128,6 +142,7 @@ def unpack_url(
verbosity: int,
download_dir: Optional[str] = None,
hashes: Optional[Hashes] = None,
archive_hash: Optional[str] = None,
Copy link
Member Author

@sbidoul sbidoul Apr 18, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Passing this archive_hash to unpack_url and then to get_file_url is the ugly part.
I think we could clean this up by avoiding calling unpack_url when the url points to a wheel in the wheel cache (as it does not unpack anyway).

If people are ok with general approach I'll investigate that next.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Passing archive_hash around like this feels wrong, especially get_file_url (if it’s passed, a bulk of the logic in the function isn’t used at all). This should be refactored a bit.

) -> Optional[File]:
"""Unpack link into location, downloading if required.

Expand All @@ -145,7 +160,9 @@ def unpack_url(

# file urls
if link.is_file:
file = get_file_url(link, download_dir, hashes=hashes)
file = get_file_url(
link, download_dir, hashes=hashes, archive_hash=archive_hash
)

# http urls
else:
Expand Down Expand Up @@ -470,6 +487,7 @@ def _prepare_linked_requirement(
self.verbosity,
self.download_dir,
hashes,
req.archive_hash,
)
except NetworkConnectionError as exc:
raise InstallationError(
Expand All @@ -486,6 +504,12 @@ def _prepare_linked_requirement(
# preserve the file path on the requirement.
if local_file:
req.local_file_path = local_file.path
# Also compute and preserve the hash of the file we downloaded.
# Note: as an optimization we may use link.hash if it is a sha256,
# as we verify elsewhere that it matches the downloaded content.
# TODO Should we use hashes.FAVORITE_HASH type ?
hash = hash_file(local_file.path)[0].hexdigest()
req.archive_hash = f"sha256={hash}"

dist = _get_prepared_distribution(
req,
Expand Down
6 changes: 6 additions & 0 deletions src/pip/_internal/req/req_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@ def __init__(
# PEP 508 URL requirement
link = Link(req.url)
self.link = self.original_link = link

# The locally computed hash of the (source) archive we downloaded. If no
# download occured because a corresponding wheel was found in the local wheel
# cache, this is the hash that was recorded in the cache entry.
self.archive_hash: Optional[str] = None

self.original_link_is_in_wheel_cache = False

# Path to any downloaded or already-existing package.
Expand Down
13 changes: 6 additions & 7 deletions src/pip/_internal/resolution/legacy/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
)
from pip._internal.index.package_finder import PackageFinder
from pip._internal.metadata import BaseDistribution
from pip._internal.models import direct_url
from pip._internal.models.link import Link
from pip._internal.operations.prepare import RequirementPreparer
from pip._internal.req.req_install import (
Expand Down Expand Up @@ -289,17 +290,11 @@ def _populate_link(self, req: InstallRequirement) -> None:
Note that req.link may still be None - if the requirement is already
installed and not needed to be upgraded based on the return value of
_is_upgrade_allowed().

If preparer.require_hashes is True, don't use the wheel cache, because
cached wheels, always built locally, have different hashes than the
files downloaded from the index server and thus throw false hash
mismatches. Furthermore, cached wheels at present have undeterministic
contents due to file modification times.
"""
if req.link is None:
req.link = self._find_requirement_link(req)

if self.wheel_cache is None or self.preparer.require_hashes:
if self.wheel_cache is None:
return
cache_entry = self.wheel_cache.get_cache_entry(
link=req.link,
Expand All @@ -310,6 +305,10 @@ def _populate_link(self, req: InstallRequirement) -> None:
logger.debug("Using cached wheel link: %s", cache_entry.link)
if req.link is req.original_link and cache_entry.persistent:
req.original_link_is_in_wheel_cache = True
if cache_entry.origin is not None and isinstance(
cache_entry.origin.info, direct_url.ArchiveInfo
):
req.archive_hash = cache_entry.origin.info.hash
req.link = cache_entry.link

def _get_dist_for(self, req: InstallRequirement) -> BaseDistribution:
Expand Down
5 changes: 5 additions & 0 deletions src/pip/_internal/resolution/resolvelib/candidates.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
MetadataInconsistent,
)
from pip._internal.metadata import BaseDistribution
from pip._internal.models import direct_url
from pip._internal.models.link import Link, links_equivalent
from pip._internal.models.wheel import Wheel
from pip._internal.req.constructors import (
Expand Down Expand Up @@ -284,6 +285,10 @@ def __init__(
and template.link is template.original_link
):
ireq.original_link_is_in_wheel_cache = True
if cache_entry.origin is not None and isinstance(
cache_entry.origin.info, direct_url.ArchiveInfo
):
ireq.archive_hash = cache_entry.origin.info.hash

super().__init__(
link=link,
Expand Down
11 changes: 2 additions & 9 deletions src/pip/_internal/resolution/resolvelib/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,15 +536,8 @@ def make_requires_python_requirement(
def get_wheel_cache_entry(
self, link: Link, name: Optional[str]
) -> Optional[CacheEntry]:
"""Look up the link in the wheel cache.

If ``preparer.require_hashes`` is True, don't use the wheel cache,
because cached wheels, always built locally, have different hashes
than the files downloaded from the index server and thus throw false
hash mismatches. Furthermore, cached wheels at present have
nondeterministic contents due to file modification times.
"""
if self._wheel_cache is None or self.preparer.require_hashes:
"""Look up the link in the wheel cache."""
if self._wheel_cache is None:
return None
return self._wheel_cache.get_cache_entry(
link=link,
Expand Down
27 changes: 20 additions & 7 deletions src/pip/_internal/utils/hashes.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import hashlib
from typing import TYPE_CHECKING, BinaryIO, Dict, Iterable, List

from pip._internal.exceptions import HashMismatch, HashMissing, InstallationError
from typing import TYPE_CHECKING, BinaryIO, Dict, Iterable, List, Mapping

from pip._internal.exceptions import (
HashMismatch,
HashMissing,
InstallationError,
SupportsHexDigest,
)
from pip._internal.utils.misc import read_chunks

if TYPE_CHECKING:
from hashlib import _Hash

# NoReturn introduced in 3.6.2; imported only for type checking to maintain
# pip compatibility with older patch versions of Python 3.6
from typing import NoReturn
Expand All @@ -22,6 +25,11 @@
STRONG_HASHES = ["sha256", "sha384", "sha512"]


class HexDigestStr(str, SupportsHexDigest):
def hexdigest(self) -> str:
return self


class Hashes:
"""A wrapper that builds multiple hashes at once and checks them against
known-good values
Expand Down Expand Up @@ -94,7 +102,7 @@ def check_against_chunks(self, chunks: Iterable[bytes]) -> None:
return
self._raise(gots)

def _raise(self, gots: Dict[str, "_Hash"]) -> "NoReturn":
def _raise(self, gots: Mapping[str, SupportsHexDigest]) -> "NoReturn":
raise HashMismatch(self._allowed, gots)

def check_against_file(self, file: BinaryIO) -> None:
Expand All @@ -109,6 +117,11 @@ def check_against_path(self, path: str) -> None:
with open(path, "rb") as file:
return self.check_against_file(file)

def check_against_hash(self, hash: str) -> None:
alg, value = hash.split("=", 1)
if value not in self._allowed.get(alg, []):
self._raise({alg: HexDigestStr(value)})

def __bool__(self) -> bool:
"""Return whether I know any known-good hashes."""
return bool(self._allowed)
Expand Down Expand Up @@ -144,5 +157,5 @@ def __init__(self) -> None:
# empty list, it will never match, so an error will always raise.
super().__init__(hashes={FAVORITE_HASH: []})

def _raise(self, gots: Dict[str, "_Hash"]) -> "NoReturn":
def _raise(self, gots: Mapping[str, SupportsHexDigest]) -> "NoReturn":
raise HashMissing(gots[FAVORITE_HASH].hexdigest())
14 changes: 14 additions & 0 deletions src/pip/_internal/wheel_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os.path
import re
import shutil
from pathlib import Path
from typing import Any, Callable, Iterable, List, Optional, Tuple

from pip._vendor.packaging.utils import canonicalize_name, canonicalize_version
Expand All @@ -13,12 +14,14 @@
from pip._internal.cache import WheelCache
from pip._internal.exceptions import InvalidWheelFilename, UnsupportedWheel
from pip._internal.metadata import FilesystemWheel, get_wheel_distribution
from pip._internal.models import direct_url
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
from pip._internal.operations.build.wheel import build_wheel_pep517
from pip._internal.operations.build.wheel_editable import build_wheel_editable
from pip._internal.operations.build.wheel_legacy import build_wheel_legacy
from pip._internal.req.req_install import InstallRequirement
from pip._internal.utils.direct_url_helpers import direct_url_from_link
from pip._internal.utils.logging import indent_log
from pip._internal.utils.misc import ensure_dir, hash_file, is_wheel_installed
from pip._internal.utils.setuptools_build import make_setuptools_clean_args
Expand Down Expand Up @@ -344,6 +347,7 @@ def build(
build_successes, build_failures = [], []
for req in requirements:
assert req.name
assert req.link
cache_dir = _get_cache_dir(req, wheel_cache)
wheel_file = _build_one(
req,
Expand All @@ -354,6 +358,16 @@ def build(
req.editable and req.permit_editable_wheels,
)
if wheel_file:
# Store the origin URL of this cache entry
# TODO move this to cache.py / refactor
origin_direct_url = direct_url_from_link(req.link, req.source_dir)
if isinstance(origin_direct_url.info, direct_url.ArchiveInfo):
# Record the hash of the file that was downloaded.
assert req.archive_hash
origin_direct_url.info.hash = req.archive_hash
Path(cache_dir).joinpath("origin.json").write_text(
origin_direct_url.to_json()
)
# Update the link for this.
req.link = Link(path_to_url(wheel_file))
req.local_file_path = req.link.file_path
Expand Down