Skip to content

Commit

Permalink
Merge pull request #11897 from sbidoul/cache-hash-checking-sbi
Browse files Browse the repository at this point in the history
Support wheel cache when using --require-hashes
  • Loading branch information
sbidoul authored Apr 15, 2023
2 parents f7787f8 + efe2d27 commit dbf4e68
Show file tree
Hide file tree
Showing 10 changed files with 129 additions and 16 deletions.
1 change: 1 addition & 0 deletions news/5037.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Support wheel cache when using ``--require-hashes``.
59 changes: 50 additions & 9 deletions src/pip/_internal/operations/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,10 @@ def unpack_url(


def _check_download_dir(
link: Link, download_dir: str, hashes: Optional[Hashes]
link: Link,
download_dir: str,
hashes: Optional[Hashes],
warn_on_hash_mismatch: bool = True,
) -> Optional[str]:
"""Check download_dir for previously downloaded file with correct hash
If a correct file is found return its path else None
Expand All @@ -195,10 +198,11 @@ def _check_download_dir(
try:
hashes.check_against_path(download_path)
except HashMismatch:
logger.warning(
"Previously-downloaded file %s has bad hash. Re-downloading.",
download_path,
)
if warn_on_hash_mismatch:
logger.warning(
"Previously-downloaded file %s has bad hash. Re-downloading.",
download_path,
)
os.unlink(download_path)
return None
return download_path
Expand Down Expand Up @@ -263,7 +267,7 @@ def __init__(

def _log_preparing_link(self, req: InstallRequirement) -> None:
"""Provide context for the requirement being prepared."""
if req.link.is_file and not req.original_link_is_in_wheel_cache:
if req.link.is_file and not req.is_wheel_from_cache:
message = "Processing %s"
information = str(display_path(req.link.file_path))
else:
Expand All @@ -284,7 +288,7 @@ def _log_preparing_link(self, req: InstallRequirement) -> None:
self._previous_requirement_header = (message, information)
logger.info(message, information)

if req.original_link_is_in_wheel_cache:
if req.is_wheel_from_cache:
with indent_log():
logger.info("Using cached %s", req.link.filename)

Expand Down Expand Up @@ -485,7 +489,18 @@ def prepare_linked_requirement(
file_path = None
if self.download_dir is not None and req.link.is_wheel:
hashes = self._get_linked_req_hashes(req)
file_path = _check_download_dir(req.link, self.download_dir, hashes)
file_path = _check_download_dir(
req.link,
self.download_dir,
hashes,
# When a locally built wheel has been found in cache, we don't warn
# about re-downloading when the already downloaded wheel hash does
# not match. This is because the hash must be checked against the
# original link, not the cached link. It that case the already
# downloaded file will be removed and re-fetched from cache (which
# implies a hash check against the cache entry's origin.json).
warn_on_hash_mismatch=not req.is_wheel_from_cache,
)

if file_path is not None:
# The file is already available, so mark it as downloaded
Expand Down Expand Up @@ -536,9 +551,35 @@ def _prepare_linked_requirement(
assert req.link
link = req.link

self._ensure_link_req_src_dir(req, parallel_builds)
hashes = self._get_linked_req_hashes(req)

if hashes and req.is_wheel_from_cache:
assert req.download_info is not None
assert link.is_wheel
assert link.is_file
# We need to verify hashes, and we have found the requirement in the cache
# of locally built wheels.
if (
isinstance(req.download_info.info, ArchiveInfo)
and req.download_info.info.hashes
and hashes.has_one_of(req.download_info.info.hashes)
):
# At this point we know the requirement was built from a hashable source
# artifact, and we verified that the cache entry's hash of the original
# artifact matches one of the hashes we expect. We don't verify hashes
# against the cached wheel, because the wheel is not the original.
hashes = None
else:
logger.warning(
"The hashes of the source archive found in cache entry "
"don't match, ignoring cached built wheel "
"and re-downloading source."
)
req.link = req.cached_wheel_source_link
link = req.link

self._ensure_link_req_src_dir(req, parallel_builds)

if link.is_existing_dir():
local_file = None
elif link.url not in self._downloaded:
Expand Down
12 changes: 11 additions & 1 deletion src/pip/_internal/req/req_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,11 @@ def __init__(
# PEP 508 URL requirement
link = Link(req.url)
self.link = self.original_link = link
self.original_link_is_in_wheel_cache = False

# When this InstallRequirement is a wheel obtained from the cache of locally
# built wheels, this is the source link corresponding to the cache entry, which
# was used to download and build the cached wheel.
self.cached_wheel_source_link: Optional[Link] = None

# Information about the location of the artifact that was downloaded . This
# property is guaranteed to be set in resolver results.
Expand Down Expand Up @@ -437,6 +441,12 @@ def is_wheel(self) -> bool:
return False
return self.link.is_wheel

@property
def is_wheel_from_cache(self) -> bool:
# When True, it means that this InstallRequirement is a local wheel file in the
# cache of locally built wheels.
return self.cached_wheel_source_link is not None

# Things valid for sdists
@property
def unpacked_source_directory(self) -> str:
Expand Down
2 changes: 1 addition & 1 deletion src/pip/_internal/resolution/legacy/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def _populate_link(self, req: InstallRequirement) -> None:
if cache_entry is not None:
logger.debug("Using cached wheel link: %s", cache_entry.link)
if req.link is req.original_link and cache_entry.persistent:
req.original_link_is_in_wheel_cache = True
req.cached_wheel_source_link = req.link
if cache_entry.origin is not None:
req.download_info = cache_entry.origin
else:
Expand Down
6 changes: 4 additions & 2 deletions src/pip/_internal/resolution/resolvelib/candidates.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def __init__(
version: Optional[CandidateVersion] = None,
) -> None:
source_link = link
cache_entry = factory.get_wheel_cache_entry(link, name)
cache_entry = factory.get_wheel_cache_entry(source_link, name)
if cache_entry is not None:
logger.debug("Using cached wheel link: %s", cache_entry.link)
link = cache_entry.link
Expand All @@ -277,8 +277,10 @@ def __init__(
)

if cache_entry is not None:
assert ireq.link.is_wheel
assert ireq.link.is_file
if cache_entry.persistent and template.link is template.original_link:
ireq.original_link_is_in_wheel_cache = True
ireq.cached_wheel_source_link = source_link
if cache_entry.origin is not None:
ireq.download_info = cache_entry.origin
else:
Expand Down
2 changes: 1 addition & 1 deletion src/pip/_internal/resolution/resolvelib/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ def get_wheel_cache_entry(
hash mismatches. Furthermore, cached wheels at present have
nondeterministic contents due to file modification times.
"""
if self._wheel_cache is None or self.preparer.require_hashes:
if self._wheel_cache is None:
return None
return self._wheel_cache.get_cache_entry(
link=link,
Expand Down
7 changes: 7 additions & 0 deletions src/pip/_internal/utils/hashes.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,13 @@ def check_against_path(self, path: str) -> None:
with open(path, "rb") as file:
return self.check_against_file(file)

def has_one_of(self, hashes: Dict[str, str]) -> bool:
"""Return whether any of the given hashes are allowed."""
for hash_name, hex_digest in hashes.items():
if self.is_hash_allowed(hash_name, hex_digest):
return True
return False

def __bool__(self) -> bool:
"""Return whether I know any known-good hashes."""
return bool(self._allowed)
Expand Down
42 changes: 42 additions & 0 deletions tests/functional/test_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,48 @@ def test_bad_link_hash_in_dep_install_failure(
assert "THESE PACKAGES DO NOT MATCH THE HASHES" in result.stderr, result.stderr


def test_hashed_install_from_cache(
script: PipTestEnvironment, data: TestData, tmpdir: Path
) -> None:
"""
Test that installing from a cached built wheel works and that the hash is verified
against the hash of the original source archived stored in the cache entry.
"""
with requirements_file(
"simple2==1.0 --hash=sha256:"
"9336af72ca661e6336eb87bc7de3e8844d853e3848c2b9bbd2e8bf01db88c2c7\n",
tmpdir,
) as reqs_file:
result = script.pip_install_local(
"--use-pep517", "--no-build-isolation", "-r", reqs_file.resolve()
)
assert "Created wheel for simple2" in result.stdout
script.pip("uninstall", "simple2", "-y")
result = script.pip_install_local(
"--use-pep517", "--no-build-isolation", "-r", reqs_file.resolve()
)
assert "Using cached simple2" in result.stdout
# now try with an invalid hash
with requirements_file(
"simple2==1.0 --hash=sha256:invalid\n",
tmpdir,
) as reqs_file:
script.pip("uninstall", "simple2", "-y")
result = script.pip_install_local(
"--use-pep517",
"--no-build-isolation",
"-r",
reqs_file.resolve(),
expect_error=True,
)
assert (
"WARNING: The hashes of the source archive found in cache entry "
"don't match, ignoring cached built wheel and re-downloading source."
) in result.stderr
assert "Using cached simple2" in result.stdout
assert "ERROR: THESE PACKAGES DO NOT MATCH THE HASHES" in result.stderr


def assert_re_match(pattern: str, text: str) -> None:
assert re.search(pattern, text), f"Could not find {pattern!r} in {text!r}"

Expand Down
6 changes: 4 additions & 2 deletions tests/unit/test_req.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,8 @@ def test_download_info_archive_legacy_cache(
reqset = resolver.resolve([ireq], True)
assert len(reqset.all_requirements) == 1
req = reqset.all_requirements[0]
assert req.original_link_is_in_wheel_cache
assert req.is_wheel_from_cache
assert req.cached_wheel_source_link
assert req.download_info
assert req.download_info.url == url
assert isinstance(req.download_info.info, ArchiveInfo)
Expand All @@ -437,7 +438,8 @@ def test_download_info_archive_cache_with_origin(
reqset = resolver.resolve([ireq], True)
assert len(reqset.all_requirements) == 1
req = reqset.all_requirements[0]
assert req.original_link_is_in_wheel_cache
assert req.is_wheel_from_cache
assert req.cached_wheel_source_link
assert req.download_info
assert req.download_info.url == url
assert isinstance(req.download_info.info, ArchiveInfo)
Expand Down
8 changes: 8 additions & 0 deletions tests/unit/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,14 @@ def test_hash(self) -> None:
cache[Hashes({"sha256": ["ab", "cd"]})] = 42
assert cache[Hashes({"sha256": ["ab", "cd"]})] == 42

def test_has_one_of(self) -> None:
hashes = Hashes({"sha256": ["abcd", "efgh"], "sha384": ["ijkl"]})
assert hashes.has_one_of({"sha256": "abcd"})
assert hashes.has_one_of({"sha256": "efgh"})
assert not hashes.has_one_of({"sha256": "xyzt"})
empty_hashes = Hashes()
assert not empty_hashes.has_one_of({"sha256": "xyzt"})


class TestEncoding:
"""Tests for pip._internal.utils.encoding"""
Expand Down

0 comments on commit dbf4e68

Please sign in to comment.