Skip to content

Commit dbf4e68

Browse files
authored
Merge pull request #11897 from sbidoul/cache-hash-checking-sbi
Support wheel cache when using --require-hashes
2 parents f7787f8 + efe2d27 commit dbf4e68

File tree

10 files changed

+129
-16
lines changed

10 files changed

+129
-16
lines changed

news/5037.feature.rst

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Support wheel cache when using ``--require-hashes``.

src/pip/_internal/operations/prepare.py

+50-9
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,10 @@ def unpack_url(
179179

180180

181181
def _check_download_dir(
182-
link: Link, download_dir: str, hashes: Optional[Hashes]
182+
link: Link,
183+
download_dir: str,
184+
hashes: Optional[Hashes],
185+
warn_on_hash_mismatch: bool = True,
183186
) -> Optional[str]:
184187
"""Check download_dir for previously downloaded file with correct hash
185188
If a correct file is found return its path else None
@@ -195,10 +198,11 @@ def _check_download_dir(
195198
try:
196199
hashes.check_against_path(download_path)
197200
except HashMismatch:
198-
logger.warning(
199-
"Previously-downloaded file %s has bad hash. Re-downloading.",
200-
download_path,
201-
)
201+
if warn_on_hash_mismatch:
202+
logger.warning(
203+
"Previously-downloaded file %s has bad hash. Re-downloading.",
204+
download_path,
205+
)
202206
os.unlink(download_path)
203207
return None
204208
return download_path
@@ -263,7 +267,7 @@ def __init__(
263267

264268
def _log_preparing_link(self, req: InstallRequirement) -> None:
265269
"""Provide context for the requirement being prepared."""
266-
if req.link.is_file and not req.original_link_is_in_wheel_cache:
270+
if req.link.is_file and not req.is_wheel_from_cache:
267271
message = "Processing %s"
268272
information = str(display_path(req.link.file_path))
269273
else:
@@ -284,7 +288,7 @@ def _log_preparing_link(self, req: InstallRequirement) -> None:
284288
self._previous_requirement_header = (message, information)
285289
logger.info(message, information)
286290

287-
if req.original_link_is_in_wheel_cache:
291+
if req.is_wheel_from_cache:
288292
with indent_log():
289293
logger.info("Using cached %s", req.link.filename)
290294

@@ -485,7 +489,18 @@ def prepare_linked_requirement(
485489
file_path = None
486490
if self.download_dir is not None and req.link.is_wheel:
487491
hashes = self._get_linked_req_hashes(req)
488-
file_path = _check_download_dir(req.link, self.download_dir, hashes)
492+
file_path = _check_download_dir(
493+
req.link,
494+
self.download_dir,
495+
hashes,
496+
# When a locally built wheel has been found in cache, we don't warn
497+
# about re-downloading when the already downloaded wheel hash does
498+
# not match. This is because the hash must be checked against the
499+
# original link, not the cached link. It that case the already
500+
# downloaded file will be removed and re-fetched from cache (which
501+
# implies a hash check against the cache entry's origin.json).
502+
warn_on_hash_mismatch=not req.is_wheel_from_cache,
503+
)
489504

490505
if file_path is not None:
491506
# The file is already available, so mark it as downloaded
@@ -536,9 +551,35 @@ def _prepare_linked_requirement(
536551
assert req.link
537552
link = req.link
538553

539-
self._ensure_link_req_src_dir(req, parallel_builds)
540554
hashes = self._get_linked_req_hashes(req)
541555

556+
if hashes and req.is_wheel_from_cache:
557+
assert req.download_info is not None
558+
assert link.is_wheel
559+
assert link.is_file
560+
# We need to verify hashes, and we have found the requirement in the cache
561+
# of locally built wheels.
562+
if (
563+
isinstance(req.download_info.info, ArchiveInfo)
564+
and req.download_info.info.hashes
565+
and hashes.has_one_of(req.download_info.info.hashes)
566+
):
567+
# At this point we know the requirement was built from a hashable source
568+
# artifact, and we verified that the cache entry's hash of the original
569+
# artifact matches one of the hashes we expect. We don't verify hashes
570+
# against the cached wheel, because the wheel is not the original.
571+
hashes = None
572+
else:
573+
logger.warning(
574+
"The hashes of the source archive found in cache entry "
575+
"don't match, ignoring cached built wheel "
576+
"and re-downloading source."
577+
)
578+
req.link = req.cached_wheel_source_link
579+
link = req.link
580+
581+
self._ensure_link_req_src_dir(req, parallel_builds)
582+
542583
if link.is_existing_dir():
543584
local_file = None
544585
elif link.url not in self._downloaded:

src/pip/_internal/req/req_install.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,11 @@ def __init__(
108108
# PEP 508 URL requirement
109109
link = Link(req.url)
110110
self.link = self.original_link = link
111-
self.original_link_is_in_wheel_cache = False
111+
112+
# When this InstallRequirement is a wheel obtained from the cache of locally
113+
# built wheels, this is the source link corresponding to the cache entry, which
114+
# was used to download and build the cached wheel.
115+
self.cached_wheel_source_link: Optional[Link] = None
112116

113117
# Information about the location of the artifact that was downloaded . This
114118
# property is guaranteed to be set in resolver results.
@@ -437,6 +441,12 @@ def is_wheel(self) -> bool:
437441
return False
438442
return self.link.is_wheel
439443

444+
@property
445+
def is_wheel_from_cache(self) -> bool:
446+
# When True, it means that this InstallRequirement is a local wheel file in the
447+
# cache of locally built wheels.
448+
return self.cached_wheel_source_link is not None
449+
440450
# Things valid for sdists
441451
@property
442452
def unpacked_source_directory(self) -> str:

src/pip/_internal/resolution/legacy/resolver.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ def _populate_link(self, req: InstallRequirement) -> None:
431431
if cache_entry is not None:
432432
logger.debug("Using cached wheel link: %s", cache_entry.link)
433433
if req.link is req.original_link and cache_entry.persistent:
434-
req.original_link_is_in_wheel_cache = True
434+
req.cached_wheel_source_link = req.link
435435
if cache_entry.origin is not None:
436436
req.download_info = cache_entry.origin
437437
else:

src/pip/_internal/resolution/resolvelib/candidates.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ def __init__(
259259
version: Optional[CandidateVersion] = None,
260260
) -> None:
261261
source_link = link
262-
cache_entry = factory.get_wheel_cache_entry(link, name)
262+
cache_entry = factory.get_wheel_cache_entry(source_link, name)
263263
if cache_entry is not None:
264264
logger.debug("Using cached wheel link: %s", cache_entry.link)
265265
link = cache_entry.link
@@ -277,8 +277,10 @@ def __init__(
277277
)
278278

279279
if cache_entry is not None:
280+
assert ireq.link.is_wheel
281+
assert ireq.link.is_file
280282
if cache_entry.persistent and template.link is template.original_link:
281-
ireq.original_link_is_in_wheel_cache = True
283+
ireq.cached_wheel_source_link = source_link
282284
if cache_entry.origin is not None:
283285
ireq.download_info = cache_entry.origin
284286
else:

src/pip/_internal/resolution/resolvelib/factory.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,7 @@ def get_wheel_cache_entry(
535535
hash mismatches. Furthermore, cached wheels at present have
536536
nondeterministic contents due to file modification times.
537537
"""
538-
if self._wheel_cache is None or self.preparer.require_hashes:
538+
if self._wheel_cache is None:
539539
return None
540540
return self._wheel_cache.get_cache_entry(
541541
link=link,

src/pip/_internal/utils/hashes.py

+7
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,13 @@ def check_against_path(self, path: str) -> None:
105105
with open(path, "rb") as file:
106106
return self.check_against_file(file)
107107

108+
def has_one_of(self, hashes: Dict[str, str]) -> bool:
109+
"""Return whether any of the given hashes are allowed."""
110+
for hash_name, hex_digest in hashes.items():
111+
if self.is_hash_allowed(hash_name, hex_digest):
112+
return True
113+
return False
114+
108115
def __bool__(self) -> bool:
109116
"""Return whether I know any known-good hashes."""
110117
return bool(self._allowed)

tests/functional/test_install.py

+42
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,48 @@ def test_bad_link_hash_in_dep_install_failure(
729729
assert "THESE PACKAGES DO NOT MATCH THE HASHES" in result.stderr, result.stderr
730730

731731

732+
def test_hashed_install_from_cache(
733+
script: PipTestEnvironment, data: TestData, tmpdir: Path
734+
) -> None:
735+
"""
736+
Test that installing from a cached built wheel works and that the hash is verified
737+
against the hash of the original source archived stored in the cache entry.
738+
"""
739+
with requirements_file(
740+
"simple2==1.0 --hash=sha256:"
741+
"9336af72ca661e6336eb87bc7de3e8844d853e3848c2b9bbd2e8bf01db88c2c7\n",
742+
tmpdir,
743+
) as reqs_file:
744+
result = script.pip_install_local(
745+
"--use-pep517", "--no-build-isolation", "-r", reqs_file.resolve()
746+
)
747+
assert "Created wheel for simple2" in result.stdout
748+
script.pip("uninstall", "simple2", "-y")
749+
result = script.pip_install_local(
750+
"--use-pep517", "--no-build-isolation", "-r", reqs_file.resolve()
751+
)
752+
assert "Using cached simple2" in result.stdout
753+
# now try with an invalid hash
754+
with requirements_file(
755+
"simple2==1.0 --hash=sha256:invalid\n",
756+
tmpdir,
757+
) as reqs_file:
758+
script.pip("uninstall", "simple2", "-y")
759+
result = script.pip_install_local(
760+
"--use-pep517",
761+
"--no-build-isolation",
762+
"-r",
763+
reqs_file.resolve(),
764+
expect_error=True,
765+
)
766+
assert (
767+
"WARNING: The hashes of the source archive found in cache entry "
768+
"don't match, ignoring cached built wheel and re-downloading source."
769+
) in result.stderr
770+
assert "Using cached simple2" in result.stdout
771+
assert "ERROR: THESE PACKAGES DO NOT MATCH THE HASHES" in result.stderr
772+
773+
732774
def assert_re_match(pattern: str, text: str) -> None:
733775
assert re.search(pattern, text), f"Could not find {pattern!r} in {text!r}"
734776

tests/unit/test_req.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,8 @@ def test_download_info_archive_legacy_cache(
411411
reqset = resolver.resolve([ireq], True)
412412
assert len(reqset.all_requirements) == 1
413413
req = reqset.all_requirements[0]
414-
assert req.original_link_is_in_wheel_cache
414+
assert req.is_wheel_from_cache
415+
assert req.cached_wheel_source_link
415416
assert req.download_info
416417
assert req.download_info.url == url
417418
assert isinstance(req.download_info.info, ArchiveInfo)
@@ -437,7 +438,8 @@ def test_download_info_archive_cache_with_origin(
437438
reqset = resolver.resolve([ireq], True)
438439
assert len(reqset.all_requirements) == 1
439440
req = reqset.all_requirements[0]
440-
assert req.original_link_is_in_wheel_cache
441+
assert req.is_wheel_from_cache
442+
assert req.cached_wheel_source_link
441443
assert req.download_info
442444
assert req.download_info.url == url
443445
assert isinstance(req.download_info.info, ArchiveInfo)

tests/unit/test_utils.py

+8
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,14 @@ def test_hash(self) -> None:
425425
cache[Hashes({"sha256": ["ab", "cd"]})] = 42
426426
assert cache[Hashes({"sha256": ["ab", "cd"]})] == 42
427427

428+
def test_has_one_of(self) -> None:
429+
hashes = Hashes({"sha256": ["abcd", "efgh"], "sha384": ["ijkl"]})
430+
assert hashes.has_one_of({"sha256": "abcd"})
431+
assert hashes.has_one_of({"sha256": "efgh"})
432+
assert not hashes.has_one_of({"sha256": "xyzt"})
433+
empty_hashes = Hashes()
434+
assert not empty_hashes.has_one_of({"sha256": "xyzt"})
435+
428436

429437
class TestEncoding:
430438
"""Tests for pip._internal.utils.encoding"""

0 commit comments

Comments
 (0)