Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: perform a single hash validation of wheel files #8027

Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 88 additions & 11 deletions src/poetry/installation/wheel_installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@

from pathlib import Path
from typing import TYPE_CHECKING
from typing import Collection
from typing import Iterable

from installer import install
from installer.destinations import SchemeDictionaryDestination
from installer.records import RecordEntry
from installer.records import parse_record_file
from installer.sources import WheelFile
from installer.sources import _WheelFileValidationError

Expand All @@ -18,15 +22,33 @@
if TYPE_CHECKING:
from typing import BinaryIO

from installer.records import RecordEntry
from installer.scripts import LauncherKind
from installer.utils import Scheme

from poetry.utils.env import Env


class WheelDestination(SchemeDictionaryDestination):
""" """
def __init__(
self,
source: WheelFile,
scheme_dict: dict[str, str],
interpreter: str,
script_kind: LauncherKind,
hash_algorithm: str = "sha256",
bytecode_optimization_levels: Collection[int] = (),
destdir: str | None = None,
) -> None:
super().__init__(
scheme_dict=scheme_dict,
interpreter=interpreter,
script_kind=script_kind,
hash_algorithm=hash_algorithm,
bytecode_optimization_levels=bytecode_optimization_levels,
destdir=destdir,
)
self._source = source
self.issues: list[str] = []

def write_to_fs(
self,
Expand All @@ -36,7 +58,6 @@ def write_to_fs(
is_executable: bool,
) -> RecordEntry:
from installer.records import Hash
from installer.records import RecordEntry
from installer.utils import copyfileobj_with_hashing
from installer.utils import make_file_executable

Expand Down Expand Up @@ -66,12 +87,52 @@ def for_source(self, source: WheelFile) -> WheelDestination:
scheme_dict["headers"] = str(Path(scheme_dict["headers"]) / source.distribution)

return self.__class__(
scheme_dict,
source=source,
scheme_dict=scheme_dict,
interpreter=self.interpreter,
script_kind=self.script_kind,
bytecode_optimization_levels=self.bytecode_optimization_levels,
)

def _validate_hash_and_size(
self, records: Iterable[tuple[Scheme, RecordEntry]]
) -> None:
record_lines = self._source.read_dist_info("RECORD").splitlines()
record_mapping = {
record[0]: record for record in parse_record_file(record_lines)
}
for item in self._source._zipfile.infolist():
record_args = record_mapping.pop(item.filename, None)
if not record_args:
continue

file_record = RecordEntry.from_elements(*record_args)
computed_record = next(
record for _, record in records if record.path == item.filename
)
Comment on lines +110 to +112
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't always work. I tested a large project and got a StopIteration for some packages.

You can reproduce it by running poetry add fonttools for example.

What's even worth: The package is installed, but the RECORD file is not written.

poetry run pip uninstall fonttools now throws ERROR: Cannot uninstall fonttools 4.39.4, RECORD file not found. Hint: The package was installed by Poetry 1.6.0.dev0. and poetry run pip install fonttools gives Requirement already satisfied.

You can only "manually" uninstall the package.

if (
file_record.hash_ is not None
and computed_record.hash_ is not None
and file_record.hash_ != computed_record.hash_
) or (
file_record.size is not None
and computed_record.size is not None
and file_record.size != computed_record.size
):
self.issues.append(
f"In {self._source._zipfile.filename}, hash / size of"
f" {item.filename} didn't match RECORD"
)

def finalize_installation(
self,
scheme: Scheme,
record_file_path: str,
records: Iterable[tuple[Scheme, RecordEntry]],
) -> None:
self._validate_hash_and_size(records)
return super().finalize_installation(scheme, record_file_path, records)
Comment on lines +133 to +134
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
self._validate_hash_and_size(records)
return super().finalize_installation(scheme, record_file_path, records)
try:
self._validate_hash_and_size(records)
finally:
return super().finalize_installation(scheme, record_file_path, records)

I wonder if we should do something like that to make sure the RECORD file is written despite of bugs in _validate_hash_and_size(). However, when I tried this, the exception was swallowed, which not good either. Thus, we would have to put more effort here...



class WheelInstaller:
def __init__(self, env: Env) -> None:
Expand All @@ -89,26 +150,42 @@ def __init__(self, env: Env) -> None:
schemes = self._env.paths
schemes["headers"] = schemes["include"]

self._destination = WheelDestination(
schemes, interpreter=str(self._env.python), script_kind=script_kind
)

self._script_kind = script_kind
self._schemes = schemes
self._bytecode_compilation_enabled = False
self.invalid_wheels: dict[Path, list[str]] = {}

def enable_bytecode_compilation(self, enable: bool = True) -> None:
self._destination.bytecode_optimization_levels = (-1,) if enable else ()
self._bytecode_compilation_enabled = enable

def install(self, wheel: Path) -> None:
with WheelFile.open(wheel) as source:
destination = WheelDestination(
source=source,
scheme_dict=self._schemes,
interpreter=str(self._env.python),
script_kind=self._script_kind,
)
destination.bytecode_optimization_levels = (
(-1,) if self._bytecode_compilation_enabled else ()
)
destination = destination.for_source(source)
try:
source.validate_record()
# Content validation is disabled to avoid performing hash
# computation on files twice. We perform this kind of validation
# while installing the wheel. See _validate_hash_and_size.
source.validate_record(validate_contents=False)
except _WheelFileValidationError as e:
self.invalid_wheels[wheel] = e.issues
install(
source=source,
destination=self._destination.for_source(source),
destination=destination,
# Additional metadata that is generated by the installation tool.
additional_metadata={
"INSTALLER": f"Poetry {__version__}".encode(),
},
)
if destination.issues:
self.invalid_wheels[wheel] = (
self.invalid_wheels.get(wheel, []) + destination.issues
)