From 06b8850f35ae67377ad2fe31d62ee1f71ba61eea Mon Sep 17 00:00:00 2001 From: John Sirois Date: Sun, 3 Nov 2024 18:25:33 -0800 Subject: [PATCH] Support `pex3 cache prune --older-than ...`. (#2586) Work towards #2528. --- CHANGES.md | 12 + pex/cache/access.py | 33 +- pex/cache/dirs.py | 714 +++++++++++++++++- pex/cache/prunable.py | 170 +++++ pex/cli/commands/cache/command.py | 493 +++++++++++- pex/hashing.py | 7 +- pex/interpreter.py | 52 +- pex/jobs.py | 7 +- pex/layout.py | 41 +- pex/pex_bootstrapper.py | 23 +- pex/pex_builder.py | 29 +- pex/pex_info.py | 7 +- pex/pip/installation.py | 82 +- pex/pip/tool.py | 63 +- pex/pip/vcs.py | 6 +- pex/pip/version.py | 21 +- pex/resolve/downloads.py | 26 +- pex/resolve/lock_resolver.py | 7 +- pex/resolve/lockfile/create.py | 5 +- pex/resolve/lockfile/download_manager.py | 11 +- pex/resolver.py | 73 +- pex/scie/science.py | 4 +- pex/sh_boot.py | 2 +- pex/third_party/__init__.py | 13 +- pex/util.py | 27 +- pex/variables.py | 18 +- pex/venv/venv_pex.py | 2 + pex/version.py | 2 +- .../cli/commands/test_cache_prune.py | 558 ++++++++++++++ tests/integration/test_integration.py | 6 +- tests/integration/test_pex_bootstrapper.py | 12 +- .../resolve/lockfile/test_download_manager.py | 5 +- tests/test_interpreter.py | 12 +- tests/test_pip.py | 2 +- .../commands/test_interpreter_command.py | 59 +- 35 files changed, 2301 insertions(+), 303 deletions(-) create mode 100644 pex/cache/prunable.py create mode 100644 tests/integration/cli/commands/test_cache_prune.py diff --git a/CHANGES.md b/CHANGES.md index 1a5c9ca17..334a87fad 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,17 @@ # Release Notes +## 2.24.0 + +This release adds `pex3 cache prune` as a likely more useful Pex cache +management command than the existing `pex3 cache purge`. By default +`pex3 cache prune` prunes any cached items not used for the last 2 +weeks and is likely suitable for use as a daily cron job to keep Pex +cache sizes down. The default age of 2 weeks can be overridden by +specifying `--older-than "1 week"` or `--last-access-before 14/3/2024`, +etc. See `pex3 cache prune --help` for more details. + +* Support `pex3 cache prune --older-than ...`. (#2586) + ## 2.23.0 This release adds support for drawing requirements from diff --git a/pex/cache/access.py b/pex/cache/access.py index 847300de5..faa85232d 100644 --- a/pex/cache/access.py +++ b/pex/cache/access.py @@ -4,7 +4,9 @@ from __future__ import absolute_import, print_function import fcntl +import itertools import os +import time from contextlib import contextmanager from pex.common import safe_mkdir @@ -12,7 +14,9 @@ from pex.variables import ENV if TYPE_CHECKING: - from typing import Iterator, Optional, Tuple + from typing import Iterator, Optional, Tuple, Union + + from pex.cache.dirs import AtomicCacheDir, UnzipDir, VenvDirs # noqa # N.B.: The lock file path is last in the lock state tuple to allow for a simple encoding scheme in @@ -99,3 +103,30 @@ def await_delete_lock(): lock_file = _lock(exclusive=False) yield lock_file _lock(exclusive=True) + + +def record_access( + atomic_cache_dir, # type: AtomicCacheDir + last_access=None, # type: Optional[float] +): + # type: (...) -> None + + # N.B.: We explicitly set atime and do not rely on the filesystem implicitly setting it when the + # directory is read since filesystems may be mounted noatime, nodiratime or relatime on Linux + # and similar toggles exist, at least in part, for some macOS file systems. + atime = last_access or time.time() + mtime = os.stat(atomic_cache_dir.path).st_mtime + os.utime(atomic_cache_dir.path, (atime, mtime)) + + +def iter_all_cached_pex_dirs(): + # type: () -> Iterator[Tuple[Union[UnzipDir, VenvDirs], float]] + + from pex.cache.dirs import UnzipDir, VenvDirs + + pex_dirs = itertools.chain( + UnzipDir.iter_all(), VenvDirs.iter_all() + ) # type: Iterator[Union[UnzipDir, VenvDirs]] + for pex_dir in pex_dirs: + last_access = os.stat(pex_dir.path).st_atime + yield pex_dir, last_access diff --git a/pex/cache/dirs.py b/pex/cache/dirs.py index bd773cd47..a43c85674 100644 --- a/pex/cache/dirs.py +++ b/pex/cache/dirs.py @@ -3,14 +3,26 @@ from __future__ import absolute_import +import glob import os +from pex.common import is_exe, safe_rmtree +from pex.compatibility import commonpath from pex.enum import Enum -from pex.typing import TYPE_CHECKING +from pex.exceptions import production_assert +from pex.orderedset import OrderedSet +from pex.typing import TYPE_CHECKING, cast from pex.variables import ENV, Variables if TYPE_CHECKING: - from typing import Iterable, Iterator, Union + from typing import Any, Iterable, Iterator, List, Optional, Type, TypeVar, Union + + from pex.dist_metadata import ProjectNameAndVersion + from pex.interpreter import PythonInterpreter + from pex.pep_440 import Version + from pex.pep_503 import ProjectName + from pex.pip.version import PipVersionValue + from pex.targets import Target class CacheDir(Enum["CacheDir.Value"]): @@ -95,7 +107,7 @@ def iter_transitive_dependents(self): DOWNLOADS = Value( "downloads", - version=0, + version=1, name="Lock Artifact Downloads", description="Distributions downloaded when resolving from a Pex lock file.", ) @@ -139,6 +151,7 @@ def iter_transitive_dependents(self): version=1, name="Pip Versions", description="Isolated Pip caches and Pip PEXes Pex uses to resolve distributions.", + dependencies=[INSTALLED_WHEELS], ) PLATFORMS = Value( @@ -189,3 +202,698 @@ def iter_transitive_dependents(self): description="Virtual environments generated at runtime for `--venv` mode PEXes.", dependencies=[INSTALLED_WHEELS], ) + + +if TYPE_CHECKING: + _AtomicCacheDir = TypeVar("_AtomicCacheDir", bound="AtomicCacheDir") + + +class AtomicCacheDir(str): + @staticmethod + def __new__( + cls, # type: Type[_AtomicCacheDir] + path, # type: str + *args, # type: Any + **kwargs # type: Any + ): + # type: (...) -> _AtomicCacheDir + # MyPy incorrectly flags the call to str.__new__(cls, path) for Python 2.7. + return cast("_AtomicCacheDir", str.__new__(cls, path)) # type: ignore[call-arg] + + def __init__( + self, + path, # type: str + *args, # type: Any + **kwargs # type: Any + ): + # type: (...) -> None + self.path = path + + def __repr__(self): + # type: () -> str + return "{clazz}(path={path})".format(clazz=self.__class__.__name__, path=self.path) + + +class UnzipDir(AtomicCacheDir): + @classmethod + def create( + cls, + pex_hash, # type: str + pex_root=ENV, # type: Union[str, Variables] + ): + # type: (...) -> UnzipDir + unzip_dir = CacheDir.UNZIPPED_PEXES.path(pex_hash, pex_root=pex_root) + return cls(path=unzip_dir, pex_hash=pex_hash, pex_root=pex_root) + + @classmethod + def iter_all(cls, pex_root=ENV): + # type: (Union[str, Variables]) -> Iterator[UnzipDir] + for unzip_dir in glob.glob(CacheDir.UNZIPPED_PEXES.path("*", pex_root=pex_root)): + if os.path.isdir(unzip_dir): + pex_hash = os.path.basename(unzip_dir) + yield UnzipDir(path=unzip_dir, pex_hash=pex_hash, pex_root=pex_root) + + def __init__( + self, + path, # type: str + pex_hash, # type: str + pex_root=ENV, # type: Union[str, Variables] + ): + # type: (...) -> None + super(UnzipDir, self).__init__(path) + self.pex_hash = pex_hash + self._pex_root = pex_root + + def iter_deps(self): + # type: () -> Iterator[Union[BootstrapDir, UserCodeDir, InstalledWheelDir]] + + from pex.pex_info import PexInfo + + pex_info = PexInfo.from_pex(self.path) + if pex_info.bootstrap_hash: + yield BootstrapDir.create( + bootstrap_hash=pex_info.bootstrap_hash, pex_root=self._pex_root + ) + if pex_info.code_hash: + yield UserCodeDir.create(code_hash=pex_info.code_hash, pex_root=self._pex_root) + for wheel_name, install_hash in pex_info.distributions.items(): + installed_wheel_dir = InstalledWheelDir.create( + wheel_name=wheel_name, install_hash=install_hash, pex_root=self._pex_root + ) + # N.B.: Not all installed wheels in a PEX's .deps will be extracted for a given + # interpreter if the PEX is multiplatform. + if os.path.exists(installed_wheel_dir): + yield installed_wheel_dir + + +class VenvDir(AtomicCacheDir): + @classmethod + def create( + cls, + pex_hash, # type: str + contents_hash, # type: str + pex_root=ENV, # type: Union[str, Variables] + ): + # type: (...) -> VenvDir + venv_dir = CacheDir.VENVS.path(pex_hash, contents_hash, pex_root=pex_root) + return cls(path=venv_dir, pex_hash=pex_hash, contents_hash=contents_hash, pex_root=pex_root) + + def __init__( + self, + path, # type: str + pex_hash, # type: str + contents_hash, # type: str + pex_root=ENV, # type: Union[str, Variables] + ): + # type: (...) -> None + super(VenvDir, self).__init__(path) + self.pex_hash = pex_hash + self.contents_hash = contents_hash + self.pex_root = pex_root + + +class VenvDirs(AtomicCacheDir): + SHORT_SYMLINK_NAME = "venv" + + @classmethod + def create( + cls, + short_hash, # type: str + pex_hash, # type: str + contents_hash, # type: str + pex_root=ENV, # type: Union[str, Variables] + ): + # type: (...) -> VenvDirs + venv_dir = VenvDir.create(pex_hash, contents_hash, pex_root=pex_root) + return cls(venv_dir=venv_dir, short_hash=short_hash) + + @classmethod + def iter_all(cls, pex_root=ENV): + # type: (Union[str, Variables]) -> Iterator[VenvDirs] + for venv_short_dir_symlink in glob.glob( + CacheDir.VENVS.path("s", "*", cls.SHORT_SYMLINK_NAME, pex_root=pex_root) + ): + if not os.path.isdir(venv_short_dir_symlink): + continue + + head, _venv = os.path.split(venv_short_dir_symlink) + short_hash = os.path.basename(head) + + venv_dir_path = os.path.realpath(venv_short_dir_symlink) + head, contents_hash = os.path.split(venv_dir_path) + pex_hash = os.path.basename(head) + venv_dir = VenvDir(path=venv_dir_path, pex_hash=pex_hash, contents_hash=contents_hash) + + yield VenvDirs(venv_dir=venv_dir, short_hash=short_hash) + + @staticmethod + def __new__( + cls, + venv_dir, # type: VenvDir + short_hash, # type: str + ): + # type: (...) -> VenvDirs + return cast(VenvDirs, super(VenvDirs, cls).__new__(cls, venv_dir.path)) + + def __getnewargs__(self): + return VenvDir.create(self.pex_hash, self.contents_hash, self._pex_root), self.short_hash + + def __init__( + self, + venv_dir, # type: VenvDir + short_hash, # type: str + ): + # type: (...) -> None + super(VenvDirs, self).__init__(venv_dir.path) + self.short_hash = short_hash + self.pex_hash = venv_dir.pex_hash + self.contents_hash = venv_dir.contents_hash + self._pex_root = venv_dir.pex_root + + @property + def short_dir(self): + # type: () -> str + return CacheDir.VENVS.path("s", self.short_hash, pex_root=self._pex_root) + + def iter_deps(self): + # type: () -> Iterator[InstalledWheelDir] + + from pex.pex_info import PexInfo + + pex_info = PexInfo.from_pex(self.path) + if not pex_info.venv_site_packages_copies: + for wheel_name, install_hash in pex_info.distributions.items(): + installed_wheel_dir = InstalledWheelDir.create( + wheel_name=wheel_name, install_hash=install_hash, pex_root=self._pex_root + ) + # N.B.: Not all installed wheels in a PEX's .deps will be installed in a given + # venv if the PEX is multiplatform. + if os.path.exists(installed_wheel_dir): + yield installed_wheel_dir + + +class InstalledWheelDir(AtomicCacheDir): + @classmethod + def iter_all(cls, pex_root=ENV): + # type: (Union[str, Variables]) -> Iterator[InstalledWheelDir] + + from pex.dist_metadata import ProjectNameAndVersion + + symlinks = [] # type: List[str] + dirs = OrderedSet() # type: OrderedSet[str] + for path in glob.glob(CacheDir.INSTALLED_WHEELS.path("*", "*.whl", pex_root=pex_root)): + if not os.path.isdir(path): + continue + if os.path.islink(path): + symlinks.append(path) + else: + dirs.add(path) + + for symlink in symlinks: + wheel_dir = os.path.realpath(symlink) + dirs.discard(wheel_dir) + wheel_hash = os.path.basename(os.path.dirname(wheel_dir)) + symlink_dir = os.path.dirname(symlink) + install_hash = os.path.basename(symlink_dir) + wheel_name = os.path.basename(wheel_dir) + pnav = ProjectNameAndVersion.from_filename(wheel_name) + yield InstalledWheelDir( + wheel_dir, + wheel_name=wheel_name, + project_name=pnav.canonicalized_project_name, + version=pnav.canonicalized_version, + install_hash=install_hash, + wheel_hash=wheel_hash, + symlink_dir=symlink_dir, + ) + for wheel_dir in dirs: + install_hash = os.path.basename(os.path.dirname(wheel_dir)) + wheel_name = os.path.basename(wheel_dir) + pnav = ProjectNameAndVersion.from_filename(wheel_name) + yield InstalledWheelDir( + wheel_dir, + wheel_name=wheel_name, + project_name=pnav.canonicalized_project_name, + version=pnav.canonicalized_version, + install_hash=install_hash, + ) + + @classmethod + def create( + cls, + wheel_name, # type: str + install_hash, # type: str + wheel_hash=None, # type: Optional[str] + pex_root=ENV, # type: Union[str, Variables] + ): + # type: (...) -> InstalledWheelDir + + from pex.dist_metadata import ProjectNameAndVersion + + pnav = ProjectNameAndVersion.from_filename(wheel_name) + wheel_dir = CacheDir.INSTALLED_WHEELS.path(install_hash, wheel_name, pex_root=pex_root) + symlink_dir = None # type: Optional[str] + if os.path.islink(wheel_dir): + symlink_dir = os.path.dirname(wheel_dir) + wheel_dir = os.path.realpath(wheel_dir) + recorded_wheel_hash = os.path.basename(os.path.dirname(wheel_dir)) + if wheel_hash: + production_assert(wheel_hash == recorded_wheel_hash) + else: + wheel_hash = recorded_wheel_hash + elif wheel_hash is not None: + symlink_dir = os.path.dirname(wheel_dir) + wheel_dir = CacheDir.INSTALLED_WHEELS.path(wheel_hash, wheel_name, pex_root=pex_root) + + return cls( + path=wheel_dir, + wheel_name=wheel_name, + project_name=pnav.canonicalized_project_name, + version=pnav.canonicalized_version, + install_hash=install_hash, + wheel_hash=wheel_hash, + symlink_dir=symlink_dir, + ) + + def __init__( + self, + path, # type: str + wheel_name, # type: str + project_name, # type: ProjectName + version, # type: Version + install_hash, # type: str + wheel_hash=None, # type: Optional[str] + symlink_dir=None, # type: Optional[str] + ): + # type: (...) -> None + super(InstalledWheelDir, self).__init__(path) + self.wheel_name = wheel_name + self.project_name = project_name + self.version = version + self.install_hash = install_hash + self.wheel_hash = wheel_hash + self.symlink_dir = symlink_dir + + +class BootstrapDir(AtomicCacheDir): + @classmethod + def iter_all(cls, pex_root=ENV): + # type: (Union[str, Variables]) -> Iterator[BootstrapDir] + + for path in glob.glob(CacheDir.BOOTSTRAPS.path("*", pex_root=pex_root)): + bootstrap_hash = os.path.basename(path) + yield cls(path=path, bootstrap_hash=bootstrap_hash) + + @classmethod + def create( + cls, + bootstrap_hash, # type: str + pex_root=ENV, # type: Union[str, Variables] + ): + # type: (...) -> BootstrapDir + bootstrap_dir = CacheDir.BOOTSTRAPS.path(bootstrap_hash, pex_root=pex_root) + return cls(path=bootstrap_dir, bootstrap_hash=bootstrap_hash) + + def __init__( + self, + path, # type: str + bootstrap_hash, # type: str + ): + # type: (...) -> None + super(BootstrapDir, self).__init__(path) + self.bootstrap_hash = bootstrap_hash + + +class UserCodeDir(AtomicCacheDir): + @classmethod + def iter_all(cls, pex_root=ENV): + # type: (Union[str, Variables]) -> Iterator[UserCodeDir] + + for path in glob.glob(CacheDir.USER_CODE.path("*", pex_root=pex_root)): + code_hash = os.path.basename(path) + yield cls(path=path, code_hash=code_hash) + + @classmethod + def create( + cls, + code_hash, # type: str + pex_root=ENV, # type: Union[str, Variables] + ): + # type: (...) -> UserCodeDir + user_code_dir = CacheDir.USER_CODE.path(code_hash, pex_root=pex_root) + return cls(path=user_code_dir, code_hash=code_hash) + + def __init__( + self, + path, # type: str + code_hash, # type: str + ): + # type: (...) -> None + super(UserCodeDir, self).__init__(path) + self.code_hash = code_hash + + +class PipPexDir(AtomicCacheDir): + @classmethod + def iter_all(cls, pex_root=ENV): + # type: (Union[str, Variables]) -> Iterator[PipPexDir] + + from pex.pip.version import PipVersion + + for base_dir in glob.glob(CacheDir.PIP.path("*", pex_root=pex_root)): + version = PipVersion.for_value(os.path.basename(base_dir)) + cache_dir = os.path.join(base_dir, "pip_cache") + for pex_dir in glob.glob(os.path.join(base_dir, "pip.pex", "*", "*")): + yield cls(path=pex_dir, version=version, base_dir=base_dir, cache_dir=cache_dir) + + @classmethod + def create( + cls, + version, # type: PipVersionValue + fingerprint, # type: str + ): + # type: (...) -> PipPexDir + + from pex.third_party import isolated + + base_dir = CacheDir.PIP.path(str(version)) + return cls( + path=os.path.join(base_dir, "pip.pex", isolated().pex_hash, fingerprint), + version=version, + base_dir=base_dir, + cache_dir=os.path.join(base_dir, "pip_cache"), + ) + + def __init__( + self, + path, # type: str + version, # type: PipVersionValue + base_dir, # type: str + cache_dir, # type: str + ): + # type: (...) -> None + super(PipPexDir, self).__init__(path) + self.version = version + self.base_dir = base_dir + self.cache_dir = cache_dir + + +class DownloadDir(AtomicCacheDir): + @classmethod + def iter_all(cls, pex_root=ENV): + # type: (Union[str, Variables]) -> Iterator[DownloadDir] + + from pex.dist_metadata import is_sdist, is_wheel + + for file_path in glob.glob(CacheDir.DOWNLOADS.path("*", "*", pex_root=pex_root)): + if os.path.isdir(file_path): + continue + if not is_sdist(file_path) and not is_wheel(file_path): + continue + download_dir, file_name = os.path.split(file_path) + yield cls(path=download_dir, file_name=file_name) + + @classmethod + def create( + cls, + file_hash, # type: str + file_name=None, # type: Optional[str] + pex_root=ENV, # type: Union[str, Variables] + ): + # type: (...) -> DownloadDir + return cls(path=CacheDir.DOWNLOADS.path(file_hash, pex_root=pex_root), file_name=file_name) + + def __init__( + self, + path, # type: str + file_name=None, # type: Optional[str] + ): + # type: (...) -> None + super(DownloadDir, self).__init__(path) + self._file_name = file_name + self.__pnav = None # type: Optional[ProjectNameAndVersion] + + @property + def file_name(self): + # type: () -> str + from pex.dist_metadata import is_sdist, is_wheel + + if self._file_name is None: + potential_file_names = [ + file_name + for file_name in os.listdir(self.path) + if not os.path.isdir(os.path.join(self.path, file_name)) + and (is_sdist(file_name) or is_wheel(file_name)) + ] + production_assert(len(potential_file_names) == 1) + self._file_name = potential_file_names[0] + return self._file_name + + @property + def _pnav(self): + # type: () -> ProjectNameAndVersion + if self.__pnav is None: + from pex.dist_metadata import ProjectNameAndVersion + + self.__pnav = ProjectNameAndVersion.from_filename(self.file_name) + return self.__pnav + + @property + def project_name(self): + # type: () -> ProjectName + return self._pnav.canonicalized_project_name + + @property + def version(self): + # type: () -> Version + return self._pnav.canonicalized_version + + +class BuiltWheelDir(AtomicCacheDir): + @classmethod + def iter_all(cls, pex_root=ENV): + # type: (Union[str, Variables]) -> Iterator[BuiltWheelDir] + + from pex.dist_metadata import ProjectNameAndVersion, UnrecognizedDistributionFormat + + for path in glob.glob(CacheDir.BUILT_WHEELS.path("sdists", "*", "*")): + sdist, fingerprint = os.path.split(path) + try: + pnav = ProjectNameAndVersion.from_filename(sdist) + yield BuiltWheelDir.create( + sdist=sdist, fingerprint=fingerprint, pnav=pnav, pex_root=pex_root + ) + except UnrecognizedDistributionFormat: + # This is a source distribution that does not follow sdist naming patterns / is not + # distributed via PyPI; e.g.: a GitHub source tarball or zip. + for built_wheel in glob.glob(os.path.join(path, "*", "*")): + file_name = os.path.basename(built_wheel) + dist_dir = os.path.dirname(built_wheel) + yield BuiltWheelDir(path=dist_dir, dist_dir=dist_dir, file_name=file_name) + + for built_wheel in glob.glob( + CacheDir.BUILT_WHEELS.path("local_projects", "*", "*", "*", "*") + ): + file_name = os.path.basename(built_wheel) + dist_dir = os.path.dirname(built_wheel) + yield BuiltWheelDir(path=dist_dir, dist_dir=dist_dir, file_name=file_name) + + @classmethod + def create( + cls, + sdist, # type: str + fingerprint=None, # type: Optional[str] + pnav=None, # type: Optional[ProjectNameAndVersion] + target=None, # type: Optional[Target] + pex_root=ENV, # type: Union[str, Variables] + ): + # type: (...) -> BuiltWheelDir + + import hashlib + + from pex import targets + from pex.dist_metadata import is_sdist + from pex.util import CacheHelper + + if is_sdist(sdist): + dist_type = "sdists" + fingerprint = fingerprint or CacheHelper.hash(sdist, hasher=hashlib.sha256) + file_name = os.path.basename(sdist) + else: + dist_type = "local_projects" + fingerprint = fingerprint or CacheHelper.dir_hash(sdist, hasher=hashlib.sha256) + file_name = None + + # For the purposes of building a wheel from source, the product should be uniqued by the + # wheel name which is unique on the host os up to the python and abi tags. In other words, + # the product of a CPython 2.7.6 wheel build and a CPython 2.7.18 wheel build should be + # functionally interchangeable if the two CPython interpreters have matching abis. + # + # However, this is foiled by at least two scenarios: + # 1. Running a vm / container with shared storage mounted. This can introduce a different + # platform on the host. + # 2. On macOS the same host can report / use different OS versions (c.f.: the + # MACOSX_DEPLOYMENT_TARGET environment variable and the 10.16 / 11.0 macOS Big Sur + # transitional case in particular). + # + # As such, we must be pessimistic and assume the wheel will be platform specific to the + # full extent possible. + interpreter = (target or targets.current()).get_interpreter() + target_tags = "{python_tag}-{abi_tag}-{platform_tag}".format( + python_tag=interpreter.identity.python_tag, + abi_tag=interpreter.identity.abi_tag, + platform_tag=interpreter.identity.platform_tag, + ) + sdist_dir = CacheDir.BUILT_WHEELS.path( + dist_type, os.path.basename(sdist), pex_root=pex_root + ) + dist_dir = os.path.join(sdist_dir, fingerprint, target_tags) + + if is_sdist(sdist): + return cls(path=sdist_dir, dist_dir=dist_dir, file_name=file_name, pnav=pnav) + else: + return cls(path=dist_dir, dist_dir=dist_dir, file_name=file_name, pnav=pnav) + + def __init__( + self, + path, # type: str + dist_dir, # type: str + file_name=None, # type: Optional[str] + pnav=None, # type: Optional[ProjectNameAndVersion] + ): + # type: (...) -> None + super(BuiltWheelDir, self).__init__(path) + self.dist_dir = dist_dir + self._file_name = file_name + self.__pnav = pnav + + @property + def file_name(self): + # type: () -> str + from pex.dist_metadata import is_wheel + + if self._file_name is None: + potential_file_names = [ + file_name + for file_name in os.listdir(self.dist_dir) + if not os.path.isdir(os.path.join(self.dist_dir, file_name)) and is_wheel(file_name) + ] + production_assert(len(potential_file_names) == 1) + self._file_name = potential_file_names[0] + return self._file_name + + @property + def _pnav(self): + # type: () -> ProjectNameAndVersion + if self.__pnav is None: + from pex.dist_metadata import ProjectNameAndVersion + + self.__pnav = ProjectNameAndVersion.from_filename(self.file_name) + return self.__pnav + + @property + def project_name(self): + # type: () -> ProjectName + return self._pnav.canonicalized_project_name + + @property + def version(self): + # type: () -> Version + return self._pnav.canonicalized_version + + +class InterpreterDir(AtomicCacheDir): + INTERP_INFO_FILE = "INTERP-INFO" + + @classmethod + def iter_all(cls, pex_root=ENV): + # type: (Union[str, Variables]) -> Iterator[InterpreterDir] + + for interp_info_file in glob.glob( + CacheDir.INTERPRETERS.path("*", "*", "*", cls.INTERP_INFO_FILE, pex_root=pex_root) + ): + yield cls(path=os.path.dirname(interp_info_file), interp_info_file=interp_info_file) + + @classmethod + def create(cls, binary): + # type: (str) -> InterpreterDir + + import hashlib + import platform + + from pex.tracer import TRACER + from pex.util import CacheHelper + + # Part of the PythonInterpreter data are environment markers that depend on the current OS + # release. That data can change when the OS is upgraded but (some of) the installed + # interpreters remain the same. As such, include the OS in the hash structure for cached + # interpreters. + os_digest = hashlib.sha1() + for os_identifier in platform.release(), platform.version(): + os_digest.update(os_identifier.encode("utf-8")) + os_hash = os_digest.hexdigest() + + interpreter_cache_dir = CacheDir.INTERPRETERS.path() + os_cache_dir = os.path.join(interpreter_cache_dir, os_hash) + if os.path.isdir(interpreter_cache_dir) and not os.path.isdir(os_cache_dir): + with TRACER.timed("GCing interpreter cache from prior OS version"): + safe_rmtree(interpreter_cache_dir) + + interpreter_hash = CacheHelper.hash(binary) + + # Some distributions include more than one copy of the same interpreter via a hard link + # (e.g.: python3.7 is a hardlink to python3.7m). To ensure a deterministic INTERP-INFO file + # we must emit a separate INTERP-INFO for each link since INTERP-INFO contains the + # interpreter path and would otherwise be unstable. + # + # See PythonInterpreter._REGEXEN for a related affordance. + # + # N.B.: The path for --venv mode interpreters can be quite long; so we just used a fixed + # length hash of the interpreter binary path to ensure uniqueness and not run afoul of file + # name length limits. + path_id = hashlib.sha1(binary.encode("utf-8")).hexdigest() + + cache_dir = os.path.join(os_cache_dir, interpreter_hash, path_id) + cache_file = os.path.join(cache_dir, cls.INTERP_INFO_FILE) + + return cls(path=cache_dir, interp_info_file=cache_file) + + def __init__( + self, + path, # type: str + interp_info_file, # type: str + pex_root=ENV, # type: Union[str, Variables] + ): + # type: (...) -> None + super(InterpreterDir, self).__init__(path) + self.interp_info_file = interp_info_file + self._interpreter = None # type: Optional[PythonInterpreter] + self._pex_root = pex_root + + @property + def interpreter(self): + # type: () -> PythonInterpreter + if self._interpreter is None: + with open(self.interp_info_file) as fp: + from pex.interpreter import PythonIdentity, PythonInterpreter + + self._interpreter = PythonInterpreter(PythonIdentity.decode(fp.read())) + return self._interpreter + + def valid(self): + # type: () -> bool + return is_exe(self.interpreter.binary) + + def venv_dir(self): + # type: () -> Optional[VenvDir] + + if not self.interpreter.is_venv: + return None + cached_venv_root = CacheDir.VENVS.path() + if cached_venv_root != commonpath((cached_venv_root, self.interpreter.prefix)): + return None + head, contents_hash = os.path.split(self.interpreter.prefix) + pex_hash = os.path.basename(head) + return VenvDir.create( + pex_hash=pex_hash, contents_hash=contents_hash, pex_root=self._pex_root + ) diff --git a/pex/cache/prunable.py b/pex/cache/prunable.py new file mode 100644 index 000000000..ea544aa29 --- /dev/null +++ b/pex/cache/prunable.py @@ -0,0 +1,170 @@ +# Copyright 2024 Pex project contributors. +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import absolute_import + +import os.path +from collections import OrderedDict +from datetime import datetime + +from pex.cache import access +from pex.cache.dirs import ( + BootstrapDir, + InstalledWheelDir, + InterpreterDir, + UnzipDir, + UserCodeDir, + VenvDirs, +) +from pex.orderedset import OrderedSet +from pex.pip.installation import iter_all as iter_all_pips +from pex.pip.tool import Pip +from pex.pip.version import PipVersionValue +from pex.typing import TYPE_CHECKING + +if TYPE_CHECKING: + # MyPy run for 2.7 does not recognize the Collection type + from typing import ( # type: ignore[attr-defined] + Collection, + Container, + Dict, + Iterator, + List, + Mapping, + Set, + Tuple, + Union, + ) + + import attr # vendor:skip +else: + from pex.third_party import attr + + +@attr.s(frozen=True) +class PrunablePipCache(object): + pip = attr.ib() # type: Pip + pex_dir = attr.ib() # type: Union[UnzipDir, VenvDirs] + last_access = attr.ib() # type: float + + +@attr.s(frozen=True) +class Pips(object): + @classmethod + def scan(cls, pex_dirs_by_hash): + # type: (Mapping[str, Tuple[Union[UnzipDir, VenvDirs], float, bool]]) -> Pips + + # True to prune the Pip version completely, False to just prune the Pip PEX. + pips_to_prune = OrderedDict() # type: OrderedDict[Pip, bool] + + # N.B.: We just need 1 Pip per version (really per paired cache). Whether a Pip has + # extra requirements installed does not affect cache management. + pip_caches_to_prune = OrderedDict() # type: OrderedDict[PipVersionValue, PrunablePipCache] + for pip in iter_all_pips(): + pex_dir, last_access, prunable = pex_dirs_by_hash[pip.pex_hash] + if prunable: + pips_to_prune[pip] = False + else: + pip_caches_to_prune[pip.version] = PrunablePipCache(pip, pex_dir, last_access) + for pip in pips_to_prune: + if pip.version not in pip_caches_to_prune: + pips_to_prune[pip] = True + + pip_paths_to_prune = tuple( + (pip.pex_dir.base_dir if prune_version else pip.pex_dir.path) + for pip, prune_version in pips_to_prune.items() + ) + return cls(paths=pip_paths_to_prune, caches=tuple(pip_caches_to_prune.values())) + + paths = attr.ib() # type: Tuple[str, ...] + caches = attr.ib() # type: Tuple[PrunablePipCache, ...] + + +@attr.s(frozen=True) +class Prunable(object): + @classmethod + def scan(cls, cutoff): + # type: (datetime) -> Prunable + + venv_dir_paths = [] # type: List[str] + prunable_pex_dirs = set() # type: Set[Union[UnzipDir, VenvDirs]] + for pex_dir, last_access in access.iter_all_cached_pex_dirs(): + if isinstance(pex_dir, VenvDirs): + venv_dir_paths.append(pex_dir.path) + + # Before a --venv installs, it 1st unzips itself. The unzipped instance of the + # PEX is not needed past the initial install; so we remove it regardless of cutoff. + unzip_dir = UnzipDir.create(pex_dir.pex_hash) + if os.path.exists(unzip_dir.path): + prunable_pex_dirs.add(unzip_dir) + + prunable = datetime.fromtimestamp(last_access) < cutoff + if prunable: + prunable_pex_dirs.add(pex_dir) + + pex_dirs = [] # type: List[Union[UnzipDir, VenvDirs]] + pex_deps = ( + OrderedSet() + ) # type: OrderedSet[Union[BootstrapDir, UserCodeDir, InstalledWheelDir]] + unprunable_deps = [] # type: List[Union[BootstrapDir, UserCodeDir, InstalledWheelDir]] + pex_dirs_by_hash = {} # type: Dict[str, Tuple[Union[UnzipDir, VenvDirs], float, bool]] + for pex_dir, last_access in access.iter_all_cached_pex_dirs(): + prunable = pex_dir in prunable_pex_dirs + if prunable: + pex_dirs.append(pex_dir) + pex_deps.update(pex_dir.iter_deps()) + else: + unprunable_deps.extend(pex_dir.iter_deps()) + pex_dirs_by_hash[pex_dir.pex_hash] = pex_dir, last_access, prunable + pips = Pips.scan(pex_dirs_by_hash) + + return cls( + pex_dirs=tuple(pex_dirs), + pex_deps=pex_deps, + venv_dir_paths=frozenset(venv_dir_paths), + unprunable_deps=frozenset(unprunable_deps), + pips=pips, + ) + + pex_dirs = attr.ib() # type: Tuple[Union[UnzipDir, VenvDirs], ...] + _pex_deps = attr.ib() # type: Collection[Union[BootstrapDir, UserCodeDir, InstalledWheelDir]] + _venv_dir_paths = attr.ib() # type: Container[str] + _unprunable_deps = ( + attr.ib() + ) # type: Container[Union[BootstrapDir, UserCodeDir, InstalledWheelDir]] + pips = attr.ib() # type: Pips + + def iter_pex_unused_deps(self): + # type: () -> Iterator[Union[BootstrapDir, UserCodeDir, InstalledWheelDir]] + for dep in self._pex_deps: + if dep not in self._unprunable_deps: + yield dep + + def iter_other_unused_deps(self): + # type: () -> Iterator[Union[BootstrapDir, UserCodeDir, InstalledWheelDir]] + + for bootstrap_dir in BootstrapDir.iter_all(): + if bootstrap_dir not in self._pex_deps and bootstrap_dir not in self._unprunable_deps: + yield bootstrap_dir + + for user_code_dir in UserCodeDir.iter_all(): + if user_code_dir not in self._pex_deps and user_code_dir not in self._unprunable_deps: + yield user_code_dir + + for installed_wheel_dir in InstalledWheelDir.iter_all(): + if (installed_wheel_dir not in self._pex_deps) and ( + installed_wheel_dir not in self._unprunable_deps + ): + yield installed_wheel_dir + + def iter_interpreters(self): + # type: () -> Iterator[InterpreterDir] + for interpreter_dir in InterpreterDir.iter_all(): + if not interpreter_dir.valid(): + yield interpreter_dir + else: + venv_dir = interpreter_dir.venv_dir() + if not venv_dir: + continue + if venv_dir.path in self._venv_dir_paths: + yield interpreter_dir diff --git a/pex/cli/commands/cache/command.py b/pex/cli/commands/cache/command.py index 4c4c10145..3b4030ce6 100644 --- a/pex/cli/commands/cache/command.py +++ b/pex/cli/commands/cache/command.py @@ -3,26 +3,58 @@ from __future__ import absolute_import, print_function +import functools +import itertools import os +import re from argparse import Action, ArgumentError, _ActionsContainer -from datetime import datetime +from collections import Counter, defaultdict +from datetime import datetime, timedelta from pex.cache import access as cache_access -from pex.cache.dirs import CacheDir +from pex.cache.dirs import ( + AtomicCacheDir, + BootstrapDir, + BuiltWheelDir, + CacheDir, + DownloadDir, + InstalledWheelDir, + VenvDirs, +) +from pex.cache.prunable import Prunable, PrunablePipCache from pex.cli.command import BuildTimeCommand from pex.cli.commands.cache.bytes import ByteAmount, ByteUnits from pex.cli.commands.cache.du import DiskUsage from pex.commands.command import OutputMixin from pex.common import pluralize, safe_rmtree +from pex.dist_metadata import ProjectNameAndVersion from pex.exceptions import reportable_unexpected_error_msg -from pex.jobs import iter_map_parallel, map_parallel +from pex.jobs import SpawnedJob, execute_parallel, iter_map_parallel, map_parallel from pex.orderedset import OrderedSet +from pex.pep_440 import Version +from pex.pep_503 import ProjectName from pex.result import Error, Ok, Result from pex.typing import TYPE_CHECKING from pex.variables import ENV if TYPE_CHECKING: - from typing import IO, Dict, Iterable, List, Optional, Tuple, Union + import typing + from typing import ( + IO, + DefaultDict, + Dict, + Iterable, + Iterator, + List, + Mapping, + Optional, + Tuple, + Union, + ) + + import attr # vendor:skip +else: + from pex.third_party import attr class HandleAmountAction(Action): @@ -45,6 +77,80 @@ def __call__(self, parser, namespace, value, option_str=None): setattr(namespace, self.dest, amount_func) +@attr.s(frozen=True) +class Cutoff(object): + @classmethod + def parse(cls, spec): + # type: (str) -> Cutoff + match = re.match( + r"(?P\d+)\s+(?Psecond|minute|hour|day|week)s?(\s+ago)?", + spec.strip(), + re.IGNORECASE, + ) + if match: + args = {match.group("unit") + "s": int(match.group("amount"))} + cutoff = datetime.now() - timedelta(**args) + else: + cutoff = datetime.strptime(spec.strip(), "%d/%m/%Y") + return cls(spec=spec, cutoff=cutoff) + + spec = attr.ib() # type: str + cutoff = attr.ib() # type: datetime + + +def _prune_cache_dir( + dry_run, # type: bool + additional_cache_dirs_by_project_name_and_version, # type: Mapping[Tuple[ProjectName, Version], Iterable[AtomicCacheDir]] + cache_dir, # type: AtomicCacheDir +): + # type: (...) -> DiskUsage + paths_to_prune = [] # type: List[str] + + def prune_if_exists(path): + # type: (Optional[str]) -> None + if path and os.path.exists(path): + paths_to_prune.append(path) + + if isinstance(cache_dir, InstalledWheelDir): + paths_to_prune.append(os.path.dirname(cache_dir.path)) + prune_if_exists(CacheDir.PACKED_WHEELS.path(cache_dir.install_hash)) + for additional_dir in additional_cache_dirs_by_project_name_and_version.get( + (cache_dir.project_name, cache_dir.version), () + ): + prune_if_exists(additional_dir) + elif isinstance(cache_dir, BootstrapDir): + paths_to_prune.append(cache_dir.path) + prune_if_exists(CacheDir.BOOTSTRAP_ZIPS.path(cache_dir.bootstrap_hash)) + else: + paths_to_prune.append(cache_dir.path) + + disk_usages = [DiskUsage.collect(path) for path in paths_to_prune] + if not dry_run: + for path in paths_to_prune: + safe_rmtree(path) + if isinstance(cache_dir, InstalledWheelDir) and cache_dir.symlink_dir: + safe_rmtree(cache_dir.symlink_dir) + elif isinstance(cache_dir, VenvDirs): + safe_rmtree(cache_dir.short_dir) + + return ( + disk_usages[0] + if len(disk_usages) == 1 + else DiskUsage.aggregate(cache_dir.path, disk_usages) + ) + + +def _prune_pip( + dry_run, # type: bool + pip_path_to_prune, # type: str +): + # type: (...) -> DiskUsage + du = DiskUsage.collect(pip_path_to_prune) + if not dry_run: + safe_rmtree(pip_path_to_prune) + return du + + class Cache(OutputMixin, BuildTimeCommand): """Interact with the Pex cache.""" @@ -90,6 +196,21 @@ def _add_info_arguments(cls, parser): ) cls.add_output_option(parser, entity="Pex cache information") + @staticmethod + def _add_dry_run_option(parser): + # type: (_ActionsContainer) -> None + + parser.add_argument( + "-n", + "--dry-run", + dest="dry_run", + action="store_true", + help=( + "Don't actually purge cache entries; instead, perform a dry run that just prints " + "out what actions would be taken" + ), + ) + @classmethod def _add_purge_arguments(cls, parser): # type: (_ActionsContainer) -> None @@ -107,17 +228,31 @@ def _add_purge_arguments(cls, parser): "other cache entries dependent on those) will be purged." ), ) + cls._add_dry_run_option(parser) + cls.add_output_option(parser, entity="Pex purge results") + + @classmethod + def _add_prune_arguments(cls, parser): + # type: (_ActionsContainer) -> None + + cls._add_amount_argument(parser) parser.add_argument( - "-n", - "--dry-run", - dest="dry_run", - action="store_true", + "--older-than", + "--last-access", + "--last-access-before", + dest="cutoff", + type=Cutoff.parse, + default=Cutoff.parse("2 weeks ago"), help=( - "Don't actually purge cache entries; instead, perform a dry run that just prints " - "out what actions would be taken" + "Prune zipapp and venv caches (amongst others) last accessed before the specified " + "time. If the dependencies of the selected zipapps and venvs (e.g.: installed " + "wheels) are unused by other zipapps and venvs, those dependencies are pruned as " + "well. The cutoff time can be specified as a date in the format " + "`//<4 digit year>` or as a relative time in the format " + "` [second(s)|minute(s)|hour(s)|day(s)|week(s)]`." ), ) - + cls._add_dry_run_option(parser) cls.add_output_option(parser, entity="Pex purge results") @classmethod @@ -151,6 +286,14 @@ def add_extra_arguments(cls, parser): ) as purge_parser: cls._add_purge_arguments(purge_parser) + with subcommands.parser( + name="prune", + help="Prune the Pex cache safely.", + func=cls._prune, + include_verbosity=False, + ) as prune_parser: + cls._add_prune_arguments(prune_parser) + def _dir(self): # type: () -> Result @@ -360,7 +503,7 @@ def _purge(self): disk_usages = [] # type: List[DiskUsage] for cache_dir, du in iter_map_parallel( - cache_dirs, self._purge_cache_dir, noun="entries", verb="purge", verb_past="purged" + cache_dirs, self._purge_cache_dir, noun="entry", verb="purge", verb_past="purged" ): print( "{purged} cache {name} from {rel_path}".format( @@ -378,3 +521,329 @@ def _purge(self): print(file=fp) return Ok() + + def _prune(self): + # type: () -> Result + + with self.output(self.options) as fp: + if not self.options.dry_run: + try: + with cache_access.await_delete_lock() as lock_file: + self._log_delete_start(lock_file, out=fp) + print( + "Attempting to acquire cache write lock (press CTRL-C to abort) ...", + file=fp, + ) + except KeyboardInterrupt: + return Error("No cache entries purged.") + finally: + print(file=fp) + + cutoff = self.options.cutoff + prunable = Prunable.scan(cutoff.cutoff) + unused_deps = tuple(prunable.iter_other_unused_deps()) + unused_wheels = tuple(dep for dep in unused_deps if isinstance(dep, InstalledWheelDir)) + + additional_cache_dirs_by_project_name_and_version = defaultdict( + list + ) # type: DefaultDict[Tuple[ProjectName, Version], List[AtomicCacheDir]] + cached_artifact_dirs = itertools.chain( + BuiltWheelDir.iter_all(), DownloadDir.iter_all() + ) # type: Iterator[Union[BuiltWheelDir, DownloadDir]] + for cache_dir in cached_artifact_dirs: + additional_cache_dirs_by_project_name_and_version[ + (cache_dir.project_name, cache_dir.version) + ].append(cache_dir) + + prune_cache_dir = functools.partial( + _prune_cache_dir, + self.options.dry_run, + additional_cache_dirs_by_project_name_and_version, + ) + prune_pip = functools.partial(_prune_pip, self.options.dry_run) + + def prune_unused_deps(additional=False): + # type: (bool) -> None + + if not unused_deps: + return + disk_usages = tuple( + iter_map_parallel( + unused_deps, + prune_cache_dir, + noun="cached PEX dependency", + verb="prune", + verb_past="pruned", + ) + ) + if disk_usages: + print( + "Pruned {count} {additional}unused PEX {dependencies}.".format( + count=len(disk_usages), + additional="additional " if additional else "", + dependencies=pluralize(disk_usages, "dependency"), + ), + file=fp, + ) + print(self._render_usage(disk_usages)) + print(file=fp) + + def prune_pips(): + # type: () -> None + if not prunable.pips.paths: + return + + print( + "{pruned} {count} {cached_pex}.".format( + pruned="Would have pruned" if self.options.dry_run else "Pruned", + count=len(prunable.pips.paths), + cached_pex=pluralize(prunable.pips.paths, "Pip PEX"), + ), + file=fp, + ) + print( + self._render_usage( + tuple( + iter_map_parallel( + prunable.pips.paths, + function=prune_pip, + noun="Pip", + verb="prune", + verb_past="pruned", + ) + ) + ), + file=fp, + ) + print(file=fp) + + def prune_pip_caches(): + # type: () -> None + + prunable_wheels = set() + for wheel in unused_wheels: + prunable_pnav = ProjectNameAndVersion.from_filename(wheel.wheel_name) + prunable_wheels.add( + (prunable_pnav.canonicalized_project_name, prunable_pnav.canonicalized_version) + ) + if not prunable_wheels: + return + + def spawn_list(prunable_pip_cache): + # type: (PrunablePipCache) -> SpawnedJob[Tuple[ProjectNameAndVersion, ...]] + return SpawnedJob.stdout( + job=prunable_pip_cache.pip.spawn_cache_list(), + result_func=lambda stdout: tuple( + ProjectNameAndVersion.from_filename(wheel_file) + for wheel_file in stdout.decode("utf-8").splitlines() + if wheel_file + ), + ) + + pip_removes = [] # type: List[Tuple[PrunablePipCache, str]] + for prunable_pip_cache, project_name_and_versions in zip( + prunable.pips.caches, + execute_parallel(inputs=prunable.pips.caches, spawn_func=spawn_list), + ): + for pnav in project_name_and_versions: + if ( + pnav.canonicalized_project_name, + pnav.canonicalized_version, + ) in prunable_wheels: + pip_removes.append( + ( + prunable_pip_cache, + "{project_name}-{version}*".format( + project_name=pnav.project_name, version=pnav.version + ), + ) + ) + + def parse_remove(stdout): + # type: (bytes) -> int + + # The output from `pip cache remove` is a line like: + # Files removed: 42 + _, sep, count = stdout.decode("utf-8").partition(":") + if sep != ":" or not count: + return 0 + try: + return int(count) + except ValueError: + return 0 + + def spawn_remove(args): + # type: (Tuple[PrunablePipCache, str]) -> SpawnedJob[int] + prunable_pip_cache, wheel_name_glob = args + return SpawnedJob.stdout( + job=prunable_pip_cache.pip.spawn_cache_remove(wheel_name_glob), + result_func=parse_remove, + ) + + removes_by_pip = Counter() # type: typing.Counter[str] + for prunable_pip_cache, remove_count in zip( + [prunable_pip_cache for prunable_pip_cache, _ in pip_removes], + execute_parallel(inputs=pip_removes, spawn_func=spawn_remove), + ): + removes_by_pip[prunable_pip_cache.pip.version.value] += remove_count + cache_access.record_access( + prunable_pip_cache.pex_dir, last_access=prunable_pip_cache.last_access + ) + if removes_by_pip: + total = sum(removes_by_pip.values()) + print( + "Pruned {total} cached {wheels} from {count} Pip {version}:".format( + total=total, + wheels=pluralize(total, "wheel"), + count=len(removes_by_pip), + version=pluralize(removes_by_pip, "version"), + ), + file=fp, + ) + for pip_version, remove_count in sorted(removes_by_pip.items()): + print( + "Pip {version}: removed {remove_count} {wheels}".format( + version=pip_version, + remove_count=remove_count, + wheels=pluralize(remove_count, "wheel"), + ), + file=fp, + ) + print(file=fp) + + def prune_interpreters(): + # type: () -> None + + interpreters_to_prune = tuple(prunable.iter_interpreters()) + if not interpreters_to_prune: + return + + print( + "{pruned} {count} {cached_interpreter}.".format( + pruned="Would have pruned" if self.options.dry_run else "Pruned", + count=len(interpreters_to_prune), + cached_interpreter=pluralize(interpreters_to_prune, "cached interpreter"), + ), + file=fp, + ) + print( + self._render_usage( + tuple( + iter_map_parallel( + interpreters_to_prune, + function=prune_cache_dir, + noun="interpreter", + verb="prune", + verb_past="pruned", + ) + ) + ), + file=fp, + ) + print(file=fp) + + if not prunable.pex_dirs: + print( + "There are no cached PEX zipapps or venvs last accessed prior to {cutoff}.".format( + cutoff=( + cutoff.spec + if cutoff.spec.endswith("ago") or cutoff.spec[-1].isdigit() + else "{cutoff} ago".format(cutoff=cutoff.spec) + ), + ), + file=fp, + ) + print(file=fp) + prune_unused_deps() + prune_pip_caches() + prune_interpreters() + return Ok() + + print( + "{pruned} {count} {cached_pex}.".format( + pruned="Would have pruned" if self.options.dry_run else "Pruned", + count=len(prunable.pex_dirs), + cached_pex=pluralize(prunable.pex_dirs, "cached PEX"), + ), + file=fp, + ) + print( + self._render_usage( + tuple( + iter_map_parallel( + prunable.pex_dirs, + prune_cache_dir, + noun="cached PEX", + verb="prune", + verb_past="pruned", + ) + ) + ), + file=fp, + ) + print(file=fp) + + deps = tuple(prunable.iter_pex_unused_deps()) + if self.options.dry_run: + print( + "Might have pruned up to {count} {cached_pex_dependency}.".format( + count=len(deps), + cached_pex_dependency=pluralize(deps, "cached PEX dependency"), + ), + file=fp, + ) + print( + self._render_usage( + tuple( + iter_map_parallel( + deps, + prune_cache_dir, + noun="cached PEX dependency", + verb="prune", + verb_past="pruned", + ) + ) + ) + ) + print(file=fp) + prune_pips() + prune_interpreters() + else: + disk_usages = tuple( + iter_map_parallel( + deps, + prune_cache_dir, + noun="cached PEX dependency", + verb="prune", + verb_past="pruned", + ) + ) + if deps and not disk_usages: + print( + "No cached PEX dependencies were able to be pruned; all have un-pruned " + "cached PEX dependents.", + file=fp, + ) + elif len(deps) == 1: + print("Pruned the 1 cached PEX dependency.", file=fp) + elif len(deps) > 1 and len(deps) == len(disk_usages): + print( + "Pruned all {count} cached PEX dependencies.".format(count=len(deps)), + file=fp, + ) + elif len(deps) > 1: + print( + "Pruned {count} of {total} cached PEX dependencies.".format( + count=len(disk_usages), total=len(deps) + ), + file=fp, + ) + if disk_usages: + print(self._render_usage(disk_usages)) + if deps or disk_usages: + print(file=fp) + prune_unused_deps(additional=len(disk_usages) > 0) + prune_pip_caches() + prune_pips() + prune_interpreters() + return Ok() diff --git a/pex/hashing.py b/pex/hashing.py index 74712878d..784d62176 100644 --- a/pex/hashing.py +++ b/pex/hashing.py @@ -200,10 +200,11 @@ def dir_hash( def iter_files(): # type: () -> Iterator[Text] for root, dirs, files in os.walk(top, followlinks=True): - dirs[:] = [d for d in dirs if dir_filter(d)] + dirs[:] = [d for d in dirs if dir_filter(os.path.join(root, d))] for f in files: - if file_filter(f): - yield os.path.join(root, f) + path = os.path.join(root, f) + if file_filter(path): + yield path file_paths = sorted(iter_files()) diff --git a/pex/interpreter.py b/pex/interpreter.py index 70f081039..761d38619 100644 --- a/pex/interpreter.py +++ b/pex/interpreter.py @@ -5,10 +5,8 @@ from __future__ import absolute_import -import hashlib import json import os -import platform import re import subprocess import sys @@ -18,7 +16,7 @@ from textwrap import dedent from pex import third_party -from pex.cache.dirs import CacheDir +from pex.cache.dirs import InterpreterDir from pex.common import is_exe, safe_mkdtemp, safe_rmtree from pex.executor import Executor from pex.jobs import Job, Retain, SpawnedJob, execute_parallel @@ -32,7 +30,6 @@ from pex.third_party.packaging import tags from pex.tracer import TRACER from pex.typing import TYPE_CHECKING, cast, overload -from pex.util import CacheHelper if TYPE_CHECKING: from typing import ( @@ -109,7 +106,7 @@ class Platlib(SitePackagesDir): pass -_PATH_MAPPINGS = {} +_PATH_MAPPINGS = {} # type: Dict[str, str] @contextmanager @@ -1053,8 +1050,6 @@ def _resolve_pyenv_shim( return python return binary - INTERP_INFO_FILE = "INTERP-INFO" - @classmethod def _spawn_from_binary_external(cls, binary): # type: (str) -> SpawnedJob[PythonInterpreter] @@ -1078,39 +1073,10 @@ def create_interpreter( ) return interpreter - # Part of the PythonInterpreter data are environment markers that depend on the current OS - # release. That data can change when the OS is upgraded but (some of) the installed interpreters - # remain the same. As such, include the OS in the hash structure for cached interpreters. - os_digest = hashlib.sha1() - for os_identifier in platform.release(), platform.version(): - os_digest.update(os_identifier.encode("utf-8")) - os_hash = os_digest.hexdigest() - - interpreter_cache_dir = CacheDir.INTERPRETERS.path() - os_cache_dir = os.path.join(interpreter_cache_dir, os_hash) - if os.path.isdir(interpreter_cache_dir) and not os.path.isdir(os_cache_dir): - with TRACER.timed("GCing interpreter cache from prior OS version"): - safe_rmtree(interpreter_cache_dir) - - interpreter_hash = CacheHelper.hash(binary) - - # Some distributions include more than one copy of the same interpreter via a hard link (e.g.: - # python3.7 is a hardlink to python3.7m). To ensure a deterministic INTERP-INFO file we must - # emit a separate INTERP-INFO for each link since INTERP-INFO contains the interpreter path and - # would otherwise be unstable. - # - # See cls._REGEXEN for a related affordance. - # - # N.B.: The path for --venv mode interpreters can be quite long; so we just used a fixed - # length hash of the interpreter binary path to ensure uniqueness and not run afoul of file - # name length limits. - path_id = hashlib.sha1(binary.encode("utf-8")).hexdigest() - - cache_dir = os.path.join(os_cache_dir, interpreter_hash, path_id) - cache_file = os.path.join(cache_dir, cls.INTERP_INFO_FILE) - if os.path.isfile(cache_file): + cache_dir = InterpreterDir.create(binary) + if os.path.isfile(cache_dir.interp_info_file): try: - with open(cache_file, "rb") as fp: + with open(cache_dir.interp_info_file, "rb") as fp: return SpawnedJob.completed(create_interpreter(fp.read(), check_binary=True)) except (IOError, OSError, cls.Error, PythonIdentity.Error): safe_rmtree(cache_dir) @@ -1143,8 +1109,8 @@ def create_interpreter( """.format( path_mappings=_PATH_MAPPINGS, binary=binary, - cache_dir=cache_dir, - info_file=cls.INTERP_INFO_FILE, + cache_dir=cache_dir.path, + info_file=InterpreterDir.INTERP_INFO_FILE, ) ), ], @@ -1157,7 +1123,9 @@ def create_interpreter( cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd ) job = Job(command=cmd, process=process, finalizer=lambda _: safe_rmtree(cwd)) - return SpawnedJob.file(job, output_file=cache_file, result_func=create_interpreter) + return SpawnedJob.file( + job, output_file=cache_dir.interp_info_file, result_func=create_interpreter + ) @classmethod def _expand_path(cls, path): diff --git a/pex/jobs.py b/pex/jobs.py index bc75fe03b..2fa1234f4 100644 --- a/pex/jobs.py +++ b/pex/jobs.py @@ -754,8 +754,11 @@ def iter_map_parallel( slots = defaultdict(list) # type: DefaultDict[int, List[float]] with TRACER.timed( - "Using {pool_size} parallel jobs to {verb} {count} items".format( - pool_size=pool_size, verb=verb, count=len(input_items) + "Using {pool_size} parallel jobs to {verb} {count} {inputs}".format( + pool_size=pool_size, + verb=verb, + count=len(input_items), + inputs=pluralize(input_items, noun), ) ): with _mp_pool(size=pool_size) as pool: diff --git a/pex/layout.py b/pex/layout.py index d8ee4b54c..30724e73b 100644 --- a/pex/layout.py +++ b/pex/layout.py @@ -10,7 +10,7 @@ from pex.atomic_directory import atomic_directory from pex.cache import access as cache_access -from pex.cache.dirs import CacheDir +from pex.cache.dirs import BootstrapDir, InstalledWheelDir, UserCodeDir from pex.common import ZipFileEx, is_script, open_zip, safe_copy, safe_mkdir, safe_mkdtemp from pex.enum import Enum from pex.tracer import TRACER @@ -147,6 +147,11 @@ def extract_main(self, dest_dir): # type: (str) -> None raise NotImplementedError() + @abstractmethod + def extract_import_hook(self, dest_dir): + # type: (str) -> None + raise NotImplementedError() + def record(self, dest_dir): # type: (str) -> None self._layout.record(dest_dir) @@ -163,7 +168,9 @@ def _install_distribution( location, sha = distribution_info is_wheel_file = pex_info.deps_are_wheel_files - spread_dest = CacheDir.INSTALLED_WHEELS.path(sha, location, pex_root=pex_info.pex_root) + spread_dest = InstalledWheelDir.create( + wheel_name=location, install_hash=sha, pex_root=pex_info.pex_root + ) dist_relpath = os.path.join(DEPS_DIR, location) source = None if is_wheel_file else layout.dist_strip_prefix(location) symlink_src = os.path.relpath( @@ -313,6 +320,8 @@ def _ensure_installed( if not os.path.exists(install_to): with ENV.patch(PEX_ROOT=pex_root): cache_access.read_write() + else: + cache_access.record_access(install_to) with atomic_directory(install_to) as chroot: if not chroot.is_finalized(): with ENV.patch(PEX_ROOT=pex_root), TRACER.timed( @@ -327,16 +336,14 @@ def _ensure_installed( raise AssertionError( "Expected bootstrap_cache to be populated for {}.".format(layout) ) - bootstrap_cache = CacheDir.BOOTSTRAPS.path( + bootstrap_cache = BootstrapDir.create( pex_info.bootstrap_hash, pex_root=pex_info.pex_root ) if pex_info.code_hash is None: raise AssertionError( "Expected code_hash to be populated for {}.".format(layout) ) - code_cache = CacheDir.USER_CODE.path( - pex_info.code_hash, pex_root=pex_info.pex_root - ) + code_cache = UserCodeDir.create(pex_info.code_hash, pex_root=pex_info.pex_root) with atomic_directory( bootstrap_cache, source=layout.bootstrap_strip_prefix() @@ -483,7 +490,7 @@ def extract_code(self, dest_dir): # type: (str) -> None for name in self.names: if name not in ("__main__.py", PEX_INFO_PATH) and not name.startswith( - (BOOTSTRAP_DIR, DEPS_DIR) + ("__pex__", BOOTSTRAP_DIR, DEPS_DIR) ): self.zfp.extract(name, dest_dir) @@ -495,6 +502,10 @@ def extract_main(self, dest_dir): # type: (str) -> None self.zfp.extract("__main__.py", dest_dir) + def extract_import_hook(self, dest_dir): + # type: (str) -> None + self.zfp.extract("__pex__/__init__.py", dest_dir) + def __str__(self): return "PEX zipfile {}".format(self.path) @@ -544,7 +555,7 @@ def extract_code(self, dest_dir): for root, dirs, files in os.walk(self._path): rel_root = os.path.relpath(root, self._path) if root == self._path: - dirs[:] = [d for d in dirs if d != DEPS_DIR] + dirs[:] = [d for d in dirs if d not in ("__pex__", DEPS_DIR)] files[:] = [ f for f in files if f not in ("__main__.py", PEX_INFO_PATH, BOOTSTRAP_DIR) ] @@ -564,6 +575,12 @@ def extract_main(self, dest_dir): # type: (str) -> None safe_copy(os.path.join(self._path, "__main__.py"), os.path.join(dest_dir, "__main__.py")) + def extract_import_hook(self, dest_dir): + # type: (str) -> None + dest = os.path.join(dest_dir, "__pex__", "__init__.py") + safe_mkdir(os.path.dirname(dest)) + safe_copy(os.path.join(self._path, "__pex__", "__init__.py"), dest) + def __str__(self): return "Spread PEX directory {}".format(self._path) @@ -615,7 +632,7 @@ def extract_code(self, dest_dir): for root, dirs, files in os.walk(self._path): rel_root = os.path.relpath(root, self._path) if root == self._path: - dirs[:] = [d for d in dirs if d not in (DEPS_DIR, BOOTSTRAP_DIR)] + dirs[:] = [d for d in dirs if d not in ("__pex__", DEPS_DIR, BOOTSTRAP_DIR)] files[:] = [f for f in files if f not in ("__main__.py", PEX_INFO_PATH)] for d in dirs: safe_mkdir(os.path.join(dest_dir, rel_root, d)) @@ -633,6 +650,12 @@ def extract_main(self, dest_dir): # type: (str) -> None safe_copy(os.path.join(self._path, "__main__.py"), os.path.join(dest_dir, "__main__.py")) + def extract_import_hook(self, dest_dir): + # type: (str) -> None + dest = os.path.join(dest_dir, "__pex__", "__init__.py") + safe_mkdir(os.path.dirname(dest)) + safe_copy(os.path.join(self._path, "__pex__", "__init__.py"), dest) + def __str__(self): return "Loose PEX directory {}".format(self._path) diff --git a/pex/pex_bootstrapper.py b/pex/pex_bootstrapper.py index f07a2569b..0c0bfa680 100644 --- a/pex/pex_bootstrapper.py +++ b/pex/pex_bootstrapper.py @@ -10,7 +10,7 @@ from pex import interpreter, pex_warnings from pex.atomic_directory import atomic_directory from pex.cache import access as cache_access -from pex.cache.dirs import CacheDir +from pex.cache.dirs import VenvDirs from pex.common import CopyMode, die, pluralize from pex.environment import ResolveError from pex.inherit_path import InheritPath @@ -506,6 +506,7 @@ def execv( def ensure_venv( pex, # type: PEX collisions_ok=True, # type: bool + copy_mode=None, # type: Optional[CopyMode.Value] ): # type: (...) -> VenvPex pex_info = pex.pex_info() @@ -523,11 +524,13 @@ def ensure_venv( if not os.path.exists(venv_dir): with ENV.patch(PEX_ROOT=pex_info.pex_root): cache_access.read_write() + else: + cache_access.record_access(venv_dir) with atomic_directory(venv_dir) as venv: if not venv.is_finalized(): from pex.venv.virtualenv import Virtualenv - with interpreter.path_mapping(venv.work_dir, venv_dir): + with interpreter.path_mapping(venv.work_dir, venv_dir.path): virtualenv = Virtualenv.create_atomic( venv_dir=venv, interpreter=pex.interpreter, @@ -547,10 +550,10 @@ def ensure_venv( collisions = [] for chars in range(8, len(venv_hash) + 1): entropy = venv_hash[:chars] - short_venv_dir = CacheDir.VENVS.path("s", entropy, pex_root=pex_info.pex_root) - with atomic_directory(short_venv_dir) as short_venv: + venv_dirs = VenvDirs(venv_dir=venv_dir, short_hash=entropy) + with atomic_directory(venv_dirs.short_dir) as short_venv: if short_venv.is_finalized(): - collisions.append(short_venv_dir) + collisions.append(venv_dirs.short_dir) if entropy == venv_hash: raise RuntimeError( "The venv for {pex} at {venv} has hash collisions with {count} " @@ -569,17 +572,17 @@ def ensure_venv( ) continue - with interpreter.path_mapping(short_venv.work_dir, short_venv_dir): + with interpreter.path_mapping(short_venv.work_dir, venv_dirs.short_dir): os.symlink( - os.path.relpath(venv_dir, short_venv_dir), - os.path.join(short_venv.work_dir, "venv"), + os.path.relpath(venv_dirs, venv_dirs.short_dir), + os.path.join(short_venv.work_dir, venv_dirs.SHORT_SYMLINK_NAME), ) # Loose PEXes don't need to unpack themselves to the PEX_ROOT before # running; so we'll not have a stable base there to symlink from. As # such, always copy for loose PEXes to ensure the PEX_ROOT venv is # stable in the face of modification of the source loose PEX. - copy_mode = ( + copy_mode = copy_mode or ( CopyMode.SYMLINK if ( pex.layout != Layout.LOOSE @@ -593,7 +596,7 @@ def ensure_venv( pex, bin_path=pex_info.venv_bin_path, python=os.path.join( - short_venv_dir, + venv_dirs.short_dir, "venv", "bin", os.path.basename(virtualenv.interpreter.binary), diff --git a/pex/pex_builder.py b/pex/pex_builder.py index f351357b0..d8a207282 100644 --- a/pex/pex_builder.py +++ b/pex/pex_builder.py @@ -11,7 +11,7 @@ from textwrap import dedent from zipimport import ZipImportError -from pex import pex_warnings +from pex import layout, pex_warnings from pex.atomic_directory import atomic_directory from pex.cache.dirs import CacheDir from pex.common import ( @@ -35,6 +35,7 @@ from pex.interpreter import PythonInterpreter from pex.layout import Layout from pex.orderedset import OrderedSet +from pex.pep_376 import InstalledWheel from pex.pex import PEX from pex.pex_info import PexInfo from pex.sh_boot import create_sh_boot_script @@ -373,9 +374,16 @@ def _add_dist( relpath = os.path.relpath(filename, path) target = os.path.join(target_dir, relpath) self._copy_or_link(filename, target, label=dist_name) - return fingerprint or ( - CacheHelper.hash(path) if is_wheel_file else CacheHelper.dir_hash(path) - ) + if fingerprint: + return fingerprint + if not is_wheel_file: + try: + installed_wheel = InstalledWheel.load(path) + if installed_wheel.fingerprint: + return installed_wheel.fingerprint + except InstalledWheel.LoadError: + pass + return CacheHelper.hash(path) if is_wheel_file else CacheHelper.dir_hash(path) def add_distribution( self, @@ -453,7 +461,13 @@ def _precompile_source(self): self._chroot.touch(compiled, label="bytecode") def _prepare_code(self): - self._pex_info.code_hash = CacheHelper.pex_code_hash(self._chroot.path()) + chroot_path = self._chroot.path() + self._pex_info.code_hash = CacheHelper.pex_code_hash( + chroot_path, + exclude_dirs=tuple( + os.path.join(chroot_path, d) for d in (layout.BOOTSTRAP_DIR, layout.DEPS_DIR) + ), + ) self._pex_info.pex_hash = hashlib.sha1(self._pex_info.dump().encode("utf-8")).hexdigest() self._chroot.write(self._pex_info.dump().encode("utf-8"), PexInfo.PATH, label="manifest") @@ -537,6 +551,11 @@ def _prepare_bootstrap(self): bootstrap_packages = ["cache", "repl", "third_party", "venv"] if self._pex_info.includes_tools: bootstrap_packages.extend(["commands", "tools"]) + + # TODO(John Sirois): Switch to a symlink model, isolate(), then symlink from there? + # The bootstraps, as it stands, are ~4.5 MB for each loose dogfood PEX. For the Pex ITs, + # this ends up taking up a significant amount of disk space. + for root, dirs, files in deterministic_walk(_ABS_PEX_PACKAGE_DIR): if root == _ABS_PEX_PACKAGE_DIR: dirs[:] = bootstrap_packages diff --git a/pex/pex_info.py b/pex/pex_info.py index 47832531d..b90fc899a 100644 --- a/pex/pex_info.py +++ b/pex/pex_info.py @@ -24,6 +24,7 @@ from typing import Collection # type: ignore[attr-defined] from typing import Any, Dict, Iterable, Mapping, Optional, Text, Tuple, Union + from pex.cache.dirs import VenvDir from pex.dist_metadata import Requirement # N.B.: These are expensive imports and PexInfo is used during PEX bootstrapping which we want @@ -275,7 +276,7 @@ def _venv_dir( interpreter=None, # type: Optional[PythonInterpreter] expand_pex_root=True, # type: bool ): - # type: (...) -> Optional[str] + # type: (...) -> Optional[VenvDir] if not self.venv: return None if self.pex_hash is None: @@ -295,7 +296,7 @@ def runtime_venv_dir( pex_file, # type: str interpreter=None, # type: Optional[PythonInterpreter] ): - # type: (...) -> Optional[str] + # type: (...) -> Optional[VenvDir] return self._venv_dir(self.pex_root, pex_file, interpreter) def raw_venv_dir( @@ -303,7 +304,7 @@ def raw_venv_dir( pex_file, # type: str interpreter=None, # type: Optional[PythonInterpreter] ): - # type: (...) -> Optional[str] + # type: (...) -> Optional[VenvDir] """Distiguished from ``venv_dir`` by use of the raw_pex_root. We don't expand the pex_root at build time in case the pex_root is not writable or doesn't exist at build time. diff --git a/pex/pip/installation.py b/pex/pip/installation.py index 709c39276..1fc282257 100644 --- a/pex/pip/installation.py +++ b/pex/pip/installation.py @@ -11,9 +11,10 @@ from pex import pep_427, pex_warnings, third_party from pex.atomic_directory import atomic_directory -from pex.cache.dirs import CacheDir +from pex.cache.dirs import InstalledWheelDir, PipPexDir from pex.common import REPRODUCIBLE_BUILDS_ENV, CopyMode, pluralize, safe_mkdtemp from pex.dist_metadata import Requirement +from pex.exceptions import production_assert from pex.executor import Executor from pex.interpreter import PythonInterpreter from pex.jobs import iter_map_parallel @@ -26,11 +27,10 @@ from pex.resolve.resolvers import Resolver from pex.result import Error, try_ from pex.targets import LocalInterpreter, RequiresPythonError, Targets -from pex.third_party import isolated from pex.tracer import TRACER -from pex.typing import TYPE_CHECKING +from pex.typing import TYPE_CHECKING, cast from pex.util import CacheHelper -from pex.variables import ENV +from pex.variables import ENV, Variables from pex.venv.virtualenv import InstallationChoice, Virtualenv if TYPE_CHECKING: @@ -41,6 +41,29 @@ from pex.third_party import attr +def _create_pip( + pip_pex, # type: PipPexDir + interpreter=None, # type: Optional[PythonInterpreter] + use_system_time=False, # type: bool +): + # type: (...) -> Pip + + production_assert(os.path.exists(pip_pex.path)) + + pip_interpreter = interpreter or PythonInterpreter.get() + pex = PEX(pip_pex.path, interpreter=pip_interpreter) + venv_pex = ensure_venv(pex, copy_mode=CopyMode.SYMLINK) + pex_hash = pex.pex_info().pex_hash + production_assert(pex_hash is not None) + pip_venv = PipVenv( + venv_dir=venv_pex.venv_dir, + pex_hash=cast(str, pex_hash), + execute_env=tuple(REPRODUCIBLE_BUILDS_ENV.items()) if not use_system_time else (), + execute_args=tuple(venv_pex.execute_args()), + ) + return Pip(pip_pex=pip_pex, pip_venv=pip_venv) + + def _pip_installation( version, # type: PipVersionValue iter_distribution_locations, # type: Callable[[], Iterator[str]] @@ -49,11 +72,9 @@ def _pip_installation( use_system_time=False, # type: bool ): # type: (...) -> Pip - pip_root = CacheDir.PIP.path(str(version)) - path = os.path.join(pip_root, "pip.pex") - pip_interpreter = interpreter or PythonInterpreter.get() - pip_pex_path = os.path.join(path, isolated().pex_hash, fingerprint) - with atomic_directory(pip_pex_path) as chroot: + + pip_pex = PipPexDir.create(version, fingerprint) + with atomic_directory(pip_pex.path) as chroot: if not chroot.is_finalized(): from pex.pex_builder import PEXBuilder @@ -82,14 +103,7 @@ def _pip_installation( ) isolated_pip_builder.set_executable(fp.name, "exe.py") isolated_pip_builder.freeze() - pip_cache = os.path.join(pip_root, "pip_cache") - pip_pex = ensure_venv(PEX(pip_pex_path, interpreter=pip_interpreter)) - pip_venv = PipVenv( - venv_dir=pip_pex.venv_dir, - execute_env=REPRODUCIBLE_BUILDS_ENV if not use_system_time else {}, - execute_args=tuple(pip_pex.execute_args()), - ) - return Pip(pip=pip_venv, version=version, pip_cache=pip_cache) + return _create_pip(pip_pex, interpreter=interpreter, use_system_time=use_system_time) def _fingerprint(requirements): @@ -198,22 +212,29 @@ def _install_wheel(wheel_path): # https://github.com/pex-tool/pex/issues/2556 wheel_hash = CacheHelper.hash(wheel_path, hasher=hashlib.sha256) wheel_name = os.path.basename(wheel_path) - destination = CacheDir.INSTALLED_WHEELS.path(wheel_hash, wheel_name) - with atomic_directory(destination) as atomic_dir: + installed_wheel_dir = InstalledWheelDir.create(wheel_name=wheel_name, install_hash=wheel_hash) + with atomic_directory(installed_wheel_dir) as atomic_dir: if not atomic_dir.is_finalized(): installed_wheel = pep_427.install_wheel_chroot( wheel_path=wheel_path, destination=atomic_dir.work_dir ) - runtime_key_dir = CacheDir.INSTALLED_WHEELS.path( - installed_wheel.fingerprint - or CacheHelper.dir_hash(atomic_dir.work_dir, hasher=hashlib.sha256) + runtime_key_dir = InstalledWheelDir.create( + wheel_name=wheel_name, + install_hash=( + installed_wheel.fingerprint + or CacheHelper.dir_hash(atomic_dir.work_dir, hasher=hashlib.sha256) + ), + wheel_hash=wheel_hash, ) - with atomic_directory(runtime_key_dir) as runtime_atomic_dir: + production_assert(runtime_key_dir.symlink_dir is not None) + with atomic_directory(cast(str, runtime_key_dir.symlink_dir)) as runtime_atomic_dir: if not runtime_atomic_dir.is_finalized(): source_path = os.path.join(runtime_atomic_dir.work_dir, wheel_name) - relative_target_path = os.path.relpath(destination, runtime_key_dir) + relative_target_path = os.path.relpath( + installed_wheel_dir, runtime_key_dir.symlink_dir + ) os.symlink(relative_target_path, source_path) - return destination + return installed_wheel_dir def _bootstrap_pip( @@ -485,3 +506,14 @@ def get_pip( ) _PIP[installation] = pip return pip + + +def iter_all( + interpreter=None, # type: Optional[PythonInterpreter] + use_system_time=False, # type: bool + pex_root=ENV, # type: Union[str, Variables] +): + # type: (...) -> Iterator[Pip] + + for pip_pex in PipPexDir.iter_all(pex_root=pex_root): + yield _create_pip(pip_pex, interpreter=interpreter, use_system_time=use_system_time) diff --git a/pex/pip/tool.py b/pex/pip/tool.py index 4b5d5dc5c..925e560b9 100644 --- a/pex/pip/tool.py +++ b/pex/pip/tool.py @@ -15,6 +15,7 @@ from pex import targets from pex.atomic_directory import atomic_directory from pex.auth import PasswordEntry +from pex.cache.dirs import PipPexDir from pex.common import safe_mkdir, safe_mkdtemp from pex.compatibility import get_stderr_bytes_buffer, shlex_quote, urlparse from pex.dependency_configuration import DependencyConfiguration @@ -272,8 +273,9 @@ def analyze(self, line): @attr.s(frozen=True) class PipVenv(object): venv_dir = attr.ib() # type: str - execute_env = attr.ib() # type: Mapping[str, str] - _execute_args = attr.ib() # type: Tuple[str, ...] + pex_hash = attr.ib() # type: str + execute_env = attr.ib(default=()) # type: Tuple[Tuple[str, str], ...] + _execute_args = attr.ib(default=()) # type: Tuple[str, ...] def execute_args(self, *args): # type: (*str) -> List[str] @@ -289,14 +291,33 @@ class Pip(object): _PATCHES_PACKAGE_ENV_VAR_NAME = "_PEX_PIP_RUNTIME_PATCHES_PACKAGE" _PATCHES_PACKAGE_NAME = "_pex_pip_patches" - _pip = attr.ib() # type: PipVenv - version = attr.ib() # type: PipVersionValue - _pip_cache = attr.ib() # type: str + _pip_pex = attr.ib() # type: PipPexDir + _pip_venv = attr.ib() # type: PipVenv @property def venv_dir(self): # type: () -> str - return self._pip.venv_dir + return self._pip_venv.venv_dir + + @property + def pex_hash(self): + # type: () -> str + return self._pip_venv.pex_hash + + @property + def version(self): + # type: () -> PipVersionValue + return self._pip_pex.version + + @property + def pex_dir(self): + # type: () -> PipPexDir + return self._pip_pex + + @property + def cache_dir(self): + # type: () -> str + return self._pip_pex.cache_dir @staticmethod def _calculate_resolver_version(package_index_configuration=None): @@ -384,7 +405,7 @@ def _spawn_pip_isolated( else: pip_args.append("-q") - pip_args.extend(["--cache-dir", self._pip_cache]) + pip_args.extend(["--cache-dir", self.cache_dir]) command = pip_args + list(args) @@ -403,7 +424,7 @@ def _spawn_pip_isolated( # since Pip relies upon `shutil.move` which is only atomic when `os.rename` can be used. # See https://github.com/pex-tool/pex/issues/1776 for an example of the issues non-atomic # moves lead to in the `pip wheel` case. - pip_tmpdir = os.path.join(self._pip_cache, ".tmp") + pip_tmpdir = os.path.join(self.cache_dir, ".tmp") safe_mkdir(pip_tmpdir) extra_env.update(TMPDIR=pip_tmpdir) @@ -432,8 +453,8 @@ def _spawn_pip_isolated( popen_kwargs["stdout"] = sys.stderr.fileno() popen_kwargs.update(stderr=subprocess.PIPE) - env.update(self._pip.execute_env) - args = self._pip.execute_args(*command) + env.update(self._pip_venv.execute_env) + args = self._pip_venv.execute_args(*command) rendered_env = " ".join( "{}={}".format(key, shlex_quote(value)) for key, value in env.items() @@ -634,10 +655,10 @@ def finalizer(_): def _ensure_wheel_installed(self, package_index_configuration=None): # type: (Optional[PackageIndexConfiguration]) -> None - pip_interpreter = self._pip.get_interpreter() + pip_interpreter = self._pip_venv.get_interpreter() with atomic_directory( os.path.join( - self._pip_cache, + self.cache_dir, ".wheel-install", hashlib.sha1(pip_interpreter.binary.encode("utf-8")).hexdigest(), ) @@ -712,3 +733,21 @@ def spawn_debug( return self._spawn_pip_isolated_job( debug_command, log=log, pip_verbosity=1, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) + + def spawn_cache_remove(self, wheel_name_glob): + # type: (str) -> Job + return self._spawn_pip_isolated_job( + args=["cache", "remove", wheel_name_glob], + pip_verbosity=1, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + def spawn_cache_list(self): + # type: () -> Job + return self._spawn_pip_isolated_job( + args=["cache", "list", "--format", "abspath"], + pip_verbosity=1, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) diff --git a/pex/pip/vcs.py b/pex/pip/vcs.py index b5a5260db..e954590f3 100644 --- a/pex/pip/vcs.py +++ b/pex/pip/vcs.py @@ -102,6 +102,10 @@ def digest_vcs_archive( hashing.dir_hash( directory=chroot, digest=digest, - dir_filter=lambda dir_path: not is_pyc_dir(dir_path) and dir_path != vcs_control_dir, + dir_filter=( + lambda dir_path: ( + not is_pyc_dir(dir_path) and os.path.basename(dir_path) != vcs_control_dir + ) + ), file_filter=lambda f: not is_pyc_file(f), ) diff --git a/pex/pip/version.py b/pex/pip/version.py index c22db9bd7..1bdb3f668 100644 --- a/pex/pip/version.py +++ b/pex/pip/version.py @@ -45,11 +45,12 @@ def overridden(cls): def __init__( self, version, # type: str + setuptools_version, # type: str + wheel_version, # type: str + requires_python, # type: str name=None, # type: Optional[str] requirement=None, # type: Optional[str] - setuptools_version=None, # type: Optional[str] - wheel_version=None, # type: Optional[str] - requires_python=None, # type: Optional[str] + setuptools_requirement=None, # type: Optional[str] hidden=False, # type: bool ): # type: (...) -> None @@ -57,22 +58,26 @@ def __init__( def to_requirement( project_name, # type: str - project_version=None, # type: Optional[str] + project_version, # type: str ): # type: (...) -> Requirement return Requirement.parse( "{project_name}=={project_version}".format( project_name=project_name, project_version=project_version ) - if project_version - else project_name ) self.version = Version(version) self.requirement = ( Requirement.parse(requirement) if requirement else to_requirement("pip", version) ) - self.setuptools_requirement = to_requirement("setuptools", setuptools_version) + self.setuptools_version = setuptools_version + self.setuptools_requirement = ( + Requirement.parse(setuptools_requirement) + if setuptools_requirement + else to_requirement("setuptools", setuptools_version) + ) + self.wheel_version = wheel_version self.wheel_requirement = to_requirement("wheel", wheel_version) self.requires_python = SpecifierSet(requires_python) if requires_python else None self.hidden = hidden @@ -174,6 +179,8 @@ def values(cls): name="20.3.4-patched", version="20.3.4+patched", requirement=vendor.PIP_SPEC.requirement, + setuptools_version="44.0.0+3acb925dd708430aeaf197ea53ac8a752f7c1863", + setuptools_requirement="setuptools", wheel_version="0.37.1", requires_python="<3.12", ) diff --git a/pex/resolve/downloads.py b/pex/resolve/downloads.py index dc3c761bb..bc5d0018c 100644 --- a/pex/resolve/downloads.py +++ b/pex/resolve/downloads.py @@ -8,7 +8,7 @@ from pex import hashing from pex.atomic_directory import atomic_directory -from pex.cache.dirs import CacheDir +from pex.cache.dirs import CacheDir, DownloadDir from pex.common import safe_mkdir, safe_mkdtemp from pex.hashing import Sha256 from pex.jobs import Job, Raise, SpawnedJob, execute_parallel @@ -23,10 +23,9 @@ from pex.result import Error from pex.targets import LocalInterpreter, Target from pex.typing import TYPE_CHECKING -from pex.variables import ENV if TYPE_CHECKING: - from typing import Dict, Iterable, Iterator, Optional, Union + from typing import Iterable, Iterator, Optional, Union import attr # vendor:skip @@ -35,20 +34,6 @@ from pex.third_party import attr -_DOWNLOADS_DIRS = {} # type: Dict[str, str] - - -def get_downloads_dir(pex_root=None): - # type: (Optional[str]) -> str - root_dir = pex_root or ENV.PEX_ROOT - downloads_dir = _DOWNLOADS_DIRS.get(root_dir) - if downloads_dir is None: - downloads_dir = CacheDir.DOWNLOADS.path(pex_root=root_dir) - safe_mkdir(downloads_dir) - _DOWNLOADS_DIRS[root_dir] = downloads_dir - return downloads_dir - - @attr.s(frozen=True) class ArtifactDownloader(object): resolver = attr.ib() # type: Resolver @@ -75,7 +60,7 @@ def _fingerprint_and_move(path): digest = Sha256() hashing.file_hash(path, digest) fingerprint = digest.hexdigest() - target_dir = os.path.join(get_downloads_dir(), fingerprint) + target_dir = DownloadDir.create(file_hash=fingerprint) with atomic_directory(target_dir) as atomic_dir: if not atomic_dir.is_finalized(): shutil.move(path, os.path.join(atomic_dir.work_dir, os.path.basename(path))) @@ -129,8 +114,9 @@ def _download( def _download_and_fingerprint(self, url): # type: (ArtifactURL) -> SpawnedJob[FileArtifact] - downloads = get_downloads_dir() - download_dir = safe_mkdtemp(prefix="fingerprint_artifact.", dir=downloads) + download_dir = safe_mkdtemp( + prefix="fingerprint_artifact.", dir=safe_mkdir(CacheDir.DOWNLOADS.path(".tmp")) + ) src_file = url.path temp_dest = os.path.join(download_dir, os.path.basename(src_file)) diff --git a/pex/resolve/lock_resolver.py b/pex/resolve/lock_resolver.py index 475cc06d2..1b7f58e47 100644 --- a/pex/resolve/lock_resolver.py +++ b/pex/resolve/lock_resolver.py @@ -43,6 +43,7 @@ from pex.targets import Target, Targets from pex.tracer import TRACER from pex.typing import TYPE_CHECKING +from pex.variables import ENV, Variables if TYPE_CHECKING: from typing import Dict, Iterable, Mapping, Optional, Sequence, Tuple, Union @@ -59,7 +60,7 @@ def __init__( self, file_lock_style, # type: FileLockStyle.Value downloader, # type: ArtifactDownloader - pex_root=None, # type: Optional[str] + pex_root=ENV, # type: Union[str, Variables] ): super(FileArtifactDownloadManager, self).__init__( pex_root=pex_root, file_lock_style=file_lock_style @@ -89,7 +90,7 @@ def __init__( password_entries=(), # type: Iterable[PasswordEntry] cache=None, # type: Optional[str] build_configuration=BuildConfiguration(), # type: BuildConfiguration - pex_root=None, # type: Optional[str] + pex_root=ENV, # type: Union[str, Variables] pip_version=None, # type: Optional[PipVersionValue] resolver=None, # type: Optional[Resolver] use_pip_config=False, # type: bool @@ -177,7 +178,7 @@ def __init__( file_lock_style, # type: FileLockStyle.Value resolver, # type: Resolver pip_version=None, # type: Optional[PipVersionValue] - pex_root=None, # type: Optional[str] + pex_root=ENV, # type: Union[str, Variables] ): super(LocalProjectDownloadManager, self).__init__( pex_root=pex_root, file_lock_style=file_lock_style diff --git a/pex/resolve/lockfile/create.py b/pex/resolve/lockfile/create.py index 868933cfa..5d153121c 100644 --- a/pex/resolve/lockfile/create.py +++ b/pex/resolve/lockfile/create.py @@ -46,6 +46,7 @@ from pex.targets import Target, Targets from pex.tracer import TRACER from pex.typing import TYPE_CHECKING +from pex.variables import ENV, Variables from pex.version import __version__ if TYPE_CHECKING: @@ -67,7 +68,7 @@ def create( cls, download_dir, # type: str locked_resolves, # type: Iterable[LockedResolve] - pex_root=None, # type: Optional[str] + pex_root=ENV, # type: Union[str, Variables] ): # type: (...) -> CreateLockDownloadManager @@ -107,7 +108,7 @@ def create( def __init__( self, path_by_artifact_and_project_name, # type: Mapping[Tuple[Artifact, ProjectName], str] - pex_root=None, # type: Optional[str] + pex_root=ENV, # type: Union[str, Variables] ): # type: (...) -> None super(CreateLockDownloadManager, self).__init__(pex_root=pex_root) diff --git a/pex/resolve/lockfile/download_manager.py b/pex/resolve/lockfile/download_manager.py index eae38f2fa..b748472d1 100644 --- a/pex/resolve/lockfile/download_manager.py +++ b/pex/resolve/lockfile/download_manager.py @@ -9,16 +9,17 @@ from pex import hashing from pex.atomic_directory import FileLockStyle, atomic_directory +from pex.cache.dirs import DownloadDir from pex.common import safe_rmtree from pex.pep_503 import ProjectName -from pex.resolve.downloads import get_downloads_dir from pex.resolve.locked_resolve import Artifact from pex.result import Error, ResultError, try_ from pex.tracer import TRACER from pex.typing import TYPE_CHECKING, Generic +from pex.variables import ENV, Variables if TYPE_CHECKING: - from typing import List, Optional, TypeVar, Union + from typing import List, TypeVar, Union import attr # vendor:skip @@ -110,7 +111,7 @@ def load(cls, artifact_dir): class DownloadManager(Generic["_A"]): def __init__( self, - pex_root=None, # type: Optional[str] + pex_root=ENV, # type: Union[str, Variables] file_lock_style=FileLockStyle.POSIX, # type: FileLockStyle.Value ): # type: (...) -> None @@ -125,8 +126,8 @@ def store( ): # type: (...) -> DownloadedArtifact - download_dir = os.path.join( - get_downloads_dir(pex_root=self._pex_root), artifact.fingerprint.hash + download_dir = DownloadDir.create( + file_hash=artifact.fingerprint.hash, pex_root=self._pex_root ) with atomic_directory(download_dir, lock_style=self._file_lock_style) as atomic_dir: if atomic_dir.is_finalized(): diff --git a/pex/resolver.py b/pex/resolver.py index bbf091fae..9dacac413 100644 --- a/pex/resolver.py +++ b/pex/resolver.py @@ -16,7 +16,7 @@ from pex import targets from pex.atomic_directory import AtomicDirectory, atomic_directory from pex.auth import PasswordEntry -from pex.cache.dirs import CacheDir +from pex.cache.dirs import BuiltWheelDir, CacheDir from pex.common import pluralize, safe_mkdir, safe_mkdtemp, safe_open from pex.compatibility import url_unquote, urlparse from pex.dependency_configuration import DependencyConfiguration @@ -33,7 +33,6 @@ from pex.pip.tool import PackageIndexConfiguration from pex.pip.version import PipVersionValue from pex.requirements import LocalProjectRequirement -from pex.resolve.downloads import get_downloads_dir from pex.resolve.requirement_configuration import RequirementConfiguration from pex.resolve.resolver_configuration import BuildConfiguration, PipLog, ResolverVersion from pex.resolve.resolvers import ( @@ -174,7 +173,7 @@ def download_distributions(self, dest=None, max_parallel_jobs=None): return [] dest = dest or safe_mkdtemp( - prefix="resolver_download.", dir=safe_mkdir(get_downloads_dir()) + prefix="resolver_download.", dir=safe_mkdir(CacheDir.DOWNLOADS.path(".tmp")) ) log_manager = PipLogManager.create(self.pip_log, self.targets) @@ -316,51 +315,22 @@ def create( source_path = attr.ib() # type: str fingerprint = attr.ib() # type: str - def result(self, dist_root): - # type: (str) -> BuildResult - return BuildResult.from_request(self, dist_root=dist_root) + def result(self): + # type: () -> BuildResult + return BuildResult.from_request(self) @attr.s(frozen=True) class BuildResult(object): @classmethod - def from_request( - cls, - build_request, # type: BuildRequest - dist_root, # type: str - ): - # type: (...) -> BuildResult - dist_type = "sdists" if os.path.isfile(build_request.source_path) else "local_projects" - - # For the purposes of building a wheel from source, the product should be uniqued by the - # wheel name which is unique on the host os up to the python and abi tags. In other words, - # the product of a CPython 2.7.6 wheel build and a CPython 2.7.18 wheel build should be - # functionally interchangeable if the two CPython interpreters have matching abis. - # - # However, this is foiled by at least two scenarios: - # 1. Running a vm / container with shared storage mounted. This can introduce a different - # platform on the host. - # 2. On macOS the same host can report / use different OS versions (c.f.: the - # MACOSX_DEPLOYMENT_TARGET environment variable and the 10.16 / 11.0 macOS Big Sur - # transitional case in particular). - # - # As such, we must be pessimistic and assume the wheel will be platform specific to the - # full extent possible. - interpreter = build_request.target.get_interpreter() - target_tags = "{python_tag}-{abi_tag}-{platform_tag}".format( - python_tag=interpreter.identity.python_tag, - abi_tag=interpreter.identity.abi_tag, - platform_tag=interpreter.identity.platform_tag, + def from_request(cls, build_request): + # type: (BuildRequest) -> BuildResult + built_wheel = BuiltWheelDir.create( + sdist=build_request.source_path, + fingerprint=build_request.fingerprint, + target=build_request.target, ) - - dist_dir = os.path.join( - dist_root, - dist_type, - os.path.basename(build_request.source_path), - build_request.fingerprint, - target_tags, - ) - return cls(request=build_request, atomic_dir=AtomicDirectory(dist_dir)) + return cls(request=build_request, atomic_dir=AtomicDirectory(built_wheel.dist_dir)) request = attr.ib() # type: BuildRequest _atomic_dir = attr.ib() # type: AtomicDirectory @@ -596,7 +566,6 @@ def __init__( @staticmethod def _categorize_build_requests( build_requests, # type: Iterable[BuildRequest] - dist_root, # type: str check_compatible=True, # type: bool ): # type: (...) -> Tuple[Iterable[BuildRequest], DefaultDict[str, OrderedSet[InstallRequest]]] @@ -605,7 +574,7 @@ def _categorize_build_requests( OrderedSet ) # type: DefaultDict[str, OrderedSet[InstallRequest]] for build_request in build_requests: - build_result = build_request.result(dist_root) + build_result = build_request.result() if not build_result.is_built: TRACER.log( "Building {} to {}".format(build_request.source_path, build_result.dist_dir) @@ -622,13 +591,9 @@ def _categorize_build_requests( ) return unsatisfied_build_requests, build_results - def _spawn_wheel_build( - self, - built_wheels_dir, # type: str - build_request, # type: BuildRequest - ): - # type: (...) -> SpawnedJob[BuildResult] - build_result = build_request.result(built_wheels_dir) + def _spawn_wheel_build(self, build_request): + # type: (BuildRequest) -> SpawnedJob[BuildResult] + build_result = build_request.result() build_job = get_pip( interpreter=build_request.target.get_interpreter(), version=self._pip_version, @@ -660,21 +625,17 @@ def build_wheels( # Nothing to build or install. return {} - built_wheels_dir = CacheDir.BUILT_WHEELS.path() - spawn_wheel_build = functools.partial(self._spawn_wheel_build, built_wheels_dir) - with TRACER.timed( "Building distributions for:" "\n {}".format("\n ".join(map(str, build_requests))) ): build_requests, build_results = self._categorize_build_requests( build_requests=build_requests, - dist_root=built_wheels_dir, check_compatible=check_compatible, ) for build_result in execute_parallel( inputs=build_requests, - spawn_func=spawn_wheel_build, + spawn_func=self._spawn_wheel_build, error_handler=Raise[BuildRequest, BuildResult](Untranslatable), max_jobs=max_parallel_jobs, ): diff --git a/pex/scie/science.py b/pex/scie/science.py index 63fd8b313..64d01072c 100644 --- a/pex/scie/science.py +++ b/pex/scie/science.py @@ -12,7 +12,7 @@ from pex import toml from pex.atomic_directory import atomic_directory -from pex.cache.dirs import CacheDir +from pex.cache.dirs import CacheDir, UnzipDir from pex.common import chmod_plus_x, is_exe, pluralize, safe_mkdtemp, safe_open from pex.compatibility import shlex_quote from pex.dist_metadata import NamedEntryPoint, parse_entry_point @@ -131,7 +131,7 @@ def create_manifests( else: production_assert(pex_info.pex_hash is not None) pex_hash = cast(str, pex_info.pex_hash) - configure_binding_args.append(CacheDir.UNZIPPED_PEXES.path(pex_hash, pex_root=pex_root)) + configure_binding_args.append(UnzipDir.create(pex_hash, pex_root=pex_root).path) commands = [] # type: List[Dict[str, Any]] entrypoints = configuration.options.busybox_entrypoints diff --git a/pex/sh_boot.py b/pex/sh_boot.py index 644c609f5..f932da7dd 100644 --- a/pex/sh_boot.py +++ b/pex/sh_boot.py @@ -168,7 +168,7 @@ def create_sh_boot_script( venv_dir = pex_info.raw_venv_dir(pex_file=pex_name, interpreter=interpreter) if venv_dir: - pex_installed_path = venv_dir + pex_installed_path = venv_dir.path else: pex_hash = pex_info.pex_hash if pex_hash is None: diff --git a/pex/third_party/__init__.py b/pex/third_party/__init__.py index 3c1bea0ea..a6c103a72 100644 --- a/pex/third_party/__init__.py +++ b/pex/third_party/__init__.py @@ -24,6 +24,7 @@ if TYPE_CHECKING: from typing import Container, Dict, Iterable, Iterator, List, Optional, Tuple + from pex.cache.dirs import InstalledWheelDir # noqa from pex.interpreter import PythonInterpreter @@ -634,10 +635,10 @@ def expose_installed_wheels( dists, # type: Iterable[str] interpreter=None, # type: Optional[PythonInterpreter] ): - # type: (...) -> Iterator[str] + # type: (...) -> Iterator[InstalledWheelDir] from pex.atomic_directory import atomic_directory - from pex.cache.dirs import CacheDir + from pex.cache.dirs import InstalledWheelDir from pex.pep_376 import InstalledWheel for path in expose(dists, interpreter=interpreter): @@ -648,12 +649,14 @@ def expose_installed_wheels( install_hash = installed_wheel.fingerprint or CacheHelper.dir_hash( path, hasher=hashlib.sha256 ) - wheel_path = CacheDir.INSTALLED_WHEELS.path(install_hash, wheel_file_name) - with atomic_directory(wheel_path) as atomic_dir: + installed_wheel_dir = InstalledWheelDir.create( + wheel_name=wheel_file_name, install_hash=install_hash + ) + with atomic_directory(installed_wheel_dir) as atomic_dir: if not atomic_dir.is_finalized(): for _src, _dst in iter_copytree(path, atomic_dir.work_dir, copy_mode=CopyMode.LINK): pass - yield wheel_path + yield installed_wheel_dir # Implicitly install an importer for vendored code on the first import of pex.third_party. diff --git a/pex/util.py b/pex/util.py index 6ddb88b6e..0f935a040 100644 --- a/pex/util.py +++ b/pex/util.py @@ -22,7 +22,7 @@ from pex.typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import IO, Any, Callable, Iterator, Optional, Text + from typing import IO, Any, Callable, Container, Iterator, Optional, Text from pex.hashing import Hasher @@ -80,16 +80,29 @@ def hash(cls, path, digest=None, hasher=sha1): return digest.hexdigest() @classmethod - def pex_code_hash(cls, directory): - # type: (str) -> str - """Return a reproducible hash of the contents of a loose PEX; excluding all `.pyc` files.""" + def pex_code_hash( + cls, + directory, + exclude_dirs=(), # type: Container[str] + exclude_files=(), # type: Container[str] + ): + # type: (...) -> str + """Return a reproducible hash of the user code of a loose PEX; excluding all `.pyc` files. + + If no code is found, `None` is returned. + """ digest = hashlib.sha1() hashing.dir_hash( directory=directory, digest=digest, - dir_filter=lambda d: not is_pyc_dir(d), - file_filter=lambda file_path: not is_pyc_file(file_path) - and not file_path.startswith("."), + dir_filter=lambda d: not is_pyc_dir(d) and d not in exclude_dirs, + file_filter=( + lambda f: ( + not is_pyc_file(f) + and not os.path.basename(f).startswith(".") + and f not in exclude_files + ) + ), ) return digest.hexdigest() diff --git a/pex/variables.py b/pex/variables.py index b22f041ad..aca8aec62 100644 --- a/pex/variables.py +++ b/pex/variables.py @@ -27,7 +27,11 @@ _O = TypeVar("_O") _P = TypeVar("_P") - # N.B.: This is an expensive import and we only need it for type checking. + # N.B.: This import is circular, and we import lazily below as a result, but we also need the + # import eagerly for type checking. + from pex.cache.dirs import UnzipDir, VenvDir # noqa + + # N.B.: This is an expensive import, and we only need it for type checking. from pex.interpreter import PythonInterpreter @@ -805,13 +809,13 @@ def unzip_dir( pex_hash, # type: str expand_pex_root=True, # type: bool ): - # type: (...) -> str + # type: (...) -> UnzipDir # N.B.: We need lazy import gymnastics here since CacheType uses Variables for PEX_ROOT. - from pex.cache.dirs import CacheDir + from pex.cache.dirs import UnzipDir pex_root = _expand_pex_root(pex_root) if expand_pex_root else pex_root - return CacheDir.UNZIPPED_PEXES.path(pex_hash, pex_root=pex_root) + return UnzipDir.create(pex_hash=pex_hash, pex_root=pex_root) def venv_dir( @@ -823,10 +827,10 @@ def venv_dir( pex_path=(), # type: Tuple[str, ...] expand_pex_root=True, # type: bool ): - # type: (...) -> str + # type: (...) -> VenvDir # N.B.: We need lazy import gymnastics here since CacheType uses Variables for PEX_ROOT. - from pex.cache.dirs import CacheDir + from pex.cache.dirs import VenvDir # The venv contents are affected by which PEX files are in play as well as which interpreter # is selected. The former is influenced via PEX_PATH and the latter is influenced by interpreter @@ -889,7 +893,7 @@ def add_pex_path_items(pexes): json.dumps(venv_contents, sort_keys=True).encode("utf-8") ).hexdigest() pex_root = _expand_pex_root(pex_root) if expand_pex_root else pex_root - venv_path = CacheDir.VENVS.path(pex_hash, venv_contents_hash, pex_root=pex_root) + venv_path = VenvDir.create(pex_hash, venv_contents_hash, pex_root=pex_root) def warn(message): # type: (str) -> None diff --git a/pex/venv/venv_pex.py b/pex/venv/venv_pex.py index 787f941e2..daed27e8e 100644 --- a/pex/venv/venv_pex.py +++ b/pex/venv/venv_pex.py @@ -169,6 +169,8 @@ def maybe_log(*message): "_PEX_SCIE_INSTALLED_PEX_DIR", # This is used to override PBS distribution URLs in lazy PEX scies. "PEX_BOOTSTRAP_URLS", + # This is used to support `pex3 cache {prune,purge}`. + "_PEX_CACHE_ACCESS_LOCK", ) ] if ignored_pex_env_vars: diff --git a/pex/version.py b/pex/version.py index d82a4583f..c56fb3667 100644 --- a/pex/version.py +++ b/pex/version.py @@ -1,4 +1,4 @@ # Copyright 2015 Pex project contributors. # Licensed under the Apache License, Version 2.0 (see LICENSE). -__version__ = "2.23.0" +__version__ = "2.24.0" diff --git a/tests/integration/cli/commands/test_cache_prune.py b/tests/integration/cli/commands/test_cache_prune.py new file mode 100644 index 000000000..a76a6082e --- /dev/null +++ b/tests/integration/cli/commands/test_cache_prune.py @@ -0,0 +1,558 @@ +# Copyright 2024 Pex project contributors. +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import absolute_import + +import os.path +import shutil +import subprocess +import time +from datetime import datetime, timedelta +from textwrap import dedent +from typing import Dict, Tuple + +import attr # vendor:skip +import colors # vendor:skip +import pytest + +from pex.cache import access +from pex.cache.dirs import ( + BootstrapDir, + CacheDir, + InstalledWheelDir, + InterpreterDir, + PipPexDir, + UnzipDir, + UserCodeDir, + VenvDirs, +) +from pex.cli.commands.cache.du import DiskUsage +from pex.common import safe_open +from pex.pep_503 import ProjectName +from pex.pex_info import PexInfo +from pex.pip.version import PipVersion, PipVersionValue +from pex.typing import TYPE_CHECKING +from pex.variables import ENV +from testing import environment_as, run_pex_command +from testing.cli import run_pex3 +from testing.pytest.tmp import Tempdir + +if TYPE_CHECKING: + from typing import Iterable, Iterator, Optional + + +@pytest.fixture(autouse=True) +def pex_root(tmpdir): + # type: (Tempdir) -> Iterator[str] + _pex_root = tmpdir.join("pex_root") + with ENV.patch(PEX_ROOT=_pex_root) as env, environment_as(**env): + yield _pex_root + + +@pytest.fixture +def pex(tmpdir): + # type: (Tempdir) -> str + return tmpdir.join("pex") + + +@pytest.fixture +def lock(tmpdir): + # type: (Tempdir) -> str + return tmpdir.join("lock.json") + + +def test_nothing_prunable( + pex, # type: str + pex_root, # type: str +): + # type: (...) -> None + + run_pex_command(args=["-o", pex]).assert_success() + pex_size = os.path.getsize(pex) + + subprocess.check_call(args=[pex, "-c", ""]) + pre_prune_du = DiskUsage.collect(pex_root) + assert ( + pre_prune_du.size > pex_size + ), "Expected the unzipped PEX to be larger than the zipped pex." + + # The default prune threshold should be high enough to never trigger in a test run (it's 2 + # weeks old at the time of writing). + run_pex3("cache", "prune").assert_success() + assert pre_prune_du == DiskUsage.collect(pex_root) + + +def test_installed_wheel_prune_build_time(pex): + # type: (str) -> None + + run_pex_command(args=["ansicolors==1.1.8", "-o", pex]).assert_success() + installed_wheels_size = DiskUsage.collect(CacheDir.INSTALLED_WHEELS.path()).size + assert installed_wheels_size > 0 + assert 0 == DiskUsage.collect(CacheDir.UNZIPPED_PEXES.path()).size + assert 0 == DiskUsage.collect(CacheDir.BOOTSTRAPS.path()).size + assert 0 == DiskUsage.collect(CacheDir.USER_CODE.path()).size + + run_pex3("cache", "prune", "--older-than", "0 seconds").assert_success() + assert 0 == DiskUsage.collect(CacheDir.UNZIPPED_PEXES.path()).size + assert 0 == DiskUsage.collect(CacheDir.INSTALLED_WHEELS.path()).size + assert 0 == DiskUsage.collect(CacheDir.BOOTSTRAPS.path()).size + assert 0 == DiskUsage.collect(CacheDir.USER_CODE.path()).size + + +def test_installed_wheel_prune_run_time( + pex, # type: str + pex_root, # type: str +): + # type: (...) -> None + + run_pex_command(args=["cowsay==5.0", "-c", "cowsay", "-o", pex]).assert_success() + pex_size = os.path.getsize(pex) + + shutil.rmtree(pex_root) + assert 0 == DiskUsage.collect(pex_root).size + + assert b"| Moo! |" in subprocess.check_output(args=[pex, "Moo!"]) + pre_prune_du = DiskUsage.collect(pex_root) + assert DiskUsage.collect(CacheDir.INSTALLED_WHEELS.path()).size > 0 + assert DiskUsage.collect(CacheDir.UNZIPPED_PEXES.path()).size > 0 + assert DiskUsage.collect(CacheDir.BOOTSTRAPS.path()).size > 0 + assert ( + 0 == DiskUsage.collect(CacheDir.USER_CODE.path()).size + ), "There is no user code in the PEX." + assert ( + pre_prune_du.size > pex_size + ), "Expected the unzipped PEX to be larger than the zipped pex." + + run_pex3("cache", "prune", "--older-than", "0 seconds").assert_success() + assert 0 == DiskUsage.collect(CacheDir.UNZIPPED_PEXES.path()).size + assert 0 == DiskUsage.collect(CacheDir.INSTALLED_WHEELS.path()).size + assert 0 == DiskUsage.collect(CacheDir.BOOTSTRAPS.path()).size + assert 0 == DiskUsage.collect(CacheDir.USER_CODE.path()).size + + +@attr.s(frozen=True) +class AnsicolorsPex(object): + path = attr.ib() # type: str + + +def write_app_py(path): + # type: (str) -> None + with safe_open(path, "w") as fp: + fp.write( + dedent( + """\ + try: + from colors import green + except ImportError: + def green(text): + return text + + + if __name__ == "__main__": + print(green("Hello Cache!")) + """ + ) + ) + + +def create_ansicolors_pex( + tmpdir, # type: Tempdir + *extra_args # type: str +): + # type: (...) -> AnsicolorsPex + pex = tmpdir.join("ansicolors.pex") + write_app_py(tmpdir.join("src", "app.py")) + run_pex_command( + args=["ansicolors==1.1.8", "-D", "src", "-m" "app", "-o", pex] + list(extra_args), + cwd=tmpdir.path, + ).assert_success() + return AnsicolorsPex(pex) + + +@pytest.fixture +def ansicolors_zipapp_pex(tmpdir): + # type: (Tempdir) -> AnsicolorsPex + + return create_ansicolors_pex(tmpdir) + + +def execute_ansicolors_pex(pex): + # type: (AnsicolorsPex) -> AnsicolorsPex + + assert ( + colors.green("Hello Cache!") + == subprocess.check_output(args=[pex.path]).decode("utf-8").strip() + ) + return pex + + +def test_app_prune( + pex_root, # type: str + ansicolors_zipapp_pex, # type: AnsicolorsPex + tmpdir, # type: Tempdir +): + # type: (...) -> None + + pex_size = os.path.getsize(ansicolors_zipapp_pex.path) + installed_wheels_size = DiskUsage.collect(CacheDir.INSTALLED_WHEELS.path()).size + assert installed_wheels_size > 0 + assert 0 == DiskUsage.collect(CacheDir.UNZIPPED_PEXES.path()).size + assert 0 == DiskUsage.collect(CacheDir.BOOTSTRAPS.path()).size + assert 0 == DiskUsage.collect(CacheDir.USER_CODE.path()).size + + execute_ansicolors_pex(ansicolors_zipapp_pex) + pre_prune_du = DiskUsage.collect(pex_root) + assert ( + DiskUsage.collect(CacheDir.INSTALLED_WHEELS.path()).size > installed_wheels_size + ), "Expected .pyc files to be compiled leading to more disk space usage" + assert DiskUsage.collect(CacheDir.UNZIPPED_PEXES.path()).size > 0 + assert DiskUsage.collect(CacheDir.BOOTSTRAPS.path()).size > 0 + assert DiskUsage.collect(CacheDir.USER_CODE.path()).size > 0 + assert ( + pre_prune_du.size > pex_size + ), "Expected the unzipped PEX to be larger than the zipped pex." + + run_pex3("cache", "prune", "--older-than", "0 seconds").assert_success() + assert 0 == DiskUsage.collect(CacheDir.UNZIPPED_PEXES.path()).size + assert 0 == DiskUsage.collect(CacheDir.INSTALLED_WHEELS.path()).size + assert 0 == DiskUsage.collect(CacheDir.BOOTSTRAPS.path()).size + assert 0 == DiskUsage.collect(CacheDir.USER_CODE.path()).size + + +def set_last_access_ago( + pex, # type: str + ago, # type: timedelta +): + # type: (...) -> None + + one_day_ago = time.mktime((datetime.now() - ago).timetuple()) + pex_info = PexInfo.from_pex(pex) + if pex_info.venv: + pex_dir = pex_info.runtime_venv_dir(pex) + assert pex_dir is not None + access.record_access(pex_dir, one_day_ago) + else: + assert pex_info.pex_hash is not None + access.record_access(UnzipDir.create(pex_info.pex_hash), one_day_ago) + + +def set_last_access_one_day_ago(pex): + # type: (str) -> None + set_last_access_ago(pex, timedelta(days=1)) + + +def set_last_access_one_second_ago(pex): + # type: (str) -> None + set_last_access_ago(pex, timedelta(seconds=1)) + + +def assert_installed_wheels( + names, # type: Iterable[str] + message=None, # type: Optional[str] +): + expected = set(map(ProjectName, names)) + actual = {iwd.project_name for iwd in InstalledWheelDir.iter_all()} + if message: + assert expected == actual, message + else: + assert expected == actual + + +def expected_pip_wheels(): + # type: () -> Iterable[str] + if PipVersion.DEFAULT is PipVersion.VENDORED: + return "pip", "setuptools" + else: + return "pip", "setuptools", "wheel" + + +def expected_pip_wheels_plus(*names): + # type: (*str) -> Iterable[str] + wheels = list(expected_pip_wheels()) + wheels.extend(names) + return wheels + + +def test_zipapp_prune_shared_bootstrap( + ansicolors_zipapp_pex, # type: AnsicolorsPex + tmpdir, # type: Tempdir +): + # type: (...) -> None + + execute_ansicolors_pex(ansicolors_zipapp_pex) + + empty_pex = tmpdir.join("empty.pex") + run_pex_command(args=["-o", empty_pex]).assert_success() + subprocess.check_call(args=[empty_pex, "-c", ""]) + + bootstraps = list(BootstrapDir.iter_all()) + assert len(bootstraps) == 1, "Expected a shared bootstrap between pex and empty.pex." + bootstrap = bootstraps[0] + + assert_installed_wheels( + expected_pip_wheels_plus("ansicolors"), + message=( + "There should be an ansicolors wheel for the pex as well as pip, setuptools and wheel wheels " + "for at least 1 Pip." + ), + ) + + set_last_access_one_day_ago(ansicolors_zipapp_pex.path) + run_pex3("cache", "prune", "--older-than", "1 hour").assert_success() + assert [bootstrap] == list(BootstrapDir.iter_all()) + assert_installed_wheels(expected_pip_wheels()) + + +def test_zipapp_prune_shared_code( + ansicolors_zipapp_pex, # type: AnsicolorsPex + tmpdir, # type: Tempdir +): + # type: (...) -> None + + execute_ansicolors_pex(ansicolors_zipapp_pex) + code_hash = PexInfo.from_pex(ansicolors_zipapp_pex.path).code_hash + assert code_hash is not None + + all_user_code = list(UserCodeDir.iter_all()) + assert len(all_user_code) == 1 + assert code_hash == all_user_code[0].code_hash + + write_app_py(tmpdir.join("app.py")) + no_colors_pex = tmpdir.join("no-colors.pex") + run_pex_command( + args=["-M" "app", "-m", "app", "-o", no_colors_pex], cwd=tmpdir.path + ).assert_success() + assert b"Hello Cache!\n" == subprocess.check_output(args=[no_colors_pex]) + assert all_user_code == list( + UserCodeDir.iter_all() + ), "Expected the shared code cache to be re-used since the code is the same for both PEXes." + + set_last_access_one_day_ago(ansicolors_zipapp_pex.path) + run_pex3("cache", "prune", "--older-than", "1 hour").assert_success() + assert all_user_code == list( + UserCodeDir.iter_all() + ), "Expected the shared code cache to be un-pruned since no_colors_pex still needs it." + + run_pex3("cache", "prune", "--older-than", "0 seconds").assert_success() + assert len(list(UserCodeDir.iter_all())) == 0, ( + "Expected the shared code cache to be pruned since the last remaining user, no_colors_pex," + "is now pruned." + ) + + +@attr.s(frozen=True) +class CowsayPex(object): + path = attr.ib() # type: str + + +def execute_cowsay_pex(pex): + # type: (CowsayPex) -> CowsayPex + + assert "| {msg} |".format(msg=colors.yellow("Moo?!")) in subprocess.check_output( + args=[pex.path, "Moo?!"] + ).decode("utf-8") + return pex + + +@pytest.fixture +def cowsay_pex(tmpdir): + # type: (Tempdir) -> CowsayPex + + cowsay_pex = tmpdir.join("cowsay.pex") + with safe_open(tmpdir.join("exe.py"), "w") as fp: + fp.write( + dedent( + """\ + import sys + + import colors + import cowsay + + + if __name__ == "__main__": + cowsay.tux(colors.yellow(" ".join(sys.argv[1:]))) + """ + ) + ) + run_pex_command( + args=["ansicolors==1.1.8", "cowsay==5.0", "--exe", fp.name, "-o", cowsay_pex] + ).assert_success() + return execute_cowsay_pex(CowsayPex(cowsay_pex)) + + +def test_zipapp_prune_shared_deps( + ansicolors_zipapp_pex, # type: AnsicolorsPex + cowsay_pex, # type: CowsayPex + tmpdir, # type: Tempdir +): + # type: (...) -> None + + execute_ansicolors_pex(ansicolors_zipapp_pex) + assert_installed_wheels(expected_pip_wheels_plus("ansicolors", "cowsay")) + + set_last_access_one_day_ago(cowsay_pex.path) + run_pex3("cache", "prune", "--older-than", "1 hour").assert_success() + assert_installed_wheels(expected_pip_wheels_plus("ansicolors")) + + # The PEXes should still work post-prune. + execute_ansicolors_pex(ansicolors_zipapp_pex) + execute_cowsay_pex(cowsay_pex) + + +def test_venv_prune_wheel_symlinks( + tmpdir, # type: Tempdir + cowsay_pex, # type: CowsayPex +): + # type: (...) -> None + + # By default, a --venv PEX uses symlinks from site-packages to installed wheel chroot contents + # which means a --venv PEX should hold a strong dependency on the installed wheels it symlinks. + + ansicolors_venv_pex = execute_ansicolors_pex(create_ansicolors_pex(tmpdir, "--venv")) + assert_installed_wheels(expected_pip_wheels_plus("ansicolors", "cowsay")) + + set_last_access_one_day_ago(cowsay_pex.path) + run_pex3("cache", "prune", "--older-than", "1 hour").assert_success() + assert_installed_wheels(expected_pip_wheels_plus("ansicolors")) + assert 0 == len( + list(UnzipDir.iter_all()) + ), "Expected the cowsay unzip dir and the --venv intermediary unzip dir to be removed." + + # And the --venv PEX should still run after a prune, but without creating the intermediary + # unzipped PEX. + execute_ansicolors_pex(ansicolors_venv_pex) + assert 0 == len(list(UnzipDir.iter_all())) + + # The cowsay PEX should also work post-prune. + execute_cowsay_pex(cowsay_pex) + + +def test_venv_prune_wheel_copies( + tmpdir, # type: Tempdir + cowsay_pex, # type: CowsayPex +): + # type: (...) -> None + + # A --venv --venv-site-packages-copies PEX uses hard links (or copies) of installed wheel chroot + # contents and so has no dependencies on those. + + ansicolors_venv_pex = execute_ansicolors_pex( + create_ansicolors_pex(tmpdir, "--venv", "--venv-site-packages-copies") + ) + assert_installed_wheels(expected_pip_wheels_plus("ansicolors", "cowsay")) + + set_last_access_one_day_ago(cowsay_pex.path) + run_pex3("cache", "prune", "--older-than", "1 hour").assert_success() + assert_installed_wheels(expected_pip_wheels()) + assert 0 == len( + list(UnzipDir.iter_all()) + ), "Expected the cowsay unzip dir and the --venv intermediary unzip dir to be removed." + + # And the --venv PEX should still run after a prune, but without creating the intermediary + # unzipped PEX. + execute_ansicolors_pex(ansicolors_venv_pex) + assert 0 == len(list(UnzipDir.iter_all())) + + # The cowsay PEX should also work post-prune. + execute_cowsay_pex(cowsay_pex) + + +def test_venv_prune_interpreter(tmpdir): + # type: (Tempdir) -> None + + ansicolors_venv_pex = create_ansicolors_pex(tmpdir, "--venv") + pre_execute_interpreters = set(InterpreterDir.iter_all()) + assert len(pre_execute_interpreters) > 0 + ansicolors_pex_info = PexInfo.from_pex(ansicolors_venv_pex.path) + + execute_ansicolors_pex(ansicolors_venv_pex) + post_execute_interpreters = set(InterpreterDir.iter_all()) + venv_interpreters = post_execute_interpreters - pre_execute_interpreters + assert len(venv_interpreters) == 1 + venv_interpreter = venv_interpreters.pop() + + assert ( + ansicolors_pex_info.runtime_venv_dir(ansicolors_venv_pex.path) + == venv_interpreter.interpreter.prefix + ) + + run_pex3("cache", "prune", "--older-than", "0 seconds").assert_success() + assert venv_interpreter not in set( + InterpreterDir.iter_all() + ), "Expected the venv interpreter to be pruned when the venv was pruned." + + +@pytest.fixture +def applicable_non_vendored_pips(): + # type: () -> Tuple[PipVersionValue, ...] + return tuple( + pv + for pv in PipVersion.values() + if pv is not PipVersion.VENDORED and pv.requires_python_applies() + ) + + +@pytest.fixture +def pip1(applicable_non_vendored_pips): + # type: (Tuple[PipVersionValue, ...]) -> PipVersionValue + if not applicable_non_vendored_pips: + pytest.skip( + "This test requires 1 non-vendored Pip `--version`s be applicable, but none are" + ) + return applicable_non_vendored_pips[0] + + +@pytest.fixture +def pip2(applicable_non_vendored_pips): + # type: (Tuple[PipVersionValue, ...]) -> PipVersionValue + if len(applicable_non_vendored_pips) < 2: + pytest.skip( + "This test requires 2 non-vendored Pip `--version`s be applicable, but only the " + "following are: {pips}".format(pips=" ".join(map(str, applicable_non_vendored_pips))) + ) + return applicable_non_vendored_pips[1] + + +def test_pip_prune( + tmpdir, # type: Tempdir + pip1, # type: PipVersionValue + pip2, # type: PipVersionValue +): + # type: (...) -> None + + create_ansicolors_pex(tmpdir, "--pip-version", str(pip1)) + create_ansicolors_pex(tmpdir, "--pip-version", str(pip2), "--no-wheel") + + pips_by_version = {pip_dir.version: pip_dir for pip_dir in PipPexDir.iter_all()} + assert {pip1, pip2}.issubset(pips_by_version) + + pip_venvs_by_version = {} # type: Dict[PipVersionValue, VenvDirs] + venv_dirs_by_pex_hash = {venv_dirs.pex_hash: venv_dirs for venv_dirs in VenvDirs.iter_all()} + for pip_dir in pips_by_version.values(): + pex_info = PexInfo.from_pex(pip_dir.path) + assert pex_info.pex_hash is not None + pip_venvs_by_version[pip_dir.version] = venv_dirs_by_pex_hash.pop(pex_info.pex_hash) + assert not venv_dirs_by_pex_hash, "Expected all venv dirs to be Pip venv dirs." + + for pip_version, venv_dirs in pip_venvs_by_version.items(): + if pip_version is pip1: + set_last_access_one_day_ago(venv_dirs.path) + else: + set_last_access_one_second_ago(venv_dirs.path) + pex_dir_to_last_access = dict(access.iter_all_cached_pex_dirs()) + result = run_pex3("cache", "prune", "--older-than", "1 hour") + result.assert_success() + assert not os.path.exists(pips_by_version[pip1].base_dir), "Expected a full prune of pip1" + + pip1_venv_dirs = pip_venvs_by_version.pop(pip1) + pex_dir_to_last_access.pop(pip1_venv_dirs) + assert pex_dir_to_last_access == dict(access.iter_all_cached_pex_dirs()), ( + "Expected other Pips to have their last access reset after calling `pip cache ...` to " + "prune Pip wheels.\n" + result.error + ) + assert set(pip_venvs_by_version) == { + pip_dir.version for pip_dir in PipPexDir.iter_all() + }, "Expected pip1 to be pruned along with the pip1 venv." + assert set(pip_venvs_by_version.values()) == set( + VenvDirs.iter_all() + ), "Expected the pip1 venv to be pruned along with pip1 itself." diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py index f5a898cf1..bea301fe3 100644 --- a/tests/integration/test_integration.py +++ b/tests/integration/test_integration.py @@ -18,7 +18,7 @@ import pytest from pex import targets -from pex.cache.dirs import CacheDir +from pex.cache.dirs import CacheDir, InterpreterDir from pex.common import is_exe, safe_mkdir, safe_open, safe_rmtree, temporary_dir, touch from pex.compatibility import WINDOWS, commonpath from pex.dist_metadata import Distribution, Requirement, is_wheel @@ -77,8 +77,8 @@ def test_pex_raise(): def assert_interpreters(label, pex_root): # type: (str, str) -> None - assert os.listdir( - CacheDir.INTERPRETERS.path(pex_root=pex_root) + assert ( + len(list(InterpreterDir.iter_all(pex_root=pex_root))) > 0 ), "Expected {label} pex root to be populated with interpreters.".format(label=label) diff --git a/tests/integration/test_pex_bootstrapper.py b/tests/integration/test_pex_bootstrapper.py index a3f49b402..9769cc17d 100644 --- a/tests/integration/test_pex_bootstrapper.py +++ b/tests/integration/test_pex_bootstrapper.py @@ -1,6 +1,5 @@ # Copyright 2021 Pex project contributors. # Licensed under the Apache License, Version 2.0 (see LICENSE). -import glob import json import os.path import re @@ -10,10 +9,10 @@ import pytest -from pex.cache.dirs import CacheDir +from pex.cache.dirs import CacheDir, InterpreterDir from pex.common import safe_open from pex.compatibility import commonpath -from pex.interpreter import PythonIdentity, PythonInterpreter +from pex.interpreter import PythonInterpreter from pex.interpreter_constraints import InterpreterConstraint from pex.pex import PEX from pex.pex_bootstrapper import ensure_venv @@ -444,11 +443,8 @@ def test_cached_venv_interpreter_paths(tmpdir): expected_prefix = os.path.dirname(json.loads(result.output)["pex"]) actual_prefixes = [] # type: List[str] - for interp_info in glob.glob( - CacheDir.INTERPRETERS.path("*", "*", "*", "INTERP-INFO", pex_root=pex_root) - ): - with open(interp_info) as fp: - actual_prefixes.append(PythonIdentity.decode(fp.read()).prefix) + for interp_dir in InterpreterDir.iter_all(pex_root=pex_root): + actual_prefixes.append(interp_dir.interpreter.prefix) assert expected_prefix in actual_prefixes, ( "Expected venv prefix of {expected_prefix} not found in actual cached python interpreter " diff --git a/tests/resolve/lockfile/test_download_manager.py b/tests/resolve/lockfile/test_download_manager.py index 4da32469a..fd39dbb33 100644 --- a/tests/resolve/lockfile/test_download_manager.py +++ b/tests/resolve/lockfile/test_download_manager.py @@ -16,9 +16,10 @@ from pex.resolve.resolved_requirement import ArtifactURL, Fingerprint from pex.result import Error, catch from pex.typing import TYPE_CHECKING +from pex.variables import ENV, Variables if TYPE_CHECKING: - from typing import Any, List, Optional, Union + from typing import Any, List, Union import attr # vendor:skip @@ -31,7 +32,7 @@ class FakeDownloadManager(DownloadManager[FileArtifact]): def __init__( self, content, # type: bytes - pex_root=None, # type: Optional[str] + pex_root=ENV, # type: Union[str, Variables] ): # type: (...) -> None super(FakeDownloadManager, self).__init__(pex_root=pex_root) diff --git a/tests/test_interpreter.py b/tests/test_interpreter.py index 777b0d6ef..8a8a80bda 100644 --- a/tests/test_interpreter.py +++ b/tests/test_interpreter.py @@ -13,6 +13,7 @@ import pytest +from pex.cache.dirs import InterpreterDir from pex.common import chmod_plus_x, safe_mkdir, safe_mkdtemp, temporary_dir, touch from pex.executor import Executor from pex.interpreter import PythonInterpreter, create_shebang @@ -423,14 +424,9 @@ def test_identify_cwd_isolation_issues_1231(tmpdir): ), PythonInterpreter._cleared_memory_cache(): interp = PythonInterpreter.from_binary(python38) - interp_info_files = { - os.path.join(root, f) - for root, _, files in os.walk(pex_root) - for f in files - if f == PythonInterpreter.INTERP_INFO_FILE - } - assert 1 == len(interp_info_files) - with open(interp_info_files.pop()) as fp: + interpreter_dirs = list(InterpreterDir.iter_all(pex_root=pex_root)) + assert 1 == len(interpreter_dirs) + with open(interpreter_dirs[0].interp_info_file) as fp: assert interp.binary == json.load(fp)["binary"] diff --git a/tests/test_pip.py b/tests/test_pip.py index 0db2a3e75..ca66c9a3f 100644 --- a/tests/test_pip.py +++ b/tests/test_pip.py @@ -352,7 +352,7 @@ def test_pip_pex_interpreter_venv_hash_issue_1885( sort_keys=True, ).encode("utf-8") ).hexdigest() - assert venv_contents_hash in pip_w_linked_ppp._pip.venv_dir + assert venv_contents_hash in pip_w_linked_ppp.venv_dir @applicable_pip_versions diff --git a/tests/tools/commands/test_interpreter_command.py b/tests/tools/commands/test_interpreter_command.py index d58a42991..bcc1e61e9 100644 --- a/tests/tools/commands/test_interpreter_command.py +++ b/tests/tools/commands/test_interpreter_command.py @@ -14,7 +14,6 @@ from pex.pex_builder import PEXBuilder from pex.typing import TYPE_CHECKING from pex.venv.virtualenv import Virtualenv -from testing import PY38, PY310, ensure_python_interpreter if TYPE_CHECKING: from typing import Any, Dict, Iterable @@ -24,18 +23,6 @@ from pex.third_party import attr -@pytest.fixture(scope="module") -def python38(): - # type: () -> PythonInterpreter - return PythonInterpreter.from_binary(ensure_python_interpreter(PY38)) - - -@pytest.fixture(scope="module") -def python310(): - # type: () -> PythonInterpreter - return PythonInterpreter.from_binary(ensure_python_interpreter(PY310)) - - @attr.s(frozen=True) class InterpreterTool(object): tools_pex = attr.ib() # type: str @@ -81,13 +68,13 @@ def run( return stdout -@pytest.fixture(scope="module") +@pytest.fixture def interpreter_tool( - python38, # type: PythonInterpreter - python310, # type: PythonInterpreter + py38, # type: PythonInterpreter + py310, # type: PythonInterpreter ): # type: (...) -> InterpreterTool - return InterpreterTool.create(python38, python310) + return InterpreterTool.create(py38, py310) def expected_basic(interpreter): @@ -96,24 +83,22 @@ def expected_basic(interpreter): def test_basic( - python38, # type: PythonInterpreter + py38, # type: PythonInterpreter interpreter_tool, # type: InterpreterTool ): # type: (...) -> None output = interpreter_tool.run() - assert expected_basic(python38) == output.strip() + assert expected_basic(py38) == output.strip() def test_basic_all( - python38, # type: PythonInterpreter - python310, # type: PythonInterpreter + py38, # type: PythonInterpreter + py310, # type: PythonInterpreter interpreter_tool, # type: InterpreterTool ): # type: (...) -> None output = interpreter_tool.run("-a") - assert [ - expected_basic(interpreter) for interpreter in (python38, python310) - ] == output.splitlines() + assert [expected_basic(interpreter) for interpreter in (py38, py310)] == output.splitlines() def expected_verbose(interpreter): @@ -126,22 +111,22 @@ def expected_verbose(interpreter): def test_verbose( - python38, # type: PythonInterpreter + py38, # type: PythonInterpreter interpreter_tool, # type: InterpreterTool ): # type: (...) -> None output = interpreter_tool.run("-v") - assert expected_verbose(python38) == json.loads(output) + assert expected_verbose(py38) == json.loads(output) def test_verbose_all( - python38, # type: PythonInterpreter - python310, # type: PythonInterpreter + py38, # type: PythonInterpreter + py310, # type: PythonInterpreter interpreter_tool, # type: InterpreterTool ): # type: (...) -> None output = interpreter_tool.run("-va") - assert [expected_verbose(interpreter) for interpreter in (python38, python310)] == [ + assert [expected_verbose(interpreter) for interpreter in (py38, py310)] == [ json.loads(line) for line in output.splitlines() ] @@ -154,30 +139,30 @@ def expected_verbose_verbose(interpreter): def test_verbose_verbose( - python38, # type: PythonInterpreter + py38, # type: PythonInterpreter interpreter_tool, # type: InterpreterTool ): # type: (...) -> None output = interpreter_tool.run("-vv") - assert expected_verbose_verbose(python38) == json.loads(output) + assert expected_verbose_verbose(py38) == json.loads(output) def test_verbose_verbose_verbose( - python38, # type: PythonInterpreter + py38, # type: PythonInterpreter interpreter_tool, # type: InterpreterTool ): # type: (...) -> None output = interpreter_tool.run("-vvv") - expected = expected_verbose_verbose(python38) - expected.update(env_markers=python38.identity.env_markers.as_dict(), venv=False) + expected = expected_verbose_verbose(py38) + expected.update(env_markers=py38.identity.env_markers.as_dict(), venv=False) assert expected == json.loads(output) def test_verbose_verbose_verbose_venv( - python310, # type: PythonInterpreter + py310, # type: PythonInterpreter ): # type: (...) -> None - venv = Virtualenv.create(venv_dir=safe_mkdtemp(), interpreter=python310, force=True) + venv = Virtualenv.create(venv_dir=safe_mkdtemp(), interpreter=py310, force=True) assert venv.interpreter.is_venv # N.B.: Non-venv-mode PEXes always escape venvs to prevent `sys.path` contamination unless @@ -188,6 +173,6 @@ def test_verbose_verbose_verbose_venv( expected.update( env_markers=venv.interpreter.identity.env_markers.as_dict(), venv=True, - base_interpreter=python310.binary, + base_interpreter=py310.binary, ) assert expected == json.loads(output)