Skip to content

Commit

Permalink
Prune downloads.
Browse files Browse the repository at this point in the history
  • Loading branch information
jsirois committed Oct 18, 2024
1 parent e5fade4 commit 0f2428a
Show file tree
Hide file tree
Showing 8 changed files with 133 additions and 47 deletions.
76 changes: 74 additions & 2 deletions pex/cache/dirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
if TYPE_CHECKING:
from typing import Any, Iterable, Iterator, List, Optional, Type, TypeVar, Union

from pex.pep_440 import Version
from pex.pep_503 import ProjectName
from pex.pip.version import PipVersionValue


Expand Down Expand Up @@ -99,7 +101,7 @@ def iter_transitive_dependents(self):

DOWNLOADS = Value(
"downloads",
version=0,
version=1,
name="Lock Artifact Downloads",
description="Distributions downloaded when resolving from a Pex lock file.",
)
Expand Down Expand Up @@ -349,6 +351,9 @@ class InstalledWheelDir(AtomicCacheDir):
@classmethod
def iter_all(cls, pex_root=ENV):
# type: (Union[str, Variables]) -> Iterator[InstalledWheelDir]

from pex.dist_metadata import ProjectNameAndVersion

symlinks = [] # type: List[str]
dirs = [] # type: List[str]
for path in glob.glob(CacheDir.INSTALLED_WHEELS.path("*", "*.whl", pex_root=pex_root)):
Expand All @@ -366,9 +371,12 @@ def iter_all(cls, pex_root=ENV):
wheel_dir = os.path.realpath(symlink)
wheel_hash = os.path.basename(os.path.dirname(wheel_dir))
wheel_name = os.path.basename(wheel_dir)
pnav = ProjectNameAndVersion.from_filename(wheel_name)
installed_wheel_dir = InstalledWheelDir(
wheel_dir,
wheel_name=wheel_name,
project_name=pnav.canonicalized_project_name,
version=pnav.canonicalized_version,
install_hash=install_hash,
wheel_hash=wheel_hash,
symlink_dir=symlink_dir,
Expand All @@ -379,8 +387,13 @@ def iter_all(cls, pex_root=ENV):
for wheel_dir in dirs:
install_hash = os.path.basename(os.path.dirname(wheel_dir))
wheel_name = os.path.basename(wheel_dir)
pnav = ProjectNameAndVersion.from_filename(wheel_name)
installed_wheel_dir = InstalledWheelDir(
wheel_dir, wheel_name=wheel_name, install_hash=install_hash
wheel_dir,
wheel_name=wheel_name,
project_name=pnav.canonicalized_project_name,
version=pnav.canonicalized_version,
install_hash=install_hash,
)
if installed_wheel_dir not in seen:
seen.add(installed_wheel_dir)
Expand All @@ -396,6 +409,9 @@ def create(
):
# type: (...) -> InstalledWheelDir

from pex.dist_metadata import ProjectNameAndVersion

pnav = ProjectNameAndVersion.from_filename(wheel_name)
wheel_dir = CacheDir.INSTALLED_WHEELS.path(install_hash, wheel_name, pex_root=pex_root)
symlink_dir = None # type: Optional[str]
if os.path.islink(wheel_dir):
Expand All @@ -413,6 +429,8 @@ def create(
return cls(
path=wheel_dir,
wheel_name=wheel_name,
project_name=pnav.canonicalized_project_name,
version=pnav.canonicalized_version,
install_hash=install_hash,
wheel_hash=wheel_hash,
symlink_dir=symlink_dir,
Expand All @@ -422,13 +440,17 @@ def __init__(
self,
path, # type: str
wheel_name, # type: str
project_name, # type: ProjectName
version, # type: Version
install_hash, # type: str
wheel_hash=None, # type: Optional[str]
symlink_dir=None, # type: Optional[str]
):
# type: (...) -> None
super(InstalledWheelDir, self).__init__(path)
self.wheel_name = wheel_name
self.project_name = project_name
self.version = version
self.install_hash = install_hash
self.wheel_hash = wheel_hash
self.symlink_dir = symlink_dir
Expand Down Expand Up @@ -519,3 +541,53 @@ def __init__(
self.version = version
self.base_dir = base_dir
self.cache_dir = cache_dir


class DownloadDir(AtomicCacheDir):
@classmethod
def iter_all(cls, pex_root=ENV):
# type: (Union[str, Variables]) -> Iterator[DownloadDir]

from pex.dist_metadata import is_sdist, is_wheel

for file_path in glob.glob(CacheDir.DOWNLOADS.path("*", "*", pex_root=pex_root)):
if os.path.isdir(file_path):
continue
if not is_sdist(file_path) and not is_wheel(file_path):
continue
directory, file_name = os.path.split(file_path)
file_hash = os.path.basename(directory)
yield cls.create(file_hash=file_hash, file_name=file_name)

@classmethod
def create(
cls,
file_hash, # type: str
file_name, # type: str
):
# type: (...) -> DownloadDir
from pex.dist_metadata import ProjectNameAndVersion, is_wheel

pnav = ProjectNameAndVersion.from_filename(file_name)
return cls(
path=CacheDir.DOWNLOADS.path(file_hash),
file_name=file_name,
project_name=pnav.canonicalized_project_name,
version=pnav.canonicalized_version,
is_wheel=is_wheel(file_name),
)

def __init__(
self,
path, # type: str
file_name, # type: str
project_name, # type: ProjectName
version, # type: Version
is_wheel, # type: bool
):
# type: (...) -> None
super(DownloadDir, self).__init__(path)
self.file_name = file_name
self.project_name = project_name
self.version = version
self.is_wheel = is_wheel
49 changes: 38 additions & 11 deletions pex/cli/commands/cache/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from __future__ import absolute_import, print_function

import functools
import os
import re
from argparse import Action, ArgumentError, _ActionsContainer
Expand All @@ -11,7 +12,14 @@

from pex.cache import access as cache_access
from pex.cache import data as cache_data
from pex.cache.dirs import AtomicCacheDir, BootstrapDir, CacheDir, InstalledWheelDir, VenvDirs
from pex.cache.dirs import (
AtomicCacheDir,
BootstrapDir,
CacheDir,
DownloadDir,
InstalledWheelDir,
VenvDirs,
)
from pex.cli.command import BuildTimeCommand
from pex.cli.commands.cache.bytes import ByteAmount, ByteUnits
from pex.cli.commands.cache.du import DiskUsage
Expand All @@ -21,6 +29,8 @@
from pex.exceptions import reportable_unexpected_error_msg
from pex.jobs import SpawnedJob, execute_parallel, iter_map_parallel, map_parallel
from pex.orderedset import OrderedSet
from pex.pep_440 import Version
from pex.pep_503 import ProjectName
from pex.pip.installation import iter_all as iter_all_pips
from pex.pip.tool import Pip
from pex.pip.version import PipVersionValue
Expand All @@ -30,7 +40,7 @@

if TYPE_CHECKING:
import typing
from typing import IO, Dict, Iterable, List, Optional, Tuple, Union
from typing import IO, Dict, Iterable, List, Mapping, Optional, Tuple, Union

import attr # vendor:skip
else:
Expand Down Expand Up @@ -447,18 +457,26 @@ def _purge(self):

return Ok()

def _prune_cache_dir(self, cache_dir):
# type: (AtomicCacheDir) -> DiskUsage
def _prune_cache_dir(
self,
additional_cache_dirs_by_project_name_and_version, # type: Mapping[Tuple[ProjectName, Version], Iterable[AtomicCacheDir]]
cache_dir, # type: AtomicCacheDir
):
# type: (...) -> DiskUsage
paths_to_prune = [] # type: List[str]

def prune_if_exists(path):
if os.path.exists(path):
# type: (Optional[str]) -> None
if path and os.path.exists(path):
paths_to_prune.append(path)

if isinstance(cache_dir, InstalledWheelDir):
paths_to_prune.append(os.path.dirname(cache_dir.path))
packed_wheel_dir = CacheDir.PACKED_WHEELS.path(cache_dir.install_hash)
prune_if_exists(packed_wheel_dir)
prune_if_exists(CacheDir.PACKED_WHEELS.path(cache_dir.install_hash))
for additional_dir in additional_cache_dirs_by_project_name_and_version.get(
(cache_dir.project_name, cache_dir.version), ()
):
prune_if_exists(additional_dir)
elif isinstance(cache_dir, BootstrapDir):
paths_to_prune.append(cache_dir.path)
prune_if_exists(CacheDir.BOOTSTRAP_ZIPS.path(cache_dir.bootstrap_hash))
Expand Down Expand Up @@ -506,6 +524,15 @@ def _prune(self):
finally:
print(file=fp)

additional_cache_dirs_by_project_name_and_version = {
(download_dir.project_name, download_dir.version): [download_dir]
for download_dir in DownloadDir.iter_all()
} # type: Dict[Tuple[ProjectName, Version], List[AtomicCacheDir]]

prune_cache_dir = functools.partial(
self._prune_cache_dir, additional_cache_dirs_by_project_name_and_version
)

def prune_unused_deps(additional=False):
# type: (bool) -> Iterable[InstalledWheelDir]
with cache_data.prune(tuple(InstalledWheelDir.iter_all())) as unused_deps_iter:
Expand All @@ -515,7 +542,7 @@ def prune_unused_deps(additional=False):
disk_usages = tuple(
iter_map_parallel(
unused_wheels,
self._prune_cache_dir,
prune_cache_dir,
noun="cached PEX dependency",
verb="prune",
verb_past="pruned",
Expand Down Expand Up @@ -703,7 +730,7 @@ def spawn_remove(args):
tuple(
iter_map_parallel(
pex_dirs,
self._prune_cache_dir,
prune_cache_dir,
noun="cached PEX",
verb="prune",
verb_past="pruned",
Expand All @@ -727,7 +754,7 @@ def spawn_remove(args):
tuple(
iter_map_parallel(
deps,
self._prune_cache_dir,
prune_cache_dir,
noun="cached PEX dependency",
verb="prune",
verb_past="pruned",
Expand All @@ -748,7 +775,7 @@ def spawn_remove(args):
disk_usages = tuple(
iter_map_parallel(
unused_deps,
self._prune_cache_dir,
prune_cache_dir,
noun="cached PEX dependency",
verb="prune",
verb_past="pruned",
Expand Down
24 changes: 5 additions & 19 deletions pex/resolve/downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,9 @@
from pex.result import Error
from pex.targets import LocalInterpreter, Target
from pex.typing import TYPE_CHECKING
from pex.variables import ENV

if TYPE_CHECKING:
from typing import Dict, Iterable, Iterator, Optional, Union
from typing import Iterable, Iterator, Optional, Union

import attr # vendor:skip

Expand All @@ -35,20 +34,6 @@
from pex.third_party import attr


_DOWNLOADS_DIRS = {} # type: Dict[str, str]


def get_downloads_dir(pex_root=None):
# type: (Optional[str]) -> str
root_dir = pex_root or ENV.PEX_ROOT
downloads_dir = _DOWNLOADS_DIRS.get(root_dir)
if downloads_dir is None:
downloads_dir = CacheDir.DOWNLOADS.path(pex_root=root_dir)
safe_mkdir(downloads_dir)
_DOWNLOADS_DIRS[root_dir] = downloads_dir
return downloads_dir


@attr.s(frozen=True)
class ArtifactDownloader(object):
resolver = attr.ib() # type: Resolver
Expand All @@ -75,7 +60,7 @@ def _fingerprint_and_move(path):
digest = Sha256()
hashing.file_hash(path, digest)
fingerprint = digest.hexdigest()
target_dir = os.path.join(get_downloads_dir(), fingerprint)
target_dir = CacheDir.DOWNLOADS.path(fingerprint)
with atomic_directory(target_dir) as atomic_dir:
if not atomic_dir.is_finalized():
shutil.move(path, os.path.join(atomic_dir.work_dir, os.path.basename(path)))
Expand Down Expand Up @@ -129,8 +114,9 @@ def _download(

def _download_and_fingerprint(self, url):
# type: (ArtifactURL) -> SpawnedJob[FileArtifact]
downloads = get_downloads_dir()
download_dir = safe_mkdtemp(prefix="fingerprint_artifact.", dir=downloads)
download_dir = safe_mkdtemp(
prefix="fingerprint_artifact.", dir=safe_mkdir(CacheDir.DOWNLOADS.path(".tmp"))
)

src_file = url.path
temp_dest = os.path.join(download_dir, os.path.basename(src_file))
Expand Down
7 changes: 4 additions & 3 deletions pex/resolve/lock_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
from pex.targets import Target, Targets
from pex.tracer import TRACER
from pex.typing import TYPE_CHECKING
from pex.variables import ENV, Variables

if TYPE_CHECKING:
from typing import Dict, Iterable, Mapping, Optional, Sequence, Tuple, Union
Expand All @@ -59,7 +60,7 @@ def __init__(
self,
file_lock_style, # type: FileLockStyle.Value
downloader, # type: ArtifactDownloader
pex_root=None, # type: Optional[str]
pex_root=ENV, # type: Union[str, Variables]
):
super(FileArtifactDownloadManager, self).__init__(
pex_root=pex_root, file_lock_style=file_lock_style
Expand Down Expand Up @@ -89,7 +90,7 @@ def __init__(
password_entries=(), # type: Iterable[PasswordEntry]
cache=None, # type: Optional[str]
build_configuration=BuildConfiguration(), # type: BuildConfiguration
pex_root=None, # type: Optional[str]
pex_root=ENV, # type: Union[str, Variables]
pip_version=None, # type: Optional[PipVersionValue]
resolver=None, # type: Optional[Resolver]
use_pip_config=False, # type: bool
Expand Down Expand Up @@ -177,7 +178,7 @@ def __init__(
file_lock_style, # type: FileLockStyle.Value
resolver, # type: Resolver
pip_version=None, # type: Optional[PipVersionValue]
pex_root=None, # type: Optional[str]
pex_root=ENV, # type: Union[str, Variables]
):
super(LocalProjectDownloadManager, self).__init__(
pex_root=pex_root, file_lock_style=file_lock_style
Expand Down
5 changes: 3 additions & 2 deletions pex/resolve/lockfile/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
from pex.targets import Target, Targets
from pex.tracer import TRACER
from pex.typing import TYPE_CHECKING
from pex.variables import ENV, Variables
from pex.version import __version__

if TYPE_CHECKING:
Expand All @@ -67,7 +68,7 @@ def create(
cls,
download_dir, # type: str
locked_resolves, # type: Iterable[LockedResolve]
pex_root=None, # type: Optional[str]
pex_root=ENV, # type: Union[str, Variables]
):
# type: (...) -> CreateLockDownloadManager

Expand Down Expand Up @@ -107,7 +108,7 @@ def create(
def __init__(
self,
path_by_artifact_and_project_name, # type: Mapping[Tuple[Artifact, ProjectName], str]
pex_root=None, # type: Optional[str]
pex_root=ENV, # type: Union[str, Variables]
):
# type: (...) -> None
super(CreateLockDownloadManager, self).__init__(pex_root=pex_root)
Expand Down
Loading

0 comments on commit 0f2428a

Please sign in to comment.