Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add a pip cache command to inspect the wheel cache #3146

Closed
wants to merge 13 commits into from
3 changes: 3 additions & 0 deletions pip/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
from __future__ import absolute_import

from pip.commands.cache import CacheCommand
from pip.commands.completion import CompletionCommand
from pip.commands.download import DownloadCommand
from pip.commands.freeze import FreezeCommand
Expand All @@ -17,6 +18,7 @@


commands_dict = {
CacheCommand.name: CacheCommand,
CompletionCommand.name: CompletionCommand,
FreezeCommand.name: FreezeCommand,
HashCommand.name: HashCommand,
Expand All @@ -41,6 +43,7 @@
SearchCommand,
WheelCommand,
HashCommand,
CacheCommand,
CompletionCommand,
HelpCommand,
]
Expand Down
176 changes: 176 additions & 0 deletions pip/commands/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
from __future__ import absolute_import

from datetime import datetime, timedelta
import logging
import os.path

from pip._vendor.packaging import version
from pip._vendor.packaging.utils import canonicalize_name
from pip._vendor import pkg_resources
from pip.basecommand import Command, SUCCESS
from pip.exceptions import InvalidWheelFilename
from pip.utils import ask, cached_property
from pip.wheel import Wheel


logger = logging.getLogger(__name__)


class WheelCacheRecord(object):

def __init__(self, file_path):
self.file_path = file_path
# get link (with caching ?)
# get size, last_access/creation, etc
self.name = os.path.basename(file_path)
self.link_path = os.path.dirname(file_path)

try:
self.wheel = Wheel(self.name)
except InvalidWheelFilename:
logger.warning('Invalid wheel name for: %s', file_path)
self.wheel = None

if self.wheel:
self.project_name = canonicalize_name(self.wheel.name)
self.version = version.parse(self.wheel.version)
else:
self.project_name = None
self.version = None

stat = os.stat(file_path)
self.size = stat.st_size
self.last_access_time = datetime.fromtimestamp(stat.st_atime)
self.possible_creation_time = datetime.fromtimestamp(stat.st_mtime)

@cached_property
def link_origin(self):
link_origin_path = os.path.join(self.link_path, 'link')
if os.path.exists(link_origin_path):
with open(link_origin_path) as fl:
return fl.read()
else:
return None

def match_reqs(self, reqs):
return any(
self.project_name == canonicalize_name(req.project_name) and
self.version in req.specifier
for req in reqs)

def remove(self):
os.remove(self.file_path)


class CacheCommand(Command):
"""Utility command to inspect and deal with the cache (wheels)"""
name = 'cache'
usage = """
%prog [options] <query>"""
summary = 'Cache utility'

def __init__(self, *args, **kw):
super(CacheCommand, self).__init__(*args, **kw)

self.cmd_opts.add_option(
'--summary',
dest='summary',
action='store_true',
default=False,
help='Only print a summary')
self.cmd_opts.add_option(
'--remove',
dest='remove',
action='store_true',
default=False,
help='Remove found cached wheels')
self.cmd_opts.add_option(
'-y', '--yes',
dest='yes',
action='store_true',
help="Don't ask for confirmation of deletions.")
self.cmd_opts.add_option(
'--not-accessed-since',
dest='not_accessed_since',
type=int,
default=None,
help='Select all wheels not accessed since X days')

self.parser.insert_option_group(0, self.cmd_opts)

def run(self, options, args):
reqs = map(pkg_resources.Requirement.parse, args)

records = []
for dirpath, dirnames, filenames in os.walk(
os.path.join(options.cache_dir, 'wheels')):

# Should we filter on the paths and ignore those that
# does not conform with the xx/yy/zz/hhhh...hhhh/ patterns ?
for filename in filenames:
if filename.endswith('.whl'):
records.append(
WheelCacheRecord(os.path.join(dirpath, filename)))

if options.not_accessed_since:
# check if possible to have:
# --not-accessed-since and --not-accessed-since-days
min_last_access = datetime.now() - timedelta(
days=options.not_accessed_since)
records = filter(
lambda r: r.last_access_time < min_last_access,
records)

if reqs:
records = filter(lambda r: r.match_reqs(reqs), records)

if options.remove:
wheel_paths = [record.file_path for record in records]
logger.info('Deleting:\n- %s' % '\n- '.join(wheel_paths))
if options.yes:
response = 'yes'
else:
response = ask('Proceed (yes/no)? ', ('yes', 'no'))
if response == 'yes':
for record in records:
record.remove()
# Should we try to cleanup empty dirs and link files ?
else:
if options.summary:
total_size = sum(record.size for record in records)
logger.info(
'Found %s cached wheels for %s',
len(records), human_readable_size(total_size))
else:
log_results(records)

return SUCCESS


def sort_key(record):
return (record.wheel.name, record.wheel.version, record.link_path)


def log_results(records):
records.sort(key=sort_key)
current_name = None
for record in records:
if record.wheel.name != current_name:
current_name = record.wheel.name
logger.info(current_name)
logger.info(' - %s', record.wheel.filename)
logger.info(' Path: %s', record.link_path)
if record.link_origin:
logger.info(' Original link: %s', record.link_origin)
logger.info(
' Size: %s - Last used: %s',
human_readable_size(record.size), record.last_access_time)


def human_readable_size(nb_bytes):
unit_formatter = ('%db', '%.1fkb', '%.1fMb', '%.1fGb', '%.1fTb')
unit_index = 0
while nb_bytes > 1024 and unit_index < 4:
nb_bytes = nb_bytes / 1024.0
unit_index += 1
return unit_formatter[unit_index] % (nb_bytes,)
5 changes: 5 additions & 0 deletions pip/wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,11 @@ def build(self, autobuilding=False):
output_dir = _cache_for_link(self._cache_root, req.link)
try:
ensure_dir(output_dir)
# Write a link file
link_filename = os.path.join(output_dir, 'link')
if not os.path.exists(link_filename) and req.link.url:
with open(link_filename, 'w') as lf:
lf.write(req.link.url)
except OSError as e:
logger.warn("Building wheel for %s failed: %s",
req.name, e)
Expand Down
9 changes: 5 additions & 4 deletions tests/functional/test_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,17 +890,17 @@ def test_install_builds_wheels(script, data):
# Must have installed it all
assert expected in str(res), str(res)
root = appdirs.user_cache_dir('pip')
wheels = []
files_in_cache = []
for top, dirs, files in os.walk(os.path.join(root, "wheels")):
wheels.extend(files)
files_in_cache.extend(files)
# and built wheels for upper and wheelbroken
assert "Running setup.py bdist_wheel for upper" in str(res), str(res)
assert "Running setup.py bdist_wheel for wheelb" in str(res), str(res)
# But not requires_wheel... which is a local dir and thus uncachable.
assert "Running setup.py bdist_wheel for requir" not in str(res), str(res)
# wheelbroken has to run install
# into the cache
assert wheels != [], str(res)
assert files_in_cache != [], str(res)
# and installed from the wheel
assert "Running setup.py install for upper" not in str(res), str(res)
# the local tree can't build a wheel (because we can't assume that every
Expand All @@ -909,8 +909,9 @@ def test_install_builds_wheels(script, data):
# wheelbroken has to run install
assert "Running setup.py install for wheelb" in str(res), str(res)
# We want to make sure we used the correct implementation tag
assert wheels == [
assert files_in_cache == [
"Upper-2.0-{0}-none-any.whl".format(pep425tags.implementation_tag),
"link",
]


Expand Down