From 877241d14d84a97801e98f4be473e5c63de5e717 Mon Sep 17 00:00:00 2001 From: mete0r Date: Sat, 2 Sep 2017 02:27:40 +0900 Subject: [PATCH 1/2] Generating hashes of distributions should not interfere with each other Some distributions have incompatible type of files with same name on the root directory. For example, matplotlib-2.0.2.tar.gz has a directory named "LICENSE" on the root, which may conflict many other distributions, causing errors like: IOError: [Errno 20] Not a directory: u'/tmp/tmpz/LICENSE/LICENSE_STIX' So we need to isolate unpacking directory of each distributions. --- tests/test_repository_pypi.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_repository_pypi.py b/tests/test_repository_pypi.py index ca4f0fc17..cea0479b9 100644 --- a/tests/test_repository_pypi.py +++ b/tests/test_repository_pypi.py @@ -53,3 +53,12 @@ def test_generate_hashes_all_platforms(from_line): repository = PyPIRepository(pip_options, session) ireq = from_line('cffi==1.9.1') assert repository.get_hashes(ireq) == expected + + +def test_generate_hashes_without_interfering_with_each_other(from_line): + pip_command = get_pip_command() + pip_options, _ = pip_command.parse_args([]) + session = pip_command._build_session(pip_options) + repository = PyPIRepository(pip_options, session) + repository.get_hashes(from_line('cffi==1.9.1')) + repository.get_hashes(from_line('matplotlib==2.0.2')) From 627dbaf9b09b2f9d453bea1defb8a7905ec7f23d Mon Sep 17 00:00:00 2001 From: Tuomas Suutari Date: Sat, 2 Sep 2017 17:09:32 +0300 Subject: [PATCH 2/2] Hash packages without unpacking The PyPIRepository._get_file_hash used to call unpack_url, when generating the hash. It only needed the side effect of the downloaded package being left in the download directory and the unpacking part was actually unnecessary. Change it to just open the (local or remote) package as a file object and hash the contents without unpacking. This makes it faster and lighter, since unpacking consumes CPU cycles and disk space, and more importantly, avoids problems which happen when some distribution has a file with the same name as a directory in another. Unpacking both to packages to the same directory will then fail. E.g. matplotlib-2.0.2.tar.gz has a directory named LICENSE, but many other packages have a file named LICENSE. Fixes #512, #544 --- CHANGELOG.md | 1 + piptools/repositories/pypi.py | 51 ++++++++++++++++++++++++----------- 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1de8180b7..9130eabf9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ when `--allow-unsafe` was not set. ([#517](https://github.com/jazzband/pip-tools (thus losing their VCS directory) and `python setup.py egg_info` fails. ([#385](https://github.com/jazzband/pip-tools/pull/385#) and [#538](https://github.com/jazzband/pip-tools/pull/538)). Thanks @blueyed and @dfee - Fixed bug where some primary dependencies were annotated with "via" info comments. ([#542](https://github.com/jazzband/pip-tools/pull/542)). Thanks @quantus - Fixed bug where pkg-resources would be removed by pip-sync in Ubuntu. ([#555](https://github.com/jazzband/pip-tools/pull/555)). Thanks @cemsbr +- Fixed package hashing doing unnecessary unpacking # 1.9.0 (2017-04-12) diff --git a/piptools/repositories/pypi.py b/piptools/repositories/pypi.py index 1d41ecae3..f3b879457 100644 --- a/piptools/repositories/pypi.py +++ b/piptools/repositories/pypi.py @@ -4,9 +4,10 @@ import hashlib import os +from contextlib import contextmanager from shutil import rmtree -from pip.download import unpack_url +from pip.download import is_file_url, url_to_path from pip.index import PackageFinder from pip.req.req_set import RequirementSet from pip.wheel import Wheel @@ -194,18 +195,38 @@ def get_hashes(self, ireq): } def _get_file_hash(self, location): - with TemporaryDirectory() as tmpdir: - unpack_url( - location, self.build_dir, - download_dir=tmpdir, only_download=True, session=self.session - ) - files = os.listdir(tmpdir) - assert len(files) == 1 - filename = os.path.abspath(os.path.join(tmpdir, files[0])) - - h = hashlib.new(FAVORITE_HASH) - with open(filename, "rb") as fp: - for chunk in iter(lambda: fp.read(8096), b""): - h.update(chunk) - + h = hashlib.new(FAVORITE_HASH) + with open_local_or_remote_file(location, self.session) as fp: + for chunk in iter(lambda: fp.read(8096), b""): + h.update(chunk) return ":".join([FAVORITE_HASH, h.hexdigest()]) + + +@contextmanager +def open_local_or_remote_file(link, session): + """ + Open local or remote file for reading. + + :type link: pip.index.Link + :type session: requests.Session + :raises ValueError: If link points to a local directory. + :return: a context manager to the opened file-like object + """ + url = link.url_without_fragment + + if is_file_url(link): + # Local URL + local_path = url_to_path(url) + if os.path.isdir(local_path): + raise ValueError("Cannot open directory for read: {}".format(url)) + else: + with open(local_path, 'rb') as local_file: + yield local_file + else: + # Remote URL + headers = {"Accept-Encoding": "identity"} + response = session.get(url, headers=headers, stream=True) + try: + yield response.raw + finally: + response.close()