From 41772d8e7c5a6b80a3da3355928d63ffa6ff27cf Mon Sep 17 00:00:00 2001 From: Pradyun Gedam Date: Tue, 2 Jul 2024 19:51:22 +0100 Subject: [PATCH 1/4] Merge pull request #12799 from encukou/gh-12781-tar-hardlink untar_file: remove common leading directory before unpacking --- news/12781.bugfix.rst | 1 + src/pip/_internal/utils/unpacking.py | 14 ++- tests/functional/test_install.py | 129 +++++++++++++++++++++++++++ tests/unit/test_utils_unpacking.py | 41 +++++++++ 4 files changed, 183 insertions(+), 2 deletions(-) create mode 100644 news/12781.bugfix.rst diff --git a/news/12781.bugfix.rst b/news/12781.bugfix.rst new file mode 100644 index 00000000000..6bd43d347db --- /dev/null +++ b/news/12781.bugfix.rst @@ -0,0 +1 @@ +Fix finding hardlink targets in tar files with an ignored top-level directory. diff --git a/src/pip/_internal/utils/unpacking.py b/src/pip/_internal/utils/unpacking.py index 341269550ce..875e30e13ab 100644 --- a/src/pip/_internal/utils/unpacking.py +++ b/src/pip/_internal/utils/unpacking.py @@ -190,9 +190,19 @@ def untar_file(filename: str, location: str) -> None: else: default_mode_plus_executable = _get_default_mode_plus_executable() + if leading: + # Strip the leading directory from all files in the archive, + # including hardlink targets (which are relative to the + # unpack location). + for member in tar.getmembers(): + name_lead, name_rest = split_leading_dir(member.name) + member.name = name_rest + if member.islnk(): + lnk_lead, lnk_rest = split_leading_dir(member.linkname) + if lnk_lead == name_lead: + member.linkname = lnk_rest + def pip_filter(member: tarfile.TarInfo, path: str) -> tarfile.TarInfo: - if leading: - member.name = split_leading_dir(member.name)[1] orig_mode = member.mode try: try: diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index eaea12a163c..4e220328d29 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -1,9 +1,11 @@ import hashlib +import io import os import re import ssl import sys import sysconfig +import tarfile import textwrap from os.path import curdir, join, pardir from pathlib import Path @@ -2590,3 +2592,130 @@ def test_install_pip_prints_req_chain_pypi(script: PipTestEnvironment) -> None: f"Collecting python-openid " f"(from Paste[openid]==1.7.5.1->-r {req_path} (line 1))" in result.stdout ) + + +@pytest.mark.parametrize("common_prefix", ("", "linktest-1.0/")) +def test_install_sdist_links(script: PipTestEnvironment, common_prefix: str) -> None: + """ + Test installing an sdist with hard and symbolic links. + """ + + # Build an unpack an sdist that contains data files: + # - root.dat + # - sub/inner.dat + # and links (symbolic and hard) to both of those, both in the top-level + # and 'sub/' directories. That's 8 links total. + + # We build the sdist from in-memory data, since the filesystem + # might not support both kinds of links. + + sdist_path = script.scratch_path.joinpath("linktest-1.0.tar.gz") + + def add_file(tar: tarfile.TarFile, name: str, content: str) -> None: + info = tarfile.TarInfo(common_prefix + name) + content_bytes = content.encode("utf-8") + info.size = len(content_bytes) + tar.addfile(info, io.BytesIO(content_bytes)) + + def add_link(tar: tarfile.TarFile, name: str, linktype: str, target: str) -> None: + info = tarfile.TarInfo(common_prefix + name) + info.type = {"sym": tarfile.SYMTYPE, "hard": tarfile.LNKTYPE}[linktype] + info.linkname = target + tar.addfile(info) + + with tarfile.open(sdist_path, "w:gz") as sdist_tar: + add_file( + sdist_tar, + "PKG-INFO", + textwrap.dedent( + """ + Metadata-Version: 2.1 + Name: linktest + Version: 1.0 + """ + ), + ) + + add_file(sdist_tar, "src/linktest/__init__.py", "") + add_file(sdist_tar, "src/linktest/root.dat", "Data") + add_file(sdist_tar, "src/linktest/sub/__init__.py", "") + add_file(sdist_tar, "src/linktest/sub/inner.dat", "Data") + linknames = [] + + # Windows requires native path separators in symlink targets. + # (see https://github.com/python/cpython/issues/57911) + # (This is not needed for hardlinks, nor for the workaround tarfile + # uses if symlinking is disabled.) + SEP = os.path.sep + + pkg_root = f"{common_prefix}src/linktest" + for prefix, target_tag, linktype, target in [ + ("", "root", "sym", "root.dat"), + ("", "root", "hard", f"{pkg_root}/root.dat"), + ("", "inner", "sym", f"sub{SEP}inner.dat"), + ("", "inner", "hard", f"{pkg_root}/sub/inner.dat"), + ("sub/", "root", "sym", f"..{SEP}root.dat"), + ("sub/", "root", "hard", f"{pkg_root}/root.dat"), + ("sub/", "inner", "sym", "inner.dat"), + ("sub/", "inner", "hard", f"{pkg_root}/sub/inner.dat"), + ]: + name = f"{prefix}link.{target_tag}.{linktype}.dat" + add_link(sdist_tar, "src/linktest/" + name, linktype, target) + linknames.append(name) + + add_file( + sdist_tar, + "pyproject.toml", + textwrap.dedent( + """ + [build-system] + requires = ["setuptools"] + build-backend = "setuptools.build_meta" + [project] + name = "linktest" + version = "1.0" + [tool.setuptools] + include-package-data = true + [tool.setuptools.packages.find] + where = ["src"] + [tool.setuptools.package-data] + "*" = ["*.dat"] + """ + ), + ) + + add_file( + sdist_tar, + "src/linktest/__main__.py", + textwrap.dedent( + f""" + from pathlib import Path + linknames = {linknames!r} + + # we could use importlib.resources here once + # it has stable convenient API across supported versions + res_path = Path(__file__).parent + + for name in linknames: + data_text = res_path.joinpath(name).read_text() + assert data_text == "Data" + print(str(len(linknames)) + ' files checked') + """ + ), + ) + + # Show sdist content, for debugging the test + result = script.run("python", "-m", "tarfile", "-vl", str(sdist_path)) + print(result) + + # Install the package + result = script.pip("install", str(sdist_path)) + print(result) + + # Show installed content, for debugging the test + result = script.pip("show", "-f", "linktest") + print(result) + + # Run the internal test + result = script.run("python", "-m", "linktest") + assert result.stdout.strip() == "8 files checked" diff --git a/tests/unit/test_utils_unpacking.py b/tests/unit/test_utils_unpacking.py index 3fdd822e739..efccbdccb0d 100644 --- a/tests/unit/test_utils_unpacking.py +++ b/tests/unit/test_utils_unpacking.py @@ -197,6 +197,47 @@ def test_unpack_tar_filter(self) -> None: assert "is outside the destination" in str(e.value) + @pytest.mark.parametrize( + ("input_prefix", "unpack_prefix"), + [ + ("", ""), + ("dir/", ""), # pip ignores a common leading directory + ("dir/sub/", "sub/"), # pip ignores *one* common leading directory + ], + ) + def test_unpack_tar_links(self, input_prefix: str, unpack_prefix: str) -> None: + """ + Test unpacking a *.tar with file containing hard & soft links + """ + test_tar = os.path.join(self.tempdir, "test_tar_links.tar") + content = b"file content" + with tarfile.open(test_tar, "w") as mytar: + file_tarinfo = tarfile.TarInfo(input_prefix + "regular_file.txt") + file_tarinfo.size = len(content) + mytar.addfile(file_tarinfo, io.BytesIO(content)) + + hardlink_tarinfo = tarfile.TarInfo(input_prefix + "hardlink.txt") + hardlink_tarinfo.type = tarfile.LNKTYPE + hardlink_tarinfo.linkname = input_prefix + "regular_file.txt" + mytar.addfile(hardlink_tarinfo) + + symlink_tarinfo = tarfile.TarInfo(input_prefix + "symlink.txt") + symlink_tarinfo.type = tarfile.SYMTYPE + symlink_tarinfo.linkname = "regular_file.txt" + mytar.addfile(symlink_tarinfo) + + untar_file(test_tar, self.tempdir) + + unpack_dir = os.path.join(self.tempdir, unpack_prefix) + with open(os.path.join(unpack_dir, "regular_file.txt"), "rb") as f: + assert f.read() == content + + with open(os.path.join(unpack_dir, "hardlink.txt"), "rb") as f: + assert f.read() == content + + with open(os.path.join(unpack_dir, "symlink.txt"), "rb") as f: + assert f.read() == content + def test_unpack_tar_unicode(tmpdir: Path) -> None: test_tar = tmpdir / "test.tar" From a56129c58be6608e000d1510341a8e9372b9b4ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Bidoul?= Date: Wed, 3 Jul 2024 08:29:27 +0200 Subject: [PATCH 2/4] Merge pull request #12787 from mgorny/no-isol-tests Use `--no-build-isolation` in tests to avoid using Internet --- tests/conftest.py | 1 + tests/functional/test_config_settings.py | 1 + tests/functional/test_install.py | 13 +++++++++++-- tests/functional/test_self_update.py | 4 ++-- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 35101cef2c3..5934e9f95d4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -395,6 +395,7 @@ def pip_editable_parts( "-m", "pip", "install", + "--no-build-isolation", "--target", pip_self_install_path, "-e", diff --git a/tests/functional/test_config_settings.py b/tests/functional/test_config_settings.py index 3f88d9c3924..857722dd10d 100644 --- a/tests/functional/test_config_settings.py +++ b/tests/functional/test_config_settings.py @@ -118,6 +118,7 @@ def test_config_settings_implies_pep517( ) result = script.pip( "wheel", + "--no-build-isolation", "--config-settings", "FOO=Hello", pkg_path, diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index 4e220328d29..0b377167bfe 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -687,7 +687,9 @@ def test_link_hash_in_dep_fails_require_hashes( # Build a wheel for pkga and compute its hash. wheelhouse = tmp_path / "wheehouse" wheelhouse.mkdir() - script.pip("wheel", "--no-deps", "-w", wheelhouse, project_path) + script.pip( + "wheel", "--no-build-isolation", "--no-deps", "-w", wheelhouse, project_path + ) digest = hashlib.sha256( wheelhouse.joinpath("pkga-1.0-py3-none-any.whl").read_bytes() ).hexdigest() @@ -905,7 +907,14 @@ def test_editable_install__local_dir_setup_requires_with_pyproject( "setup(name='dummy', setup_requires=['simplewheel'])\n" ) - script.pip("install", "--find-links", shared_data.find_links, "-e", local_dir) + script.pip( + "install", + "--no-build-isolation", + "--find-links", + shared_data.find_links, + "-e", + local_dir, + ) def test_install_pre__setup_requires_with_pyproject( diff --git a/tests/functional/test_self_update.py b/tests/functional/test_self_update.py index c507552208a..1331a87c319 100644 --- a/tests/functional/test_self_update.py +++ b/tests/functional/test_self_update.py @@ -11,12 +11,12 @@ def test_self_update_editable(script: Any, pip_src: Any) -> None: # Step 1. Install pip as non-editable. This is expected to succeed as # the existing pip in the environment is installed in editable mode, so # it only places a .pth file in the environment. - proc = script.pip("install", pip_src) + proc = script.pip("install", "--no-build-isolation", pip_src) assert proc.returncode == 0 # Step 2. Using the pip we just installed, install pip *again*, but # in editable mode. This could fail, as we'll need to uninstall the running # pip in order to install the new copy, and uninstalling pip while it's # running could fail. This test is specifically to ensure that doesn't # happen... - proc = script.pip("install", "-e", pip_src) + proc = script.pip("install", "--no-build-isolation", "-e", pip_src) assert proc.returncode == 0 From 76e82a43f8fb04695e834810df64f2d9a2ff6020 Mon Sep 17 00:00:00 2001 From: Pradyun Gedam Date: Sun, 7 Jul 2024 19:33:03 +0100 Subject: [PATCH 3/4] Bump for release --- NEWS.rst | 8 ++++++++ news/12781.bugfix.rst | 1 - src/pip/__init__.py | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) delete mode 100644 news/12781.bugfix.rst diff --git a/NEWS.rst b/NEWS.rst index d541e2b552c..3e7940c5f41 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -9,6 +9,14 @@ .. towncrier release notes start +24.1.2 (2024-07-07) +=================== + +Bug Fixes +--------- + +- Fix finding hardlink targets in tar files with an ignored top-level directory. (`#12781 `_) + 24.1.1 (2024-06-26) =================== diff --git a/news/12781.bugfix.rst b/news/12781.bugfix.rst deleted file mode 100644 index 6bd43d347db..00000000000 --- a/news/12781.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Fix finding hardlink targets in tar files with an ignored top-level directory. diff --git a/src/pip/__init__.py b/src/pip/__init__.py index e4b9a52f4b4..60c2d0e20bd 100644 --- a/src/pip/__init__.py +++ b/src/pip/__init__.py @@ -1,6 +1,6 @@ from typing import List, Optional -__version__ = "24.1.1" +__version__ = "24.1.2" def main(args: Optional[List[str]] = None) -> int: From 412211edd445d9313569457cdf2687c0c0eca4f2 Mon Sep 17 00:00:00 2001 From: Pradyun Gedam Date: Sun, 7 Jul 2024 19:33:04 +0100 Subject: [PATCH 4/4] Bump for development --- src/pip/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pip/__init__.py b/src/pip/__init__.py index 60c2d0e20bd..531eec2d928 100644 --- a/src/pip/__init__.py +++ b/src/pip/__init__.py @@ -1,6 +1,6 @@ from typing import List, Optional -__version__ = "24.1.2" +__version__ = "24.2.dev0" def main(args: Optional[List[str]] = None) -> int: