From d7890631620b231ad68a15d8bff23abf4884c185 Mon Sep 17 00:00:00 2001 From: John Sirois Date: Fri, 19 Aug 2022 11:17:25 -0700 Subject: [PATCH] Ensure `--output` always overwrites destination. (#1883) This was historically how Pex worked when there was only one layout and it partly worked with the introduction of the `loose` and `packed` layouts but failed either silently or loudly when transitioning from a `zipapp` layout to a directory-based layout or vice-versa. Fixes #1879 --- pex/layout.py | 26 +++++++++--- pex/pex_builder.py | 63 +++++++++++++--------------- tests/integration/test_issue_1879.py | 44 +++++++++++++++++++ 3 files changed, 93 insertions(+), 40 deletions(-) create mode 100644 tests/integration/test_issue_1879.py diff --git a/pex/layout.py b/pex/layout.py index e54b51d09..b11c84588 100644 --- a/pex/layout.py +++ b/pex/layout.py @@ -30,6 +30,22 @@ class Value(Enum.Value): PACKED = Value("packed") LOOSE = Value("loose") + @classmethod + def identify(cls, pex): + # type: (str) -> Layout.Value + """Assumes pex is a valid PEX and identifies its layout.""" + if zipfile.is_zipfile(pex) and is_script( + pex, + # N.B.: A PEX file need not be executable since it can always be run via `python a.pex`. + check_executable=False, + ): + return cls.ZIPAPP + + if os.path.isdir(pex) and zipfile.is_zipfile(os.path.join(pex, BOOTSTRAP_DIR)): + return cls.PACKED + + return cls.LOOSE + class _Layout(object): def __init__(self, path): @@ -253,14 +269,12 @@ def __str__(self): @contextmanager def _identify_layout(pex): # type: (str) -> Iterator[Optional[_Layout]] - if zipfile.is_zipfile(pex) and is_script( - pex, - # N.B.: A PEX file need not be executable since it can always be run via `python a.pex`. - check_executable=False, - ): + + layout = Layout.identify(pex) + if Layout.ZIPAPP is layout: with open_zip(pex) as zfp: yield _ZipAppPEX(pex, zfp) - elif os.path.isdir(pex) and zipfile.is_zipfile(os.path.join(pex, BOOTSTRAP_DIR)): + elif Layout.PACKED is layout: yield _PackedPEX(pex) else: # A loose PEX which needs no layout. diff --git a/pex/pex_builder.py b/pex/pex_builder.py index b275cc12c..e298127d1 100644 --- a/pex/pex_builder.py +++ b/pex/pex_builder.py @@ -15,10 +15,10 @@ chmod_plus_x, is_pyc_temporary_file, safe_copy, + safe_delete, safe_mkdir, safe_mkdtemp, safe_open, - safe_rmtree, ) from pex.compatibility import to_bytes from pex.compiler import Compiler @@ -645,29 +645,36 @@ def build( """ if not self._frozen: self.freeze(bytecode_compile=bytecode_compile) - if layout in (Layout.LOOSE, Layout.PACKED): - safe_rmtree(path) - - # N.B.: We want an atomic directory, but we don't expect a user to race themselves - # building to a single non-PEX_ROOT user-requested output path; so we don't grab an - # exclusive lock and dirty the target directory with a `.lck` file. - with atomic_directory(path, source="app", exclusive=False) as app_chroot: - if not app_chroot.is_finalized(): - dirname = os.path.join(app_chroot.work_dir, "app") - if layout == Layout.LOOSE: - shutil.copytree(self.path(), dirname) - else: - os.mkdir(dirname) - self._build_packedapp( - dirname=dirname, - deterministic_timestamp=deterministic_timestamp, - compress=compress, - ) + + # The PEX building proceeds assuming a user will not race themselves building to a single + # non-PEX_ROOT output path they requested; + tmp_pex = path + "~" + if os.path.exists(tmp_pex): + self._logger.warning("Previous binary unexpectedly exists, cleaning: {}".format(path)) + if os.path.isfile(tmp_pex): + os.unlink(tmp_pex) + else: + shutil.rmtree(tmp_pex, True) + + if layout == Layout.LOOSE: + shutil.copytree(self.path(), tmp_pex) + elif layout == Layout.PACKED: + self._build_packedapp( + dirname=tmp_pex, + deterministic_timestamp=deterministic_timestamp, + compress=compress, + ) else: self._build_zipapp( - filename=path, deterministic_timestamp=deterministic_timestamp, compress=compress + filename=tmp_pex, deterministic_timestamp=deterministic_timestamp, compress=compress ) + if os.path.isdir(path): + shutil.rmtree(path, True) + elif os.path.isdir(tmp_pex): + safe_delete(path) + os.rename(tmp_pex, path) + def _build_packedapp( self, dirname, # type: str @@ -756,23 +763,14 @@ def _build_zipapp( compress=True, # type: bool ): # type: (...) -> None - tmp_zip = filename + "~" - try: - os.unlink(tmp_zip) - self._logger.warning( - "Previous binary unexpectedly exists, cleaning: {}".format(tmp_zip) - ) - except OSError: - # The expectation is that the file does not exist, so continue - pass - with safe_open(tmp_zip, "ab") as pexfile: + with safe_open(filename, "wb") as pexfile: assert os.path.getsize(pexfile.name) == 0 pexfile.write(to_bytes("{}\n".format(self._shebang))) if self._header: pexfile.write(to_bytes(self._header)) with TRACER.timed("Zipping PEX file."): self._chroot.zip( - tmp_zip, + filename, mode="a", deterministic_timestamp=deterministic_timestamp, # When configured with a `copy_mode` of `CopyMode.SYMLINK`, we symlink distributions @@ -785,7 +783,4 @@ def _build_zipapp( exclude_file=is_pyc_temporary_file, compress=compress, ) - if os.path.exists(filename): - os.unlink(filename) - os.rename(tmp_zip, filename) chmod_plus_x(filename) diff --git a/tests/integration/test_issue_1879.py b/tests/integration/test_issue_1879.py new file mode 100644 index 000000000..cf5522c3d --- /dev/null +++ b/tests/integration/test_issue_1879.py @@ -0,0 +1,44 @@ +# Copyright 2022 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +import os.path + +import pytest + +from pex.layout import Layout +from pex.pex_info import PexInfo +from pex.testing import run_pex_command +from pex.typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any + + +# N.B.: The ordering of decorators is just so to get the test ids to match with what the test does +# for sanity's sake. +# +# So, we get "test_overwrite[zipapp-loose]" which indicates a test of the transition from +# zipapp (layout1) to loose (layout2) and "test_overwrite[packed-packed]" to indicate an overwrite +# of the packed layout by another packed layout, etc. +@pytest.mark.parametrize( + "layout2", [pytest.param(layout, id=layout.value) for layout in Layout.values()] +) +@pytest.mark.parametrize( + "layout1", [pytest.param(layout, id=layout.value) for layout in Layout.values()] +) +def test_overwrite( + tmpdir, # type: Any + layout1, # type: Layout.Value + layout2, # type: Layout.Value +): + # type: (...) -> None + + pex = os.path.join(str(tmpdir), "pex") + + run_pex_command(args=["-e", "one", "-o", pex, "--layout", layout1.value]).assert_success() + assert layout1 is Layout.identify(pex) + assert "one" == PexInfo.from_pex(pex).entry_point + + run_pex_command(args=["-e", "two", "-o", pex, "--layout", layout2.value]).assert_success() + assert layout2 is Layout.identify(pex) + assert "two" == PexInfo.from_pex(pex).entry_point