From b899f742f9dd93e2c8975115ed0a4d6b3b01610c Mon Sep 17 00:00:00 2001 From: John Sirois Date: Sun, 13 Dec 2020 14:32:07 -0800 Subject: [PATCH] Initial --venv support that needs polish only. No --seed for pex.bin.pex yet and no support for hashbang rewriting. --- pex/bin/pex.py | 69 +++++- pex/common.py | 18 +- pex/environment.py | 11 +- pex/inherit_path.py | 2 +- pex/pex_bootstrapper.py | 46 +++- pex/pex_builder.py | 106 +++++--- pex/pex_info.py | 71 +++++- pex/tools/commands/graph.py | 1 - pex/tools/commands/venv.py | 371 ++++++++++++++++------------ pex/tools/commands/virtualenv.py | 12 +- pex/util.py | 2 +- pex/variables.py | 83 ++++++- pex/venv_bin_path.py | 37 +++ tests/test_integration.py | 6 +- tests/test_pex_info.py | 9 - tests/test_unified_install_cache.py | 4 +- 16 files changed, 612 insertions(+), 236 deletions(-) create mode 100644 pex/venv_bin_path.py diff --git a/pex/bin/pex.py b/pex/bin/pex.py index 877b9463f..f788fcbe5 100755 --- a/pex/bin/pex.py +++ b/pex/bin/pex.py @@ -11,12 +11,11 @@ import os import sys import tempfile -import zipfile from argparse import Action, ArgumentDefaultsHelpFormatter, ArgumentParser, ArgumentTypeError from textwrap import TextWrapper from pex import pex_warnings -from pex.common import die, safe_delete, safe_mkdtemp +from pex.common import atomic_directory, die, open_zip, safe_mkdtemp from pex.inherit_path import InheritPath from pex.interpreter import PythonInterpreter from pex.interpreter_constraints import ( @@ -27,7 +26,7 @@ from pex.network_configuration import NetworkConfiguration from pex.orderedset import OrderedSet from pex.pex import PEX -from pex.pex_bootstrapper import iter_compatible_interpreters +from pex.pex_bootstrapper import ensure_venv, iter_compatible_interpreters from pex.pex_builder import PEXBuilder from pex.pip import ResolverVersion from pex.platforms import Platform @@ -35,6 +34,7 @@ from pex.tracer import TRACER from pex.typing import TYPE_CHECKING from pex.variables import ENV, Variables +from pex.venv_bin_path import BinPath from pex.version import __version__ if TYPE_CHECKING: @@ -88,6 +88,17 @@ def __call__(self, parser, namespace, value, option_str=None): setattr(namespace, self.dest, option_str == "--transitive") +class HandleVenvAction(Action): + def __init__(self, *args, **kwargs): + kwargs["nargs"] = "?" + kwargs["choices"] = (BinPath.PREPEND.value, BinPath.APPEND.value) + super(HandleVenvAction, self).__init__(*args, **kwargs) + + def __call__(self, parser, namespace, value, option_str=None): + bin_path = BinPath.FALSE if value is None else BinPath.for_value(value) + setattr(namespace, self.dest, bin_path) + + class PrintVariableHelpAction(Action): def __call__(self, parser, namespace, values, option_str=None): for variable_name, variable_type, variable_help in Variables.iter_help(): @@ -337,6 +348,19 @@ def configure_clp_pex_options(parser): "performed once and subsequent runs will enjoy lower startup latency.", ) + group.add_argument( + "--venv", + dest="venv", + metavar="{prepend,append}", + default=False, + action=HandleVenvAction, + help="Convert the pex file to a venv before executing it. If 'prepend' or 'append' is " + "specified then all scripts and console scripts provided by distributions in the pex file " + "will be added to the PATH. If the the pex file will be run multiple times under a stable " + "runtime PEX_ROOT the venv creation will only be done once and subsequent runs will enjoy " + "lower startup latency.", + ) + group.add_argument( "--always-write-cache", dest="always_write_cache", @@ -712,6 +736,16 @@ def configure_clp(): help="Specify the temporary directory Pex and its subprocesses should use.", ) + parser.add_argument( + "--seed", + "--no-seed", + dest="seed", + action=HandleBoolAction, + default=False, + help="Seed local Pex caches for the generated PEX and print out the command line to run " + "directly from the seed with.", + ) + parser.add_argument( "--help-variables", action=PrintVariableHelpAction, @@ -824,7 +858,7 @@ def to_python_interpreter(full_path_or_basename): path=safe_mkdtemp(), interpreter=interpreter, preamble=preamble, - include_tools=options.include_tools, + include_tools=options.include_tools or options.venv, ) if options.resources_directory: @@ -844,6 +878,8 @@ def to_python_interpreter(full_path_or_basename): pex_info = pex_builder.info pex_info.zip_safe = options.zip_safe pex_info.unzip = options.unzip + pex_info.venv = bool(options.venv) + pex_info.venv_bin_path = options.venv pex_info.pex_path = options.pex_path pex_info.always_write_cache = options.always_write_cache pex_info.ignore_errors = options.ignore_errors @@ -1005,14 +1041,31 @@ def warn_ignore_pex_root(set_via): if options.pex_name is not None: log("Saving PEX file to %s" % options.pex_name, V=options.verbosity) - tmp_name = options.pex_name + "~" - safe_delete(tmp_name) pex_builder.build( - tmp_name, + options.pex_name, bytecode_compile=options.compile, deterministic_timestamp=not options.use_system_time, ) - os.rename(tmp_name, options.pex_name) + if options.seed: + pex_path = pex.path() + with TRACER.timed("Seeding local caches for {}".format(pex_path)): + if options.unzip: + unzip_dir = pex.pex_info().unzip_dir + with atomic_directory(unzip_dir, exclusive=True) as chroot: + if chroot: + with TRACER.timed("Extracting {}".format(pex_path)): + with open_zip(options.pex_name) as pex_zip: + pex_zip.extractall(chroot) + print("{} {}".format(pex.interpreter.binary, unzip_dir)) + elif options.venv: + with TRACER.timed("Creating venv from {}".format(pex_path)): + print(ensure_venv(pex)) + else: + with TRACER.timed( + "Extracting code and distributions for {}".format(pex_path) + ): + pex.activate() + print(os.path.abspath(options.pex_name)) else: if not _compatible_with_current_platform(interpreter, options.platforms): log("WARNING: attempting to run PEX with incompatible platforms!", V=1) diff --git a/pex/common.py b/pex/common.py index 3910ba752..d4d8e421e 100644 --- a/pex/common.py +++ b/pex/common.py @@ -24,7 +24,7 @@ from pex.typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Any, DefaultDict, Iterable, Iterator, NoReturn, Optional, Set + from typing import Any, DefaultDict, Iterable, Iterator, NoReturn, Optional, Set, Sized # We use the start of MS-DOS time, which is what zipfiles use (see section 4.4.6 of # https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT). @@ -60,6 +60,22 @@ def die(msg, exit_code=1): sys.exit(exit_code) +def pluralize( + subject, # type: Sized + noun, # type: str +): + # type: (...) -> str + if noun == "": + return "" + count = len(subject) + if count == 1: + return noun + if noun[-1] in ("s", "x", "z") or noun[-2:] in ("sh", "ch"): + return noun + "es" + else: + return noun + "s" + + def safe_copy(source, dest, overwrite=False): # type: (str, str, bool) -> None def do_copy(): diff --git a/pex/environment.py b/pex/environment.py index da17e1f1b..5c078e1b5 100644 --- a/pex/environment.py +++ b/pex/environment.py @@ -25,7 +25,7 @@ from pex.util import CacheHelper, DistributionHelper if TYPE_CHECKING: - from typing import Container, Optional + from typing import Container, Iterator, Optional, Tuple, Iterable def _import_pkg_resources(): @@ -114,7 +114,7 @@ def explode_code( dest_dir, # type: str exclude=(), # type: Container[str] ): - # type: (...) -> None + # type: (...) -> Iterable[Tuple[str, str]] with TRACER.timed("Unzipping {}".format(pex_file)): with open_zip(pex_file) as pex_zip: pex_files = ( @@ -125,6 +125,13 @@ def explode_code( and name not in exclude ) pex_zip.extractall(dest_dir, pex_files) + return [ + ( + "{pex_file}:{zip_path}".format(pex_file=pex_file, zip_path=f), + os.path.join(dest_dir, f), + ) + for f in pex_files + ] @classmethod def _force_local(cls, pex_file, pex_info): diff --git a/pex/inherit_path.py b/pex/inherit_path.py index d2efd5d11..461e3a452 100644 --- a/pex/inherit_path.py +++ b/pex/inherit_path.py @@ -6,7 +6,7 @@ from pex.typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Tuple, Union + from typing import Union class InheritPath(object): diff --git a/pex/pex_bootstrapper.py b/pex/pex_bootstrapper.py index 1a7c695db..05ac63129 100644 --- a/pex/pex_bootstrapper.py +++ b/pex/pex_bootstrapper.py @@ -7,7 +7,7 @@ import sys from pex import pex_warnings -from pex.common import die +from pex.common import atomic_directory, die from pex.inherit_path import InheritPath from pex.interpreter import PythonInterpreter from pex.interpreter_constraints import UnsatisfiableInterpreterConstraintsError @@ -29,6 +29,8 @@ Callable, ) + from pex.pex import PEX + InterpreterIdentificationError = Tuple[str, str] InterpreterOrError = Union[PythonInterpreter, InterpreterIdentificationError] PathFilter = Callable[[str], bool] @@ -361,15 +363,51 @@ def _bootstrap(entry_point): return pex_info +def ensure_venv(pex): + # type: (PEX) -> str + pex_info = pex.pex_info() + venv_dir = pex_info.venv_dir + if venv_dir is None: + raise AssertionError( + "Expected PEX-INFO for {} to have the components of a venv directory".format(pex.path()) + ) + with atomic_directory(venv_dir, exclusive=True) as venv: + if venv: + from .tools.commands.venv import populate_venv_with_pex + from .tools.commands.virtualenv import Virtualenv + + virtualenv = Virtualenv.create(venv_dir=venv, interpreter=pex.interpreter) + populate_venv_with_pex( + virtualenv, + pex, + bin_path=pex_info.venv_bin_path, + python=os.path.join(venv_dir, "bin", os.path.basename(pex.interpreter.binary)), + collisions_ok=True, + ) + return os.path.join(venv_dir, "pex") + + # NB: This helper is used by the PEX bootstrap __main__.py code. def bootstrap_pex(entry_point): # type: (str) -> None pex_info = _bootstrap(entry_point) - maybe_reexec_pex(pex_info.interpreter_constraints) - from . import pex + if not ENV.PEX_TOOLS and pex_info.venv: + try: + target = find_compatible_interpreter( + interpreter_constraints=pex_info.interpreter_constraints, + ) + except UnsatisfiableInterpreterConstraintsError as e: + die(str(e)) + from . import pex + + venv_pex = ensure_venv(pex.PEX(entry_point, interpreter=target)) + os.execv(venv_pex, [venv_pex] + sys.argv[1:]) + else: + maybe_reexec_pex(pex_info.interpreter_constraints) + from . import pex - pex.PEX(entry_point).execute() + pex.PEX(entry_point).execute() # NB: This helper is used by third party libs - namely https://github.com/wickman/lambdex. diff --git a/pex/pex_builder.py b/pex/pex_builder.py index 4f50d23a3..814a2b221 100644 --- a/pex/pex_builder.py +++ b/pex/pex_builder.py @@ -3,11 +3,12 @@ from __future__ import absolute_import +import hashlib import logging import os from pex import pex_warnings -from pex.common import Chroot, chmod_plus_x, open_zip, safe_mkdir, safe_mkdtemp, temporary_dir +from pex.common import Chroot, chmod_plus_x, open_zip, safe_mkdtemp, safe_open, temporary_dir from pex.compatibility import to_bytes from pex.compiler import Compiler from pex.distribution_target import DistributionTarget @@ -21,30 +22,17 @@ BOOTSTRAP_DIR = ".bootstrap" -UNZIPPED_DIR = "unzipped_pexes" - -BOOTSTRAP_ENVIRONMENT = """ +BOOTSTRAP_ENVIRONMENT = """\ import os import sys def __maybe_run_unzipped__(pex_zip): - from pex.pex_info import PexInfo - pex_info = PexInfo.from_pex(pex_zip) - pex_info.update(PexInfo.from_env()) - if not pex_info.unzip: - return - - import hashlib from pex.common import atomic_directory, open_zip from pex.tracer import TRACER - from pex.variables import ENV + from pex.variables import unzip_dir - with TRACER.timed('Checking extraction for {{}}'.format(pex_zip)): - hasher = hashlib.sha1() - with open(pex_zip, 'rb') as fp: - hasher.update(fp.read()) - unzip_to = os.path.join(pex_info.pex_root, {unzipped_dir!r}, hasher.hexdigest()) + unzip_to = unzip_dir({pex_root!r}, {pex_hash!r}) with atomic_directory(unzip_to, exclusive=True) as chroot: if chroot: with TRACER.timed('Extracting {{}} to {{}}'.format(pex_zip, unzip_to)): @@ -59,6 +47,26 @@ def __maybe_run_unzipped__(pex_zip): os.execv(sys.executable, [sys.executable, unzip_to] + sys.argv[1:]) +def __maybe_run_venv__(pex): + from pex.common import is_exe + from pex.tracer import TRACER + from pex.variables import venv_dir + + venv_home = venv_dir({pex_root!r}, {pex_hash!r}, {interpreter_constraints!r}) + venv_pex = os.path.join(venv_home, 'pex') + if not is_exe(venv_pex): + # Code in bootstrap_pex will (re)create the venv after selecting the correct interpreter. + return + + TRACER.log('Executing pex venv for {{}} at {{}}'.format(pex, venv_pex)) + + # N.B.: This is read by pex.PEX and used to point sys.argv[0] back to the original pex before + # unconditionally scrubbing the env var and handing off to user code. + os.environ['__PEX_EXE__'] = pex + + os.execv(venv_pex, [venv_pex] + sys.argv[1:]) + + __entry_point__ = None if '__file__' in locals() and __file__ is not None: __entry_point__ = os.path.dirname(__file__) @@ -76,15 +84,24 @@ def __maybe_run_unzipped__(pex_zip): sys.path[0] = os.path.abspath(sys.path[0]) sys.path.insert(0, os.path.abspath(os.path.join(__entry_point__, {bootstrap_dir!r}))) -import zipfile -if zipfile.is_zipfile(__entry_point__): - __maybe_run_unzipped__(__entry_point__) +from pex.variables import ENV, Variables +if Variables.PEX_VENV.value_or(ENV, {is_venv!r}): + if not {is_venv!r}: + from pex.common import die + die( + "The PEX_VENV environment variable was set, but this PEX was not built with venv support " + "(Re-build the PEX file with `pex --venv ...`):" + ) + if not ENV.PEX_TOOLS: # We need to run from the PEX for access to tools. + __maybe_run_venv__(__entry_point__) +elif Variables.PEX_UNZIP.value_or(ENV, {is_unzip!r}): + import zipfile + if zipfile.is_zipfile(__entry_point__): + __maybe_run_unzipped__(__entry_point__) from pex.pex_bootstrapper import bootstrap_pex bootstrap_pex(__entry_point__) -""".format( - unzipped_dir=UNZIPPED_DIR, bootstrap_dir=BOOTSTRAP_DIR -) +""" class PEXBuilder(object): @@ -377,9 +394,6 @@ def _add_dist_wheel_file(self, path, dist_name): ).wait() return self._add_dist_dir(install_dir, dist_name) - def _prepare_code_hash(self): - self._pex_info.code_hash = CacheHelper.pex_hash(self._chroot.path()) - def add_distribution(self, dist, dist_name=None): """Add a :class:`pkg_resources.Distribution` from its handle. @@ -444,13 +458,26 @@ def _precompile_source(self): for compiled in compiled_relpaths: self._chroot.touch(compiled, label="bytecode") - def _prepare_manifest(self): + def _prepare_code(self): + self._pex_info.code_hash = CacheHelper.pex_code_hash(self._chroot.path()) + + hasher = hashlib.sha1() + hasher.update("code:{}".format(self._pex_info.code_hash).encode("utf-8")) + for location, sha in sorted(self._pex_info.distributions.items()): + hasher.update("{}:{}".format(location, sha).encode("utf-8")) + self._pex_info.pex_hash = hasher.hexdigest() + self._chroot.write(self._pex_info.dump().encode("utf-8"), PexInfo.PATH, label="manifest") - def _prepare_main(self): - self._chroot.write( - to_bytes(self._preamble + "\n" + BOOTSTRAP_ENVIRONMENT), "__main__.py", label="main" + bootstrap = BOOTSTRAP_ENVIRONMENT.format( + bootstrap_dir=BOOTSTRAP_DIR, + pex_root=self._pex_info.raw_pex_root, + pex_hash=self._pex_info.pex_hash, + interpreter_constraints=self._pex_info.interpreter_constraints, + is_unzip=self._pex_info.unzip, + is_venv=self._pex_info.venv, ) + self._chroot.write(to_bytes(self._preamble + "\n" + bootstrap), "__main__.py", label="main") def _copy_or_link(self, src, dst, label=None): if src is None: @@ -501,10 +528,8 @@ def freeze(self, bytecode_compile=True): only be called once and renders the PEXBuilder immutable. """ self._ensure_unfrozen("Freezing the environment") - self._prepare_code_hash() - self._prepare_manifest() self._prepare_bootstrap() - self._prepare_main() + self._prepare_code() if bytecode_compile: self._precompile_source() self._frozen = True @@ -521,21 +546,20 @@ def build(self, filename, bytecode_compile=True, deterministic_timestamp=False): """ if not self._frozen: self.freeze(bytecode_compile=bytecode_compile) + tmp_zip = filename + "~" try: - os.unlink(filename + "~") + os.unlink(tmp_zip) self._logger.warning( - "Previous binary unexpectedly exists, cleaning: %s" % (filename + "~") + "Previous binary unexpectedly exists, cleaning: {}".format(tmp_zip) ) except OSError: # The expectation is that the file does not exist, so continue pass - if os.path.dirname(filename): - safe_mkdir(os.path.dirname(filename)) - with open(filename + "~", "ab") as pexfile: + with safe_open(tmp_zip, "ab") as pexfile: assert os.path.getsize(pexfile.name) == 0 - pexfile.write(to_bytes("%s\n" % self._shebang)) - self._chroot.zip(filename + "~", mode="a", deterministic_timestamp=deterministic_timestamp) + pexfile.write(to_bytes("{}\n".format(self._shebang))) + self._chroot.zip(tmp_zip, mode="a", deterministic_timestamp=deterministic_timestamp) if os.path.exists(filename): os.unlink(filename) - os.rename(filename + "~", filename) + os.rename(tmp_zip, filename) chmod_plus_x(filename) diff --git a/pex/pex_info.py b/pex/pex_info.py index 5a9dcb43c..5a691dd2a 100644 --- a/pex/pex_info.py +++ b/pex/pex_info.py @@ -6,7 +6,7 @@ import json import os -from pex import pex_warnings +from pex import pex_warnings, variables from pex.common import can_write_dir, open_zip, safe_mkdtemp from pex.compatibility import PY2 from pex.compatibility import string as compatibility_string @@ -14,6 +14,7 @@ from pex.orderedset import OrderedSet from pex.typing import TYPE_CHECKING, cast from pex.variables import ENV, Variables +from pex.venv_bin_path import BinPath from pex.version import __version__ as pex_version if TYPE_CHECKING: @@ -32,6 +33,7 @@ class PexInfo(object): code_hash: str # sha1 hash of all names/code in the archive distributions: {dist_name: str} # map from distribution name (i.e. path in # the internal cache) to its cache key (sha1) + pex_hash: str # sha1 hash of all names/code and distributions in the pex requirements: list # list of requirements for this environment # Environment options @@ -120,6 +122,7 @@ def from_env(cls, env=ENV): "script": env.PEX_SCRIPT, "zip_safe": zip_safe, "unzip": Variables.PEX_UNZIP.strip_default(env), + "venv": Variables.PEX_VENV.strip_default(env), "inherit_path": inherit_path, "ignore_errors": Variables.PEX_IGNORE_ERRORS.strip_default(env), "always_write_cache": Variables.PEX_ALWAYS_CACHE.strip_default(env), @@ -212,6 +215,50 @@ def unzip(self): def unzip(self, value): self._pex_info["unzip"] = bool(value) + @property + def unzip_dir(self): + # type: () -> Optional[str] + if not self.unzip: + return None + if self.pex_hash is None: + raise ValueError("The unzip_dir was requested but no pex_hash was set.") + return variables.unzip_dir(self.pex_root, self.pex_hash) + + @property + def venv(self): + # type: () -> bool + """Whether or not PEX should be converted to a venv before it's executed. + + Creating a venv from a PEX is a operation that can be cached on the 1st run of a given PEX + file which results in lower startup latency in subsequent runs. + """ + return self._pex_info.get("venv", False) + + @venv.setter + def venv(self, value): + # type: (bool) -> None + self._pex_info["venv"] = bool(value) + + @property + def venv_bin_path(self): + # type: () -> BinPath.Value + """When run as a venv, whether or not to include `bin/` scripts on the PATH.""" + return BinPath.for_value(self._pex_info.get("venv_bin_path", BinPath.FALSE.value)) + + @venv_bin_path.setter + def venv_bin_path(self, value): + # type: (BinPath.Value) -> None + self._pex_info["venv_bin_path"] = str(value) + + @property + def venv_dir(self): + # type: () -> Optional[str] + if not self.venv: + return None + if self.pex_hash is None: + raise ValueError("The venv_dir was requested but no pex_hash was set.") + return variables.venv_dir(self.pex_root, self.pex_hash, self.interpreter_constraints) + @property def strip_pex_env(self): """Whether or not this PEX should strip `PEX_*` env vars before executing its entrypoint. @@ -291,12 +338,24 @@ def emit_warnings(self, value): @property def code_hash(self): + # type: () -> Optional[str] return self._pex_info.get("code_hash") @code_hash.setter def code_hash(self, value): + # type: (str) -> None self._pex_info["code_hash"] = value + @property + def pex_hash(self): + # type: () -> Optional[str] + return self._pex_info.get("pex_hash") + + @pex_hash.setter + def pex_hash(self, value): + # type: (str) -> None + self._pex_info["pex_hash"] = value + @property def entry_point(self): return self._get_safe("entry_point") @@ -335,9 +394,15 @@ def always_write_cache(self): def always_write_cache(self, value): self._pex_info["always_write_cache"] = bool(value) + @property + def raw_pex_root(self): + # type: () -> str + return cast(str, self._pex_info.get("pex_root", os.path.join("~", ".pex"))) + @property def pex_root(self): - pex_root = os.path.expanduser(self._pex_info.get("pex_root", os.path.join("~", ".pex"))) + # type: () -> str + pex_root = os.path.expanduser(self.raw_pex_root) if not can_write_dir(pex_root): tmp_root = safe_mkdtemp() pex_warnings.warn( @@ -350,6 +415,7 @@ def pex_root(self): @pex_root.setter def pex_root(self, value): + # type: (Optional[str]) -> None if value is None: self._pex_info.pop("pex_root", None) else: @@ -365,6 +431,7 @@ def install_cache(self): @property def zip_unsafe_cache(self): + #: type: () -> str return os.path.join(self.pex_root, "code") def update(self, other): diff --git a/pex/tools/commands/graph.py b/pex/tools/commands/graph.py index b2b688975..a0386c221 100644 --- a/pex/tools/commands/graph.py +++ b/pex/tools/commands/graph.py @@ -12,7 +12,6 @@ from pex.common import safe_mkdir from pex.dist_metadata import requires_dists -from pex.interpreter import PythonInterpreter from pex.pex import PEX from pex.tools.command import Command, Ok, OutputMixin, Result, try_open_file, try_run_program from pex.tools.commands.digraph import DiGraph diff --git a/pex/tools/commands/venv.py b/pex/tools/commands/venv.py index bf3f6c1f7..378ba957c 100644 --- a/pex/tools/commands/venv.py +++ b/pex/tools/commands/venv.py @@ -8,19 +8,21 @@ import shutil import zipfile from argparse import ArgumentParser, Namespace +from collections import defaultdict from textwrap import dedent from pex import pex_builder, pex_warnings -from pex.common import chmod_plus_x, safe_mkdir +from pex.common import chmod_plus_x, pluralize, safe_mkdir from pex.environment import PEXEnvironment from pex.pex import PEX from pex.tools.command import Command, Error, Ok, Result from pex.tools.commands.virtualenv import PipUnavailableError, Virtualenv from pex.tracer import TRACER from pex.typing import TYPE_CHECKING +from pex.venv_bin_path import BinPath if TYPE_CHECKING: - from typing import Tuple + from typing import Iterable, Iterator, Optional, Tuple # N.B.: We can't use shutil.copytree since we copy from multiple source locations to the same site @@ -30,9 +32,8 @@ def _copytree( src, # type: str dst, # type: str exclude=(), # type: Tuple[str, ...] - collisions_ok=False, # type: bool ): - # type: (...) -> None + # type: (...) -> Iterator[Tuple[str, str]] safe_mkdir(dst) link = True for root, dirs, files in os.walk(src, topdown=True, followlinks=False): @@ -50,6 +51,7 @@ def _copytree( for f in files: src_entry = os.path.join(root, f) dst_entry = os.path.join(dst, os.path.relpath(src_entry, src)) + yield src_entry, dst_entry try: if link: try: @@ -61,12 +63,206 @@ def _copytree( link = False shutil.copy(src_entry, dst_entry) except OSError as e: - if e.errno == errno.EEXIST: - pex_warnings.warn( - "Failed to overwrite {} with {}: {}".format(dst_entry, src_entry, e) + if e.errno != errno.EEXIST: + raise e + + +class CollisionError(Exception): + """Indicates multiple distributions provided the same file when merging a PEX into a venv.""" + + +def populate_venv_with_pex( + venv, # type: Virtualenv + pex, # type: PEX + bin_path=BinPath.FALSE, # type: BinPath.Value + python=None, # type: Optional[str] + collisions_ok=True, # type: bool +): + # type: (...) -> None + + venv_python = python or venv.interpreter.binary + venv_bin_dir = os.path.dirname(python) if python else venv.bin_dir + venv_dir = os.path.dirname(venv_bin_dir) if python else venv.venv_dir + + # 1. Populate the venv with the PEX contents. + provenance = defaultdict(list) + + def record_provenance(src_to_dst): + # type: (Iterable[Tuple[str, str]]) -> None + for src, dst in src_to_dst: + provenance[dst].append(src) + + pex_info = pex.pex_info() + if zipfile.is_zipfile(pex.path()): + record_provenance( + PEXEnvironment.explode_code( + pex.path(), pex_info, venv.site_packages_dir, exclude=("__main__.py",) + ) + ) + else: + record_provenance( + _copytree( + src=pex.path(), + dst=venv.site_packages_dir, + exclude=(pex_info.internal_cache, pex_builder.BOOTSTRAP_DIR, "__main__.py"), + ) + ) + + for dist in pex.activate(): + record_provenance( + _copytree(src=dist.location, dst=venv.site_packages_dir, exclude=("bin",)) + ) + dist_bin_dir = os.path.join(dist.location, "bin") + if os.path.isdir(dist_bin_dir): + record_provenance(_copytree(dist_bin_dir, venv.bin_dir)) + + collisions = {dst: srcs for dst, srcs in provenance.items() if len(srcs) > 1} + if collisions: + message_lines = [ + "Encountered {collision} building venv at {venv_dir} from {pex}:".format( + collision=pluralize(collisions, "collision"), venv_dir=venv_dir, pex=pex.path() + ) + ] + for index, (dst, srcs) in enumerate(collisions.items(), start=1): + message_lines.append( + "{index}. {dst} was provided by:\n\t{srcs}".format( + index=index, dst=dst, srcs="\n\t".join(srcs) + ) + ) + message = "\n".join(message_lines) + if not collisions_ok: + raise CollisionError(message) + pex_warnings.warn(message) + + # 2. Add a __main__ to the root of the venv for running the venv dir like a loose PEX dir + # and a main.py for running as a script. + main_contents = dedent( + """\ + #!{venv_python} -sE + + import os + import sys + + python = {venv_python!r} + if sys.executable != python: + sys.stderr.write("Re-execing from {{}}\\n".format(sys.executable)) + os.execv(python, [python, "-sE"] + sys.argv) + + os.environ["VIRTUAL_ENV"] = {venv_dir!r} + sys.path.extend(os.environ.get("PEX_EXTRA_SYS_PATH", "").split(os.pathsep)) + + bin_dir = {venv_bin_dir!r} + bin_path = os.environ.get("PEX_VENV_BIN_PATH", {bin_path!r}) + if bin_path != "false": + PATH = os.environ.get("PATH", "").split(os.pathsep) + if bin_path == "prepend": + PATH.insert(0, bin_dir) + elif bin_path == "append": + PATH.append(bin_dir) + else: + sys.stderr.write( + "PEX_VENV_BIN_PATH must be one of 'false', 'prepend' or 'append', given: " + "{{!r}}\\n".format( + bin_path ) - if not collisions_ok: - raise e + ) + sys.exit(1) + os.environ["PATH"] = os.pathsep.join(PATH) + + PEX_EXEC_OVERRIDE_KEYS = ("PEX_INTERPRETER", "PEX_SCRIPT", "PEX_MODULE") + pex_overrides = dict( + (key, os.environ.pop(key)) for key in PEX_EXEC_OVERRIDE_KEYS if key in os.environ + ) + if len(pex_overrides) > 1: + sys.stderr.write( + "Can only specify one of {{overrides}}; found: {{found}}\\n".format( + overrides=", ".join(PEX_EXEC_OVERRIDE_KEYS), + found=" ".join("{{}}={{}}".format(k, v) for k, v in pex_overrides.items()) + ) + ) + sys.exit(1) + + pex_script = pex_overrides.get("PEX_SCRIPT") + if pex_script: + script_path = os.path.join(bin_dir, pex_script) + os.execv(script_path, [script_path] + sys.argv[1:]) + + pex_interpreter = pex_overrides.get("PEX_INTERPRETER", "").lower() in ("1", "true") + PEX_INTERPRETER_ENTRYPOINT = "code:interact" + entry_point = ( + PEX_INTERPRETER_ENTRYPOINT + if pex_interpreter + else pex_overrides.get("PEX_MODULE", {entry_point!r} or PEX_INTERPRETER_ENTRYPOINT) + ) + if entry_point == PEX_INTERPRETER_ENTRYPOINT and len(sys.argv) > 1: + args = sys.argv[1:] + arg = args[0] + if arg == "-m": + if len(args) < 2: + sys.stderr.write("Argument expected for the -m option\\n") + sys.exit(2) + entry_point = module = args[1] + sys.argv = args[1:] + # Fall through to entry_point handling below. + else: + filename = arg + sys.argv = args + if arg == "-c": + if len(args) < 2: + sys.stderr.write("Argument expected for the -c option\\n") + sys.exit(2) + filename = "-c " + content = args[1] + sys.argv = ["-c"] + args[2:] + elif arg == "-": + content = sys.stdin.read() + else: + with open(arg) as fp: + content = fp.read() + + ast = compile(content, filename, "exec", flags=0, dont_inherit=1) + globals_map = globals().copy() + globals_map["__name__"] = "__main__" + globals_map["__file__"] = filename + locals_map = globals_map + {exec_ast} + sys.exit(0) + + module_name, _, function = entry_point.partition(":") + if not function: + import runpy + runpy.run_module(module_name, run_name="__main__") + else: + import importlib + module = importlib.import_module(module_name) + # N.B.: Functions may be hung off top-level objects in the module namespace, + # e.g.: Class.method; so we drill down through any attributes to the final function + # object. + namespace, func = module, None + for attr in function.split("."): + func = namespace = getattr(namespace, attr) + func() + """.format( + venv_python=venv_python, + venv_bin_dir=venv_bin_dir, + venv_dir=venv_dir, + bin_path=bin_path, + entry_point=pex_info.entry_point, + exec_ast=( + "exec ast in globals_map, locals_map" + if venv.interpreter.version[0] == 2 + else "exec(ast, globals_map, locals_map)" + ), + ) + ) + with open(venv.join_path("__main__.py"), "w") as fp: + fp.write(main_contents) + chmod_plus_x(fp.name) + os.symlink(os.path.basename(fp.name), venv.join_path("pex")) + + # 3. Re-write any (console) scripts to use the venv Python. + for script in venv.rewrite_scripts(python=python, python_args="-sE"): + TRACER.log("Re-writing {}".format(script)) class Venv(Command): @@ -83,8 +279,8 @@ def add_arguments(self, parser): parser.add_argument( "-b", "--bin-path", - choices=("prepend", "append"), - default=None, + choices=[choice.value for choice in BinPath.values], + default=BinPath.FALSE.value, help="Add the venv bin dir to the PATH in the __main__.py script.", ) parser.add_argument( @@ -100,7 +296,7 @@ def add_arguments(self, parser): default=False, help=( "Don't error if population of the venv encounters distributions in the PEX file " - "with colliding files." + "with colliding files, just emit a warning." ), ) parser.add_argument( @@ -118,154 +314,13 @@ def run( ): # type: (...) -> Result - # 0. Create an empty virtual environment to populate with the PEX code and dependencies. venv = Virtualenv.create(options.venv[0], interpreter=pex.interpreter, force=options.force) - - # 1. Populate the venv with the PEX contents. - pex_info = pex.pex_info() - if zipfile.is_zipfile(pex.path()): - PEXEnvironment.explode_code( - pex.path(), pex_info, venv.site_packages_dir, exclude=("__main__.py",) - ) - else: - _copytree( - src=pex.path(), - dst=venv.site_packages_dir, - exclude=(pex_info.internal_cache, pex_builder.BOOTSTRAP_DIR, "__main__.py"), - ) - - for dist in pex.activate(): - _copytree( - src=dist.location, - dst=venv.site_packages_dir, - exclude=("bin",), - collisions_ok=options.collisions_ok, - ) - dist_bin_dir = os.path.join(dist.location, "bin") - if os.path.isdir(dist_bin_dir): - _copytree(dist_bin_dir, venv.bin_dir, collisions_ok=options.collisions_ok) - - # 2. Add a __main__ to the root of the venv for running the venv dir like a loose PEX dir - # and a main.py for running as a script. - main_contents = dedent( - """\ - #!{venv_python} -sE - - import os - import sys - - python = {venv_python!r} - if sys.executable != python: - os.execv(python, [python, "-sE"] + sys.argv) - - os.environ["VIRTUAL_ENV"] = {venv_dir!r} - sys.path.extend(os.environ.get("PEX_EXTRA_SYS_PATH", "").split(os.pathsep)) - - bin_dir = {venv_bin_dir!r} - bin_path = {bin_path!r} - if bin_path: - PATH = os.environ.get("PATH", "").split(os.pathsep) - if bin_path == "prepend": - PATH = [bin_dir] + PATH - else: - PATH.append(bin_dir) - os.environ["PATH"] = os.pathsep.join(PATH) - - PEX_OVERRIDE_KEYS = ("PEX_INTERPRETER", "PEX_SCRIPT", "PEX_MODULE") - pex_overrides = dict( - (key, os.environ.pop(key)) for key in PEX_OVERRIDE_KEYS if key in os.environ - ) - if len(pex_overrides) > 1: - sys.stderr.write( - "Can only specify one of {{overrides}}; found: {{found}}\\n".format( - overrides=", ".join(PEX_OVERRIDE_KEYS), - found=" ".join("{{}}={{}}".format(k, v) for k, v in pex_overrides.items()) - ) - ) - sys.exit(1) - - pex_script = pex_overrides.get("PEX_SCRIPT") - if pex_script: - script_path = os.path.join(bin_dir, pex_script) - os.execv(script_path, [script_path] + sys.argv[1:]) - - pex_interpreter = pex_overrides.get("PEX_INTERPRETER", "").lower() in ("1", "true") - PEX_INTERPRETER_ENTRYPOINT = "code:interact" - entry_point = ( - PEX_INTERPRETER_ENTRYPOINT - if pex_interpreter - else pex_overrides.get("PEX_MODULE", {entry_point!r} or PEX_INTERPRETER_ENTRYPOINT) - ) - if entry_point == PEX_INTERPRETER_ENTRYPOINT and len(sys.argv) > 1: - args = sys.argv[1:] - arg = args[0] - if arg == "-m": - if len(args) < 2: - sys.stderr.write("Argument expected for the -m option\\n") - sys.exit(2) - entry_point = module = args[1] - sys.argv = args[1:] - # Fall through to entry_point handling below. - else: - filename = arg - sys.argv = args - if arg == "-c": - if len(args) < 2: - sys.stderr.write("Argument expected for the -c option\\n") - sys.exit(2) - filename = "-c " - content = args[1] - sys.argv = ["-c"] + args[2:] - elif arg == "-": - content = sys.stdin.read() - else: - with open(arg) as fp: - content = fp.read() - - ast = compile(content, filename, "exec", flags=0, dont_inherit=1) - globals_map = globals().copy() - globals_map["__name__"] = "__main__" - globals_map["__file__"] = filename - locals_map = globals_map - {exec_ast} - sys.exit(0) - - module_name, _, function = entry_point.partition(":") - if not function: - import runpy - runpy.run_module(module_name, run_name="__main__") - else: - import importlib - module = importlib.import_module(module_name) - # N.B.: Functions may be hung off top-level objects in the module namespace, - # e.g.: Class.method; so we drill down through any attributes to the final function - # object. - namespace, func = module, None - for attr in function.split("."): - func = namespace = getattr(namespace, attr) - func() - """.format( - venv_python=venv.interpreter.binary, - bin_path=options.bin_path, - venv_dir=venv.venv_dir, - venv_bin_dir=venv.bin_dir, - entry_point=pex_info.entry_point, - exec_ast=( - "exec ast in globals_map, locals_map" - if venv.interpreter.version[0] == 2 - else "exec(ast, globals_map, locals_map)" - ), - ) + populate_venv_with_pex( + venv, + pex, + bin_path=BinPath.for_value(options.bin_path), + collisions_ok=options.collisions_ok, ) - with open(venv.join_path("__main__.py"), "w") as fp: - fp.write(main_contents) - chmod_plus_x(fp.name) - os.symlink(os.path.basename(fp.name), venv.join_path("pex")) - - # 3. Re-write any (console) scripts to use the venv Python. - for script in venv.rewrite_scripts(python_args="-sE"): - TRACER.log("Re-writing {}".format(script)) - if options.pip: try: venv.install_pip() diff --git a/pex/tools/commands/virtualenv.py b/pex/tools/commands/virtualenv.py index 73b554801..ded1a0f83 100644 --- a/pex/tools/commands/virtualenv.py +++ b/pex/tools/commands/virtualenv.py @@ -91,7 +91,7 @@ def create( interpreter.execute(args=["-m", "venv", "--without-pip", venv_dir]) else: virtualenv_py = resource_string(__name__, "virtualenv_16.7.10_py") - with named_temporary_file(mode="w") as fp: + with named_temporary_file(mode="wb") as fp: fp.write(virtualenv_py) fp.close() interpreter.execute( @@ -156,8 +156,12 @@ def iter_executables(self): # type: () -> Iterator[str] return _iter_executables(self._bin_dir) - def rewrite_scripts(self, python_args=None): - # type: (Optional[str]) -> Iterator[str] + def rewrite_scripts( + self, + python=None, # type: Optional[str] + python_args=None, # type: Optional[str] + ): + # type: (...) -> Iterator[str] python_scripts = [] for executable in self.iter_executables(): if executable in self._base_executables: @@ -172,7 +176,7 @@ def rewrite_scripts(self, python_args=None): # which is has moved aside. for line in fi: if fi.isfirstline(): - shebang = [self._interpreter.binary] + shebang = [python or self._interpreter.binary] if python_args: shebang.append(python_args) print("#!{shebang}".format(shebang=" ".join(shebang))) diff --git a/pex/util.py b/pex/util.py index ce142b501..3963358f6 100644 --- a/pex/util.py +++ b/pex/util.py @@ -142,7 +142,7 @@ def _iter_non_pyc_files(cls, directory): yield os.path.relpath(os.path.join(root, f), normpath) @classmethod - def pex_hash(cls, d): + def pex_code_hash(cls, d): # type: (str) -> str """Return a reproducible hash of the contents of a loose PEX; excluding all `.pyc` files.""" names = sorted(f for f in cls._iter_non_pyc_files(d) if not f.startswith(".")) diff --git a/pex/variables.py b/pex/variables.py index ffe07e030..e9391604f 100644 --- a/pex/variables.py +++ b/pex/variables.py @@ -6,6 +6,7 @@ from __future__ import absolute_import +import hashlib import os import sys from contextlib import contextmanager @@ -14,9 +15,10 @@ from pex.common import can_write_dir, die, safe_mkdtemp from pex.inherit_path import InheritPath from pex.typing import TYPE_CHECKING, Generic, overload +from pex.venv_bin_path import BinPath if TYPE_CHECKING: - from typing import Callable, Dict, Iterator, Optional, Tuple, TypeVar, Type, Union + from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple, TypeVar, Type, Union _O = TypeVar("_O") _P = TypeVar("_P") @@ -93,6 +95,22 @@ def strip_default(self, instance): except NoValueError: return None + def value_or( + self, + instance, # type: _O + fallback, # type: _P + ): + # type: (...) -> _P + """Return the value of this property without the default value applied or else the fallback. + + If the property is not set `fallback` will be validated and returned. + + :param instance: The instance to check for the non-defaulted property value. + :return: The property value or `fallback` if not set. + """ + value = self.strip_default(instance) + return value if value is not None else self._validate(instance, fallback) + def validator(self, func): # type: (Callable[[_O, _P], _P]) -> Callable[[_O, _P], _P] """Associate a validation function with this defaulted property. @@ -331,6 +349,31 @@ def PEX_UNZIP(self): """ return self._get_bool("PEX_UNZIP") + @defaulted_property(default=False) + def PEX_VENV(self): + # type: () -> bool + """Boolean. + + Force this PEX to create a venv under $PEX_ROOT and re-execute from there. If the pex file + will be run multiple times under a stable $PEX_ROOT the venv creation will only be performed + once and subsequent runs will enjoy lower startup latency. + + Default: false. + """ + return self._get_bool("PEX_VENV") + + @defaulted_property(default=BinPath.FALSE) + def PEX_VENV_BIN_PATH(self): + # type: () -> BinPath.Value + """String (false|prepend|append). + + When running in PEX_VENV mode, optionally add the scripts and console scripts of + distributions in the PEX file to the $PATH. + + Default: false. + """ + return BinPath.for_value(self._get_string("PEX_VENV_BIN_PATH")) + @defaulted_property(default=False) def PEX_IGNORE_ERRORS(self): # type: () -> bool @@ -581,3 +624,41 @@ def __repr__(self): # Global singleton environment ENV = Variables() + + +# TODO(John Sirois): Extract a runtime.modes package to hold code dealing with runtime mode +# calculations. +def _expand_pex_root(pex_root): + # type: (str) -> str + return os.path.expanduser(Variables.PEX_ROOT.value_or(ENV, pex_root)) + + +def unzip_dir( + pex_root, # type: str + pex_hash, # type: str +): + # type: (...) -> str + return os.path.join(_expand_pex_root(pex_root), "unzipped_pexes", pex_hash) + + +def venv_dir( + pex_root, # type: str + pex_hash, # type: str + interpreter_constraints, # type: Iterable[str] +): + # type: (...) -> str + hasher = hashlib.sha1() + hasher.update( + "interpreter_constraints:{}".format(" or ".join(sorted(interpreter_constraints))).encode( + "utf-8" + ) + ) + hasher.update("PEX_PYTHON:{}".format(ENV.PEX_PYTHON).encode("utf-8")) + hasher.update("PEX_PYTHON_PATH:{}".format(ENV.PEX_PYTHON_PATH).encode("utf-8")) + interpreter_selection_hash = hasher.hexdigest() + return os.path.join( + _expand_pex_root(pex_root), + "venvs", + pex_hash, + interpreter_selection_hash, + ) diff --git a/pex/venv_bin_path.py b/pex/venv_bin_path.py new file mode 100644 index 000000000..057c8b8a9 --- /dev/null +++ b/pex/venv_bin_path.py @@ -0,0 +1,37 @@ +# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import absolute_import + + +class BinPath(object): + class Value(object): + def __init__(self, value): + # type: (str) -> None + self.value = value + + def __str__(self): + # type: () -> str + return str(self.value) + + def __repr__(self): + # type: () -> str + return repr(self.value) + + FALSE = Value("false") + PREPEND = Value("prepend") + APPEND = Value("append") + + values = FALSE, PREPEND, APPEND + + @classmethod + def for_value(cls, value): + # type: (str) -> BinPath.Value + for v in cls.values: + if v.value == value: + return v + raise ValueError( + "{!r} of type {} must be one of {}".format( + value, type(value), ", ".join(map(repr, cls.values)) + ) + ) diff --git a/tests/test_integration.py b/tests/test_integration.py index debdbf246..dad436db3 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -21,7 +21,6 @@ import pytest -from pex import pex_builder from pex.common import ( safe_copy, safe_mkdir, @@ -62,6 +61,7 @@ from pex.third_party import pkg_resources from pex.typing import TYPE_CHECKING from pex.util import DistributionHelper, named_temporary_file +from pex.variables import unzip_dir if TYPE_CHECKING: from typing import ( @@ -2325,7 +2325,9 @@ def test_unzip_mode(): ) assert ["quit re-exec", os.path.realpath(pex_file)] == output1.decode("utf-8").splitlines() - unzipped_cache = os.path.join(pex_root, pex_builder.UNZIPPED_DIR) + pex_hash = PexInfo.from_pex(pex_file).pex_hash + assert pex_hash is not None + unzipped_cache = unzip_dir(pex_root, pex_hash) assert os.path.isdir(unzipped_cache) shutil.rmtree(unzipped_cache) diff --git a/tests/test_pex_info.py b/tests/test_pex_info.py index aea4e23b2..e7253303b 100644 --- a/tests/test_pex_info.py +++ b/tests/test_pex_info.py @@ -117,15 +117,6 @@ def test_merge_split(): assert result == ["/pex/path/3", "/pex/path/4"] -def test_pex_root_set_none(): - # type: () -> None - pex_info = PexInfo.default() - pex_info.pex_root = None - - assert PexInfo.default().pex_root == pex_info.pex_root - assert os.path.expanduser("~/.pex") == pex_info.pex_root - - def test_pex_root_set_unwriteable(): # type: () -> None with temporary_dir() as td: diff --git a/tests/test_unified_install_cache.py b/tests/test_unified_install_cache.py index d85aa1592..ecfc6cd4f 100644 --- a/tests/test_unified_install_cache.py +++ b/tests/test_unified_install_cache.py @@ -109,8 +109,10 @@ def run(): # Force the standard pex to extract its code. An external tool like Pants would already know the # orignal source code file paths, but we need to discover here. + code_hash = colorized_isort_pex_info.code_hash + assert code_hash is not None colorized_isort_pex_code_dir = os.path.join( - colorized_isort_pex_info.zip_unsafe_cache, colorized_isort_pex_info.code_hash + colorized_isort_pex_info.zip_unsafe_cache, code_hash ) env = os.environ.copy() env.update(PEX_ROOT=ptex_cache, PEX_INTERPRETER="1", PEX_FORCE_LOCAL="1")