From a9a9871223420bdcac66cccf2da6b55259fa0a78 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Thu, 12 Mar 2020 07:07:58 -0500 Subject: [PATCH] first implement v1 of robot impl (#8793) See pantsbuild/pex#789 for a description of the issue, and https://docs.google.com/document/d/1B_g0Ofs8aQsJtrePPR1PCtSAKgBG1o59AhS_NwfFnbI/edit for a google doc with pros and cons of different approaches. @jsirois was extremely helpful throughout the development of this feature, and pantsbuild/pex#819 and pantsbuild/pex#821 in pex `2.0.3` will help to optimize several other aspects of this process when we can unrevert #8787. **Note:** `src/python/pants/backend/python/subsystems/pex_build_util.py` was removed in this PR, along with all floating references to it. With `--binary-py-generate-ipex`, a `.ipex` file will be created when `./pants binary` is run against a `python_binary()` target. This `.ipex` archive will create a `.pex` file and run it when first executed. The `.ipex` archive contains: - in `IPEX-INFO`: the source files to inject into the resulting `.pex`, and pypi indices to resolve requirements from. - in `BOOSTRAP-PEX-INFO`: the `PEX-INFO` of the pex file that *would* have been generated if `--generate-ipex` was False. - in `ipex.py`: A bootstrap script which will generate a `.pex` file when the `.ipex` file is first executed. For a `.ipex` file which hydrates the `tensorflow==1.14.0` dependency when it is first run, this translates to a >100x decrease in file size: ```bash X> ls dist total 145M -rwxr-xr-x 1 dmcclanahan staff 267k Dec 10 21:11 dehydrated.ipex* -rwxr-xr-x 1 dmcclanahan staff 134M Dec 10 21:11 dehydrated.pex* ``` --- .../python/checks/tasks/python_eval.py | 10 +- examples/3rdparty/python/BUILD | 5 + .../python/example/tensorflow_custom_op/BUILD | 18 + .../tensorflow_custom_op/show_tf_version.py | 6 + .../backend/project_info/tasks/export.py | 2 +- src/python/pants/backend/python/register.py | 7 +- src/python/pants/backend/python/rules/BUILD | 1 + .../backend/python/subsystems/ipex/BUILD | 7 + .../python/subsystems/ipex/ipex_launcher.py | 129 +++++ .../python/subsystems/pex_build_util.py | 62 --- .../python/subsystems/python_native_code.py | 2 +- .../tasks/build_local_python_distributions.py | 2 +- .../backend/python/tasks/gather_sources.py | 8 +- .../local_python_distribution_artifact.py | 2 +- .../python/tasks/python_binary_create.py | 53 ++- .../python/tasks/resolve_requirements.py | 2 +- .../pants/backend/python/tasks/setup_py.py | 2 +- .../backend/python/tasks/unpack_wheels.py | 19 +- src/python/pants/option/custom_types.py | 11 +- src/python/pants/python/BUILD | 1 + src/python/pants/python/pex_build_util.py | 445 +++++++++++++++--- src/python/pants/util/strutil.py | 5 + .../pants_test/backend/python/tasks/BUILD | 1 + .../python/tasks/test_python_binary_create.py | 71 ++- .../tasks/test_python_binary_integration.py | 26 +- 25 files changed, 704 insertions(+), 193 deletions(-) create mode 100644 examples/src/python/example/tensorflow_custom_op/show_tf_version.py create mode 100644 src/python/pants/backend/python/subsystems/ipex/BUILD create mode 100644 src/python/pants/backend/python/subsystems/ipex/ipex_launcher.py delete mode 100644 src/python/pants/backend/python/subsystems/pex_build_util.py diff --git a/contrib/python/src/python/pants/contrib/python/checks/tasks/python_eval.py b/contrib/python/src/python/pants/contrib/python/checks/tasks/python_eval.py index 68de0738e8c..5fcf7ddf020 100644 --- a/contrib/python/src/python/pants/contrib/python/checks/tasks/python_eval.py +++ b/contrib/python/src/python/pants/contrib/python/checks/tasks/python_eval.py @@ -6,10 +6,6 @@ import pkgutil from pants.backend.python.interpreter_cache import PythonInterpreterCache -from pants.backend.python.subsystems.pex_build_util import ( - has_python_requirements, - has_python_sources, -) from pants.backend.python.targets.python_binary import PythonBinary from pants.backend.python.targets.python_library import PythonLibrary from pants.backend.python.targets.python_target import PythonTarget @@ -18,7 +14,11 @@ from pants.base.exceptions import TaskError from pants.base.generator import Generator, TemplateData from pants.base.workunit import WorkUnit, WorkUnitLabel -from pants.python.pex_build_util import PexBuilderWrapper +from pants.python.pex_build_util import ( + PexBuilderWrapper, + has_python_requirements, + has_python_sources, +) from pants.task.lint_task_mixin import LintTaskMixin from pants.util.dirutil import safe_concurrent_creation, safe_mkdir from pants.util.memo import memoized_property diff --git a/examples/3rdparty/python/BUILD b/examples/3rdparty/python/BUILD index bc32f95ffda..22b76c24b5e 100644 --- a/examples/3rdparty/python/BUILD +++ b/examples/3rdparty/python/BUILD @@ -33,3 +33,8 @@ unpacked_whls( ], within_data_subdir='purelib/tensorflow', ) + +files( + name='examples_python_3rdparty', + sources=['**/*'], +) diff --git a/examples/src/python/example/tensorflow_custom_op/BUILD b/examples/src/python/example/tensorflow_custom_op/BUILD index 185ed20e2ac..fd26226af0f 100644 --- a/examples/src/python/example/tensorflow_custom_op/BUILD +++ b/examples/src/python/example/tensorflow_custom_op/BUILD @@ -37,3 +37,21 @@ ctypes_compatible_cpp_library( ], ctypes_native_library=native_artifact(lib_name='tensorflow-zero-out-operator'), ) + + +python_binary( + name='show-tf-version', + source='show_tf_version.py', + dependencies=[ + 'examples/3rdparty/python:tensorflow', + ], + compatibility=['CPython>=3.6,<4'], +) + +files( + name='show-tf-version-files', + sources=['**/*'], + dependencies=[ + 'examples/3rdparty/python:examples_python_3rdparty', + ], +) diff --git a/examples/src/python/example/tensorflow_custom_op/show_tf_version.py b/examples/src/python/example/tensorflow_custom_op/show_tf_version.py new file mode 100644 index 00000000000..9f4e64e8a84 --- /dev/null +++ b/examples/src/python/example/tensorflow_custom_op/show_tf_version.py @@ -0,0 +1,6 @@ +# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +import tensorflow as tf + +print(f"tf version: {tf.__version__}") diff --git a/src/python/pants/backend/project_info/tasks/export.py b/src/python/pants/backend/project_info/tasks/export.py index dd9c7652c0a..a2f0367cf49 100644 --- a/src/python/pants/backend/project_info/tasks/export.py +++ b/src/python/pants/backend/project_info/tasks/export.py @@ -18,7 +18,6 @@ from pants.backend.jvm.tasks.ivy_task_mixin import IvyTaskMixin from pants.backend.project_info.tasks.export_version import DEFAULT_EXPORT_VERSION from pants.backend.python.interpreter_cache import PythonInterpreterCache -from pants.backend.python.subsystems.pex_build_util import has_python_requirements from pants.backend.python.targets.python_requirement_library import PythonRequirementLibrary from pants.backend.python.targets.python_target import PythonTarget from pants.backend.python.targets.python_tests import PythonTests @@ -32,6 +31,7 @@ from pants.java.distribution.distribution import DistributionLocator from pants.java.executor import SubprocessExecutor from pants.java.jar.jar_dependency_utils import M2Coordinate +from pants.python.pex_build_util import has_python_requirements from pants.task.console_task import ConsoleTask from pants.util.memo import memoized_property from pants.util.ordered_set import OrderedSet diff --git a/src/python/pants/backend/python/register.py b/src/python/pants/backend/python/register.py index b7e45d23b5c..19cbe2fce48 100644 --- a/src/python/pants/backend/python/register.py +++ b/src/python/pants/backend/python/register.py @@ -44,11 +44,16 @@ from pants.build_graph.build_file_aliases import BuildFileAliases from pants.build_graph.resources import Resources from pants.goal.task_registrar import TaskRegistrar as task +from pants.python.pex_build_util import PexBuilderWrapper from pants.python.python_requirement import PythonRequirement def global_subsystems(): - return python_native_code.PythonNativeCode, subprocess_environment.SubprocessEnvironment + return { + python_native_code.PythonNativeCode, + subprocess_environment.SubprocessEnvironment, + PexBuilderWrapper.Factory, + } def build_file_aliases(): diff --git a/src/python/pants/backend/python/rules/BUILD b/src/python/pants/backend/python/rules/BUILD index 1a9b8c39cf4..8a3c9ec6782 100644 --- a/src/python/pants/backend/python/rules/BUILD +++ b/src/python/pants/backend/python/rules/BUILD @@ -7,6 +7,7 @@ python_library( '3rdparty/python:setuptools', 'src/python/pants/backend/python/rules/coverage_plugin:plugin', 'src/python/pants/backend/python/subsystems', + 'src/python/pants/backend/python/subsystems/ipex', 'src/python/pants/build_graph', 'src/python/pants/engine/legacy:graph', 'src/python/pants/engine:build_files', diff --git a/src/python/pants/backend/python/subsystems/ipex/BUILD b/src/python/pants/backend/python/subsystems/ipex/BUILD new file mode 100644 index 00000000000..cebac0e31a1 --- /dev/null +++ b/src/python/pants/backend/python/subsystems/ipex/BUILD @@ -0,0 +1,7 @@ +# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +# NB: This target is written into an .ipex file as the main script, and should not have any +# dependencies on another python code! .ipex files should always contain pex and setuptools +# requirements in order to run the main script! +python_library() diff --git a/src/python/pants/backend/python/subsystems/ipex/ipex_launcher.py b/src/python/pants/backend/python/subsystems/ipex/ipex_launcher.py new file mode 100644 index 00000000000..c2b84872df7 --- /dev/null +++ b/src/python/pants/backend/python/subsystems/ipex/ipex_launcher.py @@ -0,0 +1,129 @@ +# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +"""Entrypoint script for a "dehydrated" .ipex file generated with --generate-ipex. + +This script will "hydrate" a normal .pex file in the same directory, then execute it. +""" + +import json +import os +import sys +import tempfile + +from pex import resolver +from pex.common import open_zip +from pex.interpreter import PythonInterpreter +from pex.pex_builder import PEXBuilder +from pex.pex_info import PexInfo +from pkg_resources import Requirement + +APP_CODE_PREFIX = "user_files/" + + +def _strip_app_code_prefix(path): + if not path.startswith(APP_CODE_PREFIX): + raise ValueError( + "Path {path} in IPEX-INFO did not begin with '{APP_CODE_PREFIX}'.".format( + path=path, APP_CODE_PREFIX=APP_CODE_PREFIX + ) + ) + return path[len(APP_CODE_PREFIX) :] + + +def _log(message): + sys.stderr.write(message + "\n") + + +def _sanitize_requirements(requirements): + """Remove duplicate keys such as setuptools or pex which may be injected multiple times into the + resulting ipex when first executed.""" + project_names = [] + new_requirements = {} + + for r in requirements: + r = Requirement(r) + if r.marker and not r.marker.evaluate(): + continue + if r.name not in new_requirements: + project_names.append(r.name) + new_requirements[r.name] = str(r) + sanitized_requirements = [new_requirements[n] for n in project_names] + + return sanitized_requirements + + +def modify_pex_info(pex_info, **kwargs): + new_info = json.loads(pex_info.dump()) + new_info.update(kwargs) + return PexInfo.from_json(json.dumps(new_info)) + + +def _hydrate_pex_file(self, hydrated_pex_file): + # We extract source files into a temporary directory before creating the pex. + td = tempfile.mkdtemp() + + with open_zip(self) as zf: + # Populate the pex with the pinned requirements and distribution names & hashes. + bootstrap_info = PexInfo.from_json(zf.read("BOOTSTRAP-PEX-INFO")) + bootstrap_builder = PEXBuilder(pex_info=bootstrap_info, interpreter=PythonInterpreter.get()) + + # Populate the pex with the needed code. + try: + ipex_info = json.loads(zf.read("IPEX-INFO").decode("utf-8")) + for path in ipex_info["code"]: + unzipped_source = zf.extract(path, td) + bootstrap_builder.add_source( + unzipped_source, env_filename=_strip_app_code_prefix(path) + ) + except Exception as e: + raise ValueError( + "Error: {e}. The IPEX-INFO for this .ipex file was:\n{info}".format( + e=e, info=json.dumps(ipex_info, indent=4) + ) + ) + + # Perform a fully pinned intransitive resolve to hydrate the install cache. + resolver_settings = ipex_info["resolver_settings"] + + sanitized_requirements = _sanitize_requirements(bootstrap_info.requirements) + bootstrap_info = modify_pex_info(bootstrap_info, requirements=sanitized_requirements) + bootstrap_builder.info = bootstrap_info + + resolved_distributions = resolver.resolve( + requirements=bootstrap_info.requirements, + cache=bootstrap_info.pex_root, + platform="current", + transitive=False, + interpreter=bootstrap_builder.interpreter, + **resolver_settings + ) + # TODO: this shouldn't be necessary, as we should be able to use the same 'distributions' from + # BOOTSTRAP-PEX-INFO. When the .ipex is executed, the normal pex bootstrap fails to see these + # requirements or recognize that they should be pulled from the cache for some reason. + for resolved_dist in resolved_distributions: + bootstrap_builder.add_distribution(resolved_dist.distribution) + + bootstrap_builder.build(hydrated_pex_file, bytecode_compile=False) + + +def main(self): + filename_base, ext = os.path.splitext(self) + + # If the ipex (this pex) is already named '.pex', ensure the output filename doesn't collide by + # inserting an intermediate '.ipex'! + if ext == ".pex": + hydrated_pex_file = "{filename_base}.ipex.pex".format(filename_base=filename_base) + else: + hydrated_pex_file = "{filename_base}.pex".format(filename_base=filename_base) + + if not os.path.exists(hydrated_pex_file): + _log("Hydrating {} to {}...".format(self, hydrated_pex_file)) + _hydrate_pex_file(self, hydrated_pex_file) + + os.execv(sys.executable, [sys.executable, hydrated_pex_file] + sys.argv[1:]) + + +if __name__ == "__main__": + self = sys.argv[0] + main(self) diff --git a/src/python/pants/backend/python/subsystems/pex_build_util.py b/src/python/pants/backend/python/subsystems/pex_build_util.py deleted file mode 100644 index a7dab3a7d6e..00000000000 --- a/src/python/pants/backend/python/subsystems/pex_build_util.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright 2017 Pants project contributors (see CONTRIBUTORS.md). -# Licensed under the Apache License, Version 2.0 (see LICENSE). - -from collections import defaultdict - -from pants.backend.python.targets.python_binary import PythonBinary -from pants.backend.python.targets.python_distribution import PythonDistribution -from pants.backend.python.targets.python_library import PythonLibrary -from pants.backend.python.targets.python_requirement_library import PythonRequirementLibrary -from pants.backend.python.targets.python_tests import PythonTests -from pants.build_graph.files import Files -from pants.build_graph.target import Target -from pants.util.ordered_set import OrderedSet - - -def is_python_target(tgt: Target) -> bool: - # We'd like to take all PythonTarget subclasses, but currently PythonThriftLibrary and - # PythonAntlrLibrary extend PythonTarget, and until we fix that (which we can't do until - # we remove the old python pipeline entirely) we want to ignore those target types here. - return isinstance(tgt, (PythonLibrary, PythonTests, PythonBinary)) - - -def has_python_sources(tgt: Target) -> bool: - return is_python_target(tgt) and tgt.has_sources() - - -def has_resources(tgt: Target) -> bool: - return isinstance(tgt, Files) and tgt.has_sources() - - -def is_local_python_dist(tgt: Target) -> bool: - return isinstance(tgt, PythonDistribution) - - -def has_python_requirements(tgt: Target) -> bool: - return isinstance(tgt, PythonRequirementLibrary) - - -def always_uses_default_python_platform(tgt: Target) -> bool: - return isinstance(tgt, PythonTests) - - -def may_have_explicit_python_platform(tgt: Target) -> bool: - return isinstance(tgt, PythonBinary) - - -def targets_by_platform(targets, python_setup): - targets_requiring_default_platforms = [] - explicit_platform_settings = defaultdict(OrderedSet) - for target in targets: - if always_uses_default_python_platform(target): - targets_requiring_default_platforms.append(target) - elif may_have_explicit_python_platform(target): - for platform in target.platforms if target.platforms else python_setup.platforms: - explicit_platform_settings[platform].add(target) - # There are currently no tests for this because they're super platform specific and it's hard for - # us to express that on CI, but https://github.com/pantsbuild/pants/issues/7616 has an excellent - # repro case for why this is necessary. - for target in targets_requiring_default_platforms: - for platform in python_setup.platforms: - explicit_platform_settings[platform].add(target) - return dict(explicit_platform_settings) diff --git a/src/python/pants/backend/python/subsystems/python_native_code.py b/src/python/pants/backend/python/subsystems/python_native_code.py index bd5c011a955..ff993a9e620 100644 --- a/src/python/pants/backend/python/subsystems/python_native_code.py +++ b/src/python/pants/backend/python/subsystems/python_native_code.py @@ -9,10 +9,10 @@ from pants.backend.native.subsystems.native_toolchain import NativeToolchain from pants.backend.native.targets.native_library import NativeLibrary -from pants.backend.python.subsystems import pex_build_util from pants.backend.python.targets.python_distribution import PythonDistribution from pants.base.exceptions import IncompatiblePlatformsError from pants.engine.rules import rule, subsystem_rule +from pants.python import pex_build_util from pants.python.python_setup import PythonSetup from pants.subsystem.subsystem import Subsystem from pants.util.memo import memoized_property diff --git a/src/python/pants/backend/python/tasks/build_local_python_distributions.py b/src/python/pants/backend/python/tasks/build_local_python_distributions.py index 22c53577853..451545c6d1f 100644 --- a/src/python/pants/backend/python/tasks/build_local_python_distributions.py +++ b/src/python/pants/backend/python/tasks/build_local_python_distributions.py @@ -12,13 +12,13 @@ from pants.backend.native.targets.native_library import NativeLibrary from pants.backend.native.tasks.link_shared_libraries import SharedLibrary -from pants.backend.python.subsystems.pex_build_util import is_local_python_dist from pants.backend.python.subsystems.python_native_code import PythonNativeCode from pants.backend.python.targets.python_requirement_library import PythonRequirementLibrary from pants.base.build_environment import get_buildroot from pants.base.exceptions import TargetDefinitionException, TaskError from pants.base.workunit import WorkUnitLabel from pants.build_graph.address import Address +from pants.python.pex_build_util import is_local_python_dist from pants.python.python_requirement import PythonRequirement from pants.python.setup_py_runner import SetupPyRunner from pants.task.task import Task diff --git a/src/python/pants/backend/python/tasks/gather_sources.py b/src/python/pants/backend/python/tasks/gather_sources.py index f27a2bf5438..c417ef27025 100644 --- a/src/python/pants/backend/python/tasks/gather_sources.py +++ b/src/python/pants/backend/python/tasks/gather_sources.py @@ -7,14 +7,14 @@ from pex.pex import PEX from pex.pex_builder import PEXBuilder -from pants.backend.python.subsystems.pex_build_util import ( +from pants.base.exceptions import TaskError +from pants.invalidation.cache_manager import VersionedTargetSet +from pants.python.pex_build_util import ( + PexBuilderWrapper, has_python_sources, has_resources, is_python_target, ) -from pants.base.exceptions import TaskError -from pants.invalidation.cache_manager import VersionedTargetSet -from pants.python.pex_build_util import PexBuilderWrapper from pants.task.task import Task from pants.util.dirutil import safe_concurrent_creation from pants.util.ordered_set import OrderedSet diff --git a/src/python/pants/backend/python/tasks/local_python_distribution_artifact.py b/src/python/pants/backend/python/tasks/local_python_distribution_artifact.py index e8f0e045642..c7b5c72d153 100644 --- a/src/python/pants/backend/python/tasks/local_python_distribution_artifact.py +++ b/src/python/pants/backend/python/tasks/local_python_distribution_artifact.py @@ -3,8 +3,8 @@ import os -from pants.backend.python.subsystems.pex_build_util import is_local_python_dist from pants.base.build_environment import get_buildroot +from pants.python.pex_build_util import is_local_python_dist from pants.task.task import Task from pants.util.dirutil import safe_mkdir from pants.util.fileutil import atomic_copy diff --git a/src/python/pants/backend/python/tasks/python_binary_create.py b/src/python/pants/backend/python/tasks/python_binary_create.py index ea638659b21..312959f1511 100644 --- a/src/python/pants/backend/python/tasks/python_binary_create.py +++ b/src/python/pants/backend/python/tasks/python_binary_create.py @@ -2,24 +2,25 @@ # Licensed under the Apache License, Version 2.0 (see LICENSE). import os +from typing import cast from pex.interpreter import PythonInterpreter from pex.pex_builder import PEXBuilder from pex.pex_info import PexInfo -from pants.backend.python.subsystems.pex_build_util import ( - has_python_requirements, - has_python_sources, - has_resources, - is_python_target, -) from pants.backend.python.subsystems.python_native_code import PythonNativeCode from pants.backend.python.targets.python_binary import PythonBinary from pants.backend.python.targets.python_requirement_library import PythonRequirementLibrary from pants.base.build_environment import get_buildroot from pants.base.exceptions import TaskError from pants.build_graph.target_scopes import Scopes -from pants.python.pex_build_util import PexBuilderWrapper +from pants.python.pex_build_util import ( + PexBuilderWrapper, + has_python_requirements, + has_python_sources, + has_resources, + is_python_target, +) from pants.task.task import Task from pants.util.contextutil import temporary_dir from pants.util.dirutil import safe_mkdir_for @@ -42,6 +43,25 @@ def register_options(cls, register): "that the generated PEX will not be reproducible; that is, future runs of `./pants binary` will not " "create the same byte-for-byte identical .pex files.", ) + register( + "--generate-ipex", + type=bool, + default=False, + fingerprint=True, + help='Whether to generate a .ipex file, which will "hydrate" its dependencies when ' + "it is first executed, rather than at build time (the normal pex behavior). " + "This option can reduce the size of a shipped pex file by over 100x for common" + "deps such as tensorflow, but it does require access to the network when " + "first executed.", + ) + register( + "--output-file-extension", + type=str, + default=None, + fingerprint=True, + help="What extension to output the file with. This can be used to differentiate " + "ipex files from others.", + ) @classmethod def subsystem_dependencies(cls): @@ -81,6 +101,17 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._distdir = self.get_options().pants_distdir + @property + def _generate_ipex(self) -> bool: + return cast(bool, self.get_options().generate_ipex) + + def _get_output_pex_filename(self, target_name): + file_ext = self.get_options().output_file_extension + if file_ext is None: + file_ext = ".ipex" if self._generate_ipex else ".pex" + + return f"{target_name}{file_ext}" + def execute(self): binaries = self.context.targets(self.is_binary) @@ -99,7 +130,9 @@ def execute(self): python_deployable_archive = self.context.products.get("deployable_archives") python_pex_product = self.context.products.get("pex_archives") for vt in invalidation_check.all_vts: - pex_path = os.path.join(vt.results_dir, f"{vt.target.name}.pex") + pex_path = os.path.join( + vt.results_dir, self._get_output_pex_filename(vt.target.name) + ) if not vt.valid: self.context.log.debug(f"cache for {vt.target} is invalid, rebuilding") self._create_binary(vt.target, vt.results_dir) @@ -142,6 +175,7 @@ def _create_binary(self, binary_tgt, results_dir): path=tmpdir, interpreter=interpreter, pex_info=pex_info, copy=True ), log=self.context.log, + generate_ipex=self._generate_ipex, ) if binary_tgt.shebang: @@ -184,6 +218,7 @@ def _create_binary(self, binary_tgt, results_dir): pex_builder.add_requirement_libs_from(req_tgts, platforms=binary_tgt.platforms) # Build the .pex file. - pex_path = os.path.join(results_dir, f"{binary_tgt.name}.pex") + pex_filename = self._get_output_pex_filename(binary_tgt.name) + pex_path = os.path.join(results_dir, pex_filename) pex_builder.build(pex_path) return pex_path diff --git a/src/python/pants/backend/python/tasks/resolve_requirements.py b/src/python/pants/backend/python/tasks/resolve_requirements.py index a2f7c57fcf1..e0ab6dcae34 100644 --- a/src/python/pants/backend/python/tasks/resolve_requirements.py +++ b/src/python/pants/backend/python/tasks/resolve_requirements.py @@ -3,8 +3,8 @@ from pex.interpreter import PythonInterpreter -from pants.backend.python.subsystems.pex_build_util import has_python_requirements, is_python_target from pants.backend.python.tasks.resolve_requirements_task_base import ResolveRequirementsTaskBase +from pants.python.pex_build_util import has_python_requirements, is_python_target class ResolveRequirements(ResolveRequirementsTaskBase): diff --git a/src/python/pants/backend/python/tasks/setup_py.py b/src/python/pants/backend/python/tasks/setup_py.py index 9d8cd96dfab..a83cbe22e8a 100644 --- a/src/python/pants/backend/python/tasks/setup_py.py +++ b/src/python/pants/backend/python/tasks/setup_py.py @@ -19,7 +19,6 @@ from twitter.common.dirutil.chroot import Chroot from pants.backend.python.rules.setup_py_util import distutils_repr -from pants.backend.python.subsystems.pex_build_util import is_local_python_dist from pants.backend.python.targets.python_binary import PythonBinary from pants.backend.python.targets.python_requirement_library import PythonRequirementLibrary from pants.backend.python.targets.python_target import PythonTarget @@ -30,6 +29,7 @@ from pants.build_graph.address_lookup_error import AddressLookupError from pants.build_graph.build_graph import sort_targets from pants.build_graph.resources import Resources +from pants.python.pex_build_util import is_local_python_dist from pants.python.setup_py_runner import SetupPyRunner from pants.task.task import Task from pants.util.contextutil import temporary_file diff --git a/src/python/pants/backend/python/tasks/unpack_wheels.py b/src/python/pants/backend/python/tasks/unpack_wheels.py index b32b47953fc..ae8230822e4 100644 --- a/src/python/pants/backend/python/tasks/unpack_wheels.py +++ b/src/python/pants/backend/python/tasks/unpack_wheels.py @@ -48,9 +48,6 @@ def subsystem_dependencies(cls): PythonSetup, ) - class SingleDistExtractionError(Exception): - pass - def _get_matching_wheel(self, pex_path, interpreter, requirements, module_name): """Use PexBuilderWrapper to resolve a single wheel from the requirement specs using pex. @@ -63,22 +60,10 @@ def _get_matching_wheel(self, pex_path, interpreter, requirements, module_name): builder=PEXBuilder(path=chroot, interpreter=interpreter), log=self.context.log ) - resolved_dists = pex_builder.resolve_distributions( - requirements, platforms=["current"] + return pex_builder.extract_single_dist_for_current_platform( + requirements, dist_key=module_name ) - matched_dists = [ - resolved_dist.distribution - for resolved_dist in resolved_dists - if resolved_dist.distribution.key == module_name - ] - if len(matched_dists) != 1: - raise self.SingleDistExtractionError( - f"Exactly one dist was expected to match name {module_name} in requirements " - f"{requirements}, found {matched_dists}" - ) - return matched_dists[0] - @memoized_method def _compatible_interpreter(self, unpacked_whls): constraints = PythonSetup.global_instance().compatibility_or_constraints( diff --git a/src/python/pants/option/custom_types.py b/src/python/pants/option/custom_types.py index 29720ef457b..01c4631fc59 100644 --- a/src/python/pants/option/custom_types.py +++ b/src/python/pants/option/custom_types.py @@ -4,7 +4,7 @@ import os import re from enum import Enum -from typing import Dict, Iterable, List, Pattern, Sequence +from typing import Dict, Iterable, List, Optional, Pattern, Sequence, Type, Union from pants.base.deprecated import warn_or_error from pants.option.errors import ParseError @@ -26,6 +26,15 @@ def __init__(self) -> None: "UnsetBool cannot be instantiated. It should only be used as a " "sentinel type." ) + @classmethod + def coerce_bool(cls, value: Optional[Union[Type["UnsetBool"], bool]], default: bool) -> bool: + if value is None: + return default + if value is cls: + return default + assert isinstance(value, bool) + return value + def dict_option(s: str) -> "DictValueComponent": """An option of type 'dict'. diff --git a/src/python/pants/python/BUILD b/src/python/pants/python/BUILD index f6448361851..9f55712b207 100644 --- a/src/python/pants/python/BUILD +++ b/src/python/pants/python/BUILD @@ -8,6 +8,7 @@ python_library( '3rdparty/python:setuptools', 'src/python/pants/base:build_environment', 'src/python/pants/base:exceptions', + 'src/python/pants/backend/python/subsystems/ipex', 'src/python/pants/build_graph', 'src/python/pants/option', 'src/python/pants/subsystem', diff --git a/src/python/pants/python/pex_build_util.py b/src/python/pants/python/pex_build_util.py index 00c4d9f41d3..b6d2bc546f4 100644 --- a/src/python/pants/python/pex_build_util.py +++ b/src/python/pants/python/pex_build_util.py @@ -1,31 +1,92 @@ # Copyright 2017 Pants project contributors (see CONTRIBUTORS.md). # Licensed under the Apache License, Version 2.0 (see LICENSE). +import json import logging import os +from collections import defaultdict from pathlib import Path -from typing import Callable, Sequence, Set +from typing import Callable, Dict, List, Optional, Sequence, Set, Tuple +from pex.interpreter import PythonInterpreter from pex.pex_builder import PEXBuilder -from pex.resolver import resolve_multi +from pex.pex_info import PexInfo +from pex.platforms import Platform +from pex.resolver import resolve from pex.util import DistributionHelper - +from pex.version import __version__ as pex_version +from pkg_resources import Distribution, get_provider + +from pants.backend.python.subsystems.ipex import ipex_launcher +from pants.backend.python.targets.python_binary import PythonBinary +from pants.backend.python.targets.python_distribution import PythonDistribution +from pants.backend.python.targets.python_library import PythonLibrary +from pants.backend.python.targets.python_requirement_library import PythonRequirementLibrary +from pants.backend.python.targets.python_tests import PythonTests from pants.base.build_environment import get_buildroot from pants.base.exceptions import TaskError from pants.build_graph.files import Files from pants.build_graph.target import Target +from pants.option.custom_types import UnsetBool from pants.python.python_repos import PythonRepos from pants.python.python_requirement import PythonRequirement from pants.python.python_setup import PythonSetup from pants.subsystem.subsystem import Subsystem +from pants.util.collections import assert_single_element from pants.util.contextutil import temporary_file from pants.util.ordered_set import OrderedSet +from pants.util.strutil import module_dirname + + +def is_python_target(tgt: Target) -> bool: + # We'd like to take all PythonTarget subclasses, but currently PythonThriftLibrary and + # PythonAntlrLibrary extend PythonTarget, and until we fix that (which we can't do until + # we remove the old python pipeline entirely) we want to ignore those target types here. + return isinstance(tgt, (PythonLibrary, PythonTests, PythonBinary)) + + +def has_python_sources(tgt: Target) -> bool: + return is_python_target(tgt) and tgt.has_sources() + + +def is_local_python_dist(tgt: Target) -> bool: + return isinstance(tgt, PythonDistribution) def has_resources(tgt: Target) -> bool: return isinstance(tgt, Files) and tgt.has_sources() +def has_python_requirements(tgt: Target) -> bool: + return isinstance(tgt, PythonRequirementLibrary) + + +def always_uses_default_python_platform(tgt: Target) -> bool: + return isinstance(tgt, PythonTests) + + +def may_have_explicit_python_platform(tgt: Target) -> bool: + return isinstance(tgt, PythonBinary) + + +def targets_by_platform(targets, python_setup): + targets_requiring_default_platforms = [] + explicit_platform_settings = defaultdict(OrderedSet) + for target in targets: + if always_uses_default_python_platform(target): + targets_requiring_default_platforms.append(target) + elif may_have_explicit_python_platform(target): + for platform in target.platforms if target.platforms else python_setup.platforms: + explicit_platform_settings[platform].add(target) + # There are currently no tests for this because they're super platform specific and it's hard for + # us to express that on CI, but https://github.com/pantsbuild/pants/issues/7616 has an excellent + # repro case for why this is necessary. + for target in targets_requiring_default_platforms: + for platform in python_setup.platforms: + explicit_platform_settings[platform].add(target) + return dict(explicit_platform_settings) + + def identify_missing_init_files(sources: Sequence[str]) -> Set[str]: """Return the list of paths that would need to be added to ensure that every package has an __init__.py.""" @@ -42,31 +103,6 @@ def identify_missing_init_files(sources: Sequence[str]) -> Set[str]: return {os.path.join(package, "__init__.py") for package in packages} - set(sources) -def _create_source_dumper(builder: PEXBuilder, tgt: Target) -> Callable[[str], None]: - buildroot = get_buildroot() - - def get_chroot_path(relpath: str) -> str: - if type(tgt) == Files: - # Loose `Files`, as opposed to `Resources` or `PythonTarget`s, have no (implied) package - # structure and so we chroot them relative to the build root so that they can be accessed - # via the normal Python filesystem APIs just as they would be accessed outside the - # chrooted environment. NB: This requires we mark the pex as not zip safe so - # these `Files` can still be accessed in the context of a built pex distribution. - builder.info.zip_safe = False - return relpath - return str(Path(relpath).relative_to(tgt.target_base)) - - def dump_source(relpath: str) -> None: - source_path = str(Path(buildroot, relpath)) - dest_path = get_chroot_path(relpath) - if has_resources(tgt): - builder.add_resource(filename=source_path, env_filename=dest_path) - else: - builder.add_source(filename=source_path, env_filename=dest_path) - - return dump_source - - class PexBuilderWrapper: """Wraps PEXBuilder to provide an API that consumes targets and other BUILD file entities.""" @@ -76,12 +112,24 @@ class Factory(Subsystem): @classmethod def register_options(cls, register): super(PexBuilderWrapper.Factory, cls).register_options(register) + # TODO: make an analogy to cls.register_jvm_tool that can be overridden for python subsystems + # by a python_requirement_library() target, not just via pants.ini! register( "--setuptools-version", advanced=True, - default="44.0.0", - help="The setuptools version to include in the pex if namespace packages need to be " - "injected.", + default="40.6.3", + fingerprint=True, + help="The setuptools version to include in the pex if namespace packages need " + "to be injected.", + ) + register( + "--pex-version", + advanced=True, + default=pex_version, + fingerprint=True, + help="The pex version to include in any generated ipex files. " + "NOTE: This should ideally be the same as the pex version which pants " + f"itself depends on, which right now is {pex_version}.", ) @classmethod @@ -92,9 +140,10 @@ def subsystem_dependencies(cls): ) @classmethod - def create(cls, builder, log=None): + def create(cls, builder, log=None, generate_ipex=False): options = cls.global_instance().get_options() setuptools_requirement = f"setuptools=={options.setuptools_version}" + pex_requirement = f"pex=={options.pex_version}" log = log or logging.getLogger(__name__) @@ -103,93 +152,243 @@ def create(cls, builder, log=None): python_repos_subsystem=PythonRepos.global_instance(), python_setup_subsystem=PythonSetup.global_instance(), setuptools_requirement=PythonRequirement(setuptools_requirement), + pex_requirement=PythonRequirement(pex_requirement), log=log, + generate_ipex=generate_ipex, ) def __init__( - self, builder, python_repos_subsystem, python_setup_subsystem, setuptools_requirement, log + self, + builder: PEXBuilder, + python_repos_subsystem: PythonRepos, + python_setup_subsystem: PythonSetup, + setuptools_requirement: PythonRequirement, + pex_requirement: PythonRequirement, + log, + generate_ipex: bool = False, ): - assert isinstance(builder, PEXBuilder) - assert isinstance(python_repos_subsystem, PythonRepos) - assert isinstance(python_setup_subsystem, PythonSetup) - assert isinstance(setuptools_requirement, PythonRequirement) assert log is not None self._builder = builder self._python_repos_subsystem = python_repos_subsystem self._python_setup_subsystem = python_setup_subsystem self._setuptools_requirement = setuptools_requirement + self._pex_requirement = pex_requirement self._log = log - self._distributions = {} + self._distributions: Dict[str, Distribution] = {} self._frozen = False + self._generate_ipex = generate_ipex + # If we generate a .ipex, we need to ensure all the code we copy into the underlying PEXBuilder + # is also added to the new PEXBuilder created in `._shuffle_original_build_info_into_ipex()`. + self._all_added_sources_resources: List[Path] = [] + # If we generate a dehydrated "ipex" file, we need to make sure that it is aware of any special + # find_links repos attached to any single requirement, so it can later resolve those + # requirements when it is first bootstrapped, using the same resolve options. + self._all_find_links: OrderedSet[str] = OrderedSet() + def add_requirement_libs_from(self, req_libs, platforms=None): """Multi-platform dependency resolution for PEX files. + :param builder: Dump the requirements into this builder. + :param interpreter: The :class:`PythonInterpreter` to resolve requirements for. :param req_libs: A list of :class:`PythonRequirementLibrary` targets to resolve. + :param log: Use this logger. :param platforms: A list of :class:`Platform`s to resolve requirements for. - Defaults to the platforms specified by PythonSetup. + Defaults to the platforms specified by PythonSetup. """ reqs = [req for req_lib in req_libs for req in req_lib.requirements] self.add_resolved_requirements(reqs, platforms=platforms) - def resolve_distributions(self, reqs, platforms=None): + class SingleDistExtractionError(Exception): + pass + + def extract_single_dist_for_current_platform(self, reqs, dist_key) -> Distribution: + """Resolve a specific distribution from a set of requirements matching the current platform. + + :param list reqs: A list of :class:`PythonRequirement` to resolve. + :param str dist_key: The value of `distribution.key` to match for a `distribution` from the + resolved requirements. + :return: The single :class:`pkg_resources.Distribution` matching `dist_key`. + :raises: :class:`self.SingleDistExtractionError` if no dists or multiple dists matched the + given `dist_key`. + """ + distributions, _transitive_requirements = self.resolve_distributions( + reqs, platforms=["current"] + ) + try: + matched_dist = assert_single_element( + list( + dist + for _, dists in distributions.items() + for dist in dists + if dist.key == dist_key + ) + ) + except (StopIteration, ValueError) as e: + raise self.SingleDistExtractionError( + f"Exactly one dist was expected to match name {dist_key} in requirements {reqs}: {e!r}" + ) + return matched_dist + + def resolve_distributions( + self, reqs: List[PythonRequirement], platforms: Optional[List[Platform]] = None, + ) -> Tuple[Dict[str, List[Distribution]], List[PythonRequirement]]: """Multi-platform dependency resolution. :param reqs: A list of :class:`PythonRequirement` to resolve. :param platforms: A list of platform strings to resolve requirements for. Defaults to the platforms specified by PythonSetup. - :returns: List of :class:`pex.resolver.ResolvedDistribution` instances meeting requirements for - the given platforms. + :returns: A tuple `(map, transitive_reqs)`, where `map` is a dict mapping distribution name + to a list of resolved distributions, and `reqs` contains all transitive == + requirements + needed to resolve the initial given requirements `reqs` for the given platforms. """ deduped_reqs = OrderedSet(reqs) - find_links = OrderedSet() + find_links: OrderedSet[str] = OrderedSet() for req in deduped_reqs: + self._log.debug(f" Dumping requirement: {req}") + self._builder.add_requirement(str(req.requirement)) if req.repository: find_links.add(req.repository) - return self._resolve_multi(deduped_reqs, platforms=platforms, find_links=find_links) - - def add_resolved_requirements(self, reqs, platforms=None): + # Resolve the requirements into distributions. + distributions, transitive_requirements = self._resolve_multi( + self._builder.interpreter, list(deduped_reqs), platforms, list(find_links), + ) + return (distributions, transitive_requirements) + + def add_resolved_requirements( + self, + reqs: List[PythonRequirement], + platforms: Optional[List[Platform]] = None, + override_ipex_build_do_actually_add_distribution: bool = False, + ) -> None: """Multi-platform dependency resolution for PEX files. - :param reqs: A list of :class:`PythonRequirement`s to resolve. - :param platforms: A list of platform strings to resolve requirements for. - Defaults to the platforms specified by PythonSetup. + :param builder: Dump the requirements into this builder. + :param interpreter: The :class:`PythonInterpreter` to resolve requirements for. + :param reqs: A list of :class:`PythonRequirement` to resolve. + :param log: Use this logger. + :param platforms: A list of :class:`Platform`s to resolve requirements for. + Defaults to the platforms specified by PythonSetup. + :param bool override_ipex_build_do_actually_add_distribution: When this PexBuilderWrapper is configured with + generate_ipex=True, this method won't add any distributions to + the output pex. The internal implementation of this class adds a + pex dependency to the output ipex file, and therefore needs to + override the default behavior of this method. """ - for resolved_dist in self.resolve_distributions(reqs, platforms=platforms): - requirement = resolved_dist.requirement - self._log.debug(f" Dumping requirement: {requirement}") - self._builder.add_requirement(str(requirement)) - - distribution = resolved_dist.distribution - self._log.debug( - f" Dumping distribution: .../{os.path.basename(distribution.location)}" - ) - self.add_distribution(distribution) + distributions, transitive_requirements = self.resolve_distributions( + reqs, platforms=platforms + ) + locations: Set[str] = set() + for platform, dists in distributions.items(): + for dist in dists: + if dist.location not in locations: + if self._generate_ipex and not override_ipex_build_do_actually_add_distribution: + self._log.debug( + f" *AVOIDING* dumping distribution into ipex: .../{os.path.basename(dist.location)}" + ) + else: + self._log.debug( + f" Dumping distribution: .../{os.path.basename(dist.location)}" + ) + self.add_distribution(dist) + locations.add(dist.location) + # In addition to the top-level requirements, we add all the requirements matching the resolved + # distributions to the resulting pex. If `generate_ipex=True` is set, we need to have all the + # transitive requirements resolved in order to hydrate the .ipex with an intransitive resolve. + if self._generate_ipex and not override_ipex_build_do_actually_add_distribution: + self.add_direct_requirements(transitive_requirements) + + def _resolve_multi( + self, + interpreter: PythonInterpreter, + requirements: List[PythonRequirement], + platforms: Optional[List[Platform]], + find_links: Optional[List[str]], + ) -> Tuple[Dict[str, List[Distribution]], List[PythonRequirement]]: + """Multi-platform dependency resolution for PEX files. - def _resolve_multi(self, requirements, platforms=None, find_links=None): + Returns a tuple containing a list of distributions that must be included in order to satisfy a + set of requirements, and the transitive == requirements for thosee distributions. This may + involve distributions for multiple platforms. + + :param interpreter: The :class:`PythonInterpreter` to resolve for. + :param requirements: A list of :class:`PythonRequirement` objects to resolve. + :param platforms: A list of :class:`Platform`s to resolve for. + :param find_links: Additional paths to search for source packages during resolution. + :return: Map of platform name -> list of :class:`pkg_resources.Distribution` instances needed + to satisfy the requirements on that platform. + """ python_setup = self._python_setup_subsystem python_repos = self._python_repos_subsystem platforms = platforms or python_setup.platforms + find_links = list(find_links) if find_links else [] find_links.extend(python_repos.repos) - return resolve_multi( - requirements=[str(req.requirement) for req in requirements], - interpreters=[self._builder.interpreter], - indexes=python_repos.indexes, - find_links=find_links, - platforms=platforms, - cache=python_setup.resolver_cache_dir, - allow_prereleases=python_setup.resolver_allow_prereleases, - manylinux=python_setup.manylinux, - max_parallel_jobs=python_setup.resolver_jobs, - ) + # Individual requirements from pants may have a `repository` link attached to them, which is + # extracted in `self.resolve_distributions()`. When generating a .ipex file with + # `generate_ipex=True`, we want to ensure these repos are known to the ipex launcher when it + # tries to resolve all the requirements from BOOTSTRAP-PEX-INFO. + self._all_find_links.update(OrderedSet(find_links)) + + distributions: Dict[str, List[Distribution]] = defaultdict(list) + transitive_requirements: List[PythonRequirement] = [] + + all_find_links = [*python_repos.repos, *find_links] + + for platform in platforms: + requirements_cache_dir = os.path.join( + python_setup.resolver_cache_dir, str(interpreter.identity) + ) + resolved_dists = resolve( + requirements=[str(req.requirement) for req in requirements], + interpreter=interpreter, + platform=platform, + indexes=python_repos.indexes, + find_links=all_find_links, + cache=requirements_cache_dir, + allow_prereleases=python_setup.resolver_allow_prereleases, + manylinux=python_setup.manylinux, + ) + for resolved_dist in resolved_dists: + dist = resolved_dist.distribution + transitive_requirements.append(dist.as_requirement()) + distributions[platform].append(dist) + + return (distributions, transitive_requirements) + + def _create_source_dumper(self, tgt: Target) -> Callable[[str], None]: + buildroot = get_buildroot() + + def get_chroot_path(relpath: str) -> str: + if type(tgt) == Files: + # Loose `Files`, as opposed to `Resources` or `PythonTarget`s, have no (implied) package + # structure and so we chroot them relative to the build root so that they can be accessed + # via the normal Python filesystem APIs just as they would be accessed outside the + # chrooted environment. NB: This requires we mark the pex as not zip safe so + # these `Files` can still be accessed in the context of a built pex distribution. + self._builder.info.zip_safe = False + return relpath + return str(Path(relpath).relative_to(tgt.target_base)) + + def dump_source(relpath: str) -> None: + source_path = str(Path(buildroot, relpath)) + dest_path = get_chroot_path(relpath) + + self._all_added_sources_resources.append(Path(dest_path)) + if has_resources(tgt): + self._builder.add_resource(filename=source_path, env_filename=dest_path) + else: + self._builder.add_source(filename=source_path, env_filename=dest_path) + + return dump_source def add_sources_from(self, tgt: Target) -> None: - dump_source = _create_source_dumper(self._builder, tgt) + dump_source = self._create_source_dumper(tgt) self._log.debug(f" Dumping sources: {tgt}") for relpath in tgt.sources_relative_to_buildroot(): try: @@ -219,6 +418,7 @@ def _prepare_inits(self) -> Set[str]: ) ns_package.flush() for missing_init_file in missing_init_files: + self._all_added_sources_resources.append(Path(missing_init_file)) self._builder.add_source( filename=ns_package.name, env_filename=missing_init_file ) @@ -227,13 +427,114 @@ def _prepare_inits(self) -> Set[str]: def set_emit_warnings(self, emit_warnings): self._builder.info.emit_warnings = emit_warnings + def _shuffle_underlying_pex_builder(self) -> Tuple[PexInfo, Path]: + """Replace the original builder with a new one, and just pull files from the old chroot.""" + # Ensure that (the interpreter selected to resolve requirements when the ipex is first run) is + # (the exact same interpreter we used to resolve those requirements here). This is the only (?) + # way to ensure that the ipex bootstrap uses the *exact* same interpreter version. + self._builder.info = ipex_launcher.modify_pex_info( + self._builder.info, + interpreter_constraints=[str(self._builder.interpreter.identity.requirement)], + ) + + orig_info = self._builder.info.copy() + + orig_chroot = self._builder.chroot() + + # Mutate the PexBuilder object which is manipulated by this subsystem. + self._builder = PEXBuilder(interpreter=self._builder.interpreter) + + return (orig_info, Path(orig_chroot.path())) + + def _shuffle_original_build_info_into_ipex(self): + """Create a "dehydrated" ipex file without any of its requirements, and specify that in two. + + *-INFO files. + + See ipex_launcher.py for details of how these files are used. + """ + orig_pex_info, orig_chroot = self._shuffle_underlying_pex_builder() + + # Gather information needed to create IPEX-INFO. + all_code = [str(src) for src in self._all_added_sources_resources] + prefixed_code_paths = [os.path.join(ipex_launcher.APP_CODE_PREFIX, src) for src in all_code] + for src, prefixed in zip(all_code, prefixed_code_paths): + # NB: Need to add under 'source' label for `self._prepare_inits()` to pick it up! + self._builder.chroot().copy( + os.path.join(str(orig_chroot), src), prefixed, label="source" + ) + + python_repos = self._python_repos_subsystem + python_setup = self._python_setup_subsystem + + # NB: self._all_find_links is updated on every call to self._resolve_multi(), and therefore + # includes all of the links from python_repos.repos, as well as any links added within any + # individual requirements from that resolve. + + resolver_settings = dict( + indexes=list(python_repos.indexes), + find_links=list(self._all_find_links), + allow_prereleases=UnsetBool.coerce_bool( + python_setup.resolver_allow_prereleases, default=True + ), + manylinux=python_setup.manylinux, + ) + + # IPEX-INFO: A json mapping interpreted in ipex_launcher.py: + # { + # "code": [], + # "resolver_settings": {}, + # } + ipex_info = dict(code=prefixed_code_paths, resolver_settings=resolver_settings,) + with temporary_file(permissions=0o644) as ipex_info_file: + ipex_info_file.write(json.dumps(ipex_info).encode()) + ipex_info_file.flush() + self._builder.add_resource(filename=ipex_info_file.name, env_filename="IPEX-INFO") + + # BOOTSTRAP-PEX-INFO: The original PEX-INFO, which should be the PEX-INFO in the hydrated .pex + # file that is generated when the .ipex is first executed. + with temporary_file(permissions=0o644) as bootstrap_pex_info_file: + bootstrap_pex_info_file.write(orig_pex_info.dump().encode()) + bootstrap_pex_info_file.flush() + self._builder.add_resource( + filename=bootstrap_pex_info_file.name, env_filename="BOOTSTRAP-PEX-INFO" + ) + + # ipex.py: The special bootstrap script to hydrate the .ipex with the fully resolved + # requirements when it is first executed. + # Extract the file contents of our custom app launcher script from the pants package. + parent_module = module_dirname(module_dirname(ipex_launcher.__name__)) + ipex_launcher_provider = get_provider(parent_module) + ipex_launcher_script = ipex_launcher_provider.get_resource_string( + parent_module, "ipex/ipex_launcher.py" + ) + with temporary_file(permissions=0o644) as ipex_launcher_file: + ipex_launcher_file.write(ipex_launcher_script) + ipex_launcher_file.flush() + # Our .ipex file will use our custom app launcher! + self._builder.set_executable(ipex_launcher_file.name, env_filename="ipex.py") + + # The PEX-INFO we generate shouldn't have any requirements (except pex itself), or they will + # fail to bootstrap because they were unable to find those distributions. Instead, the .pex file + # produced when the .ipex is first executed will read and resolve all those requirements from + # the BOOTSTRAP-PEX-INFO. + self.add_resolved_requirements( + [self._pex_requirement, self._setuptools_requirement,], + override_ipex_build_do_actually_add_distribution=True, + ) + def freeze(self) -> None: if self._frozen: return + if self._prepare_inits(): dist = self._distributions.get("setuptools") if not dist: self.add_resolved_requirements([self._setuptools_requirement]) + + if self._generate_ipex: + self._shuffle_original_build_info_into_ipex() + self._builder.freeze(bytecode_compile=False) self._frozen = True diff --git a/src/python/pants/util/strutil.py b/src/python/pants/util/strutil.py index 91dbbd29d90..64888925c65 100644 --- a/src/python/pants/util/strutil.py +++ b/src/python/pants/util/strutil.py @@ -89,6 +89,11 @@ def create_path_env_var( return delimiter.join(path_dirs) +def module_dirname(module_path: str) -> str: + """Return the import path for the parent module of `module_path`.""" + return ".".join(module_path.split(".")[:-1]) + + def camelcase(string: str) -> str: """Convert snake casing (containing - or _ characters) to camel casing.""" return "".join(word.capitalize() for word in re.split("[-_]", string)) diff --git a/tests/python/pants_test/backend/python/tasks/BUILD b/tests/python/pants_test/backend/python/tasks/BUILD index 9ef801c597a..46372ac57be 100644 --- a/tests/python/pants_test/backend/python/tasks/BUILD +++ b/tests/python/pants_test/backend/python/tasks/BUILD @@ -153,6 +153,7 @@ python_tests( sources = ['test_python_binary_integration.py'], dependencies = [ '3rdparty/python:pex', + 'examples/src/python/example/tensorflow_custom_op:show-tf-version-files', 'src/python/pants/util:contextutil', 'src/python/pants/testutil:int-test', 'testprojects/src/python:python_distribution_directory', diff --git a/tests/python/pants_test/backend/python/tasks/test_python_binary_create.py b/tests/python/pants_test/backend/python/tasks/test_python_binary_create.py index d60d7a11c98..6188ded3ffa 100644 --- a/tests/python/pants_test/backend/python/tasks/test_python_binary_create.py +++ b/tests/python/pants_test/backend/python/tasks/test_python_binary_create.py @@ -5,6 +5,8 @@ import subprocess from textwrap import dedent +from colors import blue + from pants.backend.python.tasks.gather_sources import GatherSources from pants.backend.python.tasks.python_binary_create import PythonBinaryCreate from pants.backend.python.tasks.select_interpreter import SelectInterpreter @@ -41,11 +43,17 @@ def _assert_pex(self, binary, expected_output=None, expected_shebang=None): test_task.execute() self._check_products( - task_context, binary, expected_output=expected_output, expected_shebang=expected_shebang + test_task, + task_context, + binary, + expected_output=expected_output, + expected_shebang=expected_shebang, ) - def _check_products(self, context, binary, expected_output=None, expected_shebang=None): - pex_name = f"{binary.address.target_name}.pex" + def _check_products( + self, test_task, context, binary, expected_output=None, expected_shebang=None + ): + pex_name = test_task._get_output_pex_filename(binary.address.target_name) products = context.products.get("deployable_archives") self.assertIsNotNone(products) product_data = products.get(binary) @@ -75,10 +83,10 @@ def test_deployable_archive_products_simple(self): "lib.py": dedent( """ import os - - + + def main(): - os.getcwd() + os.getcwd() """ ) }, @@ -101,13 +109,13 @@ def test_deployable_archive_products_files_deps(self): import io import os import sys - - + + def main(): - here = os.path.dirname(__file__) - loose_file = os.path.join(here, '../src/things/loose_file') - with io.open(os.path.realpath(loose_file), 'r') as fp: - sys.stdout.write(fp.read()) + here = os.path.dirname(__file__) + loose_file = os.path.join(here, '../src/things/loose_file') + with io.open(os.path.realpath(loose_file), 'r') as fp: + sys.stdout.write(fp.read()) """ ) }, @@ -125,9 +133,9 @@ def test_shebang_modified(self): { "lib.py": dedent( """ - def main(): - print('Hello World!') - """ + def main(): + print('Hello World!') + """ ) }, ) @@ -143,3 +151,36 @@ def main(): self._assert_pex( binary, expected_output="Hello World!\n", expected_shebang=b"#!/usr/bin/env python2\n" ) + + def test_generate_ipex_ansicolors(self): + self.create_python_requirement_library( + "3rdparty/ipex", "ansicolors", requirements=["ansicolors"] + ) + self.create_python_library( + "src/ipex", + "lib", + { + "main.py": dedent( + """\ + from colors import blue + + print(blue('i just lazy-loaded the ansicolors dependency!')) + """ + ) + }, + ) + binary = self.create_python_binary( + "src/ipex", "bin", "main", dependencies=["3rdparty/ipex:ansicolors", ":lib",] + ) + + self.set_options(generate_ipex=True) + dist_dir = os.path.join(self.build_root, "dist") + + self._assert_pex( + binary, expected_output=blue("i just lazy-loaded the ansicolors dependency!") + "\n" + ) + + dehydrated_ipex_file = os.path.join(dist_dir, "bin.ipex") + assert os.path.isfile(dehydrated_ipex_file) + hydrated_pex_output_file = os.path.join(dist_dir, "bin.pex") + assert os.path.isfile(hydrated_pex_output_file) diff --git a/tests/python/pants_test/backend/python/tasks/test_python_binary_integration.py b/tests/python/pants_test/backend/python/tasks/test_python_binary_integration.py index 59554a780d3..f11835dc5e1 100644 --- a/tests/python/pants_test/backend/python/tasks/test_python_binary_integration.py +++ b/tests/python/pants_test/backend/python/tasks/test_python_binary_integration.py @@ -2,13 +2,16 @@ # Licensed under the Apache License, Version 2.0 (see LICENSE). import functools +import glob import os +import subprocess from contextlib import contextmanager from textwrap import dedent from pex.pex_info import PexInfo from pants.testutil.pants_run_integration_test import PantsRunIntegrationTest +from pants.util.collections import assert_single_element from pants.util.contextutil import open_zip, temporary_dir _LINUX_PLATFORM = "linux-x86_64-cp-36-m" @@ -158,7 +161,7 @@ def assertNotInAny(substring, collection): python_requirement('p537==1.0.4') ] ) - + """.format( target_platforms="platforms = [{}],".format( ", ".join(["'{}'".format(p) for p in target_platforms]) @@ -212,3 +215,24 @@ def test_platforms_with_native_deps(self): self.assertNotIn( "testprojects/src/python/python_distribution/ctypes:bin", result.stderr_data ) + + def test_generate_ipex_tensorflow(self): + with temporary_dir() as tmp_distdir: + with self.pants_results( + [ + f"--pants-distdir={tmp_distdir}", + # tensorflow==1.14.0 has a setuptools>=41.0.0 requirement, so the .ipex resolve fails + # without this override. + f"--pex-builder-wrapper-setuptools-version=41.0.0", + "--binary-py-generate-ipex", + "binary", + "examples/src/python/example/tensorflow_custom_op:show-tf-version", + ] + ) as pants_run: + self.assert_success(pants_run) + output_ipex = assert_single_element(glob.glob(os.path.join(tmp_distdir, "*"))) + ipex_basename = os.path.basename(output_ipex) + self.assertEqual(ipex_basename, "show-tf-version.ipex") + + pex_execution_output = subprocess.check_output([output_ipex]) + assert "tf version: 1.14.0" in pex_execution_output.decode()