# Copyright 2014 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
from __future__ import absolute_import
import importlib
import itertools
import os
import site
import sys
import zipfile
from collections import OrderedDict, defaultdict
from pex import dist_metadata, pex_builder, pex_warnings
from pex.bootstrap import Bootstrap
from pex.common import atomic_directory, die, open_zip
from pex.inherit_path import InheritPath
from pex.interpreter import PythonInterpreter
from pex.orderedset import OrderedSet
from pex.pex_info import PexInfo
from pex.third_party.packaging import tags
from pex.third_party.pkg_resources import DistributionNotFound, Environment, Requirement, WorkingSet
from pex.tracer import TRACER
from pex.typing import TYPE_CHECKING
from pex.util import CacheHelper, DistributionHelper
if TYPE_CHECKING:
from typing import Optional
def _import_pkg_resources():
try:
import pkg_resources # vendor:skip
return pkg_resources, False
except ImportError:
from pex import third_party
third_party.install(expose=["setuptools"])
import pkg_resources # vendor:skip
return pkg_resources, True
[docs]class PEXEnvironment(Environment):
class _CachingZipImporter(object):
class _CachingLoader(object):
def __init__(self, delegate):
self._delegate = delegate
def load_module(self, fullname):
loaded = sys.modules.get(fullname)
# Technically a PEP-302 loader should re-load the existing module object here - notably
# re-exec'ing the code found in the zip against the existing module __dict__. We don't do
# this since the zip is assumed immutable during our run and this is enough to work around
# the issue.
if not loaded:
loaded = self._delegate.load_module(fullname)
loaded.__loader__ = self
return loaded
_REGISTERED = False
@classmethod
def _ensure_namespace_handler_registered(cls):
if not cls._REGISTERED:
pkg_resources, _ = _import_pkg_resources()
pkg_resources.register_namespace_handler(cls, pkg_resources.file_ns_handler)
cls._REGISTERED = True
def __init__(self, path):
import zipimport
self._delegate = zipimport.zipimporter(path)
def find_module(self, fullname, path=None):
loader = self._delegate.find_module(fullname, path)
if loader is None:
return None
self._ensure_namespace_handler_registered()
caching_loader = self._CachingLoader(loader)
return caching_loader
@classmethod
def _install_pypy_zipimporter_workaround(cls, pex_file):
# The pypy zipimporter implementation always freshly loads a module instead of re-importing
# when the module already exists in sys.modules. This breaks the PEP-302 importer protocol and
# violates pkg_resources assumptions based on that protocol in its handling of namespace
# packages. See: https://bitbucket.org/pypy/pypy/issues/1686
def pypy_zipimporter_workaround(path):
import os
if not path.startswith(pex_file) or "." in os.path.relpath(path, pex_file):
# We only need to claim the pex zipfile root modules.
#
# The protocol is to raise if we don't want to hook the given path.
# See: https://www.python.org/dev/peps/pep-0302/#specification-part-2-registering-hooks
raise ImportError()
return cls._CachingZipImporter(path)
for path in list(sys.path_importer_cache):
if path.startswith(pex_file):
sys.path_importer_cache.pop(path)
sys.path_hooks.insert(0, pypy_zipimporter_workaround)
@classmethod
def _force_local(cls, pex_file, pex_info):
if pex_info.code_hash is None:
# Do not support force_local if code_hash is not set. (It should always be set.)
return pex_file
explode_dir = os.path.join(pex_info.zip_unsafe_cache, pex_info.code_hash)
TRACER.log("PEX is not zip safe, exploding to %s" % explode_dir)
with atomic_directory(explode_dir, exclusive=True) as explode_tmp:
if explode_tmp:
with TRACER.timed("Unzipping %s" % pex_file):
with open_zip(pex_file) as pex_zip:
pex_files = (
x
for x in pex_zip.namelist()
if not x.startswith(pex_builder.BOOTSTRAP_DIR)
and not x.startswith(pex_info.internal_cache)
)
pex_zip.extractall(explode_tmp, pex_files)
return explode_dir
@classmethod
def _update_module_paths(cls, pex_file):
bootstrap = Bootstrap.locate()
# Un-import any modules already loaded from within the .pex file.
to_reimport = []
for name, module in reversed(sorted(sys.modules.items())):
if bootstrap.imported_from_bootstrap(module):
TRACER.log("Not re-importing module %s from bootstrap." % module, V=3)
continue
pkg_path = getattr(module, "__path__", None)
if pkg_path and any(
os.path.realpath(path_item).startswith(pex_file) for path_item in pkg_path
):
sys.modules.pop(name)
to_reimport.append((name, pkg_path, True))
elif (
name != "__main__"
): # The __main__ module is special in python and is not re-importable.
mod_file = getattr(module, "__file__", None)
if mod_file and os.path.realpath(mod_file).startswith(pex_file):
sys.modules.pop(name)
to_reimport.append((name, mod_file, False))
# And re-import them from the exploded pex.
for name, existing_path, is_pkg in to_reimport:
TRACER.log(
"Re-importing %s %s loaded via %r from exploded pex."
% ("package" if is_pkg else "module", name, existing_path)
)
reimported_module = importlib.import_module(name)
if is_pkg:
for path_item in existing_path:
# NB: It is not guaranteed that __path__ is a list, it may be a PEP-420 namespace package
# object which supports a limited mutation API; so we append each item individually.
reimported_module.__path__.append(path_item)
@classmethod
def _write_zipped_internal_cache(cls, zf, pex_info):
cached_distributions = []
for distribution_name, dist_digest in pex_info.distributions.items():
internal_dist_path = "/".join([pex_info.internal_cache, distribution_name])
cached_location = os.path.join(pex_info.install_cache, dist_digest, distribution_name)
dist = CacheHelper.cache_distribution(zf, internal_dist_path, cached_location)
cached_distributions.append(dist)
return cached_distributions
@classmethod
def _load_internal_cache(cls, pex, pex_info):
"""Possibly cache out the internal cache."""
internal_cache = os.path.join(pex, pex_info.internal_cache)
with TRACER.timed("Searching dependency cache: %s" % internal_cache, V=2):
if len(pex_info.distributions) == 0:
# We have no .deps to load.
return
if os.path.isdir(pex):
search_path = [
os.path.join(internal_cache, dist_chroot)
for dist_chroot in os.listdir(internal_cache)
]
internal_env = Environment(search_path=search_path)
for dist_name in internal_env:
for dist in internal_env[dist_name]:
yield dist
else:
with open_zip(pex) as zf:
for dist in cls._write_zipped_internal_cache(zf, pex_info):
yield dist
def __init__(self, pex, pex_info, interpreter=None):
# type: (str, PexInfo, Optional[PythonInterpreter]) -> None
self._internal_cache = os.path.join(pex, pex_info.internal_cache)
self._pex = pex
self._pex_info = pex_info
self._activated = False
self._working_set = None
self._interpreter = interpreter or PythonInterpreter.get()
self._inherit_path = pex_info.inherit_path
self._supported_tags = frozenset(self._interpreter.identity.supported_tags)
self._target_interpreter_env = self._interpreter.identity.env_markers
# For the bug this works around, see: https://bitbucket.org/pypy/pypy/issues/1686
# NB: This must be installed early before the underlying pex is loaded in any way.
if self._interpreter.identity.python_tag.startswith("pp") and zipfile.is_zipfile(self._pex):
self._install_pypy_zipimporter_workaround(self._pex)
super(PEXEnvironment, self).__init__(
search_path=[] if pex_info.inherit_path == InheritPath.FALSE else sys.path,
platform=self._interpreter.identity.platform_tag,
)
TRACER.log(
"E: tags for %r x %r -> %s" % (self.platform, self._interpreter, self._supported_tags),
V=9,
)
def _update_candidate_distributions(self, distribution_iter):
for dist in distribution_iter:
if self.can_add(dist):
with TRACER.timed("Adding %s" % dist, V=2):
self.add(dist)
[docs] def can_add(self, dist):
filename, ext = os.path.splitext(os.path.basename(dist.location))
if ext.lower() != ".whl":
# This supports resolving pex's own vendored distributions which are vendored in directory
# directory with the project name (`pip/` for pip) and not the corresponding wheel name
# (`pip-19.3.1-py2.py3-none-any.whl/` for pip). Pex only vendors universal wheels for all
# platforms it supports at buildtime and runtime so this is always safe.
return True
# Wheel filename format: https://www.python.org/dev/peps/pep-0427/#file-name-convention
# `{distribution}-{version}(-{build tag})?-{python tag}-{abi tag}-{platform tag}.whl`
wheel_components = filename.split("-")
if len(wheel_components) < 3:
return False
wheel_tags = "-".join(wheel_components[-3:]) # `{python tag}-{abi tag}-{platform tag}`
if self._supported_tags.isdisjoint(tags.parse_tag(wheel_tags)):
return False
python_requires = dist_metadata.requires_python(dist)
if not python_requires:
return True
return self._interpreter.identity.version_str in python_requires
def activate(self):
if not self._activated:
with TRACER.timed("Activating PEX virtual environment from %s" % self._pex):
self._working_set = self._activate()
self._activated = True
return self._working_set
def _resolve(self, working_set, reqs):
environment = self._target_interpreter_env.copy()
environment["extra"] = list(set(itertools.chain(*(req.extras for req in reqs))))
reqs_by_key = OrderedDict()
for req in reqs:
if req.marker and not req.marker.evaluate(environment=environment):
TRACER.log(
"Skipping activation of `%s` due to environment marker de-selection" % req
)
continue
reqs_by_key.setdefault(req.key, []).append(req)
unresolved_reqs = OrderedDict()
resolveds = OrderedSet()
# Resolve them one at a time so that we can figure out which ones we need to elide should
# there be an interpreter incompatibility.
for key, reqs in reqs_by_key.items():
with TRACER.timed("Resolving {} from {}".format(key, reqs), V=2):
# N.B.: We resolve the bare requirement with no version specifiers since the resolve process
# used to build this pex already did so. There may be multiple distributions satisfying any
# particular key (e.g.: a Python 2 specific version and a Python 3 specific version for a
# multi-python PEX) and we want the working set to pick the most appropriate one.
req = Requirement.parse(key)
try:
resolveds.update(working_set.resolve([req], env=self))
except DistributionNotFound as e:
TRACER.log("Failed to resolve a requirement: %s" % e)
requirers = unresolved_reqs.setdefault(e.req, OrderedSet())
if e.requirers:
for requirer in e.requirers:
requirers.update(reqs_by_key[requirer])
if unresolved_reqs:
TRACER.log("Unresolved requirements:")
for req in unresolved_reqs:
TRACER.log(" - %s" % req)
TRACER.log("Distributions contained within this pex:")
distributions_by_key = defaultdict(list)
if not self._pex_info.distributions:
TRACER.log(" None")
else:
for dist_name, dist_digest in self._pex_info.distributions.items():
TRACER.log(" - %s" % dist_name)
distribution = DistributionHelper.distribution_from_path(
path=os.path.join(self._pex_info.install_cache, dist_digest, dist_name)
)
distributions_by_key[distribution.as_requirement().key].append(distribution)
if not self._pex_info.ignore_errors:
items = []
for index, (requirement, requirers) in enumerate(unresolved_reqs.items()):
rendered_requirers = ""
if requirers:
rendered_requirers = ("\n Required by:" "\n {requirers}").format(
requirers="\n ".join(map(str, requirers))
)
items.append(
"{index: 2d}: {requirement}"
"{rendered_requirers}"
"\n But this pex only contains:"
"\n {distributions}".format(
index=index + 1,
requirement=requirement,
rendered_requirers=rendered_requirers,
distributions="\n ".join(
os.path.basename(d.location)
for d in distributions_by_key[requirement.key]
),
)
)
die(
"Failed to execute PEX file. Needed {platform} compatible dependencies for:\n{items}".format(
platform=self._interpreter.platform, items="\n".join(items)
)
)
return resolveds
_NAMESPACE_PACKAGE_METADATA_RESOURCE = "namespace_packages.txt"
@classmethod
def _get_namespace_packages(cls, dist):
if dist.has_metadata(cls._NAMESPACE_PACKAGE_METADATA_RESOURCE):
return list(dist.get_metadata_lines(cls._NAMESPACE_PACKAGE_METADATA_RESOURCE))
else:
return []
@classmethod
def declare_namespace_packages(cls, resolved_dists):
namespace_packages_by_dist = OrderedDict()
for dist in resolved_dists:
namespace_packages = cls._get_namespace_packages(dist)
# NB: Dists can explicitly declare empty namespace packages lists to indicate they have none.
# We only care about dists with one or more namespace packages though; thus, the guard.
if namespace_packages:
namespace_packages_by_dist[dist] = namespace_packages
if not namespace_packages_by_dist:
return # Nothing to do here.
# When declaring namespace packages, we need to do so with the `setuptools` distribution that
# will be active in the pex environment at runtime and, as such, care must be taken.
#
# Properly behaved distributions will declare a dependency on `setuptools`, in which case we
# use that (non-vendored) distribution. A side-effect of importing `pkg_resources` from that
# distribution is that a global `pkg_resources.working_set` will be populated. For various
# `pkg_resources` distribution discovery functions to work, that global
# `pkg_resources.working_set` must be built with the `sys.path` fully settled. Since all dists
# in the dependency set (`resolved_dists`) have already been resolved and added to the
# `sys.path` we're safe to proceed here.
#
# Other distributions (notably `twitter.common.*`) in the wild declare `setuptools`-specific
# `namespace_packages` but do not properly declare a dependency on `setuptools` which they must
# use to:
# 1. Declare `namespace_packages` metadata which we just verified they have with the check
# above.
# 2. Declare namespace packages at runtime via the canonical:
# `__import__('pkg_resources').declare_namespace(__name__)`
#
# For such distributions we fall back to our vendored version of `setuptools`. This is safe,
# since we'll only introduce our shaded version when no other standard version is present and
# even then tear it all down when we hand off from the bootstrap to user code.
pkg_resources, vendored = _import_pkg_resources()
if vendored:
dists = "\n".join(
"\n{index}. {dist} namespace packages:\n {ns_packages}".format(
index=index + 1,
dist=dist.as_requirement(),
ns_packages="\n ".join(ns_packages),
)
for index, (dist, ns_packages) in enumerate(namespace_packages_by_dist.items())
)
pex_warnings.warn(
"The `pkg_resources` package was loaded from a pex vendored version when "
"declaring namespace packages defined by:\n{dists}\n\nThese distributions "
"should fix their `install_requires` to include `setuptools`".format(dists=dists)
)
for pkg in itertools.chain(*namespace_packages_by_dist.values()):
if pkg in sys.modules:
pkg_resources.declare_namespace(pkg)
def _activate(self):
# type: () -> WorkingSet
pex_file = os.path.realpath(self._pex)
self._update_candidate_distributions(self._load_internal_cache(pex_file, self._pex_info))
is_zipped_pex = os.path.isfile(pex_file)
if not self._pex_info.zip_safe and is_zipped_pex:
explode_dir = self._force_local(pex_file=pex_file, pex_info=self._pex_info)
# Force subsequent imports to come from the exploded .pex directory rather than the .pex file.
TRACER.log("Adding exploded non zip-safe pex to the head of sys.path: %s" % explode_dir)
sys.path[:] = [path for path in sys.path if pex_file != os.path.realpath(path)]
sys.path.insert(0, explode_dir)
self._update_module_paths(pex_file=pex_file)
elif not any(pex_file == os.path.realpath(path) for path in sys.path):
TRACER.log(
"Adding pex %s to the head of sys.path: %s"
% ("file" if is_zipped_pex else "dir", pex_file)
)
sys.path.insert(0, pex_file)
all_reqs = [Requirement.parse(req) for req in self._pex_info.requirements]
working_set = WorkingSet([])
resolved = self._resolve(working_set, all_reqs)
for dist in resolved:
with TRACER.timed("Activating %s" % dist, V=2):
working_set.add(dist)
if self._inherit_path == InheritPath.FALLBACK:
# Prepend location to sys.path.
#
# This ensures that bundled versions of libraries will be used before system-installed
# versions, in case something is installed in both, helping to favor hermeticity in
# the case of non-hermetic PEX files (i.e. those with inherit_path=True).
#
# If the path is not already in sys.path, site.addsitedir will append (not prepend)
# the path to sys.path. But if the path is already in sys.path, site.addsitedir will
# leave sys.path unmodified, but will do everything else it would do. This is not part
# of its advertised contract (which is very vague), but has been verified to be the
# case by inspecting its source for both cpython 2.7 and cpython 3.7.
sys.path.insert(0, dist.location)
else:
sys.path.append(dist.location)
with TRACER.timed("Adding sitedir", V=2):
site.addsitedir(dist.location)
return working_set