Skip to content

Commit

Permalink
Harden PythonInterpreter against pyenv shims. (#860)
Browse files Browse the repository at this point in the history
Previously, Pex encounered hard to diagnose errors when selected
interpreters were pyenv shims. These shim files can represent a
different interpreter on different executions which broke Pex's
assumption that a given binary always corresponded to a given python
version.

Since we already shell out to an interpreter to gather its
`PythonIdentity`, we now include collecting the real final interpeter
path from `sys.executable` there. This allows pex to see through a shim
to the underlying `$PYENV_ROOT/versions/<version>/bin/python*`
interpreter which conforms to the expectation of a given binary
corresponding to a given python version.

See: pyenv/pyenv#1112

Debugged working #856 which hit otherwise un-explainable errors in
`tests/test_pex_bootstrapper.py`.

Work towards #782.
  • Loading branch information
jsirois authored Jan 23, 2020
1 parent aa6a843 commit e02ea4a
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 18 deletions.
63 changes: 47 additions & 16 deletions pex/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def get(cls):
supported_tags = tuple(tags.sys_tags())
preferred_tag = supported_tags[0]
return cls(
binary=sys.executable,
python_tag=preferred_tag.interpreter,
abi_tag=preferred_tag.abi,
platform_tag=preferred_tag.platform,
Expand All @@ -58,8 +59,8 @@ def get(cls):
def decode(cls, encoded):
TRACER.log('creating PythonIdentity from encoded: %s' % encoded, V=9)
values = json.loads(encoded)
if len(values) != 6:
raise cls.InvalidError("Invalid id string: %s" % encoded)
if len(values) != 7:
raise cls.InvalidError("Invalid interpreter identity: %s" % encoded)

supported_tags = values.pop('supported_tags')

Expand All @@ -80,11 +81,21 @@ def _find_interpreter_name(cls, python_tag):
return interpreter
raise ValueError('Unknown interpreter: {}'.format(python_tag))

def __init__(self, python_tag, abi_tag, platform_tag, version, supported_tags, env_markers):
def __init__(
self,
binary,
python_tag,
abi_tag,
platform_tag,
version,
supported_tags,
env_markers
):
# N.B.: We keep this mapping to support historical values for `distribution` and `requirement`
# properties.
self._interpreter_name = self._find_interpreter_name(python_tag)

self._binary = binary
self._python_tag = python_tag
self._abi_tag = abi_tag
self._platform_tag = platform_tag
Expand All @@ -94,6 +105,7 @@ def __init__(self, python_tag, abi_tag, platform_tag, version, supported_tags, e

def encode(self):
values = dict(
binary=self._binary,
python_tag=self._python_tag,
abi_tag=self._abi_tag,
platform_tag=self._platform_tag,
Expand All @@ -106,6 +118,10 @@ def encode(self):
)
return json.dumps(values)

@property
def binary(self):
return self._binary

@property
def python_tag(self):
return self._python_tag
Expand Down Expand Up @@ -185,15 +201,27 @@ def python(self):
return '%d.%d' % (self.version[0:2])

def __str__(self):
return '%s-%s.%s.%s' % (
self._interpreter_name,
self._version[0],
self._version[1],
self._version[2]
# N.B.: Kept as distinct from __repr__ to support legacy str(identity) used by Pants v1 when
# forming cache locations.
return '{interpreter_name}-{major}.{minor}.{patch}'.format(
interpreter_name=self._interpreter_name,
major=self._version[0],
minor=self._version[1],
patch=self._version[2]
)

def __repr__(self):
return '{type}({binary!r}, {python_tag!r}, {abi_tag!r}, {platform_tag!r}, {version!r})'.format(
type=self.__class__.__name__,
binary=self._binary,
python_tag=self._python_tag,
abi_tag=self._abi_tag,
platform_tag=self._platform_tag,
version=self._version
)

def _tup(self):
return self._python_tag, self._abi_tag, self._platform_tag, self._version
return self._binary, self._python_tag, self._abi_tag, self._platform_tag, self._version

def __eq__(self, other):
if type(other) is not type(self):
Expand Down Expand Up @@ -313,7 +341,7 @@ def create_interpreter(stdout):
identity = stdout.decode('utf-8').strip()
if not identity:
raise cls.IdentificationError('Could not establish identity of %s' % binary)
return cls(binary, PythonIdentity.decode(identity))
return cls(PythonIdentity.decode(identity))

return SpawnedJob.stdout(job, result_func=create_interpreter)

Expand All @@ -322,7 +350,7 @@ def _expand_path(cls, path):
if os.path.isfile(path):
return [path]
elif os.path.isdir(path):
return [os.path.join(path, fn) for fn in os.listdir(path)]
return sorted(os.path.join(path, fn) for fn in os.listdir(path))
return []

@classmethod
Expand All @@ -349,7 +377,7 @@ def _spawn_from_binary(cls, binary):
if cached_interpreter is not None:
return SpawnedJob.completed(cached_interpreter)
if normalized_binary == cls._normalize_path(sys.executable):
current_interpreter = cls(sys.executable, PythonIdentity.get())
current_interpreter = cls(PythonIdentity.get())
return SpawnedJob.completed(current_interpreter)
return cls._spawn_from_binary_external(normalized_binary)

Expand Down Expand Up @@ -412,16 +440,15 @@ def _sanitized_environment(cls, env=None):
env_copy.pop('MACOSX_DEPLOYMENT_TARGET', None)
return env_copy

def __init__(self, binary, identity):
def __init__(self, identity):
"""Construct a PythonInterpreter.
You should probably use `PythonInterpreter.from_binary` instead.
:param binary: The full path of the python binary.
:param identity: The :class:`PythonIdentity` of the PythonInterpreter.
"""
self._binary = self._normalize_path(binary)
self._identity = identity
self._binary = self._normalize_path(self.identity.binary)

self._PYTHON_INTERPRETER_BY_NORMALIZED_PATH[self._binary] = self

Expand Down Expand Up @@ -475,7 +502,11 @@ def __lt__(self, other):
return self.version < other.version

def __repr__(self):
return '%s(%r, %r)' % (self.__class__.__name__, self._binary, self._identity)
return '{type}({binary!r}, {identity!r})'.format(
type=self.__class__.__name__,
binary=self._binary,
identity=self._identity
)


def spawn_python_job(args, env=None, interpreter=None, expose=None, **subprocess_kwargs):
Expand Down
15 changes: 13 additions & 2 deletions tests/test_pex_bootstrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import os
import sys
from textwrap import dedent

from pex.interpreter import PythonInterpreter
from pex.pex_bootstrapper import iter_compatible_interpreters
Expand Down Expand Up @@ -37,8 +38,18 @@ def test_find_compatible_interpreters():
all_known_interpreters = set(PythonInterpreter.all())
all_known_interpreters.add(PythonInterpreter.get())

interpreters = iter_compatible_interpreters(compatibility_constraints=['<3'])
assert set(interpreters).issubset(all_known_interpreters)
interpreters = set(iter_compatible_interpreters(compatibility_constraints=['<3']))
i_rendered = '\n '.join(sorted(map(repr, interpreters)))
aki_rendered = '\n '.join(sorted(map(repr, all_known_interpreters)))
assert interpreters.issubset(all_known_interpreters), dedent(
"""
interpreters '<3':
{interpreters}
all known interpreters:
{all_known_interpreters}
""".format(interpreters=i_rendered, all_known_interpreters=aki_rendered)
)


@skip_for_pyenv_use_under_pypy
Expand Down

0 comments on commit e02ea4a

Please sign in to comment.