From e02ea4a527d5a6fd81d709bf4a4db301f384195f Mon Sep 17 00:00:00 2001 From: John Sirois Date: Thu, 23 Jan 2020 07:16:36 -0800 Subject: [PATCH] Harden PythonInterpreter against pyenv shims. (#860) Previously, Pex encounered hard to diagnose errors when selected interpreters were pyenv shims. These shim files can represent a different interpreter on different executions which broke Pex's assumption that a given binary always corresponded to a given python version. Since we already shell out to an interpreter to gather its `PythonIdentity`, we now include collecting the real final interpeter path from `sys.executable` there. This allows pex to see through a shim to the underlying `$PYENV_ROOT/versions//bin/python*` interpreter which conforms to the expectation of a given binary corresponding to a given python version. See: https://github.com/pyenv/pyenv/issues/1112 Debugged working #856 which hit otherwise un-explainable errors in `tests/test_pex_bootstrapper.py`. Work towards #782. --- pex/interpreter.py | 63 +++++++++++++++++++++++++--------- tests/test_pex_bootstrapper.py | 15 ++++++-- 2 files changed, 60 insertions(+), 18 deletions(-) diff --git a/pex/interpreter.py b/pex/interpreter.py index 192ff5277..8f950f48c 100644 --- a/pex/interpreter.py +++ b/pex/interpreter.py @@ -46,6 +46,7 @@ def get(cls): supported_tags = tuple(tags.sys_tags()) preferred_tag = supported_tags[0] return cls( + binary=sys.executable, python_tag=preferred_tag.interpreter, abi_tag=preferred_tag.abi, platform_tag=preferred_tag.platform, @@ -58,8 +59,8 @@ def get(cls): def decode(cls, encoded): TRACER.log('creating PythonIdentity from encoded: %s' % encoded, V=9) values = json.loads(encoded) - if len(values) != 6: - raise cls.InvalidError("Invalid id string: %s" % encoded) + if len(values) != 7: + raise cls.InvalidError("Invalid interpreter identity: %s" % encoded) supported_tags = values.pop('supported_tags') @@ -80,11 +81,21 @@ def _find_interpreter_name(cls, python_tag): return interpreter raise ValueError('Unknown interpreter: {}'.format(python_tag)) - def __init__(self, python_tag, abi_tag, platform_tag, version, supported_tags, env_markers): + def __init__( + self, + binary, + python_tag, + abi_tag, + platform_tag, + version, + supported_tags, + env_markers + ): # N.B.: We keep this mapping to support historical values for `distribution` and `requirement` # properties. self._interpreter_name = self._find_interpreter_name(python_tag) + self._binary = binary self._python_tag = python_tag self._abi_tag = abi_tag self._platform_tag = platform_tag @@ -94,6 +105,7 @@ def __init__(self, python_tag, abi_tag, platform_tag, version, supported_tags, e def encode(self): values = dict( + binary=self._binary, python_tag=self._python_tag, abi_tag=self._abi_tag, platform_tag=self._platform_tag, @@ -106,6 +118,10 @@ def encode(self): ) return json.dumps(values) + @property + def binary(self): + return self._binary + @property def python_tag(self): return self._python_tag @@ -185,15 +201,27 @@ def python(self): return '%d.%d' % (self.version[0:2]) def __str__(self): - return '%s-%s.%s.%s' % ( - self._interpreter_name, - self._version[0], - self._version[1], - self._version[2] + # N.B.: Kept as distinct from __repr__ to support legacy str(identity) used by Pants v1 when + # forming cache locations. + return '{interpreter_name}-{major}.{minor}.{patch}'.format( + interpreter_name=self._interpreter_name, + major=self._version[0], + minor=self._version[1], + patch=self._version[2] + ) + + def __repr__(self): + return '{type}({binary!r}, {python_tag!r}, {abi_tag!r}, {platform_tag!r}, {version!r})'.format( + type=self.__class__.__name__, + binary=self._binary, + python_tag=self._python_tag, + abi_tag=self._abi_tag, + platform_tag=self._platform_tag, + version=self._version ) def _tup(self): - return self._python_tag, self._abi_tag, self._platform_tag, self._version + return self._binary, self._python_tag, self._abi_tag, self._platform_tag, self._version def __eq__(self, other): if type(other) is not type(self): @@ -313,7 +341,7 @@ def create_interpreter(stdout): identity = stdout.decode('utf-8').strip() if not identity: raise cls.IdentificationError('Could not establish identity of %s' % binary) - return cls(binary, PythonIdentity.decode(identity)) + return cls(PythonIdentity.decode(identity)) return SpawnedJob.stdout(job, result_func=create_interpreter) @@ -322,7 +350,7 @@ def _expand_path(cls, path): if os.path.isfile(path): return [path] elif os.path.isdir(path): - return [os.path.join(path, fn) for fn in os.listdir(path)] + return sorted(os.path.join(path, fn) for fn in os.listdir(path)) return [] @classmethod @@ -349,7 +377,7 @@ def _spawn_from_binary(cls, binary): if cached_interpreter is not None: return SpawnedJob.completed(cached_interpreter) if normalized_binary == cls._normalize_path(sys.executable): - current_interpreter = cls(sys.executable, PythonIdentity.get()) + current_interpreter = cls(PythonIdentity.get()) return SpawnedJob.completed(current_interpreter) return cls._spawn_from_binary_external(normalized_binary) @@ -412,16 +440,15 @@ def _sanitized_environment(cls, env=None): env_copy.pop('MACOSX_DEPLOYMENT_TARGET', None) return env_copy - def __init__(self, binary, identity): + def __init__(self, identity): """Construct a PythonInterpreter. You should probably use `PythonInterpreter.from_binary` instead. - :param binary: The full path of the python binary. :param identity: The :class:`PythonIdentity` of the PythonInterpreter. """ - self._binary = self._normalize_path(binary) self._identity = identity + self._binary = self._normalize_path(self.identity.binary) self._PYTHON_INTERPRETER_BY_NORMALIZED_PATH[self._binary] = self @@ -475,7 +502,11 @@ def __lt__(self, other): return self.version < other.version def __repr__(self): - return '%s(%r, %r)' % (self.__class__.__name__, self._binary, self._identity) + return '{type}({binary!r}, {identity!r})'.format( + type=self.__class__.__name__, + binary=self._binary, + identity=self._identity + ) def spawn_python_job(args, env=None, interpreter=None, expose=None, **subprocess_kwargs): diff --git a/tests/test_pex_bootstrapper.py b/tests/test_pex_bootstrapper.py index f872c23f6..6613a7b31 100644 --- a/tests/test_pex_bootstrapper.py +++ b/tests/test_pex_bootstrapper.py @@ -3,6 +3,7 @@ import os import sys +from textwrap import dedent from pex.interpreter import PythonInterpreter from pex.pex_bootstrapper import iter_compatible_interpreters @@ -37,8 +38,18 @@ def test_find_compatible_interpreters(): all_known_interpreters = set(PythonInterpreter.all()) all_known_interpreters.add(PythonInterpreter.get()) - interpreters = iter_compatible_interpreters(compatibility_constraints=['<3']) - assert set(interpreters).issubset(all_known_interpreters) + interpreters = set(iter_compatible_interpreters(compatibility_constraints=['<3'])) + i_rendered = '\n '.join(sorted(map(repr, interpreters))) + aki_rendered = '\n '.join(sorted(map(repr, all_known_interpreters))) + assert interpreters.issubset(all_known_interpreters), dedent( + """ + interpreters '<3': + {interpreters} + + all known interpreters: + {all_known_interpreters} + """.format(interpreters=i_rendered, all_known_interpreters=aki_rendered) + ) @skip_for_pyenv_use_under_pypy