From 901bc692808515afbb13c87cb6301436117b08a9 Mon Sep 17 00:00:00 2001 From: Chris Livingston Date: Fri, 1 Feb 2019 15:01:50 -0500 Subject: [PATCH 1/5] Purge stale interpreters from Interpreter Cache --- .../pants/backend/python/interpreter_cache.py | 13 +++++++++++++ .../backend/python/tasks/select_interpreter.py | 15 +++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/src/python/pants/backend/python/interpreter_cache.py b/src/python/pants/backend/python/interpreter_cache.py index e0126f74e13..d2c4f9ed887 100644 --- a/src/python/pants/backend/python/interpreter_cache.py +++ b/src/python/pants/backend/python/interpreter_cache.py @@ -119,6 +119,8 @@ def _interpreter_from_path(self, path, filters=()): try: executable = os.readlink(os.path.join(path, 'python')) except OSError: + if os.path.dirname(path) == self._cache_dir: + self._purge_interpreter(path) return None interpreter = PythonInterpreter.from_binary(executable, include_site_extras=False) if self._matches(interpreter, filters=filters): @@ -251,3 +253,14 @@ def _resolve_and_link(self, interpreter, requirement, target_link): _safe_link(target_location, target_link) logger.debug(' installed {}'.format(target_location)) return Package.from_href(target_location) + + def _purge_interpreter(self, interpreter_dir): + try: + logger.info('Detected stale interpreter `{}` in the interpreter cache, purging.' + .format(interpreter_dir)) + shutil.rmtree(interpreter_dir, ignore_errors=True) + except Exception as e: + logger.warn( + 'Caught exception {!r} during interpreter purge. Please run `./pants clean-all`!' + .format(e) + ) diff --git a/src/python/pants/backend/python/tasks/select_interpreter.py b/src/python/pants/backend/python/tasks/select_interpreter.py index 8c0905a90e2..ce7af53212b 100644 --- a/src/python/pants/backend/python/tasks/select_interpreter.py +++ b/src/python/pants/backend/python/tasks/select_interpreter.py @@ -78,6 +78,9 @@ def execute(self): interpreter_path_file = self._interpreter_path_file(target_set_id) if not os.path.exists(interpreter_path_file): self._create_interpreter_path_file(interpreter_path_file, python_tgts) + else: + if _detect_and_purge_invalid_interpreter(interpreter_path_file): + self._create_interpreter_path_file(interpreter_path_file, python_tgts) interpreter = self._get_interpreter(interpreter_path_file) self.context.products.register_data(PythonInterpreter, interpreter) @@ -105,3 +108,15 @@ def _get_interpreter(interpreter_path_file): dist_name, dist_version, location = line.strip().split('\t') interpreter = interpreter.with_extra(dist_name, dist_version, location) return interpreter + + @staticmethod + def _detect_and_purge_invalid_interpreter(interpreter_path_file): + with open(interpreter_path_file, 'r') as infile: + lines = infile.readlines() + binary = lines[0].strip() + if not os.path.exists(binary): + self.context.log.info('Stale interpreter reference detected, removing reference and ' + 'selecting a new interpreter.') + os.remove(interpreter_path_file) + return True + return False From 9362424e257274fb536b4c1e752cc0053616edea Mon Sep 17 00:00:00 2001 From: Chris Livingston Date: Wed, 6 Feb 2019 17:00:17 -0500 Subject: [PATCH 2/5] Rename purge method to make side effects more clear --- .../pants/backend/python/interpreter_cache.py | 7 +++--- .../python/tasks/select_interpreter.py | 25 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/python/pants/backend/python/interpreter_cache.py b/src/python/pants/backend/python/interpreter_cache.py index d2c4f9ed887..a0e49ca0b04 100644 --- a/src/python/pants/backend/python/interpreter_cache.py +++ b/src/python/pants/backend/python/interpreter_cache.py @@ -97,7 +97,6 @@ def select_interpreter_for_targets(self, targets): tgts_by_compatibilities, total_filter_set = self.partition_targets_by_compatibility(targets) allowed_interpreters = set(self.setup(filters=total_filter_set)) - # Constrain allowed_interpreters based on each target's compatibility requirements. for compatibility in tgts_by_compatibilities: compatible_with_target = set(self._matching(allowed_interpreters, compatibility)) @@ -118,9 +117,11 @@ def select_interpreter_for_targets(self, targets): def _interpreter_from_path(self, path, filters=()): try: executable = os.readlink(os.path.join(path, 'python')) + if not os.path.exists(executable): + if os.path.dirname(path) == self._cache_dir: + self._purge_interpreter(path) + return None except OSError: - if os.path.dirname(path) == self._cache_dir: - self._purge_interpreter(path) return None interpreter = PythonInterpreter.from_binary(executable, include_site_extras=False) if self._matches(interpreter, filters=filters): diff --git a/src/python/pants/backend/python/tasks/select_interpreter.py b/src/python/pants/backend/python/tasks/select_interpreter.py index ce7af53212b..443275e47ce 100644 --- a/src/python/pants/backend/python/tasks/select_interpreter.py +++ b/src/python/pants/backend/python/tasks/select_interpreter.py @@ -79,7 +79,7 @@ def execute(self): if not os.path.exists(interpreter_path_file): self._create_interpreter_path_file(interpreter_path_file, python_tgts) else: - if _detect_and_purge_invalid_interpreter(interpreter_path_file): + if self._detect_and_purge_invalid_interpreter(interpreter_path_file): self._create_interpreter_path_file(interpreter_path_file, python_tgts) interpreter = self._get_interpreter(interpreter_path_file) @@ -98,6 +98,17 @@ def _create_interpreter_path_file(self, interpreter_path_file, targets): def _interpreter_path_file(self, target_set_id): return os.path.join(self.workdir, target_set_id, 'interpreter.info') + def _detect_and_purge_invalid_interpreter(self, interpreter_path_file): + with open(interpreter_path_file, 'r') as infile: + lines = infile.readlines() + binary = lines[0].strip() + if not os.path.exists(binary): + self.context.log.info('Stale interpreter reference detected: {}, removing reference and ' + 'selecting a new interpreter.'.format(binary)) + os.remove(interpreter_path_file) + return True + return False + @staticmethod def _get_interpreter(interpreter_path_file): with open(interpreter_path_file, 'r') as infile: @@ -108,15 +119,3 @@ def _get_interpreter(interpreter_path_file): dist_name, dist_version, location = line.strip().split('\t') interpreter = interpreter.with_extra(dist_name, dist_version, location) return interpreter - - @staticmethod - def _detect_and_purge_invalid_interpreter(interpreter_path_file): - with open(interpreter_path_file, 'r') as infile: - lines = infile.readlines() - binary = lines[0].strip() - if not os.path.exists(binary): - self.context.log.info('Stale interpreter reference detected, removing reference and ' - 'selecting a new interpreter.') - os.remove(interpreter_path_file) - return True - return False From 5dfd62fe87b83de0ae8144a3cac559f5a257bcb5 Mon Sep 17 00:00:00 2001 From: Chris Livingston Date: Wed, 6 Feb 2019 17:10:19 -0500 Subject: [PATCH 3/5] Do not change whitespace --- src/python/pants/backend/python/interpreter_cache.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/python/pants/backend/python/interpreter_cache.py b/src/python/pants/backend/python/interpreter_cache.py index a0e49ca0b04..3abb1d8abb0 100644 --- a/src/python/pants/backend/python/interpreter_cache.py +++ b/src/python/pants/backend/python/interpreter_cache.py @@ -97,6 +97,7 @@ def select_interpreter_for_targets(self, targets): tgts_by_compatibilities, total_filter_set = self.partition_targets_by_compatibility(targets) allowed_interpreters = set(self.setup(filters=total_filter_set)) + # Constrain allowed_interpreters based on each target's compatibility requirements. for compatibility in tgts_by_compatibilities: compatible_with_target = set(self._matching(allowed_interpreters, compatibility)) From 4e59198bfd74b48f4d52bf5b7b0b1bd9ec57c4e3 Mon Sep 17 00:00:00 2001 From: Chris Livingston Date: Thu, 7 Feb 2019 16:48:18 -0500 Subject: [PATCH 4/5] Simplify unit test and make jsirois changes --- .../pants/backend/python/interpreter_cache.py | 37 +++++++++---------- .../python/tasks/select_interpreter.py | 6 +-- .../backend/python/test_interpreter_cache.py | 31 ++++++++++++++++ 3 files changed, 51 insertions(+), 23 deletions(-) diff --git a/src/python/pants/backend/python/interpreter_cache.py b/src/python/pants/backend/python/interpreter_cache.py index 3abb1d8abb0..a6e25df498e 100644 --- a/src/python/pants/backend/python/interpreter_cache.py +++ b/src/python/pants/backend/python/interpreter_cache.py @@ -115,18 +115,19 @@ def select_interpreter_for_targets(self, targets): # Return the lowest compatible interpreter. return min(allowed_interpreters) - def _interpreter_from_path(self, path, filters=()): - try: - executable = os.readlink(os.path.join(path, 'python')) - if not os.path.exists(executable): - if os.path.dirname(path) == self._cache_dir: + def _interpreter_from_relpath(self, path, filters=()): + path = os.path.join(self._cache_dir, path) + if os.path.isdir(path): + try: + executable = os.readlink(os.path.join(path, 'python')) + if not os.path.exists(executable): self._purge_interpreter(path) + return None + except OSError: return None - except OSError: - return None - interpreter = PythonInterpreter.from_binary(executable, include_site_extras=False) - if self._matches(interpreter, filters=filters): - return self._resolve(interpreter) + interpreter = PythonInterpreter.from_binary(executable, include_site_extras=False) + if self._matches(interpreter, filters=filters): + return self._resolve(interpreter) return None def _setup_interpreter(self, interpreter, cache_target_path): @@ -138,22 +139,20 @@ def _setup_interpreter(self, interpreter, cache_target_path): def _setup_cached(self, filters=()): """Find all currently-cached interpreters.""" for interpreter_dir in os.listdir(self._cache_dir): - path = os.path.join(self._cache_dir, interpreter_dir) - if os.path.isdir(path): - pi = self._interpreter_from_path(path, filters=filters) - if pi: - logger.debug('Detected interpreter {}: {}'.format(pi.binary, str(pi.identity))) - yield pi + pi = self._interpreter_from_relpath(interpreter_dir, filters=filters) + if pi: + logger.debug('Detected interpreter {}: {}'.format(pi.binary, str(pi.identity))) + yield pi def _setup_paths(self, paths, filters=()): """Find interpreters under paths, and cache them.""" for interpreter in self._matching(PythonInterpreter.all(paths), filters=filters): identity_str = str(interpreter.identity) - cache_path = os.path.join(self._cache_dir, identity_str) - pi = self._interpreter_from_path(cache_path, filters=filters) + pi = self._interpreter_from_relpath(identity_str, filters=filters) if pi is None: + cache_path = os.path.join(self._cache_dir, identity_str) self._setup_interpreter(interpreter, cache_path) - pi = self._interpreter_from_path(cache_path, filters=filters) + pi = self._interpreter_from_relpath(identity_str, filters=filters) if pi: yield pi diff --git a/src/python/pants/backend/python/tasks/select_interpreter.py b/src/python/pants/backend/python/tasks/select_interpreter.py index 443275e47ce..37c8b9d643a 100644 --- a/src/python/pants/backend/python/tasks/select_interpreter.py +++ b/src/python/pants/backend/python/tasks/select_interpreter.py @@ -99,10 +99,8 @@ def _interpreter_path_file(self, target_set_id): return os.path.join(self.workdir, target_set_id, 'interpreter.info') def _detect_and_purge_invalid_interpreter(self, interpreter_path_file): - with open(interpreter_path_file, 'r') as infile: - lines = infile.readlines() - binary = lines[0].strip() - if not os.path.exists(binary): + interpreter = self._get_interpreter(interpreter_path_file) + if not os.path.exists(interpreter.binary): self.context.log.info('Stale interpreter reference detected: {}, removing reference and ' 'selecting a new interpreter.'.format(binary)) os.remove(interpreter_path_file) diff --git a/tests/python/pants_test/backend/python/test_interpreter_cache.py b/tests/python/pants_test/backend/python/test_interpreter_cache.py index 86196d710ed..6cfcbe41554 100644 --- a/tests/python/pants_test/backend/python/test_interpreter_cache.py +++ b/tests/python/pants_test/backend/python/test_interpreter_cache.py @@ -5,6 +5,7 @@ from __future__ import absolute_import, division, print_function, unicode_literals import os +import shutil from builtins import str from contextlib import contextmanager @@ -16,6 +17,7 @@ from pants.backend.python.interpreter_cache import PythonInterpreter, PythonInterpreterCache from pants.subsystem.subsystem import Subsystem from pants.util.contextutil import temporary_dir +from pants.util.dirutil import chmod_plus_x, safe_mkdir from pants_test.backend.python.interpreter_selection_utils import (PY_27, PY_36, python_interpreter_path, skip_unless_python27_and_python36) @@ -171,3 +173,32 @@ def test_setup_cached_warm(self): def test_setup_cached_cold(self): with self._setup_cache() as (cache, _): self.assertEqual([], list(cache._setup_cached())) + + def test_interpreter_from_relpath_purges_stale_interpreter(self): + """ + Simulates a stale interpreter cache and tests that _interpreter_from_relpath + properly detects it and removes the stale dist directory. + + See https://github.com/pantsbuild/pants/issues/3416 for more info. + """ + with temporary_dir() as temp_dir: + # Setup a interpreter distribution that we can safely mutate. + test_interpreter_binary = os.path.join(temp_dir, 'python2.7') + src = '/usr/bin/python2.7' + shutil.copyfile(src, test_interpreter_binary) + chmod_plus_x(test_interpreter_binary) + + with self._setup_cache(constraints=[]) as (cache, path): + # Setup cache for test interpreter distribution. + identity_str = str(PythonInterpreter.from_binary(test_interpreter_binary).identity) + cached_interpreter_dir = os.path.join(cache._cache_dir, identity_str) + safe_mkdir(cached_interpreter_dir) + cached_symlink = os.path.join(cached_interpreter_dir, 'python') + os.symlink(test_interpreter_binary, cached_symlink) + + # Remove the test interpreter binary from filesystem and assert that the cache is purged. + os.remove(test_interpreter_binary) + self.assertEqual(os.path.exists(test_interpreter_binary), False) + self.assertEqual(os.path.exists(cached_interpreter_dir), True) + cache._interpreter_from_relpath(identity_str) + self.assertEqual(os.path.exists(cached_interpreter_dir), False) From b8c6ebfc58380228a896c02c332f535d47b5eb1b Mon Sep 17 00:00:00 2001 From: Chris Livingston Date: Fri, 8 Feb 2019 11:54:35 -0500 Subject: [PATCH 5/5] 2nd round of jsirois feedback --- .../pants/backend/python/interpreter_cache.py | 25 ++++++----- .../python/tasks/select_interpreter.py | 2 +- .../backend/python/test_interpreter_cache.py | 43 ++++++++++--------- 3 files changed, 36 insertions(+), 34 deletions(-) diff --git a/src/python/pants/backend/python/interpreter_cache.py b/src/python/pants/backend/python/interpreter_cache.py index a6e25df498e..95908ddbd75 100644 --- a/src/python/pants/backend/python/interpreter_cache.py +++ b/src/python/pants/backend/python/interpreter_cache.py @@ -117,20 +117,20 @@ def select_interpreter_for_targets(self, targets): def _interpreter_from_relpath(self, path, filters=()): path = os.path.join(self._cache_dir, path) - if os.path.isdir(path): - try: - executable = os.readlink(os.path.join(path, 'python')) - if not os.path.exists(executable): - self._purge_interpreter(path) - return None - except OSError: + try: + executable = os.readlink(os.path.join(path, 'python')) + if not os.path.exists(executable): + self._purge_interpreter(path) return None - interpreter = PythonInterpreter.from_binary(executable, include_site_extras=False) - if self._matches(interpreter, filters=filters): - return self._resolve(interpreter) + except OSError: + return None + interpreter = PythonInterpreter.from_binary(executable, include_site_extras=False) + if self._matches(interpreter, filters=filters): + return self._resolve(interpreter) return None - def _setup_interpreter(self, interpreter, cache_target_path): + def _setup_interpreter(self, interpreter, identity_str): + cache_target_path = os.path.join(self._cache_dir, identity_str) with safe_concurrent_creation(cache_target_path) as safe_path: os.mkdir(safe_path) # Parent will already have been created by safe_concurrent_creation. os.symlink(interpreter.binary, os.path.join(safe_path, 'python')) @@ -150,8 +150,7 @@ def _setup_paths(self, paths, filters=()): identity_str = str(interpreter.identity) pi = self._interpreter_from_relpath(identity_str, filters=filters) if pi is None: - cache_path = os.path.join(self._cache_dir, identity_str) - self._setup_interpreter(interpreter, cache_path) + self._setup_interpreter(interpreter, identity_str) pi = self._interpreter_from_relpath(identity_str, filters=filters) if pi: yield pi diff --git a/src/python/pants/backend/python/tasks/select_interpreter.py b/src/python/pants/backend/python/tasks/select_interpreter.py index 37c8b9d643a..24a06809f47 100644 --- a/src/python/pants/backend/python/tasks/select_interpreter.py +++ b/src/python/pants/backend/python/tasks/select_interpreter.py @@ -102,7 +102,7 @@ def _detect_and_purge_invalid_interpreter(self, interpreter_path_file): interpreter = self._get_interpreter(interpreter_path_file) if not os.path.exists(interpreter.binary): self.context.log.info('Stale interpreter reference detected: {}, removing reference and ' - 'selecting a new interpreter.'.format(binary)) + 'selecting a new interpreter.'.format(interpreter.binary)) os.remove(interpreter_path_file) return True return False diff --git a/tests/python/pants_test/backend/python/test_interpreter_cache.py b/tests/python/pants_test/backend/python/test_interpreter_cache.py index 6cfcbe41554..84cf353cf2d 100644 --- a/tests/python/pants_test/backend/python/test_interpreter_cache.py +++ b/tests/python/pants_test/backend/python/test_interpreter_cache.py @@ -6,6 +6,7 @@ import os import shutil +import sys from builtins import str from contextlib import contextmanager @@ -16,8 +17,8 @@ from pants.backend.python.interpreter_cache import PythonInterpreter, PythonInterpreterCache from pants.subsystem.subsystem import Subsystem -from pants.util.contextutil import temporary_dir -from pants.util.dirutil import chmod_plus_x, safe_mkdir +from pants.util.contextutil import environment_as, temporary_dir +from pants.util.dirutil import safe_mkdir from pants_test.backend.python.interpreter_selection_utils import (PY_27, PY_36, python_interpreter_path, skip_unless_python27_and_python36) @@ -184,21 +185,23 @@ def test_interpreter_from_relpath_purges_stale_interpreter(self): with temporary_dir() as temp_dir: # Setup a interpreter distribution that we can safely mutate. test_interpreter_binary = os.path.join(temp_dir, 'python2.7') - src = '/usr/bin/python2.7' - shutil.copyfile(src, test_interpreter_binary) - chmod_plus_x(test_interpreter_binary) - - with self._setup_cache(constraints=[]) as (cache, path): - # Setup cache for test interpreter distribution. - identity_str = str(PythonInterpreter.from_binary(test_interpreter_binary).identity) - cached_interpreter_dir = os.path.join(cache._cache_dir, identity_str) - safe_mkdir(cached_interpreter_dir) - cached_symlink = os.path.join(cached_interpreter_dir, 'python') - os.symlink(test_interpreter_binary, cached_symlink) - - # Remove the test interpreter binary from filesystem and assert that the cache is purged. - os.remove(test_interpreter_binary) - self.assertEqual(os.path.exists(test_interpreter_binary), False) - self.assertEqual(os.path.exists(cached_interpreter_dir), True) - cache._interpreter_from_relpath(identity_str) - self.assertEqual(os.path.exists(cached_interpreter_dir), False) + src = os.path.realpath(sys.executable) + sys_exe_dist = os.path.dirname(os.path.dirname(src)) + shutil.copy2(src, test_interpreter_binary) + with environment_as( + PYTHONPATH='{}'.format(os.path.join(sys_exe_dist, 'lib/python2.7')) + ): + with self._setup_cache(constraints=[]) as (cache, path): + # Setup cache for test interpreter distribution. + identity_str = str(PythonInterpreter.from_binary(test_interpreter_binary).identity) + cached_interpreter_dir = os.path.join(cache._cache_dir, identity_str) + safe_mkdir(cached_interpreter_dir) + cached_symlink = os.path.join(cached_interpreter_dir, 'python') + os.symlink(test_interpreter_binary, cached_symlink) + + # Remove the test interpreter binary from filesystem and assert that the cache is purged. + os.remove(test_interpreter_binary) + self.assertEqual(os.path.exists(test_interpreter_binary), False) + self.assertEqual(os.path.exists(cached_interpreter_dir), True) + cache._interpreter_from_relpath(identity_str) + self.assertEqual(os.path.exists(cached_interpreter_dir), False)