Implement caching of pytest results.

This saves off both junit xml and coverage files (when generated).
pantsbuild · Jul 14, 2017 · 38845c7 · 38845c7
1 parent df55949
commit 38845c7
Show file tree

Hide file tree

Showing 3 changed files with 165 additions and 38 deletions.
diff --git a/contrib/scrooge/tests/python/pants_test/contrib/scrooge/tasks/test_scrooge_gen.py b/contrib/scrooge/tests/python/pants_test/contrib/scrooge/tasks/test_scrooge_gen.py
@@ -15,12 +15,12 @@
 from pants.base.exceptions import TargetDefinitionException, TaskError
 from pants.build_graph.build_file_aliases import BuildFileAliases
 from pants.goal.context import Context
-
 from pants_test.jvm.nailgun_task_test_base import NailgunTaskTestBase
 from twitter.common.collections import OrderedSet
 
 from pants.contrib.scrooge.tasks.scrooge_gen import ScroogeGen
 
+
 GEN_ADAPT = '--gen-adapt'
 
 

diff --git a/src/python/pants/backend/python/tasks2/BUILD b/src/python/pants/backend/python/tasks2/BUILD
@@ -11,6 +11,7 @@ python_library(
     'src/python/pants/backend/python/targets',
     'src/python/pants/base:build_environment',
     'src/python/pants/base:exceptions',
+    'src/python/pants/base:fingerprint_strategy',
     'src/python/pants/base:specs',
     'src/python/pants/build_graph',
     'src/python/pants/invalidation',
@@ -21,6 +22,7 @@ python_library(
     'src/python/pants/util:fileutil',
     'src/python/pants/util:meta',
     'src/python/pants/util:memo',
+    'src/python/pants/util:objects',
     'src/python/pants/util:process_handler',
     'src/python/pants/util:xml_parser',
   ]

diff --git a/src/python/pants/backend/python/tasks2/pytest_run.py b/src/python/pants/backend/python/tasks2/pytest_run.py
@@ -10,6 +10,7 @@
 import shutil
 import time
 import traceback
+import uuid
 from contextlib import contextmanager
 from textwrap import dedent
 
@@ -21,13 +22,17 @@
 from pants.backend.python.tasks2.pytest_prep import PytestPrep
 from pants.base.build_environment import get_buildroot
 from pants.base.exceptions import ErrorWhileTesting, TaskError
+from pants.base.fingerprint_strategy import DefaultFingerprintStrategy
 from pants.base.hash_utils import Sharder
 from pants.base.workunit import WorkUnitLabel
 from pants.build_graph.target import Target
+from pants.invalidation.cache_manager import VersionedTargetSet
 from pants.task.task import Task
 from pants.task.testrunner_task_mixin import TestRunnerTaskMixin
 from pants.util.contextutil import temporary_dir, temporary_file
-from pants.util.dirutil import safe_mkdir, safe_mkdir_for
+from pants.util.dirutil import safe_mkdir, safe_mkdir_for, safe_walk
+from pants.util.memo import memoized_method, memoized_property
+from pants.util.objects import datatype
 from pants.util.process_handler import SubprocessProcessHandler
 from pants.util.strutil import safe_shlex_split
 from pants.util.xml_parser import XmlParser
@@ -104,22 +109,34 @@ def register_options(cls, register):
              help='Run all tests in a single pytest invocation. If turned off, each test target '
                   'will run in its own pytest invocation, which will be slower, but isolates '
                   'tests from process-wide state created by tests in other targets.')
-    register('--junit-xml-dir', metavar='<DIR>', fingerprint=True,
+
+    # NB: We always produce junit xml privately, and if this option is specified, we then copy
+    # it to the user-specified directory, post any interaction with the cache to retrieve the
+    # privately generated and cached xml files. As such, this option is not part of the
+    # fingerprint.
+    register('--junit-xml-dir', metavar='<DIR>',
              help='Specifying a directory causes junit xml results files to be emitted under '
                   'that dir for each test run.')
+
     register('--profile', metavar='<FILE>', fingerprint=True,
              help="Specifying a file path causes tests to be profiled with the profiling data "
                   "emitted to that file (prefix). Note that tests may run in a different cwd, so "
                   "it's best to use an absolute path to make it easy to find the subprocess "
                   "profiles later.")
+
     register('--options', type=list, fingerprint=True, help='Pass these options to pytest.')
+
     register('--coverage', fingerprint=True,
-             help='Emit coverage information for specified packages or directories (absolute or'
+             help='Emit coverage information for specified packages or directories (absolute or '
                   'relative to the build root).  The special value "auto" indicates that Pants '
                   'should attempt to deduce which packages to emit coverage for.')
-    register('--coverage-output-dir', metavar='<DIR>', default=None, fingerprint=True,
+    # For a given --coverage specification (which is fingerprinted), we will always copy the
+    # associated generated and cached --coverage files to this directory post any interaction with
+    # the cache to retrieve the coverage files. As such, this option is not part of the fingerprint.
+    register('--coverage-output-dir', metavar='<DIR>', default=None,
              help='Directory to emit coverage reports to.'
              'If not specified, a default within dist is used.')
+
     register('--test-shard', fingerprint=True,
              help='Subset of tests to run, in the form M/N, 0 <= M < N. For example, 1/3 means '
                   'run tests number 2, 5, 8, 11, ...')
@@ -151,12 +168,6 @@ def _execute(self, all_targets):
   class InvalidShardSpecification(TaskError):
     """Indicates an invalid `--test-shard` option."""
 
-  def _get_junit_xml_path(self, targets):
-    xml_path = os.path.join(self.workdir, 'junitxml',
-                            'TEST-{}.xml'.format(Target.maybe_readable_identify(targets)))
-    safe_mkdir_for(xml_path)
-    return xml_path
-
   DEFAULT_COVERAGE_CONFIG = dedent(b"""
     [run]
     branch = True
@@ -220,12 +231,12 @@ def add_realpath(path):
     return cp
 
   @contextmanager
-  def _cov_setup(self, source_mappings, coverage_sources=None):
+  def _cov_setup(self, workdirs, source_mappings, coverage_sources=None):
     cp = self._generate_coverage_config(source_mappings=source_mappings)
     # Note that it's important to put the tmpfile under the workdir, because pytest
     # uses all arguments that look like paths to compute its rootdir, and we want
     # it to pick the buildroot.
-    with temporary_file(root_dir=self.workdir) as fp:
+    with temporary_file(root_dir=workdirs.root_dir) as fp:
       cp.write(fp)
       fp.close()
       coverage_rc = fp.name
@@ -237,7 +248,7 @@ def _cov_setup(self, source_mappings, coverage_sources=None):
       yield args, coverage_rc
 
   @contextmanager
-  def _maybe_emit_coverage_data(self, targets, pex):
+  def _maybe_emit_coverage_data(self, workdirs, targets, pex):
     coverage = self.get_options().coverage
     if coverage is None:
       yield []
@@ -297,7 +308,8 @@ def compute_coverage_sources(tgt):
           # The source is to be interpreted as a package name.
           coverage_sources.append(source)
 
-    with self._cov_setup(source_mappings,
+    with self._cov_setup(workdirs,
+                         source_mappings,
                          coverage_sources=coverage_sources) as (args, coverage_rc):
       try:
         yield args
@@ -318,15 +330,10 @@ def pex_run(arguments):
           shutil.move('.coverage', '.coverage.raw')
           pex_run(['combine', '--rcfile', coverage_rc])
           pex_run(['report', '-i', '--rcfile', coverage_rc])
-          if self.get_options().coverage_output_dir:
-            target_dir = self.get_options().coverage_output_dir
-          else:
-            relpath = Target.maybe_readable_identify(targets)
-            pants_distdir = self.context.options.for_global_scope().pants_distdir
-            target_dir = os.path.join(pants_distdir, 'coverage', relpath)
-          safe_mkdir(target_dir)
-          pex_run(['html', '-i', '--rcfile', coverage_rc, '-d', target_dir])
-          coverage_xml = os.path.join(target_dir, 'coverage.xml')
+
+          coverage_workdir = workdirs.coverage_path
+          pex_run(['html', '-i', '--rcfile', coverage_rc, '-d', coverage_workdir])
+          coverage_xml = os.path.join(coverage_workdir, 'coverage.xml')
           pex_run(['xml', '-i', '--rcfile', coverage_rc, '-o', coverage_xml])
 
   def _get_shard_conftest_content(self):
@@ -417,10 +424,10 @@ def _conftest(self, sources_map):
       yield conftest
 
   @contextmanager
-  def _test_runner(self, targets, sources_map):
+  def _test_runner(self, workdirs, targets, sources_map):
     pex = self.context.products.get_data(PytestPrep.PYTEST_BINARY)
     with self._conftest(sources_map) as conftest:
-      with self._maybe_emit_coverage_data(targets, pex) as coverage_args:
+      with self._maybe_emit_coverage_data(workdirs, targets, pex) as coverage_args:
         yield pex, [conftest] + coverage_args
 
   def _do_run_tests_with_args(self, pex, args):
@@ -539,16 +546,137 @@ def _run_tests(self, targets):
       # A low-level test execution failure occurred before tests were run.
       raise TaskError()
 
-  def _do_run_tests(self, targets):
-    with self.invalidated(targets,
+  @staticmethod
+  def _vts_for_partition(invalidation_check):
+    return VersionedTargetSet.from_versioned_targets(invalidation_check.all_vts)
+
+  def check_artifact_cache_for(self, invalidation_check):
+    # We generate artifacts, namely junit.xml and coverage reports, that cover the full target set
+    # whether that is all targets in the context (`--fast`) or each target
+    # individually (`--no-fast`).
+    return [self._vts_for_partition(invalidation_check)]
+
+  @staticmethod
+  def _copy_dir(src, dst):
+    # NB: shutil.copytree requires dst not already exist, we tolerate that case.
+    safe_mkdir(dst)
+    for src_path, dirnames, filenames in safe_walk(src, topdown=True):
+      dst_path = os.path.join(dst, os.path.relpath(src_path, src))
+      for dirname in dirnames:
+        safe_mkdir(os.path.join(dst_path, dirname))
+      for filename in filenames:
+        dst_filename = os.path.join(dst_path, filename)
+        if os.path.exists(dst_filename):
+          os.unlink(dst_filename)
+        shutil.copy2(os.path.join(src_path, filename), dst_filename)
+
+  class Workdirs(datatype('Workdirs', ['root_dir'])):
+    @classmethod
+    def for_targets(cls, work_dir, targets):
+      root_dir = os.path.join(work_dir, Target.maybe_readable_identify(targets))
+      # safe_mkdir(root_dir, clean=True)
+      safe_mkdir(root_dir, clean=False)
+      return cls(root_dir=root_dir)
+
+    @memoized_method
+    def junitxml_path(self, *targets):
+      xml_path = os.path.join(self.root_dir, 'junitxml',
+                              'TEST-{}.xml'.format(Target.maybe_readable_identify(targets)))
+      safe_mkdir_for(xml_path)
+      return xml_path
+
+    @memoized_property
+    def coverage_path(self):
+      coverage_workdir = os.path.join(self.root_dir, 'coverage')
+      safe_mkdir(coverage_workdir)
+      return coverage_workdir
+
+    def files(self):
+      def files_iter():
+        for dir_path, _, file_names in os.walk(self.root_dir):
+          for filename in file_names:
+            yield os.path.join(dir_path, filename)
+      return list(files_iter())
+
+  # TODO(John Sirois): Its probably worth generalizing a means to mark certain options or target
+  # attributes as making results un-cacheable.
+  class NeverCacheFingerprintStrategy(DefaultFingerprintStrategy):
+    def compute_fingerprint(self, target):
+      return uuid.uuid4()
+
+  def _fingerprint_strategy(self):
+    if self.get_options().profile:
+      # A profile is machine-specific and we assume anyone wanting a profile wants to run it here
+      # and now and not accept some old result, even if on the same inputs.
+      return self.NeverCacheFingerprintStrategy()
+    else:
+      return None  # Accept the default fingerprint strategy.
+
+  def _do_run_tests(self, partition):
+    with self.invalidated(partition,
+                          fingerprint_strategy=self._fingerprint_strategy(),
                           # Re-run tests when the code they test (and depend on) changes.
                           invalidate_dependents=True) as invalidation_check:
 
-      invalid_tgts = [tgt for vts in invalidation_check.invalid_vts for tgt in vts.targets]
-      result = self._run_pytest(invalid_tgts)
-      return result.checked()
+      invalid_tgts = [invalid_tgt
+                      for vts in invalidation_check.invalid_vts
+                      for invalid_tgt in vts.targets]
+
+      # need to go:
+      # 1.) output -> workdir
+      # 2.) [iff all == invalid] workdir -> cache
+      # 3.) [iff invalid == 0 and all > 0] cache -> workdir
+      # 4.) [iff finals] workdir -> finals
+
+      # 1.) Write all results that will be potentially cached to workdir.
+      workdirs = self.Workdirs.for_targets(self.workdir, partition)
+      result = self._run_pytest(workdirs, invalid_tgts).checked()
+
+      cache_vts = self._vts_for_partition(invalidation_check)
+      if invalidation_check.all_vts == invalidation_check.invalid_vts:
+        # 2.) The full partition was invalid, cache results.
+        if self.artifact_cache_writes_enabled():
+          self.update_artifact_cache([(cache_vts, workdirs.files())])
+      elif not invalidation_check.invalid_vts:
+        # 3.) The full partition was valid, our results will have been staged for/by caching if not
+        # already local.
+        pass
+      else:
+        # The partition was partially invalid.
+
+        # We don't cache results; so others will need to re-run this partition.
+        # NB: We will presumably commit this change now though and so others will get this
+        # partition in a state that executes successfully; so when the 1st of the others
+        # executes against this partition; they will hit `all_vts == invalid_vts` and
+        # cache the results. That 1st of others is hopefully CI!
+        cache_vts.force_invalidate()
+
+      # 4.) Pluck any results that an end user might need to interact with from the workdir to the
+      # locations they expect.
 
-  def _run_pytest(self, targets):
+      external_junit_xml_dir = self.get_options().junit_xml_dir
+      if external_junit_xml_dir:
+        # Either we just ran pytest for a set of invalid targets and generated a junit xml file
+        # specific to that (sub)set or else we hit the cache for the whole partition and skipped
+        # running pytest, simply retrieving the partition's full junit xml file.
+        junitxml_path = workdirs.junitxml_path(*(invalid_tgts or partition))
+
+        safe_mkdir(external_junit_xml_dir)
+        shutil.copy2(junitxml_path, external_junit_xml_dir)
+
+      if self.get_options().coverage:
+        coverage_output_dir = self.get_options().coverage_output_dir
+        if coverage_output_dir:
+          target_dir = coverage_output_dir
+        else:
+          relpath = Target.maybe_readable_identify(partition)
+          pants_distdir = self.context.options.for_global_scope().pants_distdir
+          target_dir = os.path.join(pants_distdir, 'coverage', relpath)
+        self._copy_dir(workdirs.coverage_path, target_dir)
+
+      return result
+
+  def _run_pytest(self, workdirs, targets):
     if not targets:
       return PytestResult.rc(0)
 
@@ -563,14 +691,15 @@ def _run_pytest(self, targets):
     if not sources_map:
       return PytestResult.rc(0)
 
-    with self._test_runner(targets, sources_map) as (pex, test_args):
+    with self._test_runner(workdirs, targets, sources_map) as (pex, test_args):
       # Validate that the user didn't provide any passthru args that conflict
       # with those we must set ourselves.
       for arg in self.get_passthru_args():
         if arg.startswith('--junitxml') or arg.startswith('--confcutdir'):
           raise TaskError('Cannot pass this arg through to pytest: {}'.format(arg))
 
-      junitxml_path = self._get_junit_xml_path(targets)
+      junitxml_path = workdirs.junitxml_path(*targets)
+
       # N.B. the `--confcutdir` here instructs pytest to stop scanning for conftest.py files at the
       # top of the buildroot. This prevents conftest.py files from outside (e.g. in users home dirs)
       # from leaking into pants test runs. See: https://github.com/pantsbuild/pants/issues/2726
@@ -599,10 +728,6 @@ def _run_pytest(self, targets):
       if not os.path.exists(junitxml_path):
         return result
 
-      external_junit_xml_dir = self.get_options().junit_xml_dir
-      if external_junit_xml_dir:
-        safe_mkdir(external_junit_xml_dir)
-        shutil.copy(junitxml_path, external_junit_xml_dir)
       failed_targets = self._get_failed_targets_from_junitxml(junitxml_path, targets)
 
       def parse_error_handler(parse_error):