Skip to content

Commit

Permalink
Implement caching of pytest results.
Browse files Browse the repository at this point in the history
This saves off both junit xml and coverage files (when generated).
  • Loading branch information
jsirois committed Jul 14, 2017
1 parent df55949 commit 38845c7
Show file tree
Hide file tree
Showing 3 changed files with 165 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
from pants.base.exceptions import TargetDefinitionException, TaskError
from pants.build_graph.build_file_aliases import BuildFileAliases
from pants.goal.context import Context

from pants_test.jvm.nailgun_task_test_base import NailgunTaskTestBase
from twitter.common.collections import OrderedSet

from pants.contrib.scrooge.tasks.scrooge_gen import ScroogeGen


GEN_ADAPT = '--gen-adapt'


Expand Down
2 changes: 2 additions & 0 deletions src/python/pants/backend/python/tasks2/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ python_library(
'src/python/pants/backend/python/targets',
'src/python/pants/base:build_environment',
'src/python/pants/base:exceptions',
'src/python/pants/base:fingerprint_strategy',
'src/python/pants/base:specs',
'src/python/pants/build_graph',
'src/python/pants/invalidation',
Expand All @@ -21,6 +22,7 @@ python_library(
'src/python/pants/util:fileutil',
'src/python/pants/util:meta',
'src/python/pants/util:memo',
'src/python/pants/util:objects',
'src/python/pants/util:process_handler',
'src/python/pants/util:xml_parser',
]
Expand Down
199 changes: 162 additions & 37 deletions src/python/pants/backend/python/tasks2/pytest_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import shutil
import time
import traceback
import uuid
from contextlib import contextmanager
from textwrap import dedent

Expand All @@ -21,13 +22,17 @@
from pants.backend.python.tasks2.pytest_prep import PytestPrep
from pants.base.build_environment import get_buildroot
from pants.base.exceptions import ErrorWhileTesting, TaskError
from pants.base.fingerprint_strategy import DefaultFingerprintStrategy
from pants.base.hash_utils import Sharder
from pants.base.workunit import WorkUnitLabel
from pants.build_graph.target import Target
from pants.invalidation.cache_manager import VersionedTargetSet
from pants.task.task import Task
from pants.task.testrunner_task_mixin import TestRunnerTaskMixin
from pants.util.contextutil import temporary_dir, temporary_file
from pants.util.dirutil import safe_mkdir, safe_mkdir_for
from pants.util.dirutil import safe_mkdir, safe_mkdir_for, safe_walk
from pants.util.memo import memoized_method, memoized_property
from pants.util.objects import datatype
from pants.util.process_handler import SubprocessProcessHandler
from pants.util.strutil import safe_shlex_split
from pants.util.xml_parser import XmlParser
Expand Down Expand Up @@ -104,22 +109,34 @@ def register_options(cls, register):
help='Run all tests in a single pytest invocation. If turned off, each test target '
'will run in its own pytest invocation, which will be slower, but isolates '
'tests from process-wide state created by tests in other targets.')
register('--junit-xml-dir', metavar='<DIR>', fingerprint=True,

# NB: We always produce junit xml privately, and if this option is specified, we then copy
# it to the user-specified directory, post any interaction with the cache to retrieve the
# privately generated and cached xml files. As such, this option is not part of the
# fingerprint.
register('--junit-xml-dir', metavar='<DIR>',
help='Specifying a directory causes junit xml results files to be emitted under '
'that dir for each test run.')

register('--profile', metavar='<FILE>', fingerprint=True,
help="Specifying a file path causes tests to be profiled with the profiling data "
"emitted to that file (prefix). Note that tests may run in a different cwd, so "
"it's best to use an absolute path to make it easy to find the subprocess "
"profiles later.")

register('--options', type=list, fingerprint=True, help='Pass these options to pytest.')

register('--coverage', fingerprint=True,
help='Emit coverage information for specified packages or directories (absolute or'
help='Emit coverage information for specified packages or directories (absolute or '
'relative to the build root). The special value "auto" indicates that Pants '
'should attempt to deduce which packages to emit coverage for.')
register('--coverage-output-dir', metavar='<DIR>', default=None, fingerprint=True,
# For a given --coverage specification (which is fingerprinted), we will always copy the
# associated generated and cached --coverage files to this directory post any interaction with
# the cache to retrieve the coverage files. As such, this option is not part of the fingerprint.
register('--coverage-output-dir', metavar='<DIR>', default=None,
help='Directory to emit coverage reports to.'
'If not specified, a default within dist is used.')

register('--test-shard', fingerprint=True,
help='Subset of tests to run, in the form M/N, 0 <= M < N. For example, 1/3 means '
'run tests number 2, 5, 8, 11, ...')
Expand Down Expand Up @@ -151,12 +168,6 @@ def _execute(self, all_targets):
class InvalidShardSpecification(TaskError):
"""Indicates an invalid `--test-shard` option."""

def _get_junit_xml_path(self, targets):
xml_path = os.path.join(self.workdir, 'junitxml',
'TEST-{}.xml'.format(Target.maybe_readable_identify(targets)))
safe_mkdir_for(xml_path)
return xml_path

DEFAULT_COVERAGE_CONFIG = dedent(b"""
[run]
branch = True
Expand Down Expand Up @@ -220,12 +231,12 @@ def add_realpath(path):
return cp

@contextmanager
def _cov_setup(self, source_mappings, coverage_sources=None):
def _cov_setup(self, workdirs, source_mappings, coverage_sources=None):
cp = self._generate_coverage_config(source_mappings=source_mappings)
# Note that it's important to put the tmpfile under the workdir, because pytest
# uses all arguments that look like paths to compute its rootdir, and we want
# it to pick the buildroot.
with temporary_file(root_dir=self.workdir) as fp:
with temporary_file(root_dir=workdirs.root_dir) as fp:
cp.write(fp)
fp.close()
coverage_rc = fp.name
Expand All @@ -237,7 +248,7 @@ def _cov_setup(self, source_mappings, coverage_sources=None):
yield args, coverage_rc

@contextmanager
def _maybe_emit_coverage_data(self, targets, pex):
def _maybe_emit_coverage_data(self, workdirs, targets, pex):
coverage = self.get_options().coverage
if coverage is None:
yield []
Expand Down Expand Up @@ -297,7 +308,8 @@ def compute_coverage_sources(tgt):
# The source is to be interpreted as a package name.
coverage_sources.append(source)

with self._cov_setup(source_mappings,
with self._cov_setup(workdirs,
source_mappings,
coverage_sources=coverage_sources) as (args, coverage_rc):
try:
yield args
Expand All @@ -318,15 +330,10 @@ def pex_run(arguments):
shutil.move('.coverage', '.coverage.raw')
pex_run(['combine', '--rcfile', coverage_rc])
pex_run(['report', '-i', '--rcfile', coverage_rc])
if self.get_options().coverage_output_dir:
target_dir = self.get_options().coverage_output_dir
else:
relpath = Target.maybe_readable_identify(targets)
pants_distdir = self.context.options.for_global_scope().pants_distdir
target_dir = os.path.join(pants_distdir, 'coverage', relpath)
safe_mkdir(target_dir)
pex_run(['html', '-i', '--rcfile', coverage_rc, '-d', target_dir])
coverage_xml = os.path.join(target_dir, 'coverage.xml')

coverage_workdir = workdirs.coverage_path
pex_run(['html', '-i', '--rcfile', coverage_rc, '-d', coverage_workdir])
coverage_xml = os.path.join(coverage_workdir, 'coverage.xml')
pex_run(['xml', '-i', '--rcfile', coverage_rc, '-o', coverage_xml])

def _get_shard_conftest_content(self):
Expand Down Expand Up @@ -417,10 +424,10 @@ def _conftest(self, sources_map):
yield conftest

@contextmanager
def _test_runner(self, targets, sources_map):
def _test_runner(self, workdirs, targets, sources_map):
pex = self.context.products.get_data(PytestPrep.PYTEST_BINARY)
with self._conftest(sources_map) as conftest:
with self._maybe_emit_coverage_data(targets, pex) as coverage_args:
with self._maybe_emit_coverage_data(workdirs, targets, pex) as coverage_args:
yield pex, [conftest] + coverage_args

def _do_run_tests_with_args(self, pex, args):
Expand Down Expand Up @@ -539,16 +546,137 @@ def _run_tests(self, targets):
# A low-level test execution failure occurred before tests were run.
raise TaskError()

def _do_run_tests(self, targets):
with self.invalidated(targets,
@staticmethod
def _vts_for_partition(invalidation_check):
return VersionedTargetSet.from_versioned_targets(invalidation_check.all_vts)

def check_artifact_cache_for(self, invalidation_check):
# We generate artifacts, namely junit.xml and coverage reports, that cover the full target set
# whether that is all targets in the context (`--fast`) or each target
# individually (`--no-fast`).
return [self._vts_for_partition(invalidation_check)]

@staticmethod
def _copy_dir(src, dst):
# NB: shutil.copytree requires dst not already exist, we tolerate that case.
safe_mkdir(dst)
for src_path, dirnames, filenames in safe_walk(src, topdown=True):
dst_path = os.path.join(dst, os.path.relpath(src_path, src))
for dirname in dirnames:
safe_mkdir(os.path.join(dst_path, dirname))
for filename in filenames:
dst_filename = os.path.join(dst_path, filename)
if os.path.exists(dst_filename):
os.unlink(dst_filename)
shutil.copy2(os.path.join(src_path, filename), dst_filename)

class Workdirs(datatype('Workdirs', ['root_dir'])):
@classmethod
def for_targets(cls, work_dir, targets):
root_dir = os.path.join(work_dir, Target.maybe_readable_identify(targets))
# safe_mkdir(root_dir, clean=True)
safe_mkdir(root_dir, clean=False)
return cls(root_dir=root_dir)

@memoized_method
def junitxml_path(self, *targets):
xml_path = os.path.join(self.root_dir, 'junitxml',
'TEST-{}.xml'.format(Target.maybe_readable_identify(targets)))
safe_mkdir_for(xml_path)
return xml_path

@memoized_property
def coverage_path(self):
coverage_workdir = os.path.join(self.root_dir, 'coverage')
safe_mkdir(coverage_workdir)
return coverage_workdir

def files(self):
def files_iter():
for dir_path, _, file_names in os.walk(self.root_dir):
for filename in file_names:
yield os.path.join(dir_path, filename)
return list(files_iter())

# TODO(John Sirois): Its probably worth generalizing a means to mark certain options or target
# attributes as making results un-cacheable.
class NeverCacheFingerprintStrategy(DefaultFingerprintStrategy):
def compute_fingerprint(self, target):
return uuid.uuid4()

def _fingerprint_strategy(self):
if self.get_options().profile:
# A profile is machine-specific and we assume anyone wanting a profile wants to run it here
# and now and not accept some old result, even if on the same inputs.
return self.NeverCacheFingerprintStrategy()
else:
return None # Accept the default fingerprint strategy.

def _do_run_tests(self, partition):
with self.invalidated(partition,
fingerprint_strategy=self._fingerprint_strategy(),
# Re-run tests when the code they test (and depend on) changes.
invalidate_dependents=True) as invalidation_check:

invalid_tgts = [tgt for vts in invalidation_check.invalid_vts for tgt in vts.targets]
result = self._run_pytest(invalid_tgts)
return result.checked()
invalid_tgts = [invalid_tgt
for vts in invalidation_check.invalid_vts
for invalid_tgt in vts.targets]

# need to go:
# 1.) output -> workdir
# 2.) [iff all == invalid] workdir -> cache
# 3.) [iff invalid == 0 and all > 0] cache -> workdir
# 4.) [iff finals] workdir -> finals

# 1.) Write all results that will be potentially cached to workdir.
workdirs = self.Workdirs.for_targets(self.workdir, partition)
result = self._run_pytest(workdirs, invalid_tgts).checked()

cache_vts = self._vts_for_partition(invalidation_check)
if invalidation_check.all_vts == invalidation_check.invalid_vts:
# 2.) The full partition was invalid, cache results.
if self.artifact_cache_writes_enabled():
self.update_artifact_cache([(cache_vts, workdirs.files())])
elif not invalidation_check.invalid_vts:
# 3.) The full partition was valid, our results will have been staged for/by caching if not
# already local.
pass
else:
# The partition was partially invalid.

# We don't cache results; so others will need to re-run this partition.
# NB: We will presumably commit this change now though and so others will get this
# partition in a state that executes successfully; so when the 1st of the others
# executes against this partition; they will hit `all_vts == invalid_vts` and
# cache the results. That 1st of others is hopefully CI!
cache_vts.force_invalidate()

# 4.) Pluck any results that an end user might need to interact with from the workdir to the
# locations they expect.

def _run_pytest(self, targets):
external_junit_xml_dir = self.get_options().junit_xml_dir
if external_junit_xml_dir:
# Either we just ran pytest for a set of invalid targets and generated a junit xml file
# specific to that (sub)set or else we hit the cache for the whole partition and skipped
# running pytest, simply retrieving the partition's full junit xml file.
junitxml_path = workdirs.junitxml_path(*(invalid_tgts or partition))

safe_mkdir(external_junit_xml_dir)
shutil.copy2(junitxml_path, external_junit_xml_dir)

if self.get_options().coverage:
coverage_output_dir = self.get_options().coverage_output_dir
if coverage_output_dir:
target_dir = coverage_output_dir
else:
relpath = Target.maybe_readable_identify(partition)
pants_distdir = self.context.options.for_global_scope().pants_distdir
target_dir = os.path.join(pants_distdir, 'coverage', relpath)
self._copy_dir(workdirs.coverage_path, target_dir)

return result

def _run_pytest(self, workdirs, targets):
if not targets:
return PytestResult.rc(0)

Expand All @@ -563,14 +691,15 @@ def _run_pytest(self, targets):
if not sources_map:
return PytestResult.rc(0)

with self._test_runner(targets, sources_map) as (pex, test_args):
with self._test_runner(workdirs, targets, sources_map) as (pex, test_args):
# Validate that the user didn't provide any passthru args that conflict
# with those we must set ourselves.
for arg in self.get_passthru_args():
if arg.startswith('--junitxml') or arg.startswith('--confcutdir'):
raise TaskError('Cannot pass this arg through to pytest: {}'.format(arg))

junitxml_path = self._get_junit_xml_path(targets)
junitxml_path = workdirs.junitxml_path(*targets)

# N.B. the `--confcutdir` here instructs pytest to stop scanning for conftest.py files at the
# top of the buildroot. This prevents conftest.py files from outside (e.g. in users home dirs)
# from leaking into pants test runs. See: https://github.com/pantsbuild/pants/issues/2726
Expand Down Expand Up @@ -599,10 +728,6 @@ def _run_pytest(self, targets):
if not os.path.exists(junitxml_path):
return result

external_junit_xml_dir = self.get_options().junit_xml_dir
if external_junit_xml_dir:
safe_mkdir(external_junit_xml_dir)
shutil.copy(junitxml_path, external_junit_xml_dir)
failed_targets = self._get_failed_targets_from_junitxml(junitxml_path, targets)

def parse_error_handler(parse_error):
Expand Down

0 comments on commit 38845c7

Please sign in to comment.