Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow nailgun execution for RscCompile by bundling together the tool classpaths #7092

Merged
merged 7 commits into from
Jan 18, 2019
76 changes: 76 additions & 0 deletions src/python/pants/backend/jvm/subsystems/jvm_tool_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@
from collections import namedtuple
from textwrap import dedent

from future.utils import text_type

from pants.base.exceptions import TaskError
from pants.java.distribution.distribution import DistributionLocator
from pants.option.custom_types import target_option
from pants.util.memo import memoized_property
from pants.util.objects import datatype


class JvmToolMixin(object):
Expand Down Expand Up @@ -145,6 +149,78 @@ def formulate_help():
jvm_tool = cls.JvmTool(register.scope, key, classpath, main, custom_rules)
JvmToolMixin._jvm_tools.append(jvm_tool)

# TODO: Deprecate .register_jvm_tool()?
class JvmToolDeclaration(datatype([
('tool_name', text_type),
'main',
('classpath', tuple),
('custom_rules', tuple),
])):
"""A specification for a JVM tool."""

def __new__(cls, tool_name, main=None, classpath=None, custom_rules=None):
return super(JvmToolMixin.JvmToolDeclaration, cls).__new__(
cls, text_type(tool_name), main, tuple(classpath or []), tuple(custom_rules or []))

class JvmToolDeclError(Exception): pass

@classmethod
def register_jvm_tool_decl(cls, register, decl, **kwargs):
"""Register a `JvmToolDeclaration` with `.register_jvm_tool()`."""
if not isinstance(decl, cls.JvmToolDeclaration):
raise cls.JvmToolDeclError(
'decl {} must be an instance of {}.{}'
.format(decl, cls.__name__, cls.JvmToolDeclaration.__name__))
kwargs = dict(
classpath=list(decl.classpath),
custom_rules=list(decl.custom_rules)
)
if decl.main:
kwargs['main'] = decl.main
cls.register_jvm_tool(register,
decl.tool_name,
**kwargs)

class CombinedJvmToolsError(JvmToolDeclError): pass

_combined_tools = []

@classmethod
def register_combined_jvm_tools(cls, register, tool_decls):
"""Register `JvmToolDeclaration`s as JVM tools, which can be invoked as a single jar.

This allows tools to be invoked one at a time, but with the combined classpath of all of them
using ensure_combined_jvm_tool_classpath(). This allows creating nailgun instances for the
tools which have the same fingerprint, and allows a task to invoke multiple different JVM tools
from the same nailgun instances.
"""
for decl in tool_decls:
cls.register_jvm_tool_decl(register, decl)
cls._combined_tools.append(decl)

@memoized_property
def _combined_jvm_tool_classpath(self):
cp = []
for decl in self._combined_tools:
cp.extend(self.tool_classpath(decl.tool_name))
return cp

@memoized_property
def _registered_combined_tool_keys(self):
return frozenset(decl.tool_name for decl in self._combined_tools)

def ensure_combined_jvm_tool_classpath(self, tool_name):
"""Get a single classpath for all tools registered with `register_combined_jvm_tools()`.

Also check to ensure the tool was registered for consumption as a combined JVM tool.
"""
if tool_name not in self._registered_combined_tool_keys:
raise self.CombinedJvmToolsError(
"tool with name '{}' must be registered with register_combined_jvm_tools() "
"(known keys are: {})"
.format(tool_name, type(self).__name__, self._registered_combined_tool_keys))
return self._combined_jvm_tool_classpath

@classmethod
def prepare_tools(cls, round_manager):
"""Subclasses must call this method to ensure jvm tool products are available."""
Expand Down
48 changes: 27 additions & 21 deletions src/python/pants/backend/jvm/subsystems/zinc.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,19 @@
from pants.util.memo import memoized_method, memoized_property


_ZINC_SHADER_RULES = [
# The compiler-interface and compiler-bridge tool jars carry xsbt and
# xsbti interfaces that are used across the shaded tool jar boundary so
# we preserve these root packages wholesale along with the core scala
# APIs.
Shader.exclude_package('scala', recursive=True),
Shader.exclude_package('xsbt', recursive=True),
Shader.exclude_package('xsbti', recursive=True),
# Unfortunately, is loaded reflectively by the compiler.
Shader.exclude_package('org.apache.logging.log4j', recursive=True),
]


class Zinc(object):
"""Configuration for Pants' zinc wrapper tool."""

Expand Down Expand Up @@ -61,34 +74,17 @@ def register_options(cls, register):

zinc_rev = '1.0.3'

shader_rules = [
# The compiler-interface and compiler-bridge tool jars carry xsbt and
# xsbti interfaces that are used across the shaded tool jar boundary so
# we preserve these root packages wholesale along with the core scala
# APIs.
Shader.exclude_package('scala', recursive=True),
Shader.exclude_package('xsbt', recursive=True),
Shader.exclude_package('xsbti', recursive=True),
# Unfortunately, is loaded reflectively by the compiler.
Shader.exclude_package('org.apache.logging.log4j', recursive=True),
]

cls.register_jvm_tool(register,
Zinc.ZINC_BOOTSTRAPPER_TOOL_NAME,
classpath=[
JarDependency('org.pantsbuild', 'zinc-bootstrapper_2.11', '0.0.4'),
],
main=Zinc.ZINC_BOOTSTRAPER_MAIN,
custom_rules=shader_rules,
custom_rules=_ZINC_SHADER_RULES,
)

cls.register_jvm_tool(register,
Zinc.ZINC_COMPILER_TOOL_NAME,
classpath=[
JarDependency('org.pantsbuild', 'zinc-compiler_2.11', '0.0.9'),
],
main=Zinc.ZINC_COMPILE_MAIN,
custom_rules=shader_rules)
# Defined at the bottom of this file so it can be consumed elsewhere.
cls.register_jvm_tool_decl(register, ZINC_COMPILER_DECL)

cls.register_jvm_tool(register,
'compiler-bridge',
Expand All @@ -111,7 +107,7 @@ def register_options(cls, register):
# broken up into multiple jars, but zinc does not yet support a sequence
# of jars for the interface.
main='no.such.main.Main',
custom_rules=shader_rules)
custom_rules=_ZINC_SHADER_RULES)

cls.register_jvm_tool(register,
Zinc.ZINC_EXTRACTOR_TOOL_NAME,
Expand Down Expand Up @@ -435,3 +431,13 @@ def compile_classpath(self, classpath_product_key, target, extra_cp_entries=None
"Classpath entry does not start with buildroot: {}".format(entry)

return classpath_entries


ZINC_COMPILER_DECL = JvmToolMixin.JvmToolDeclaration(
Zinc.ZINC_COMPILER_TOOL_NAME,
main=Zinc.ZINC_COMPILE_MAIN,
classpath=[
JarDependency('org.pantsbuild', 'zinc-compiler_2.11', '0.0.9'),
],
custom_rules=_ZINC_SHADER_RULES,
)
54 changes: 20 additions & 34 deletions src/python/pants/backend/jvm/tasks/jvm_compile/rsc/rsc_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@

from pants.backend.jvm.subsystems.dependency_context import DependencyContext # noqa
from pants.backend.jvm.subsystems.jvm_platform import JvmPlatform
from pants.backend.jvm.subsystems.jvm_tool_mixin import JvmToolMixin
from pants.backend.jvm.subsystems.shader import Shader
from pants.backend.jvm.subsystems.zinc import ZINC_COMPILER_DECL
from pants.backend.jvm.targets.jar_library import JarLibrary
from pants.backend.jvm.targets.java_library import JavaLibrary
from pants.backend.jvm.targets.jvm_target import JvmTarget
Expand All @@ -35,7 +37,6 @@
from pants.util.contextutil import Timer
from pants.util.dirutil import (fast_relpath, fast_relpath_optional, maybe_read_file,
safe_file_dump, safe_mkdir)
from pants.util.memo import memoized_property


#
Expand Down Expand Up @@ -132,20 +133,14 @@ def __init__(self, *args, **kwargs):
def implementation_version(cls):
return super(RscCompile, cls).implementation_version() + [('RscCompile', 171)]

# Bundle all the jvm tool classpaths together so that we can persist a single nailgun instance for
# all of them.
# TODO: move this in JvmToolMixin or NailgunTask as a register_nailgunnable_jvm_tools() method!
_joined_jvm_tool_keys = ['rsc', 'metacp', 'metai']

@classmethod
def register_options(cls, register):
super(RscCompile, cls).register_options(register)

rsc_toolchain_version = '0.0.0-446-c64e6937'
scalameta_toolchain_version = '4.0.0'

cls.register_jvm_tool(
register,
rsc_decl = JvmToolMixin.JvmToolDeclaration(
'rsc',
classpath=[
JarDependency(
Expand All @@ -156,9 +151,9 @@ def register_options(cls, register):
],
custom_rules=[
Shader.exclude_package('rsc', recursive=True),
])
cls.register_jvm_tool(
register,
]
)
metacp_decl = JvmToolMixin.JvmToolDeclaration(
'metacp',
classpath=[
JarDependency(
Expand All @@ -169,9 +164,9 @@ def register_options(cls, register):
],
custom_rules=[
Shader.exclude_package('scala', recursive=True),
])
cls.register_jvm_tool(
register,
]
)
metai_decl = JvmToolMixin.JvmToolDeclaration(
'metai',
classpath=[
JarDependency(
Expand All @@ -182,7 +177,14 @@ def register_options(cls, register):
],
custom_rules=[
Shader.exclude_package('scala', recursive=True),
])
]
)

# Register all of these as "combined" JVM tools so that we can invoke their combined classpath
# in a single nailgun instance. We still invoke their classpaths separately when not using
# nailgun, however.
cls.register_combined_jvm_tools(
register, [rsc_decl, metacp_decl, metai_decl, ZINC_COMPILER_DECL])

def register_extra_products_from_contexts(self, targets, compile_contexts):
super(RscCompile, self).register_extra_products_from_contexts(targets, compile_contexts)
Expand Down Expand Up @@ -830,30 +832,13 @@ def _runtool_nonhermetic(self, parent_workunit, classpath, main, tool_name, args
if c.name is tool_name:
runjava_workunit = c
break
# TODO: when would this happen?
# TODO: figure out and document when would this happen.
if runjava_workunit is None:
raise Exception('couldnt find work unit for underlying execution')
return runjava_workunit

class UnregisteredCombinedJvmTool(Exception): pass

@memoized_property
def _combined_tool_classpath(self):
cp = []
for k in self._joined_jvm_tool_keys:
cp.extend(self.tool_classpath(k))
return cp

def _ensure_combined_tool_classpath(self, tool_name):
if tool_name not in self._joined_jvm_tool_keys:
raise self.UnregisteredCombinedJvmTool(
"tool with name '{}' must be in _joined_jvm_tool_keys in {} (known keys are: {})"
.format(tool_name, type(self).__name__, self._joined_jvm_tool_keys))
return self._combined_tool_classpath

def _runtool(self, main, tool_name, args, distribution,
tgt=None, input_files=tuple(), input_digest=None, output_dir=None):
# ???
def workunit_factory(*args, **kwargs):
return self.context.new_workunit(tool_name, *args, **kwargs)
return self.do_for_execution_strategy_variant(workunit_factory, {
Expand All @@ -863,7 +848,8 @@ def workunit_factory(*args, **kwargs):
self.SUBPROCESS: lambda wu: self._runtool_nonhermetic(
wu, self.tool_classpath(tool_name), main, tool_name, args, distribution),
self.NAILGUN: lambda wu: self._runtool_nonhermetic(
wu, self._ensure_combined_tool_classpath(tool_name), main, tool_name, args, distribution),
wu, self.ensure_combined_jvm_tool_classpath(tool_name),
main, tool_name, args, distribution),
})

def _run_metai_tool(self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,12 @@ def relative_to_exec_root(path):
# TODO: This should probably return a ClasspathEntry rather than a Digest
return res.output_directory_digest
else:
if self.runjava(classpath=[self._zinc.zinc],
# This will just be the zinc compiler tool classpath normally, but tasks which invoke zinc
# along with other JVM tools with nailgun (such as RscCompile) require zinc to be invoked with
# this method to ensure a single classpath is used for all the tools they need to invoke so
# that the nailgun instance (which is keyed by classpath and JVM options) isn't invalidated.
zinc_combined_cp = self.ensure_combined_jvm_tool_classpath(Zinc.ZINC_COMPILER_TOOL_NAME)
if self.runjava(classpath=zinc_combined_cp,
main=Zinc.ZINC_COMPILE_MAIN,
jvm_options=jvm_options,
args=zinc_args,
Expand Down
4 changes: 4 additions & 0 deletions src/python/pants/backend/jvm/tasks/nailgun_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ class InvalidExecutionStrategyMapping(Exception): pass
_all_execution_strategies = frozenset([NAILGUN, SUBPROCESS, HERMETIC])

def do_for_execution_strategy_variant(self, workunit_factory, mapping):
"""Invoke the method in `mapping` with the key corresponding to the execution strategy.

`mapping` is a dict mapping execution strategy -> method accepting a workunit argument.
"""
variants = frozenset(mapping.keys())
if variants != self._all_execution_strategies:
raise self.InvalidExecutionStrategyMapping(
Expand Down
2 changes: 1 addition & 1 deletion src/python/pants/java/nailgun_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def run(this, stdout=None, stderr=None, stdin=None, cwd=None):

def _check_nailgun_state(self, new_fingerprint):
running = self.is_alive()
updated = self.needs_restart(new_fingerprint) or self.cmd != self._distribution.java
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this changing? I don't think it should be?

Copy link
Contributor Author

@cosmicexplorer cosmicexplorer Jan 17, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree! I mentioned:

Remove the mysterious or self.cmd != self._distribution.java in nailgun_executor.py -- this is necessary for this PR to work, but an option can be plumbed in if it breaks pantsd

When adding a ton of debug logging I realized the nailguns were restarting despite matching fingerprints because of this line of code, and everything immediately started working after I removed it. I'm of course very concerned this will silently break pantsd, and an option can be plumbed through to avoid removing this check for specific nailguns, but the logic for nailgun and pailgun isn't clearly demarcated -- #6579 makes the distinction between nailgun and pailgun implementations more clear and could make this process easier.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could delve more into why the restarting is occurring, because to start at least self.cmd is a list, not a string like self._distribution.java.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to see whether this is still necessary.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think cmd is a string representation of command (

), but that doesn't clear up why we're comparing that string to self._distribution.java, which is the location of java being used.

I think self.cmd != self._distribution.java would only be False if the cmd was to just run java without any arguments.

Copy link
Contributor Author

@cosmicexplorer cosmicexplorer Jan 17, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is still necessary, if you run e.g. ./pants test tests/python/pants_test/backend/jvm/tasks/jvm_compile/rsc: -- -vs -k test_executing_multi_target_binary_nonhermetic it will reliably fail with the error described in the OP without this change, and that's not a fluke. Since the git log of that line only shows moving it from elsewhere (the first commit from git log -L 156,156:src/python/pants/java/nailgun_executor.py is 0d62074), I don't know how to evaluate this. It would be great if there were comments or tests about it, but in the worst case if we start seeing random pantsd issues and #6579 hasn't been merged yet (which I suspect would make all of this much more clear), we know what to fix. I also thought it was strange that it was comparing it to a bare java command and was wondering if it was necessary a long time ago and not anymore.

Copy link
Contributor Author

@cosmicexplorer cosmicexplorer Jan 17, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I could see that being used as a testing hack to invalidate dummy java processes, for example, but I don't know where to find more context about this)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was wrong, looking at the wrong level of nesting. self.cmd in this context is from the ProcessManager, not the Runner. It's the first element in the command line.

def cmd(self):
"""The first element of the process commandline e.g. '/usr/bin/python2.7'.
:returns: The first element of the process command line or else `None` if the underlying
process has died.
"""

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find it confusing that cmd has so many meanings in that file.

Copy link
Contributor Author

@cosmicexplorer cosmicexplorer Jan 18, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep!!!! Thank you for finding that, I thought I had tracked it down to executor.py.

updated = self.needs_restart(new_fingerprint)
logging.debug('Nailgun {nailgun} state: updated={up!s} running={run!s} fingerprint={old_fp} '
'new_fingerprint={new_fp} distribution={old_dist} new_distribution={new_dist}'
.format(nailgun=self._identity, up=updated, run=running,
Expand Down