diff --git a/src/python/pants/engine/BUILD b/src/python/pants/engine/BUILD index 50f02f73a25b..ce310d34d508 100644 --- a/src/python/pants/engine/BUILD +++ b/src/python/pants/engine/BUILD @@ -181,6 +181,7 @@ python_library( name='rules', sources=['rules.py'], dependencies=[ + '3rdparty/python:dataclasses', '3rdparty/python/twitter/commons:twitter.common.collections', ':goal', ':selectors', diff --git a/src/python/pants/engine/README.md b/src/python/pants/engine/README.md index f51181829a58..1deb19976a10 100644 --- a/src/python/pants/engine/README.md +++ b/src/python/pants/engine/README.md @@ -76,52 +76,72 @@ In cases where this search detects any ambiguity (generally because there are tw that can provide the same product with the same number of parameters), rule graph compilation will fail with a useful error message. -### Datatypes +### How to Declare Engine `Param` Types In practical use, builtin types like `str` or `int` do not provide enough information to -disambiguate between various types of data in `@rule` signatures, so declaring small `datatype` -definitions to provide a unique and descriptive type is highly recommended: +disambiguate between various types of data in `@rule` signatures, so declaring small, unique classes +to encapsulate different states can help to make `@rule` sets with complex control flow between rules +more declarative and self-documenting. + +#### Requirements for an Engine `Param` + +To use an instance of some class `C` as a `Param` to the engine: +1. instances of `C` must be immutable, +2. `__hash__` and `__eq__` must be implemented such that when constructing two separate instances of `C` with the same argument list `...`, `C(...) == C(...)` and `hash(C(...)) == hash(C(...))`. + - This can be ignored for singleton `Param`s. + +#### Benefits of using `@dataclass` for a `Param` +[Python 3 `@dataclass`es](https://docs.python.org/3/library/dataclasses.html) satisfy the above requirements for engine `Param`s. Using `@dataclass` to declare engine `Param` types also provides additional benefits compared to alternatives: +1. a compact and high-performance representation which can be stably shared across FFI boundaries, +2. static type checking via [the `dataclasses` mypy plugin](https://github.com/python/mypy/blob/master/mypy/plugins/dataclasses.py), +3. a concise, standard, and Pythonic way to declare classes. + + +#### Example Usage of `@dataclass` for Engine `Param`s + +*Note that the [3rdparty `dataclasses` library](https://github.com/ericvsmith/dataclasses) must be in your BUILD file and `import`ed if you're running Pants with Python 3.6!* ```python -class FormattedInt(datatype(['content'])): pass +from dataclasses import dataclass -@rule -def int_to_str(value: int) -> FormattedInt: - return FormattedInt('{}'.format(value)) +from pants.util.meta import frozen_after_init -# Field values can be specified with positional and/or keyword arguments in the constructor: -x = FormattedInt('a string') -x = FormattedInt(content='a string') +# Pants requires that engine Params have a stable hash. This can be accomplished with the +# `frozen=True` argument set in the `@dataclass` decorator. +@dataclass(frozen=True) +class FormattedInt: + content: str -# Field values can be accessed after construction by name or index: -print(x.content) # 'a string' -print(x[0]) # 'a string' +# `@frozen_after_init` can also be used with `unsafe_hash=True` to create engine Params which can +# modify their fields within the `__init__()` method: +@frozen_after_init +@dataclass(unsafe_hash=True) +class ValidatedInt: + x: int -# datatype objects can be easily inspected: -print(x) # 'FormattedInt(content=a string)' -``` + def __init__(self, x): + self.x = x -#### Types of Fields +@rule +def int_to_str(value: ValidatedInt) -> FormattedInt: + return FormattedInt('{}'.format(value.x)) -`datatype()` accepts a list of *field declarations*, and returns a type which can be subclassed. A -*field declaration* can just be a string (e.g. `'field_name'`), which is then used as the field -name, as with `FormattedInt` above. A field can also be declared with a tuple of two elements: the -field name string, and a `TypeConstraint` for the field (e.g. `('field_name', -Exactly(FieldType))`). The bare type name (e.g. `FieldType`) can also be used as a shorthand for -`Exactly(FieldType)`. If the tuple form is used, the constructor will create your object, then raise -an error if the field value does not satisfy the type constraint. +# Instances can be constructed and accessed the same way as for normal `@dataclass`es. +x = FormattedInt('a string') +assert x == FormattedInt(content='a string') +print(x.content) # 'a string' -``` python -class TypedDatatype(datatype([('field_name', Exactly(str, int))])): +# `@dataclass` objects can be easily inspected: +print(x) # 'FormattedInt(content="a string")' + +# All mypy types, including parameterized types, may be used in field definitions. +@dataclass(frozen=True) +class TypedDatatype: """Example of a datatype with a more complex field type constraint.""" -``` + field_name: Union[str, int] -Assigning a specific type to a field can be somewhat unidiomatic in Python, and may be unexpected or -unnatural to use. However, regardless of whether the object is created directly with type-checked -fields or whether it's produced from a set of rules by the engine's dependency injection, it is -extremely useful to formalize the assumptions made about the value of an object into a specific -type, even if the type just wraps a single field. The `datatype()` function makes it simple and -efficient to apply that strategy. +print(TypedDatatype("huh")) # 'TypedDatatype(field_name=huh)' +``` ### Gets and RootRules diff --git a/src/python/pants/engine/rules.py b/src/python/pants/engine/rules.py index e7048f242568..82c24e26ff80 100644 --- a/src/python/pants/engine/rules.py +++ b/src/python/pants/engine/rules.py @@ -2,6 +2,7 @@ # Licensed under the Apache License, Version 2.0 (see LICENSE). import ast +import dataclasses import inspect import itertools import logging @@ -10,7 +11,7 @@ from abc import ABC, abstractmethod from collections import OrderedDict from dataclasses import dataclass -from typing import Any, Callable, Dict, List, Optional, Tuple, Type +from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type from twitter.common.collections import OrderedSet @@ -332,6 +333,19 @@ def dependency_optionables(self): """A tuple of Optionable classes that are known to be necessary to run this rule.""" return () + @classmethod + def _validate_type_field(cls, type_obj, description): + if not isinstance(type_obj, type): + raise TypeError(f'{description} provided to @rules must be types! Was: {type_obj}.') + if dataclasses.is_dataclass(type_obj): + if not (type_obj.__dataclass_params__.frozen or + getattr(type_obj, frozen_after_init.sentinel_attr, False)): + raise TypeError( + f'{description} {type_obj} is a dataclass declared without `frozen=True`, or without ' + 'both `unsafe_hash=True` and the `@frozen_after_init` decorator! ' + 'The engine requires that fields in params are immutable for stable hashing!') + return type_obj + @frozen_after_init @dataclass(unsafe_hash=True) @@ -362,8 +376,16 @@ def __init__( cacheable: bool = True, name: Optional[str] = None, ): - self._output_type = output_type - self.input_selectors = input_selectors + self._output_type = self._validate_type_field(output_type, '@rule output type') + self.input_selectors = tuple( + self._validate_type_field(t, '@rule input selector') + for t in input_selectors + ) + for g in input_gets: + product_type = g.product + subject_type = g.subject_declared_type + self._validate_type_field(product_type, 'Get product type') + self._validate_type_field(subject_type, 'Get subject type') self.input_gets = input_gets self.func = func # type: ignore self._dependency_rules = dependency_rules or () @@ -406,7 +428,7 @@ class RootRule(Rule): _output_type: Type def __init__(self, output_type: Type) -> None: - self._output_type = output_type + self._output_type = self._validate_type_field(output_type, 'RootRule declared type') @property def output_type(self): @@ -421,13 +443,12 @@ def dependency_optionables(self): return tuple() -# TODO: add typechecking here -- use dicts for `union_rules`. @dataclass(frozen=True) class RuleIndex: """Holds a normalized index of Rules used to instantiate Nodes.""" - rules: Any - roots: Any - union_rules: Any + rules: Dict[Any, Any] + roots: Set[Any] + union_rules: Dict[Any, Any] @classmethod def create(cls, rule_entries, union_rules=None): diff --git a/src/python/pants/util/meta.py b/src/python/pants/util/meta.py index 0f3bbf8b544d..f6c986d12c30 100644 --- a/src/python/pants/util/meta.py +++ b/src/python/pants/util/meta.py @@ -137,4 +137,8 @@ def new_setattr(self, key: str, value: Any) -> None: cls.__init__ = new_init # type: ignore cls.__setattr__ = new_setattr # type: ignore + setattr(cls, frozen_after_init.sentinel_attr, True) # type: ignore + return cls + +frozen_after_init.sentinel_attr = '_frozen_after_init' # type: ignore diff --git a/tests/python/pants_test/engine/test_scheduler.py b/tests/python/pants_test/engine/test_scheduler.py index 260634dc9c62..6be869e177a0 100644 --- a/tests/python/pants_test/engine/test_scheduler.py +++ b/tests/python/pants_test/engine/test_scheduler.py @@ -15,6 +15,7 @@ from pants.engine.selectors import Get, Params from pants.testutil.engine.util import assert_equal_with_printing, remove_locations_from_traceback from pants.testutil.test_base import TestBase +from pants.util.meta import frozen_after_init @dataclass(frozen=True) @@ -62,6 +63,43 @@ async def transitive_coroutine_rule(c: C) -> D: return D(b) +@dataclass +class NonFrozenDataclass: + x: int + + +@frozen_after_init +@dataclass(unsafe_hash=True) +class FrozenAfterInit: + x: int + + def __init__(self, x): + # This is an example of how you can assign within __init__() with @frozen_after_init. This + # particular example is not intended to be super useful. + self.x = x + 1 + + +@rule +def use_frozen_after_init_object(x: FrozenAfterInit) -> int: + return x.x + + +@dataclass(frozen=True) +class FrozenFieldsDataclass: + x: int + y: str + + +@dataclass(frozen=True) +class ResultDataclass: + something: str + + +@rule +def dataclass_rule(obj: FrozenFieldsDataclass) -> ResultDataclass: + return ResultDataclass(something=f'x={obj.x}, y={obj.y}') + + @union class UnionBase: pass @@ -179,6 +217,10 @@ def rules(cls): consumes_a_and_b, transitive_b_c, transitive_coroutine_rule, + dataclass_rule, + RootRule(FrozenAfterInit), + use_frozen_after_init_object, + RootRule(FrozenFieldsDataclass), RootRule(UnionWrapper), UnionRule(UnionBase, UnionA), UnionRule(UnionWithNonMemberErrorMsg, UnionX), @@ -244,6 +286,17 @@ def test_union_rules_no_docstring(self): with self._assert_execution_error(expected_msg): self.scheduler.product_request(UnionX, [Params(UnionWrapper(UnionA()))]) + def test_dataclass_products_rule(self): + result, = self.scheduler.product_request( + ResultDataclass, + [Params(FrozenFieldsDataclass(3, "test string"))]) + self.assertEquals(result.something, 'x=3, y=test string') + + result, = self.scheduler.product_request( + int, + [Params(FrozenAfterInit(x=3))]) + self.assertEquals(result, 4) + class SchedulerWithNestedRaiseTest(TestBase): @@ -358,3 +411,23 @@ def test_trace_includes_rule_exception_traceback(self): raise Exception(f'An exception for {type(x).__name__}') Exception: An exception for B''').lstrip() + '\n\n', # Traces include two empty lines after. trace) + + +class RuleIndexingErrorTest(TestBase): + + def test_non_frozen_dataclass_error(self): + with self.assertRaisesWithMessage(TypeError, dedent("""\ + RootRule declared type is a dataclass declared without `frozen=True`, or without both `unsafe_hash=True` and the `@frozen_after_init` decorator! The engine requires that fields in params are immutable for stable hashing!""")): + RootRule(NonFrozenDataclass) + + with self.assertRaisesWithMessage(TypeError, dedent("""\ + @rule input selector is a dataclass declared without `frozen=True`, or without both `unsafe_hash=True` and the `@frozen_after_init` decorator! The engine requires that fields in params are immutable for stable hashing!""")): + @rule + def f(x: NonFrozenDataclass) -> int: + return 3 + + with self.assertRaisesWithMessage(TypeError, dedent("""\ + @rule output type is a dataclass declared without `frozen=True`, or without both `unsafe_hash=True` and the `@frozen_after_init` decorator! The engine requires that fields in params are immutable for stable hashing!""")): + @rule + def g(x: int) -> NonFrozenDataclass: + return NonFrozenDataclass(x=x)