Skip to content

Commit

Permalink
python_sources and python_tests targets no longer use dependency …
Browse files Browse the repository at this point in the history
…inference, only `python_source` and `python_test` targets (#13231)

Now our Python code operates on `python_source` and `python_test` targets. This causes some changes.

### Dependency inference

Dependency inference no longer runs on `python_sources` and `python_tests`, only `python_source` and `python_test`. That applies to import analysis, conftest.py, and `__init__.py`. 

This actually speeds up dependency inference! We no longer run `import_parser.py` twice on the same files.

Before:

```
❯ hyperfine -r 10 './pants --no-process-execution-local-cache --no-pantsd dependencies src/python::'
Benchmark #1: ./pants --no-process-execution-local-cache --no-pantsd dependencies src/python::
  Time (mean ± σ):      7.650 s ±  0.741 s    [User: 5.794 s, System: 2.141 s]
  Range (min … max):    7.250 s …  9.673 s    10 runs
```

After:

```
❯ hyperfine -r 10 './pants --no-process-execution-local-cache --no-pantsd dependencies src/python::'
Benchmark #1: ./pants --no-process-execution-local-cache --no-pantsd dependencies src/python::
  Time (mean ± σ):      5.998 s ±  0.279 s    [User: 3.524 s, System: 0.903 s]
  Range (min … max):    5.708 s …  6.688 s    10 runs
```

It means that `./pants dependencies src/py:lib` now only has explicitly declared dependencies. You have to use `--transitive` to get the results of dep inference.

Thanks to this change, we can simplify `import_parser.py` to assume there is only one file.

### Lockfile generation

When determining the interpreter constraints to use, we now look at generated targets, not target generators. We only run over the `python_source` and `python_test` targets that actually will be used. 

This makes us future-proof to an `overrides` field adding `skip_tool` to only some of the files. I think it also fixes our dependency resolution for Pylint looking at direct deps?

### `FieldSet.opt_out` no longer worries about file targets

Pytest and MyPy used to skip running on non-file targets. That can be removed now. This unblocks allowing you to explicitly define a `python_source`/`python_test` target.
  • Loading branch information
Eric-Arellano authored Oct 13, 2021
1 parent 6cd9936 commit e865418
Show file tree
Hide file tree
Showing 79 changed files with 581 additions and 427 deletions.
4 changes: 2 additions & 2 deletions src/python/pants/backend/awslambda/python/rules_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from pants.backend.awslambda.python.target_types import rules as target_rules
from pants.backend.python.subsystems.lambdex import Lambdex
from pants.backend.python.subsystems.lambdex import rules as awslambda_python_subsystem_rules
from pants.backend.python.target_types import PythonLibrary
from pants.backend.python.target_types import PythonSourcesGeneratorTarget
from pants.backend.python.target_types_rules import rules as python_target_types_rules
from pants.core.goals.package import BuiltPackage
from pants.core.target_types import FilesGeneratorTarget, RelocatedFiles, ResourcesGeneratorTarget
Expand All @@ -39,7 +39,7 @@ def rule_runner() -> RuleRunner:
],
target_types=[
PythonAWSLambda,
PythonLibrary,
PythonSourcesGeneratorTarget,
FilesGeneratorTarget,
RelocatedFiles,
ResourcesGeneratorTarget,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
ResolvePythonAwsHandlerRequest,
)
from pants.backend.awslambda.python.target_types import rules as target_type_rules
from pants.backend.python.target_types import PythonLibrary, PythonRequirementTarget
from pants.backend.python.target_types import PythonRequirementTarget, PythonSourcesGeneratorTarget
from pants.backend.python.target_types_rules import rules as python_target_types_rules
from pants.build_graph.address import Address
from pants.engine.target import InjectedDependencies, InvalidFieldException
Expand All @@ -32,7 +32,7 @@ def rule_runner() -> RuleRunner:
QueryRule(ResolvedPythonAwsHandler, [ResolvePythonAwsHandlerRequest]),
QueryRule(InjectedDependencies, [InjectPythonLambdaHandlerDependency]),
],
target_types=[PythonAWSLambda, PythonRequirementTarget, PythonLibrary],
target_types=[PythonAWSLambda, PythonRequirementTarget, PythonSourcesGeneratorTarget],
)


Expand Down
2 changes: 1 addition & 1 deletion src/python/pants/backend/codegen/protobuf/python/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ python_tests(name="python_protobuf_subsystem_test", sources=["python_protobuf_su
python_tests(
name="rules_integration_test",
sources=["rules_integration_test.py"],
timeout=240,
timeout=330,
# We want to make sure the default lockfile for MyPy Protobuf works for both macOS and Linux.
tags=["platform_specific_behavior"],
)
4 changes: 2 additions & 2 deletions src/python/pants/backend/codegen/protobuf/python/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
PythonProtobufSubsystem,
)
from pants.backend.codegen.protobuf.target_types import ProtobufGrpcToggleField, ProtobufSourceField
from pants.backend.python.target_types import PythonSources
from pants.backend.python.target_types import PythonSourceField
from pants.backend.python.util_rules import pex
from pants.backend.python.util_rules.pex import PexRequest, PexResolveInfo, VenvPex, VenvPexRequest
from pants.backend.python.util_rules.pex_environment import PexEnvironment
Expand Down Expand Up @@ -43,7 +43,7 @@

class GeneratePythonFromProtobufRequest(GenerateSourcesRequest):
input = ProtobufSourceField
output = PythonSources
output = PythonSourceField


@rule(desc="Generate Python from Protobuf", level=LogLevel.DEBUG)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from pants.backend.python.subsystems.lambdex import (
rules as python_google_cloud_function_subsystem_rules,
)
from pants.backend.python.target_types import PythonLibrary
from pants.backend.python.target_types import PythonSourcesGeneratorTarget
from pants.backend.python.target_types_rules import rules as python_target_types_rules
from pants.core.goals.package import BuiltPackage
from pants.core.target_types import FilesGeneratorTarget, RelocatedFiles, ResourcesGeneratorTarget
Expand All @@ -43,7 +43,7 @@ def rule_runner() -> RuleRunner:
],
target_types=[
PythonGoogleCloudFunction,
PythonLibrary,
PythonSourcesGeneratorTarget,
FilesGeneratorTarget,
RelocatedFiles,
ResourcesGeneratorTarget,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
ResolvePythonGoogleHandlerRequest,
)
from pants.backend.google_cloud_function.python.target_types import rules as target_type_rules
from pants.backend.python.target_types import PythonLibrary, PythonRequirementTarget
from pants.backend.python.target_types import PythonRequirementTarget, PythonSourcesGeneratorTarget
from pants.backend.python.target_types_rules import rules as python_target_types_rules
from pants.build_graph.address import Address
from pants.engine.internals.scheduler import ExecutionError
Expand All @@ -33,7 +33,11 @@ def rule_runner() -> RuleRunner:
QueryRule(ResolvedPythonGoogleHandler, [ResolvePythonGoogleHandlerRequest]),
QueryRule(InjectedDependencies, [InjectPythonCloudFunctionHandlerDependency]),
],
target_types=[PythonGoogleCloudFunction, PythonRequirementTarget, PythonLibrary],
target_types=[
PythonGoogleCloudFunction,
PythonRequirementTarget,
PythonSourcesGeneratorTarget,
],
)


Expand Down
4 changes: 2 additions & 2 deletions src/python/pants/backend/project_info/count_loc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from pants.backend.project_info import count_loc
from pants.backend.project_info.count_loc import CountLinesOfCode
from pants.backend.python.target_types import PythonLibrary
from pants.backend.python.target_types import PythonSourcesGeneratorTarget
from pants.core.util_rules import external_tool
from pants.engine.target import MultipleSourcesField, Target
from pants.testutil.rule_runner import GoalRuleResult, RuleRunner
Expand All @@ -24,7 +24,7 @@ class ElixirTarget(Target):
def rule_runner() -> RuleRunner:
return RuleRunner(
rules=[*count_loc.rules(), *external_tool.rules()],
target_types=[PythonLibrary, ElixirTarget],
target_types=[PythonSourcesGeneratorTarget, ElixirTarget],
)


Expand Down
5 changes: 3 additions & 2 deletions src/python/pants/backend/project_info/dependencies_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pytest

from pants.backend.project_info.dependencies import Dependencies, DependencyType, rules
from pants.backend.python.target_types import PythonLibrary, PythonRequirementTarget
from pants.backend.python.target_types import PythonRequirementTarget, PythonSourcesGeneratorTarget
from pants.engine.target import SpecialCasedDependencies, Target
from pants.testutil.rule_runner import RuleRunner

Expand All @@ -27,7 +27,8 @@ class SpecialDepsTarget(Target):
@pytest.fixture
def rule_runner() -> RuleRunner:
return RuleRunner(
rules=rules(), target_types=[PythonLibrary, PythonRequirementTarget, SpecialDepsTarget]
rules=rules(),
target_types=[PythonSourcesGeneratorTarget, PythonRequirementTarget, SpecialDepsTarget],
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from dataclasses import dataclass

from pants.backend.python.target_types import PythonSourceField
from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
from pants.backend.python.util_rules.pex_environment import PythonExecutable
from pants.core.util_rules.source_files import SourceFilesRequest
Expand All @@ -11,7 +12,6 @@
from pants.engine.fs import CreateDigest, Digest, FileContent, MergeDigests
from pants.engine.process import Process, ProcessResult
from pants.engine.rules import Get, MultiGet, collect_rules, rule
from pants.engine.target import SourcesField
from pants.util.logging import LogLevel

# NOTE: Must call .format(min_dots=X) on this string to use it.
Expand Down Expand Up @@ -113,24 +113,23 @@ def parse_file(filename):
return None
if __name__ == "__main__":
imports = set()
for filename in sys.argv[1:]:
tree = parse_file(filename)
if not tree:
continue
def main(filename):
tree = parse_file(filename)
if not tree:
return
package_parts = os.path.dirname(filename).split(os.path.sep)
visitor = AstVisitor(package_parts)
visitor.visit(tree)
imports.update(visitor.imports)
package_parts = os.path.dirname(filename).split(os.path.sep)
visitor = AstVisitor(package_parts)
visitor.visit(tree)
# We have to be careful to set the encoding explicitly and write raw bytes ourselves.
# See below for where we explicitly decode.
buffer = sys.stdout if sys.version_info[0:2] == (2, 7) else sys.stdout.buffer
buffer.write("\\n".join(sorted(imports)).encode("utf8"))
buffer.write("\\n".join(sorted(visitor.imports)).encode("utf8"))
if __name__ == "__main__":
main(sys.argv[1])
"""


Expand All @@ -143,7 +142,7 @@ class ParsedPythonImports(DeduplicatedCollection[str]):

@dataclass(frozen=True)
class ParsePythonImportsRequest:
sources: SourcesField
sources: PythonSourceField
interpreter_constraints: InterpreterConstraints
string_imports: bool
string_imports_min_dots: int
Expand All @@ -157,6 +156,11 @@ async def parse_python_imports(request: ParsePythonImportsRequest) -> ParsedPyth
Get(Digest, CreateDigest([FileContent("__parse_python_imports.py", script)])),
Get(StrippedSourceFiles, SourceFilesRequest([request.sources])),
)

# We operate on PythonSourceField, which should be one file.
assert len(stripped_sources.snapshot.files) == 1
file = stripped_sources.snapshot.files[0]

input_digest = await Get(
Digest, MergeDigests([script_digest, stripped_sources.snapshot.digest])
)
Expand All @@ -166,7 +170,7 @@ async def parse_python_imports(request: ParsePythonImportsRequest) -> ParsedPyth
argv=[
python_interpreter.path,
"./__parse_python_imports.py",
*stripped_sources.snapshot.files,
file,
],
input_digest=input_digest,
description=f"Determine Python imports for {request.sources.address}",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
ParsedPythonImports,
ParsePythonImportsRequest,
)
from pants.backend.python.target_types import PythonLibrary, PythonSources
from pants.backend.python.target_types import PythonSourceField, PythonSourceTarget
from pants.backend.python.util_rules import pex
from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
from pants.core.util_rules import stripped_source_files
Expand All @@ -34,13 +34,13 @@ def rule_runner() -> RuleRunner:
*pex.rules(),
QueryRule(ParsedPythonImports, [ParsePythonImportsRequest]),
],
target_types=[PythonLibrary],
target_types=[PythonSourceTarget],
)


def assert_imports_parsed(
rule_runner: RuleRunner,
content: str | None,
content: str,
*,
expected: list[str],
filename: str = "project/foo.py",
Expand All @@ -49,16 +49,18 @@ def assert_imports_parsed(
string_imports_min_dots: int = 2,
) -> None:
rule_runner.set_options([], env_inherit={"PATH", "PYENV_ROOT", "HOME"})
files = {"project/BUILD": "python_sources(sources=['**/*.py'])"}
if content is not None:
files[filename] = content
rule_runner.write_files(files) # type: ignore[arg-type]
tgt = rule_runner.get_target(Address("project"))
rule_runner.write_files(
{
"BUILD": f"python_sources(name='t', source={repr(filename)})",
filename: content,
}
)
tgt = rule_runner.get_target(Address("", target_name="t"))
imports = rule_runner.request(
ParsedPythonImports,
[
ParsePythonImportsRequest(
tgt[PythonSources],
tgt[PythonSourceField],
InterpreterConstraints([constraints]),
string_imports=string_imports,
string_imports_min_dots=string_imports_min_dots,
Expand Down Expand Up @@ -94,9 +96,6 @@ def test_normal_imports(rule_runner: RuleRunner) -> None:
__import__("pkg_resources")
"""
)
# We create a second file, in addition to what `assert_imports_parsed` does, to ensure we can
# handle multiple files belonging to the same target.
rule_runner.write_files({"project/f2.py": "import second_import"})
assert_imports_parsed(
rule_runner,
content,
Expand All @@ -110,7 +109,6 @@ def test_normal_imports(rule_runner: RuleRunner) -> None:
"project.demo.Demo",
"project.demo.OriginalName",
"project.circular_dep.CircularDep",
"second_import",
"subprocess",
"subprocess23",
"pkg_resources",
Expand Down Expand Up @@ -195,15 +193,11 @@ def test_imports_from_strings(rule_runner: RuleRunner, min_dots: int) -> None:


def test_gracefully_handle_syntax_errors(rule_runner: RuleRunner) -> None:
assert_imports_parsed(rule_runner, content="x =", expected=[])
assert_imports_parsed(rule_runner, "x =", expected=[])


def test_handle_unicode(rule_runner: RuleRunner) -> None:
assert_imports_parsed(rule_runner, content="x = 'äbç'", expected=[])


def test_gracefully_handle_no_sources(rule_runner: RuleRunner) -> None:
assert_imports_parsed(rule_runner, content=None, expected=[])
assert_imports_parsed(rule_runner, "x = 'äbç'", expected=[])


@skip_unless_python27_present
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
PythonRequirementModulesField,
PythonRequirementsField,
PythonRequirementTypeStubModulesField,
PythonSources,
PythonSourceField,
TypeStubsModuleMappingField,
)
from pants.base.specs import AddressSpecs, DescendantAddresses
Expand Down Expand Up @@ -174,32 +174,37 @@ async def map_first_party_python_targets_to_modules(
_: FirstPartyPythonTargetsMappingMarker,
) -> FirstPartyPythonMappingImpl:
all_expanded_targets = await Get(Targets, AddressSpecs([DescendantAddresses("")]))
python_targets = tuple(tgt for tgt in all_expanded_targets if tgt.has_field(PythonSources))
python_targets = tuple(tgt for tgt in all_expanded_targets if tgt.has_field(PythonSourceField))
stripped_sources_per_target = await MultiGet(
Get(StrippedSourceFileNames, SourcesPathsRequest(tgt[PythonSources]))
Get(StrippedSourceFileNames, SourcesPathsRequest(tgt[PythonSourceField]))
for tgt in python_targets
)

modules_to_addresses: DefaultDict[str, list[Address]] = defaultdict(list)
modules_with_multiple_implementations: DefaultDict[str, set[Address]] = defaultdict(set)
for tgt, stripped_sources in zip(python_targets, stripped_sources_per_target):
for stripped_f in stripped_sources:
module = PythonModule.create_from_stripped_path(PurePath(stripped_f)).module
if module in modules_to_addresses:
# We check if one of the targets is an implementation (.py file) and the other is
# a type stub (.pyi file), which we allow. Otherwise, we have ambiguity.
prior_is_type_stub = len(
modules_to_addresses[module]
) == 1 and modules_to_addresses[module][0].filename.endswith(".pyi")
current_is_type_stub = tgt.address.filename.endswith(".pyi")
if prior_is_type_stub ^ current_is_type_stub:
modules_to_addresses[module].append(tgt.address)
else:
modules_with_multiple_implementations[module].update(
{*modules_to_addresses[module], tgt.address}
)
else:
modules_to_addresses[module].append(tgt.address)
# `PythonSourceFile` validates that each target has exactly one file.
assert len(stripped_sources) == 1
stripped_f = stripped_sources[0]

module = PythonModule.create_from_stripped_path(PurePath(stripped_f)).module
if module not in modules_to_addresses:
modules_to_addresses[module].append(tgt.address)
continue

# Else, possible ambiguity. Check if one of the targets is an implementation
# (.py file) and the other is a type stub (.pyi file), which we allow. Otherwise, it's
# ambiguous.
prior_is_type_stub = len(modules_to_addresses[module]) == 1 and modules_to_addresses[
module
][0].filename.endswith(".pyi")
current_is_type_stub = tgt.address.filename.endswith(".pyi")
if prior_is_type_stub ^ current_is_type_stub:
modules_to_addresses[module].append(tgt.address)
else:
modules_with_multiple_implementations[module].update(
{*modules_to_addresses[module], tgt.address}
)

# Remove modules with ambiguous owners.
for module in modules_with_multiple_implementations:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
ThirdPartyPythonModuleMapping,
)
from pants.backend.python.dependency_inference.module_mapper import rules as module_mapper_rules
from pants.backend.python.target_types import PythonLibrary, PythonRequirementTarget
from pants.backend.python.target_types import PythonRequirementTarget, PythonSourcesGeneratorTarget
from pants.core.util_rules import stripped_source_files
from pants.engine.addresses import Address
from pants.testutil.rule_runner import QueryRule, RuleRunner
Expand Down Expand Up @@ -174,7 +174,11 @@ def rule_runner() -> RuleRunner:
QueryRule(ThirdPartyPythonModuleMapping, []),
QueryRule(PythonModuleOwners, [PythonModule]),
],
target_types=[PythonLibrary, PythonRequirementTarget, ProtobufSourcesGeneratorTarget],
target_types=[
PythonSourcesGeneratorTarget,
PythonRequirementTarget,
ProtobufSourcesGeneratorTarget,
],
)


Expand Down
Loading

0 comments on commit e865418

Please sign in to comment.