Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lockfile invalidation -- add metadata to generated lockfiles #12427

Merged
Merged
5 changes: 5 additions & 0 deletions src/python/pants/backend/experimental/python/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,8 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).

python_library()

python_tests(
name = "tests",
timeout = 180,
)
40 changes: 35 additions & 5 deletions src/python/pants/backend/experimental/python/lockfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
from dataclasses import dataclass
from typing import cast

from pants.backend.experimental.python.lockfile_metadata import (
invalidation_digest,
lockfile_content_with_header,
)
from pants.backend.python.subsystems.python_tool_base import (
PythonToolBase,
PythonToolRequirementsBase,
Expand All @@ -15,7 +19,14 @@
from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
from pants.backend.python.util_rules.pex import PexRequest, PexRequirements, VenvPex, VenvPexProcess
from pants.engine.addresses import Addresses
from pants.engine.fs import CreateDigest, Digest, FileContent, MergeDigests, Workspace
from pants.engine.fs import (
CreateDigest,
Digest,
DigestContents,
FileContent,
MergeDigests,
Workspace,
)
from pants.engine.goal import Goal, GoalSubsystem
from pants.engine.process import ProcessResult
from pants.engine.rules import Get, MultiGet, collect_rules, goal_rule, rule
Expand Down Expand Up @@ -93,13 +104,23 @@ def from_tool(
description=f"Generate lockfile for {subsystem.options_scope}",
)

@property
def hex_digest(self) -> str:
Eric-Arellano marked this conversation as resolved.
Show resolved Hide resolved
"""Produces a hex digest of this lockfile's inputs, which should uniquely specify the
resolution of this lockfile request.

Inputs are definted as requirements and interpreter constraints.
"""
return invalidation_digest(self.requirements, self.interpreter_constraints)


@rule(desc="Generate lockfile", level=LogLevel.DEBUG)
async def generate_lockfile(
req: PythonLockfileRequest, pip_tools_subsystem: PipToolsSubsystem
) -> PythonLockfile:
reqs_filename = "reqs.txt"
input_requirements = await Get(
Digest, CreateDigest([FileContent("reqs.txt", "\n".join(req.requirements).encode())])
Digest, CreateDigest([FileContent(reqs_filename, "\n".join(req.requirements).encode())])
)

pip_compile_pex = await Get(
Expand All @@ -117,7 +138,7 @@ async def generate_lockfile(
),
)

result = await Get(
generated_lockfile = await Get(
ProcessResult,
# TODO(#12314): Figure out named_caches for pip-tools. The best would be to share
# the cache between Pex and Pip. Next best is a dedicated named_cache.
Expand All @@ -126,7 +147,7 @@ async def generate_lockfile(
description=req.description,
# TODO(#12314): Wire up all the pip options like indexes.
argv=[
"reqs.txt",
reqs_filename,
"--generate-hashes",
f"--output-file={req.dest}",
# NB: This allows pinning setuptools et al, which we must do. This will become
Expand All @@ -137,7 +158,16 @@ async def generate_lockfile(
output_files=(req.dest,),
),
)
return PythonLockfile(result.output_digest, req.dest)

_lockfile_contents_iter = await Get(DigestContents, Digest, generated_lockfile.output_digest)
lockfile_contents = _lockfile_contents_iter[0]

content_with_header = lockfile_content_with_header(req.hex_digest, lockfile_contents.content)
complete_lockfile = await Get(
Digest, CreateDigest([FileContent(req.dest, content_with_header)])
)

return PythonLockfile(complete_lockfile, req.dest)


# --------------------------------------------------------------------------------------
Expand Down
77 changes: 77 additions & 0 deletions src/python/pants/backend/experimental/python/lockfile_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

import hashlib
import json
from dataclasses import dataclass

from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
from pants.util.ordered_set import FrozenOrderedSet

BEGIN_LOCKFILE_HEADER = b"# --- BEGIN PANTS LOCKFILE METADATA: DO NOT EDIT OR REMOVE ---"
END_LOCKFILE_HEADER = b"# --- END PANTS LOCKFILE METADATA ---"


@dataclass
class LockfileMetadata:
invalidation_digest: str | None


def invalidation_digest(
requirements: FrozenOrderedSet[str], interpreter_constraints: InterpreterConstraints
) -> str:
"""Returns an invalidation digest for the given requirements and interpreter constraints."""
m = hashlib.sha256()
pres = {
"requirements": list(requirements),
"interpreter_constraints": [str(i) for i in interpreter_constraints],
}
m.update(json.dumps(pres).encode("utf-8"))
return m.hexdigest()


def lockfile_content_with_header(invalidation_digest: str, content: bytes) -> bytes:
"""Returns a version of the lockfile with a pants metadata header prepended."""
return b"%b\n%b" % (lockfile_metadata_header(invalidation_digest), content)


def lockfile_metadata_header(invalidation_digest: str) -> bytes:
"""Produces a metadata bytes object for including at the top of a lockfile.

Currently, this only consists of an invalidation digest for the file, which is used when Pants
consumes the lockfile during builds.
"""
return (
b"""
%(BEGIN_LOCKFILE_HEADER)b
# invalidation digest: %(invalidation_digest)s
%(END_LOCKFILE_HEADER)b
"""
% {
b"BEGIN_LOCKFILE_HEADER": BEGIN_LOCKFILE_HEADER,
b"invalidation_digest": invalidation_digest.encode("ascii"),
b"END_LOCKFILE_HEADER": END_LOCKFILE_HEADER,
}
).strip()


def read_lockfile_metadata(contents: bytes) -> LockfileMetadata:
"""Reads through `contents`, and returns the contents of the lockfile metadata block as a
`LockfileMetadata` object."""

metadata = {}

in_metadata_block = False
for line in contents.splitlines():
line = line.strip()
if line == BEGIN_LOCKFILE_HEADER:
in_metadata_block = True
elif line == END_LOCKFILE_HEADER:
break
elif in_metadata_block:
key, value = (i.strip().decode("ascii") for i in line[1:].split(b":"))
metadata[key] = value

return LockfileMetadata(invalidation_digest=metadata.get("invalidation digest"))
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import pytest

from pants.backend.experimental.python.lockfile_metadata import (
invalidation_digest,
lockfile_content_with_header,
lockfile_metadata_header,
read_lockfile_metadata,
)
from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
from pants.util.ordered_set import FrozenOrderedSet


def test_metadata_round_trip() -> None:
val = "help_i_am_trapped_inside_a_unit_test_string"
output = read_lockfile_metadata(lockfile_metadata_header(val))
assert val == output.invalidation_digest


def test_validated_lockfile_content() -> None:
content = b"""dave==3.1.4 \\
--hash=sha256:cab0c0c0c0c0dadacafec0c0c0c0cafedadabeefc0c0c0c0feedbeeffeedbeef \\
"""

output = b"""
# --- BEGIN PANTS LOCKFILE METADATA: DO NOT EDIT OR REMOVE ---
# invalidation digest: 000faaafcacacaca
# --- END PANTS LOCKFILE METADATA ---
dave==3.1.4 \\
--hash=sha256:cab0c0c0c0c0dadacafec0c0c0c0cafedadabeefc0c0c0c0feedbeeffeedbeef \\
"""

# Helper function to make the test case more resilient to reformatting
line_by_line = lambda b: [i for i in (j.strip() for j in b.splitlines()) if i]
assert line_by_line(lockfile_content_with_header("000faaafcacacaca", content)) == line_by_line(
output
)


_interpreter_constraints = [">=3.7", "<3.10"]
_requirements = ["flake8-pantsbuild>=2.0,<3", "flake8-2020>=1.6.0,<1.7.0"]


@pytest.mark.parametrize(
"requirements,interpreter_constraints,expected",
[
([], [], "51f5289473089f1de64ab760af3f03ff55cd769f25cce7ea82dd1ac88aac5ff4"),
(
_interpreter_constraints,
[],
"821e8eef80573c7d2460185da4d436b6a8c59e134f5f0758000be3c85e9819eb",
),
([], _requirements, "604fb99ed6d6d83ba2c4eb1230184dd7f279a446cda042e9e87099448f28dddb"),
(
_interpreter_constraints,
_requirements,
"9264a3b59a592d7eeac9cb4bbb4f5b2200907694bfe92b48757c99b1f71485f0",
),
],
)
def test_hex_digest(requirements, interpreter_constraints, expected) -> None:
assert (
invalidation_digest(
FrozenOrderedSet(requirements), InterpreterConstraints(interpreter_constraints)
)
== expected
)


def test_hash_depends_on_requirement_source() -> None:
reqs = ["CPython"]
assert invalidation_digest(
FrozenOrderedSet(reqs), InterpreterConstraints([])
) != invalidation_digest(FrozenOrderedSet([]), InterpreterConstraints(reqs))