Skip to content

Commit

Permalink
Lockfile invalidation -- add metadata to generated lockfiles (#12427)
Browse files Browse the repository at this point in the history
This WIP adds the lockfile invalidation header, but does not yet consume it.

Partially addresses #12415.
  • Loading branch information
Christopher Neugebauer authored Jul 30, 2021
1 parent 71abd67 commit 6ee8f27
Show file tree
Hide file tree
Showing 4 changed files with 193 additions and 5 deletions.
5 changes: 5 additions & 0 deletions src/python/pants/backend/experimental/python/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,8 @@
# Licensed under the Apache License, Version 2.0 (see LICENSE).

python_library()

python_tests(
name = "tests",
timeout = 180,
)
40 changes: 35 additions & 5 deletions src/python/pants/backend/experimental/python/lockfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
from dataclasses import dataclass
from typing import cast

from pants.backend.experimental.python.lockfile_metadata import (
invalidation_digest,
lockfile_content_with_header,
)
from pants.backend.python.subsystems.python_tool_base import (
PythonToolBase,
PythonToolRequirementsBase,
Expand All @@ -15,7 +19,14 @@
from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
from pants.backend.python.util_rules.pex import PexRequest, PexRequirements, VenvPex, VenvPexProcess
from pants.engine.addresses import Addresses
from pants.engine.fs import CreateDigest, Digest, FileContent, MergeDigests, Workspace
from pants.engine.fs import (
CreateDigest,
Digest,
DigestContents,
FileContent,
MergeDigests,
Workspace,
)
from pants.engine.goal import Goal, GoalSubsystem
from pants.engine.process import ProcessResult
from pants.engine.rules import Get, MultiGet, collect_rules, goal_rule, rule
Expand Down Expand Up @@ -93,13 +104,23 @@ def from_tool(
description=f"Generate lockfile for {subsystem.options_scope}",
)

@property
def hex_digest(self) -> str:
"""Produces a hex digest of this lockfile's inputs, which should uniquely specify the
resolution of this lockfile request.
Inputs are definted as requirements and interpreter constraints.
"""
return invalidation_digest(self.requirements, self.interpreter_constraints)


@rule(desc="Generate lockfile", level=LogLevel.DEBUG)
async def generate_lockfile(
req: PythonLockfileRequest, pip_tools_subsystem: PipToolsSubsystem
) -> PythonLockfile:
reqs_filename = "reqs.txt"
input_requirements = await Get(
Digest, CreateDigest([FileContent("reqs.txt", "\n".join(req.requirements).encode())])
Digest, CreateDigest([FileContent(reqs_filename, "\n".join(req.requirements).encode())])
)

pip_compile_pex = await Get(
Expand All @@ -117,7 +138,7 @@ async def generate_lockfile(
),
)

result = await Get(
generated_lockfile = await Get(
ProcessResult,
# TODO(#12314): Figure out named_caches for pip-tools. The best would be to share
# the cache between Pex and Pip. Next best is a dedicated named_cache.
Expand All @@ -126,7 +147,7 @@ async def generate_lockfile(
description=req.description,
# TODO(#12314): Wire up all the pip options like indexes.
argv=[
"reqs.txt",
reqs_filename,
"--generate-hashes",
f"--output-file={req.dest}",
# NB: This allows pinning setuptools et al, which we must do. This will become
Expand All @@ -137,7 +158,16 @@ async def generate_lockfile(
output_files=(req.dest,),
),
)
return PythonLockfile(result.output_digest, req.dest)

_lockfile_contents_iter = await Get(DigestContents, Digest, generated_lockfile.output_digest)
lockfile_contents = _lockfile_contents_iter[0]

content_with_header = lockfile_content_with_header(req.hex_digest, lockfile_contents.content)
complete_lockfile = await Get(
Digest, CreateDigest([FileContent(req.dest, content_with_header)])
)

return PythonLockfile(complete_lockfile, req.dest)


# --------------------------------------------------------------------------------------
Expand Down
77 changes: 77 additions & 0 deletions src/python/pants/backend/experimental/python/lockfile_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

import hashlib
import json
from dataclasses import dataclass

from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
from pants.util.ordered_set import FrozenOrderedSet

BEGIN_LOCKFILE_HEADER = b"# --- BEGIN PANTS LOCKFILE METADATA: DO NOT EDIT OR REMOVE ---"
END_LOCKFILE_HEADER = b"# --- END PANTS LOCKFILE METADATA ---"


@dataclass
class LockfileMetadata:
invalidation_digest: str | None


def invalidation_digest(
requirements: FrozenOrderedSet[str], interpreter_constraints: InterpreterConstraints
) -> str:
"""Returns an invalidation digest for the given requirements and interpreter constraints."""
m = hashlib.sha256()
pres = {
"requirements": list(requirements),
"interpreter_constraints": [str(i) for i in interpreter_constraints],
}
m.update(json.dumps(pres).encode("utf-8"))
return m.hexdigest()


def lockfile_content_with_header(invalidation_digest: str, content: bytes) -> bytes:
"""Returns a version of the lockfile with a pants metadata header prepended."""
return b"%b\n%b" % (lockfile_metadata_header(invalidation_digest), content)


def lockfile_metadata_header(invalidation_digest: str) -> bytes:
"""Produces a metadata bytes object for including at the top of a lockfile.
Currently, this only consists of an invalidation digest for the file, which is used when Pants
consumes the lockfile during builds.
"""
return (
b"""
%(BEGIN_LOCKFILE_HEADER)b
# invalidation digest: %(invalidation_digest)s
%(END_LOCKFILE_HEADER)b
"""
% {
b"BEGIN_LOCKFILE_HEADER": BEGIN_LOCKFILE_HEADER,
b"invalidation_digest": invalidation_digest.encode("ascii"),
b"END_LOCKFILE_HEADER": END_LOCKFILE_HEADER,
}
).strip()


def read_lockfile_metadata(contents: bytes) -> LockfileMetadata:
"""Reads through `contents`, and returns the contents of the lockfile metadata block as a
`LockfileMetadata` object."""

metadata = {}

in_metadata_block = False
for line in contents.splitlines():
line = line.strip()
if line == BEGIN_LOCKFILE_HEADER:
in_metadata_block = True
elif line == END_LOCKFILE_HEADER:
break
elif in_metadata_block:
key, value = (i.strip().decode("ascii") for i in line[1:].split(b":"))
metadata[key] = value

return LockfileMetadata(invalidation_digest=metadata.get("invalidation digest"))
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import pytest

from pants.backend.experimental.python.lockfile_metadata import (
invalidation_digest,
lockfile_content_with_header,
lockfile_metadata_header,
read_lockfile_metadata,
)
from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
from pants.util.ordered_set import FrozenOrderedSet


def test_metadata_round_trip() -> None:
val = "help_i_am_trapped_inside_a_unit_test_string"
output = read_lockfile_metadata(lockfile_metadata_header(val))
assert val == output.invalidation_digest


def test_validated_lockfile_content() -> None:
content = b"""dave==3.1.4 \\
--hash=sha256:cab0c0c0c0c0dadacafec0c0c0c0cafedadabeefc0c0c0c0feedbeeffeedbeef \\
"""

output = b"""
# --- BEGIN PANTS LOCKFILE METADATA: DO NOT EDIT OR REMOVE ---
# invalidation digest: 000faaafcacacaca
# --- END PANTS LOCKFILE METADATA ---
dave==3.1.4 \\
--hash=sha256:cab0c0c0c0c0dadacafec0c0c0c0cafedadabeefc0c0c0c0feedbeeffeedbeef \\
"""

# Helper function to make the test case more resilient to reformatting
line_by_line = lambda b: [i for i in (j.strip() for j in b.splitlines()) if i]
assert line_by_line(lockfile_content_with_header("000faaafcacacaca", content)) == line_by_line(
output
)


_interpreter_constraints = [">=3.7", "<3.10"]
_requirements = ["flake8-pantsbuild>=2.0,<3", "flake8-2020>=1.6.0,<1.7.0"]


@pytest.mark.parametrize(
"requirements,interpreter_constraints,expected",
[
([], [], "51f5289473089f1de64ab760af3f03ff55cd769f25cce7ea82dd1ac88aac5ff4"),
(
_interpreter_constraints,
[],
"821e8eef80573c7d2460185da4d436b6a8c59e134f5f0758000be3c85e9819eb",
),
([], _requirements, "604fb99ed6d6d83ba2c4eb1230184dd7f279a446cda042e9e87099448f28dddb"),
(
_interpreter_constraints,
_requirements,
"9264a3b59a592d7eeac9cb4bbb4f5b2200907694bfe92b48757c99b1f71485f0",
),
],
)
def test_hex_digest(requirements, interpreter_constraints, expected) -> None:
assert (
invalidation_digest(
FrozenOrderedSet(requirements), InterpreterConstraints(interpreter_constraints)
)
== expected
)


def test_hash_depends_on_requirement_source() -> None:
reqs = ["CPython"]
assert invalidation_digest(
FrozenOrderedSet(reqs), InterpreterConstraints([])
) != invalidation_digest(FrozenOrderedSet([]), InterpreterConstraints(reqs))

0 comments on commit 6ee8f27

Please sign in to comment.