pantsbuild · Eric-Arellano · Jul 30, 2021 · Jul 26, 2021 · Jul 26, 2021 · Jul 26, 2021
diff --git a/src/python/pants/backend/experimental/python/BUILD b/src/python/pants/backend/experimental/python/BUILD
@@ -2,3 +2,8 @@
 # Licensed under the Apache License, Version 2.0 (see LICENSE).
 
 python_library()
+
+python_tests(
+  name = "tests",
+  timeout = 180,
+)
diff --git a/src/python/pants/backend/experimental/python/lockfile.py b/src/python/pants/backend/experimental/python/lockfile.py
@@ -7,6 +7,10 @@
 from dataclasses import dataclass
 from typing import cast
 
+from pants.backend.experimental.python.lockfile_metadata import (
+    invalidation_digest,
+    lockfile_content_with_header,
+)
 from pants.backend.python.subsystems.python_tool_base import (
     PythonToolBase,
     PythonToolRequirementsBase,
@@ -15,7 +19,14 @@
 from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
 from pants.backend.python.util_rules.pex import PexRequest, PexRequirements, VenvPex, VenvPexProcess
 from pants.engine.addresses import Addresses
-from pants.engine.fs import CreateDigest, Digest, FileContent, MergeDigests, Workspace
+from pants.engine.fs import (
+    CreateDigest,
+    Digest,
+    DigestContents,
+    FileContent,
+    MergeDigests,
+    Workspace,
+)
 from pants.engine.goal import Goal, GoalSubsystem
 from pants.engine.process import ProcessResult
 from pants.engine.rules import Get, MultiGet, collect_rules, goal_rule, rule
@@ -93,13 +104,23 @@ def from_tool(
             description=f"Generate lockfile for {subsystem.options_scope}",
         )
 
+    @property
+    def hex_digest(self) -> str:
+        """Produces a hex digest of this lockfile's inputs, which should uniquely specify the
+        resolution of this lockfile request.
+
+        Inputs are definted as requirements and interpreter constraints.
+        """
+        return invalidation_digest(self.requirements, self.interpreter_constraints)
+
 
 @rule(desc="Generate lockfile", level=LogLevel.DEBUG)
 async def generate_lockfile(
     req: PythonLockfileRequest, pip_tools_subsystem: PipToolsSubsystem
 ) -> PythonLockfile:
+    reqs_filename = "reqs.txt"
     input_requirements = await Get(
-        Digest, CreateDigest([FileContent("reqs.txt", "\n".join(req.requirements).encode())])
+        Digest, CreateDigest([FileContent(reqs_filename, "\n".join(req.requirements).encode())])
     )
 
     pip_compile_pex = await Get(
@@ -117,7 +138,7 @@ async def generate_lockfile(
         ),
     )
 
-    result = await Get(
+    generated_lockfile = await Get(
         ProcessResult,
         # TODO(#12314): Figure out named_caches for pip-tools. The best would be to share
         #  the cache between Pex and Pip. Next best is a dedicated named_cache.
@@ -126,7 +147,7 @@ async def generate_lockfile(
             description=req.description,
             # TODO(#12314): Wire up all the pip options like indexes.
             argv=[
-                "reqs.txt",
+                reqs_filename,
                 "--generate-hashes",
                 f"--output-file={req.dest}",
                 # NB: This allows pinning setuptools et al, which we must do. This will become
@@ -137,7 +158,16 @@ async def generate_lockfile(
             output_files=(req.dest,),
         ),
     )
-    return PythonLockfile(result.output_digest, req.dest)
+
+    _lockfile_contents_iter = await Get(DigestContents, Digest, generated_lockfile.output_digest)
+    lockfile_contents = _lockfile_contents_iter[0]
+
+    content_with_header = lockfile_content_with_header(req.hex_digest, lockfile_contents.content)
+    complete_lockfile = await Get(
+        Digest, CreateDigest([FileContent(req.dest, content_with_header)])
+    )
+
+    return PythonLockfile(complete_lockfile, req.dest)
 
 
 # --------------------------------------------------------------------------------------

diff --git a/src/python/pants/backend/experimental/python/lockfile_metadata.py b/src/python/pants/backend/experimental/python/lockfile_metadata.py
@@ -0,0 +1,77 @@
+# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
+# Licensed under the Apache License, Version 2.0 (see LICENSE).
+
+from __future__ import annotations
+
+import hashlib
+import json
+from dataclasses import dataclass
+
+from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
+from pants.util.ordered_set import FrozenOrderedSet
+
+BEGIN_LOCKFILE_HEADER = b"# --- BEGIN PANTS LOCKFILE METADATA: DO NOT EDIT OR REMOVE ---"
+END_LOCKFILE_HEADER = b"# --- END PANTS LOCKFILE METADATA ---"
+
+
+@dataclass
+class LockfileMetadata:
+    invalidation_digest: str | None
+
+
+def invalidation_digest(
+    requirements: FrozenOrderedSet[str], interpreter_constraints: InterpreterConstraints
+) -> str:
+    """Returns an invalidation digest for the given requirements and interpreter constraints."""
+    m = hashlib.sha256()
+    pres = {
+        "requirements": list(requirements),
+        "interpreter_constraints": [str(i) for i in interpreter_constraints],
+    }
+    m.update(json.dumps(pres).encode("utf-8"))
+    return m.hexdigest()
+
+
+def lockfile_content_with_header(invalidation_digest: str, content: bytes) -> bytes:
+    """Returns a version of the lockfile with a pants metadata header prepended."""
+    return b"%b\n%b" % (lockfile_metadata_header(invalidation_digest), content)
+
+
+def lockfile_metadata_header(invalidation_digest: str) -> bytes:
+    """Produces a metadata bytes object for including at the top of a lockfile.
+
+    Currently, this only consists of an invalidation digest for the file, which is used when Pants
+    consumes the lockfile during builds.
+    """
+    return (
+        b"""
+%(BEGIN_LOCKFILE_HEADER)b
+# invalidation digest: %(invalidation_digest)s
+%(END_LOCKFILE_HEADER)b
+    """
+        % {
+            b"BEGIN_LOCKFILE_HEADER": BEGIN_LOCKFILE_HEADER,
+            b"invalidation_digest": invalidation_digest.encode("ascii"),
+            b"END_LOCKFILE_HEADER": END_LOCKFILE_HEADER,
+        }
+    ).strip()
+
+
+def read_lockfile_metadata(contents: bytes) -> LockfileMetadata:
+    """Reads through `contents`, and returns the contents of the lockfile metadata block as a
+    `LockfileMetadata` object."""
+
+    metadata = {}
+
+    in_metadata_block = False
+    for line in contents.splitlines():
+        line = line.strip()
+        if line == BEGIN_LOCKFILE_HEADER:
+            in_metadata_block = True
+        elif line == END_LOCKFILE_HEADER:
+            break
+        elif in_metadata_block:
+            key, value = (i.strip().decode("ascii") for i in line[1:].split(b":"))
+            metadata[key] = value
+
+    return LockfileMetadata(invalidation_digest=metadata.get("invalidation digest"))
diff --git a/src/python/pants/backend/experimental/python/lockfile_metadata_test.py b/src/python/pants/backend/experimental/python/lockfile_metadata_test.py
@@ -0,0 +1,76 @@
+# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
+# Licensed under the Apache License, Version 2.0 (see LICENSE).
+
+import pytest
+
+from pants.backend.experimental.python.lockfile_metadata import (
+    invalidation_digest,
+    lockfile_content_with_header,
+    lockfile_metadata_header,
+    read_lockfile_metadata,
+)
+from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints
+from pants.util.ordered_set import FrozenOrderedSet
+
+
+def test_metadata_round_trip() -> None:
+    val = "help_i_am_trapped_inside_a_unit_test_string"
+    output = read_lockfile_metadata(lockfile_metadata_header(val))
+    assert val == output.invalidation_digest
+
+
+def test_validated_lockfile_content() -> None:
+    content = b"""dave==3.1.4 \\
+    --hash=sha256:cab0c0c0c0c0dadacafec0c0c0c0cafedadabeefc0c0c0c0feedbeeffeedbeef \\
+    """
+
+    output = b"""
+# --- BEGIN PANTS LOCKFILE METADATA: DO NOT EDIT OR REMOVE ---
+# invalidation digest: 000faaafcacacaca
+# --- END PANTS LOCKFILE METADATA ---
+dave==3.1.4 \\
+    --hash=sha256:cab0c0c0c0c0dadacafec0c0c0c0cafedadabeefc0c0c0c0feedbeeffeedbeef \\
+    """
+
+    # Helper function to make the test case more resilient to reformatting
+    line_by_line = lambda b: [i for i in (j.strip() for j in b.splitlines()) if i]
+    assert line_by_line(lockfile_content_with_header("000faaafcacacaca", content)) == line_by_line(
+        output
+    )
+
+
+_interpreter_constraints = [">=3.7", "<3.10"]
+_requirements = ["flake8-pantsbuild>=2.0,<3", "flake8-2020>=1.6.0,<1.7.0"]
+
+
+@pytest.mark.parametrize(
+    "requirements,interpreter_constraints,expected",
+    [
+        ([], [], "51f5289473089f1de64ab760af3f03ff55cd769f25cce7ea82dd1ac88aac5ff4"),
+        (
+            _interpreter_constraints,
+            [],
+            "821e8eef80573c7d2460185da4d436b6a8c59e134f5f0758000be3c85e9819eb",
+        ),
+        ([], _requirements, "604fb99ed6d6d83ba2c4eb1230184dd7f279a446cda042e9e87099448f28dddb"),
+        (
+            _interpreter_constraints,
+            _requirements,
+            "9264a3b59a592d7eeac9cb4bbb4f5b2200907694bfe92b48757c99b1f71485f0",
+        ),
+    ],
+)
+def test_hex_digest(requirements, interpreter_constraints, expected) -> None:
+    assert (
+        invalidation_digest(
+            FrozenOrderedSet(requirements), InterpreterConstraints(interpreter_constraints)
+        )
+        == expected
+    )
+
+
+def test_hash_depends_on_requirement_source() -> None:
+    reqs = ["CPython"]
+    assert invalidation_digest(
+        FrozenOrderedSet(reqs), InterpreterConstraints([])
+    ) != invalidation_digest(FrozenOrderedSet([]), InterpreterConstraints(reqs))