diff --git a/conda_lock/conda_lock.py b/conda_lock/conda_lock.py index 0bc43fdc9..98fb1f9ff 100644 --- a/conda_lock/conda_lock.py +++ b/conda_lock/conda_lock.py @@ -38,6 +38,12 @@ from conda_lock.common import read_file, read_json, write_file from conda_lock.errors import PlatformValidationError +from conda_lock.lockfile_metadata import ( + DEFAULT_METADATA_TO_INCLUDE, + METADATA_FIELDS_LIST_AS_STRING, + make_metadata_header, + validate_metadata_to_include, +) from conda_lock.src_parser import LockSpecification from conda_lock.src_parser.environment_yaml import parse_environment_file from conda_lock.src_parser.meta_yaml import parse_meta_yaml_file @@ -386,6 +392,8 @@ def make_lock_files( channel_overrides: Optional[Sequence[str]] = None, filename_template: Optional[str] = None, check_spec_hash: bool = False, + metadata_to_include: List[str] = DEFAULT_METADATA_TO_INCLUDE, + comment: Optional[str] = None, ): """Generate the lock files for the given platforms from the src file provided @@ -405,7 +413,10 @@ def make_lock_files( Format for the lock file names. Must include {platform}. check_spec_hash : Validate that the existing spec hash has not already been generated for. - + metadata_to_include : + List of metadata fields to be added to the lockfiles. + comment : + Text to be added to the lockfile metadata. """ if filename_template: if "{platform}" not in filename_template and len(platforms) > 1: @@ -468,6 +479,10 @@ def make_lock_files( filename += KIND_FILE_EXT[kind] with open(filename, "w") as fo: + metadata_header = make_metadata_header( + lock_spec, metadata_to_include, comment + ) + lockfile_contents = metadata_header.splitlines() + lockfile_contents fo.write("\n".join(lockfile_contents) + "\n") print( @@ -508,11 +523,7 @@ def create_lockfile_from_spec( ) logging.debug("dry_run_install:\n%s", dry_run_install) - lockfile_contents = [ - "# Generated by conda-lock.", - f"# platform: {spec.platform}", - f"# input_hash: {spec.input_hash()}\n", - ] + lockfile_contents = [] if kind == "env": link_actions = dry_run_install["actions"]["LINK"] @@ -751,6 +762,8 @@ def run_lock( filename_template: Optional[str] = None, kinds: Optional[List[str]] = None, check_input_hash: bool = False, + metadata_to_include: List[str] = DEFAULT_METADATA_TO_INCLUDE, + comment: Optional[str] = None, ) -> None: if environment_files == DEFAULT_FILES: long_ext_file = pathlib.Path("environment.yaml") @@ -769,6 +782,8 @@ def run_lock( filename_template=filename_template, kinds=kinds or DEFAULT_KINDS, check_spec_hash=check_input_hash, + metadata_to_include=metadata_to_include, + comment=comment, ) @@ -849,6 +864,15 @@ def main(): default="INFO", type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]), ) +@click.option( + "--metadata", + default=",".join(DEFAULT_METADATA_TO_INCLUDE), + help="List of fields to include as a header to the lockfile. " + "Can be 'all', 'none', 'previous', or a comma-separated subset of " + f"{METADATA_FIELDS_LIST_AS_STRING}.", + callback=validate_metadata_to_include, +) +@click.option("--comment", help="Add a comment to the lockfile metadata.") # @click.option( # "-m", # "--mode", @@ -872,6 +896,8 @@ def lock( strip_auth, check_input_hash: bool, log_level, + metadata: List[str], + comment: Optional[str], ): """Generate fully reproducible lock files for conda environments. @@ -897,6 +923,8 @@ def lock( include_dev_dependencies=dev_dependencies, channel_overrides=channel_overrides, kinds=kind, + metadata_to_include=metadata, + comment=comment, ) if strip_auth: with tempfile.TemporaryDirectory() as tempdir: diff --git a/conda_lock/lockfile_metadata.py b/conda_lock/lockfile_metadata.py new file mode 100644 index 000000000..2f9fbe7bd --- /dev/null +++ b/conda_lock/lockfile_metadata.py @@ -0,0 +1,167 @@ +"""Machinery for preparing the conda-lock metadata headers. + +The main function to build the header is 'make_metadata_header()'. + +The actual functions which define the header fields are in lockfile_metadata_fields.py. + +Also included is some Click stuff for the "--metadata=..." command-line option, and +some stuff to configure the PyYAML output. +""" + +import inspect +import sys + +from functools import lru_cache +from typing import List, Optional + +import click +import yaml + +from conda_lock.lockfile_metadata_fields import ( + METADATA_FIELDS_LIST, + METADATA_FIELDS_LIST_AS_STRING, + LiteralStr, + MetadataFields, +) +from conda_lock.src_parser import LockSpecification + + +STR_TAG = "tag:yaml.org,2002:str" +"""Tag corresponding to the YAML string type.""" + +# Click stuff +# ----------- + +# This value of "PREVIOUS" is a special value. At some point we probably want +# to switch default to the new structured header. For that, uncomment the stuff +# directly below. +DEFAULT_METADATA_TO_INCLUDE = ["previous"] + + +# DEFAULT_METADATA_TO_INCLUDE = ["about", "platform", "input_hash"] +# """Default fields when '--metadata=...' is not present.""" + +# # Validate DEFAULT_METADATA_TO_INCLUDE. +# _invalid_fields = set(DEFAULT_METADATA_TO_INCLUDE) - set(METADATA_FIELDS_LIST) +# if _invalid_fields: +# raise ValueError(f"Default metadata values {_invalid_fields} are invalid.") + + +def validate_metadata_to_include(ctx, param, metadata_to_include) -> List[str]: + """Convert the comma-separated string of metadata fields into a list. + + For use as a callback function with Click. + """ + if metadata_to_include == "none": + return [] + elif metadata_to_include == "all": + return METADATA_FIELDS_LIST + elif metadata_to_include == "previous": + return ["PREVIOUS"] + else: + # Parse and validate that metadata_to_include is a comma-separated list of + # metadata keys. + selected = metadata_to_include.split(",") + for key in selected: + # Validate the keys. + if key not in METADATA_FIELDS_LIST: + raise click.BadParameter( + f"'{key}' does not correspond to a valid field. It must be one of " + f"{METADATA_FIELDS_LIST_AS_STRING}." + ) + return selected + + +# Header generation +# ----------------- + + +def make_metadata_header( + spec: LockSpecification, + metadata_to_include: List[str] = DEFAULT_METADATA_TO_INCLUDE, + comment: Optional[str] = None, +): + """Constructs a string of commented YAML for inclusion as a header in lockfiles.""" + + if metadata_to_include == []: + return "" + + if metadata_to_include == ["PREVIOUS"]: + return _previous_metadata_header(spec) + + if comment and "comment" not in metadata_to_include: + metadata_to_include.append("comment") + + fields = MetadataFields(spec, comment) + + # Create a dictionary with the selected metadata evaluated. + metadata_as_dict = {key: getattr(fields, key)() for key in metadata_to_include} + + warn_on_old_pyyaml() + metadata_as_yaml = ( + "---\n" + + yaml.dump(data={"conda-lock-metadata": metadata_as_dict}, Dumper=Dumper) + + "..." + ) + + metadata_as_commented_yaml = "\n".join( + [f"# {line}" for line in metadata_as_yaml.splitlines()] + ) + return metadata_as_commented_yaml + "\n" + + +def _previous_metadata_header(spec: LockSpecification) -> str: + """We should get rid of this soon, if possible.""" + return "\n".join( + [ + "# Generated by conda-lock.", + f"# platform: {spec.platform}", + f"# input_hash: {spec.input_hash()}\n", + ] + ) + + +# PyYAML stuff +# ------------ + + +@lru_cache() # The following function should run at most once. +def warn_on_old_pyyaml(): + """Versions of PyYAML less than 5.1 sort keys alphabetically.""" + yaml_dumper_params = inspect.signature(yaml.Dumper).parameters + if "sort_keys" not in yaml_dumper_params: + print( + f"WARNING: The currently-installed version of PyYAML (v{yaml.__version__}) " + "is very old, and the metadata keys will be sorted in alphabetical order " + "instead of the given order. Please upgrade PyYAML to v5.1 or greater.", + file=sys.stderr, + ) + + +def literal_representer(dumper: yaml.Dumper, data: str) -> yaml.ScalarNode: + """This tells PyYAML to format a given string as a literal block. + + This means that that the '|' delimiter is used, and the text is indented, but + otherwise unformatted. + """ + literal_scalar_node = dumper.represent_scalar(STR_TAG, data, style="|") + return literal_scalar_node + + +class Dumper(yaml.Dumper): + """Dumper class for changing PyYAML output defaults.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # Prevent alphabetical sorting. + self.sort_keys = False + + # Don't escape unicode characters. + self.allow_unicode = True + + # Don't wrap long lines. + self.width = self.best_width = float("inf") + + # Register all instances of LiteralStr to be formatted as literal blocks. + self.add_representer(LiteralStr, literal_representer) diff --git a/conda_lock/lockfile_metadata_fields.py b/conda_lock/lockfile_metadata_fields.py new file mode 100644 index 000000000..bdf2225c2 --- /dev/null +++ b/conda_lock/lockfile_metadata_fields.py @@ -0,0 +1,131 @@ +"""Functions which define the fields in the lockfile's metadata header. + +New fields can be added to the MetadataFields class. +""" + +import os +import shlex +import sys + +from datetime import datetime +from getpass import getuser +from typing import List, Optional + +from pkg_resources import get_distribution + +from conda_lock.src_parser import LockSpecification + + +class MetadataFields: + """All fields which can be selected to appear in the lockfile's metadata header. + + To add a new field, simply define a method of the desired field name with no + arguments other than self. It will be automatically detected as a valid field, + so no further registration steps are necessary. + + Any helper methods or (static) class attributes should be prefixed with a '_' + character so that they won't be detected as fields. + """ + + def __init__(self, spec: LockSpecification, comment: Optional[str]): + """Here we pass in data to be accessed, and set corresponding instance + attributes. + """ + self.spec = spec + self.comment_str = comment + + def about(self) -> str: + """A message explaining that this file was created with conda-lock.""" + return "This lockfile was generated by conda-lock to ensure reproducibility." + + def platform(self) -> str: + """The plaform of a given lockfile, e.g. 'linux-64'.""" + return self.spec.platform + + def created_by(self) -> Optional[str]: + """The username at the time of running the command.""" + try: + return getuser() + except (KeyError, AttributeError): + return None + + def timestamp(self) -> datetime: + """The time at which the lock was created.""" + return datetime.now().replace(microsecond=0).astimezone() + + def command_with_path(self) -> str: + """The command used to invoke the lock, after being processed by the shell.""" + return argv_to_string(sys.argv) + + def command(self) -> str: + """Same as 'command_with_path` but with the path to the executable stripped.""" + argv = sys.argv.copy() + argv[0] = os.path.basename(argv[0]) + return argv_to_string(argv) + + def conda_lock_version(self) -> str: + """The conda-lock version used to make this lockfile.""" + return get_distribution("conda_lock").version + + def metadata_version(self) -> int: + """The version of this metadata. Format is semver, major-only.""" + return 1 + + def input_hash(self) -> str: + """A SHA-256 hash of the lock file input specification.""" + return self.spec.input_hash() + + def comment(self) -> Optional[str]: + """A textual comment passed as a command-line argument.""" + if self.comment_str is None: + return None + return LiteralStr(self.comment_str) + + +METADATA_FIELDS_LIST = [ + field for field in dir(MetadataFields) if not field.startswith("_") +] +"""List of the public methods defined in MetadataFieldFunctions.""" + +METADATA_FIELDS_LIST_AS_STRING = "'" + "', '".join(METADATA_FIELDS_LIST) + "'" +"""Looks something like: + 'about', 'command', ..., 'timestamp' +""" + + +def argv_to_string(argv: List[str]) -> str: + """Take a list of command-line arguments and escape them to a shell command. + + This is a helper function for 'command' and 'command_with_path'. + """ + return _escape_newlines_for_bash(" ".join(shlex.quote(arg) for arg in argv)) + + +def _escape_newlines_for_bash(s: str) -> str: + r"""This is a helper function for 'argv_to_string()'. + + We want to replace newline characters with: + $'\n' + But a newline character triggers shlex's single-quote mode, and thus can only + occur therein. Therefore we actually need surrounding single quotes around the + above string in order to exit and reenter single-quote mode. The actual substitution + rule we need is thus: + '$'\n'' + """ + if "\n" in s: + print( + r"WARNING: There are newlines appearing in the 'command' metadata field. " + r"They will be escaped for Bash as $'\n' but since you're doing something " + r"complicated, consider instead explaining your command with something " + r"like --comment=$(README.txt)", + file=sys.stderr, + ) + return s.replace("\n", r"'$'\n''") + + +class LiteralStr(str): + """This is a trivial extension of the 'str' class which indicates to our PyYAML + Dumper class that the corresponding string should be formatted as a block literal. + """ + + pass diff --git a/setup.cfg b/setup.cfg index 427549f0e..3f010acb7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -56,7 +56,6 @@ include_trailing_comma=true lines_after_imports=2 lines_between_types=1 multi_line_output=3 -not_skip=__init__.py use_parentheses=true known_first_party=attr