From 64024b12f84dae02808556e6530d301f2a5fe28a Mon Sep 17 00:00:00 2001 From: John Sirois Date: Thu, 28 Sep 2023 17:27:07 -0700 Subject: [PATCH] Offer suggestions for lift manifest typos. Also make the array indexing basis clear when array indexes are used. --- CHANGES.md | 5 ++ RELEASE.md | 2 +- science/__init__.py | 2 +- science/config.py | 99 ++++++++++++++++++++-- science/data.py | 56 ++++++++++-- tests/data/unrecognized-config-fields.toml | 2 +- tests/test_config.py | 16 ++-- 7 files changed, 157 insertions(+), 25 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 2868ee6..b5de8c5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,10 @@ # Release Notes +## 0.3.0 + +The unrecognized lift manifest configuration data error message now includes suggestions when the +unrecognized configuration is likely a typo. + ## 0.2.2 Unrecognized lift manifest configuration data now generates an informative error instead of diff --git a/RELEASE.md b/RELEASE.md index a411475..9596005 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -5,7 +5,7 @@ ### Version Bump and Changelog 1. Bump the version in at [`science/__init__.py`](science/__init__.py). -2. Run `nox && nox -epackage` as a sanity check on the state of the project. +2. Run `nox && nox -e linkcheck package` as a sanity check on the state of the project. 3. Update [`CHANGES.md`](CHANGES.md) with any changes that are likely to be useful to consumers. 4. Open a PR with these changes and land it on https://github.com/a-scie/lift main. diff --git a/science/__init__.py b/science/__init__.py index eb2b358..e22357c 100644 --- a/science/__init__.py +++ b/science/__init__.py @@ -3,6 +3,6 @@ from packaging.version import Version -__version__ = "0.2.2" +__version__ = "0.3.0" VERSION = Version(__version__) diff --git a/science/config.py b/science/config.py index c394cc4..3c2eb32 100644 --- a/science/config.py +++ b/science/config.py @@ -3,18 +3,22 @@ from __future__ import annotations +import dataclasses +import difflib import tomllib -from collections import OrderedDict +from collections import OrderedDict, defaultdict from dataclasses import dataclass +from functools import cache from io import BytesIO from pathlib import Path from textwrap import dedent -from typing import BinaryIO +from typing import BinaryIO, DefaultDict, Generic, Iterator, TypeVar from science.build_info import BuildInfo -from science.data import Data +from science.data import Accessor, Data from science.dataclass import Dataclass from science.dataclass.deserializer import parse as parse_dataclass +from science.dataclass.reflect import FieldInfo, dataclass_info from science.doc import DOC_SITE_URL from science.errors import InputError from science.frozendict import FrozenDict @@ -74,6 +78,70 @@ class InterpreterGroupFields(Dataclass): members: tuple[str, ...] +def _iter_field_info(datatype: type[Dataclass]) -> Iterator[FieldInfo]: + for field_info in dataclass_info(datatype).field_info: + if not field_info.hidden and not field_info.inline: + yield field_info + elif field_info.inline and (dt := field_info.type.dataclass): + yield from _iter_field_info(dt) + + +_D = TypeVar("_D", bound=Dataclass) + + +@dataclass(frozen=True) +class ValidConfig(Generic[_D]): + @classmethod + @cache + def gather(cls, datatype: type[_D]) -> ValidConfig: + return cls( + datatype=datatype, + fields=FrozenDict( + {field_info.name: field_info for field_info in _iter_field_info(datatype)} + ), + ) + + datatype: type[_D] + fields: FrozenDict[str, FieldInfo] + + def access(self, field_name: str) -> ValidConfig | None: + field_info = self.fields.get(field_name) + if not field_info: + return None + + if datatype := field_info.type.dataclass: + return ValidConfig.gather(datatype) + + if field_info.type.has_item_type and dataclasses.is_dataclass( + item_type := field_info.type.item_type + ): + return ValidConfig.gather(item_type) + + return None + + +@dataclass(frozen=True) +class UnrecognizedApplicationConfig: + @classmethod + def gather(cls, lift: Data, index_start: int) -> UnrecognizedApplicationConfig | None: + unused_items = list(lift.iter_unused_items(index_start=index_start)) + if not unused_items: + return None + + valid_config_by_unused_accessor: dict[Accessor, ValidConfig | None] = {} + for unused_accessor, _ in unused_items: + valid_config: ValidConfig | None = ValidConfig.gather(Application) + for accessor in unused_accessor.iter_lineage(): + if not valid_config: + break + valid_config = valid_config.access(accessor.key) + valid_config_by_unused_accessor[unused_accessor] = valid_config + + return cls(unrecognized=FrozenDict(valid_config_by_unused_accessor)) + + unrecognized: FrozenDict[Accessor, ValidConfig | None] + + def parse_config_data(data: Data) -> Application: lift = data.get_data("lift") @@ -111,12 +179,28 @@ def parse_interpreter_group(ig_data: Data) -> InterpreterGroup: custom_parsers={BuildInfo: parse_build_info, InterpreterGroup: parse_interpreter_group}, ) - unused_items = list(lift.iter_unused_items()) - if unused_items: + unrecognized_config = UnrecognizedApplicationConfig.gather(lift, index_start=1) + if unrecognized_config: + unrecognized_field_info: DefaultDict[str, list[str]] = defaultdict(list) + index_used = False + for accessor, valid_config in unrecognized_config.unrecognized.items(): + index_used |= accessor.path_includes_index() + suggestions = unrecognized_field_info[accessor.render()] + if valid_config: + suggestions.extend(difflib.get_close_matches(accessor.key, valid_config.fields)) + + field_column_width = max(map(len, unrecognized_field_info)) + unrecognized_fields = [] + for name, suggestions in unrecognized_field_info.items(): + line = name.rjust(field_column_width) + if suggestions: + line += f": Did you mean {' or '.join(suggestions)}?" + unrecognized_fields.append(line) + raise InputError( dedent( """\ - The following `lift` manifest entries in {manifest_source} were not recognized: + The following `lift` manifest entries in {manifest_source} were not recognized{index_parenthetical}: {unrecognized_fields} Refer to the lift manifest format specification at {doc_url} or by running `science doc open manifest`. @@ -124,7 +208,8 @@ def parse_interpreter_group(ig_data: Data) -> InterpreterGroup: ) .format( manifest_source=data.provenance.source, - unrecognized_fields="\n".join(key for key, _ in unused_items), + index_parenthetical=" (indexes are 1-based)" if index_used else "", + unrecognized_fields="\n".join(unrecognized_fields), doc_url=f"{DOC_SITE_URL}/manifest.html", ) .strip() diff --git a/science/data.py b/science/data.py index befb508..cea5f49 100644 --- a/science/data.py +++ b/science/data.py @@ -13,6 +13,40 @@ from science.frozendict import FrozenDict from science.hashing import Provenance + +@dataclass(frozen=True) +class Accessor: + key: str + parent: Accessor | None = None + _index: int | None = None + + def index(self, index: int) -> Accessor: + return dataclasses.replace(self, _index=index) + + def render(self) -> str: + if self.parent: + rendered = f"{self.parent.render()}.{self.key}" + else: + rendered = self.key + + if self._index is not None: + rendered += f"[{self._index}]" + return rendered + + def iter_lineage(self, include_self: bool = False) -> Iterator[Accessor]: + if self.parent: + yield from self.parent.iter_lineage(include_self=True) + if include_self: + yield self + + def path_includes_index(self) -> bool: + if self._index is not None: + return True + if self.parent and self.parent.path_includes_index(): + return True + return False + + _T = TypeVar("_T") @@ -150,17 +184,25 @@ def get_value( self._unused_data.pop(key, None) return value - def iter_unused_items(self) -> Iterator[tuple[str, Any]]: + def iter_unused_items( + self, parent: Accessor | None = None, index_start: int = 0 + ) -> Iterator[tuple[Accessor, Any]]: for key, value in self._unused_data.items(): + accessor = Accessor(key, parent=parent) if isinstance(value, list) and all(isinstance(item, Data) for item in value): - for index, item in enumerate(value, start=1): - for sub_key, sub_value in item.iter_unused_items(): - yield f"{key}[{index}].{sub_key}", sub_value + for index, item in enumerate(value, start=index_start): + accessor = accessor.index(index) + for sub_accessor, sub_value in item.iter_unused_items( + parent=accessor, index_start=index_start + ): + yield sub_accessor, sub_value elif isinstance(value, Data): - for sub_key, sub_value in value.iter_unused_items(): - yield f"{key}.{sub_key}", sub_value + for sub_accessor, sub_value in value.iter_unused_items( + parent=accessor, index_start=index_start + ): + yield sub_accessor, sub_value else: - yield key, value + yield accessor, value def __bool__(self): return bool(self.data) diff --git a/tests/data/unrecognized-config-fields.toml b/tests/data/unrecognized-config-fields.toml index af4841e..1f33f51 100644 --- a/tests/data/unrecognized-config-fields.toml +++ b/tests/data/unrecognized-config-fields.toml @@ -33,7 +33,7 @@ members = [ [[lift.commands]] # N.B: `environ` is invalid. -environ = { key = "value" } +just_wrong = { key = "value" } exe = "#{cpython:python}" args = [ "-c", diff --git a/tests/test_config.py b/tests/test_config.py index 56ef952..6b559f2 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -172,14 +172,14 @@ def test_unrecognized_config_fields(tmp_path: Path, science_pyz: Path) -> None: assert ( dedent( f"""\ - The following `lift` manifest entries in {config} were not recognized: - scie-jump - scie_jump.version2 - interpreters[2].lizzy - commands[1].environ - commands[1].env.remove_re2 - commands[1].env.replace2 - app-info + The following `lift` manifest entries in {config} were not recognized (indexes are 1-based): + scie-jump: Did you mean scie_jump? + scie_jump.version2: Did you mean version? + interpreters[2].lizzy: Did you mean lazy? + commands[1].just_wrong + commands[1].env.remove_re2: Did you mean remove_re or remove_exact? + commands[1].env.replace2: Did you mean replace? + app-info: Did you mean app_info? Refer to the lift manifest format specification at https://science.scie.app/manifest.html or by running `science doc open manifest`. """