diff --git a/build-support/bin/generate_docs.py b/build-support/bin/generate_docs.py index 10e301bd983..61ae5ce4660 100644 --- a/build-support/bin/generate_docs.py +++ b/build-support/bin/generate_docs.py @@ -33,6 +33,7 @@ from common import die from pants.help.help_info_extracter import to_help_str +from pants.util.docutil import DocUrlMatcher, DocUrlRewriter, get_titles from pants.version import MAJOR_MINOR logger = logging.getLogger(__name__) @@ -40,10 +41,20 @@ def main() -> None: logging.basicConfig(format="[%(levelname)s]: %(message)s", level=logging.INFO) - version = determine_pants_version() args = create_parser().parse_args() + version = determine_pants_version() help_info = run_pants_help_all() - generator = ReferenceGenerator(args, version, help_info) + doc_urls = DocUrlMatcher().find_doc_urls(value_strs_iter(help_info)) + logger.info("Found the following docsite URLs:") + for url in sorted(doc_urls): + logger.info(f" {url}") + logger.info("Fetching titles...") + slug_to_title = get_titles(doc_urls) + logger.info("Found the following titles:") + for slug, title in sorted(slug_to_title.items()): + logger.info(f" {slug}: {title}") + rewritten_help_info = rewrite_value_strs(help_info, slug_to_title) + generator = ReferenceGenerator(args, version, rewritten_help_info) if args.sync: generator.sync() else: @@ -142,8 +153,41 @@ def run_pants_help_all() -> Dict: return cast(Dict, json.loads(run.stdout)) +def value_strs_iter(help_info: dict) -> Iterable[str]: + def _recurse(val: Any) -> Iterable[str]: + if isinstance(val, str): + yield val + if isinstance(val, dict): + for v in val.values(): + for x in _recurse(v): + yield x + if isinstance(val, list): + for v in val: + for x in _recurse(v): + yield x + + for x in _recurse(help_info): + yield x + + +def rewrite_value_strs(help_info: dict, slug_to_title: dict[str, str]) -> dict: + """Return a copy of the argument with rewritten docsite URLs.""" + rewriter = DocUrlRewriter(slug_to_title) + + def _recurse(val: Any) -> Any: + if isinstance(val, str): + return rewriter.rewrite(val) + if isinstance(val, dict): + return {k: _recurse(v) for k, v in val.items()} + if isinstance(val, list): + return [_recurse(x) for x in val] + return val + + return cast(dict, _recurse(help_info)) + + class ReferenceGenerator: - def __init__(self, args: argparse.Namespace, version: str, help_info: Dict) -> None: + def __init__(self, args: argparse.Namespace, version: str, help_info: dict) -> None: self._args = args self._version = version @@ -177,7 +221,7 @@ def _link(scope: str, *, sync: bool) -> str: return f"reference-{url_safe_scope}" if sync else f"{url_safe_scope}.md" @classmethod - def process_options_input(cls, help_info: Dict, *, sync: bool) -> Dict: + def process_options_input(cls, help_info: dict, *, sync: bool) -> Dict: scope_to_help_info = help_info["scope_to_help_info"] # Process the list of consumed_scopes into a comma-separated list, and add it to the option diff --git a/build-support/bin/generate_docs_test.py b/build-support/bin/generate_docs_test.py index 44c9b0f2572..94cfe720b27 100644 --- a/build-support/bin/generate_docs_test.py +++ b/build-support/bin/generate_docs_test.py @@ -1,7 +1,7 @@ # Copyright 2021 Pants project contributors (see CONTRIBUTORS.md). # Licensed under the Apache License, Version 2.0 (see LICENSE). -from generate_docs import html_safe, markdown_safe +from generate_docs import html_safe, markdown_safe, value_strs_iter def test_markdown_safe(): @@ -10,3 +10,14 @@ def test_markdown_safe(): def test_html_safe(): assert "foo bar=='baz' qux" == html_safe("foo `bar=='baz'` qux") + + +def test_gather_value_strs(): + help_info = { + "a": "foo", + "b": ["bar", 5, "baz"], + "c": 42, + "d": True, + "e": {"f": 5, "g": "qux", "h": {"i": "quux"}}, + } + assert set(value_strs_iter(help_info)) == {"foo", "bar", "baz", "qux", "quux"} diff --git a/src/python/pants/backend/awslambda/python/rules.py b/src/python/pants/backend/awslambda/python/rules.py index 9de7c479281..f3a3e5c0da9 100644 --- a/src/python/pants/backend/awslambda/python/rules.py +++ b/src/python/pants/backend/awslambda/python/rules.py @@ -40,7 +40,7 @@ targets_with_sources_types, ) from pants.engine.unions import UnionMembership, UnionRule -from pants.util.docutil import bracketed_docs_url +from pants.util.docutil import doc_url from pants.util.logging import LogLevel logger = logging.getLogger(__name__) @@ -119,7 +119,7 @@ async def package_python_awslambda( "files targets, but Pants will not include them in the built Lambda. Filesystem APIs " "like `open()` are not able to load files within the binary itself; instead, they " "read from the current working directory." - f"\n\nInstead, use `resources` targets. See {bracketed_docs_url('resources')}." + f"\n\nInstead, use `resources` targets. See {doc_url('resources')}." f"\n\nFiles targets dependencies: {files_addresses}" ) diff --git a/src/python/pants/backend/awslambda/python/target_types.py b/src/python/pants/backend/awslambda/python/target_types.py index 833a40662e3..1cf3d6910ec 100644 --- a/src/python/pants/backend/awslambda/python/target_types.py +++ b/src/python/pants/backend/awslambda/python/target_types.py @@ -30,7 +30,7 @@ from pants.engine.unions import UnionRule from pants.source.filespec import Filespec from pants.source.source_root import SourceRoot, SourceRootRequest -from pants.util.docutil import bracketed_docs_url +from pants.util.docutil import doc_url class DeprecatedAwsLambdaInterpreterConstraints(InterpreterConstraintsField): @@ -205,8 +205,8 @@ class PythonAWSLambda(Target): PythonAwsLambdaRuntime, ) help = ( - "A self-contained Python function suitable for uploading to AWS Lambda.\n\nSee " - f"{bracketed_docs_url('awslambda-python')}." + "A self-contained Python function suitable for uploading to AWS Lambda.\n\n" + f"See {doc_url('awslambda-python')}." ) diff --git a/src/python/pants/backend/codegen/protobuf/python/python_protobuf_subsystem.py b/src/python/pants/backend/codegen/protobuf/python/python_protobuf_subsystem.py index 1e1754f455d..6ea1debeb14 100644 --- a/src/python/pants/backend/codegen/protobuf/python/python_protobuf_subsystem.py +++ b/src/python/pants/backend/codegen/protobuf/python/python_protobuf_subsystem.py @@ -11,14 +11,12 @@ from pants.engine.unions import UnionRule from pants.option.custom_types import target_option from pants.option.subsystem import Subsystem -from pants.util.docutil import bracketed_docs_url +from pants.util.docutil import doc_url class PythonProtobufSubsystem(Subsystem): options_scope = "python-protobuf" - help = ( - f"Options related to the Protobuf Python backend.\n\nSee {bracketed_docs_url('protobuf')}." - ) + help = f"Options related to the Protobuf Python backend.\n\nSee {doc_url('protobuf')}." @classmethod def register_options(cls, register): diff --git a/src/python/pants/backend/codegen/protobuf/target_types.py b/src/python/pants/backend/codegen/protobuf/target_types.py index 8b4f47b3b1a..e3d5edaf5f9 100644 --- a/src/python/pants/backend/codegen/protobuf/target_types.py +++ b/src/python/pants/backend/codegen/protobuf/target_types.py @@ -2,11 +2,12 @@ # Licensed under the Apache License, Version 2.0 (see LICENSE). from pants.engine.target import COMMON_TARGET_FIELDS, BoolField, Dependencies, Sources, Target -from pants.util.docutil import bracketed_docs_url - # NB: We subclass Dependencies so that specific backends can add dependency injection rules to # Protobuf targets. +from pants.util.docutil import doc_url + + class ProtobufDependencies(Dependencies): pass @@ -25,4 +26,4 @@ class ProtobufGrpcToggle(BoolField): class ProtobufLibrary(Target): alias = "protobuf_library" core_fields = (*COMMON_TARGET_FIELDS, ProtobufDependencies, ProtobufSources, ProtobufGrpcToggle) - help = f"Protobuf files used to generate various languages.\n\nSee {bracketed_docs_url('protobuf')}." + help = f"Protobuf files used to generate various languages.\n\nSee f{doc_url('protobuf')}." diff --git a/src/python/pants/backend/python/goals/package_pex_binary.py b/src/python/pants/backend/python/goals/package_pex_binary.py index be53099e0d2..231919df49b 100644 --- a/src/python/pants/backend/python/goals/package_pex_binary.py +++ b/src/python/pants/backend/python/goals/package_pex_binary.py @@ -44,7 +44,7 @@ targets_with_sources_types, ) from pants.engine.unions import UnionMembership, UnionRule -from pants.util.docutil import bracketed_docs_url +from pants.util.docutil import doc_url from pants.util.logging import LogLevel logger = logging.getLogger(__name__) @@ -120,8 +120,8 @@ async def package_pex_binary( "targets, but Pants will not include them in the PEX. Filesystem APIs like `open()` " "are not able to load files within the binary itself; instead, they read from the " "current working directory." - "\n\nInstead, use `resources` targets or wrap this `pex_binary` in an `archive`. See " - f"{bracketed_docs_url('resources')}." + "\n\nInstead, use `resources` targets or wrap this `pex_binary` in an `archive`. " + f"See {doc_url('resources')}." f"\n\nFiles targets dependencies: {files_addresses}" ) diff --git a/src/python/pants/backend/python/goals/setup_py.py b/src/python/pants/backend/python/goals/setup_py.py index 570e0e88961..164b517d492 100644 --- a/src/python/pants/backend/python/goals/setup_py.py +++ b/src/python/pants/backend/python/goals/setup_py.py @@ -61,7 +61,7 @@ from pants.engine.unions import UnionMembership, UnionRule, union from pants.option.subsystem import Subsystem from pants.python.python_setup import PythonSetup -from pants.util.docutil import bracketed_docs_url +from pants.util.docutil import doc_url from pants.util.logging import LogLevel from pants.util.memo import memoized_property from pants.util.meta import frozen_after_init @@ -88,7 +88,7 @@ class OwnershipError(Exception): def __init__(self, msg: str): super().__init__( - f"{msg} See {bracketed_docs_url('python-distributions')} for " + f"{msg} See {doc_url('python-distributions')} for " f"how python_library targets are mapped to distributions." ) diff --git a/src/python/pants/backend/python/lint/pylint/subsystem.py b/src/python/pants/backend/python/lint/pylint/subsystem.py index 2237935624e..c2d7b1ee17d 100644 --- a/src/python/pants/backend/python/lint/pylint/subsystem.py +++ b/src/python/pants/backend/python/lint/pylint/subsystem.py @@ -11,7 +11,7 @@ from pants.core.util_rules.config_files import ConfigFilesRequest from pants.engine.addresses import UnparsedAddressInputs from pants.option.custom_types import file_option, shell_str, target_option -from pants.util.docutil import bracketed_docs_url +from pants.util.docutil import doc_url class Pylint(PythonToolBase): @@ -74,7 +74,7 @@ def register_options(cls, register): "example, if your plugin is at `build-support/pylint/custom_plugin.py`, add " "'build-support/pylint' to `[source].root_patterns` in `pants.toml`. This is " "necessary for Pants to know how to tell Pylint to discover your plugin. See " - f"{bracketed_docs_url('source-roots')}\n\nYou must also set `load-plugins=$module_name` in " + f"{doc_url('source-roots')}\n\nYou must also set `load-plugins=$module_name` in " "your Pylint config file, and set the `[pylint].config` option in `pants.toml`." "\n\nWhile your plugin's code can depend on other first-party code and third-party " "requirements, all first-party dependencies of the plugin must live in the same " diff --git a/src/python/pants/backend/python/target_types.py b/src/python/pants/backend/python/target_types.py index 0e454e49910..db03ce437ab 100644 --- a/src/python/pants/backend/python/target_types.py +++ b/src/python/pants/backend/python/target_types.py @@ -46,7 +46,7 @@ from pants.option.subsystem import Subsystem from pants.python.python_setup import PythonSetup from pants.source.filespec import Filespec -from pants.util.docutil import bracketed_docs_url +from pants.util.docutil import doc_url from pants.util.frozendict import FrozenDict logger = logging.getLogger(__name__) @@ -69,9 +69,9 @@ class InterpreterConstraintsField(StringSequenceField): "`CPython` as a shorthand, e.g. '>=2.7' will be expanded to 'CPython>=2.7'.\n\nSpecify " "more than one element to OR the constraints, e.g. `['PyPy==3.7.*', 'CPython==3.7.*']` " "means either PyPy 3.7 _or_ CPython 3.7.\n\nIf the field is not set, it will default to " - "the option `[python-setup].interpreter_constraints`.\n\nSee " - f"{bracketed_docs_url('python-interpreter-compatibility')} for how these interpreter " - f"constraints are merged with the constraints of dependencies." + "the option `[python-setup].interpreter_constraints`.\n\n" + f"See {doc_url('python-interpreter-compatibility')} for how these interpreter " + "constraints are merged with the constraints of dependencies." ) def value_or_global_default(self, python_setup: PythonSetup) -> Tuple[str, ...]: @@ -390,7 +390,7 @@ class PexBinary(Target): help = ( "A Python target that can be converted into an executable PEX file.\n\nPEX files are " "self-contained executable files that contain a complete Python environment capable of " - f"running the target. For more information, see {bracketed_docs_url('pex-files')}." + f"running the target. For more information, see {doc_url('pex-files')}." ) @@ -471,7 +471,7 @@ class PythonTests(Target): help = ( "Python tests, written in either Pytest style or unittest style.\n\nAll test util code, " "other than `conftest.py`, should go into a dedicated `python_library()` target and then " - f"be included in the `dependencies` field.\n\nSee {bracketed_docs_url('python-test-goal')}." + f"be included in the `dependencies` field.\n\nSee {doc_url('python-test-goal')}." ) @@ -650,7 +650,7 @@ class PythonRequirementLibrary(Target): "Python requirements inline in a BUILD file. If you have a `requirements.txt` file " "already, you can instead use the macro `python_requirements()` to convert each " "requirement into a `python_requirement_library()` target automatically.\n\nSee " - f"{bracketed_docs_url('python-third-party-dependencies')}." + f"{doc_url('python-third-party-dependencies')}." ) @@ -710,8 +710,8 @@ class PythonProvidesField(ScalarField, ProvidesField): "`name`. You can also set almost any keyword argument accepted by setup.py in the " "`setup()` function: " "(https://packaging.python.org/guides/distributing-packages-using-setuptools/#setup-args)." - f"\n\nSee {bracketed_docs_url('plugins-setup-py')} for how to write a plugin to " - f"dynamically generate kwargs." + f"\n\nSee {doc_url('plugins-setup-py')} for how to write a plugin to " + "dynamically generate kwargs." ) @classmethod @@ -742,5 +742,5 @@ class PythonDistribution(Target): ) help = ( "A publishable Python setuptools distribution (e.g. an sdist or wheel).\n\nSee " - f"{bracketed_docs_url('python-distributions')}." + f"{doc_url('python-distributions')}." ) diff --git a/src/python/pants/backend/python/typecheck/mypy/rules.py b/src/python/pants/backend/python/typecheck/mypy/rules.py index 0bc12db39be..fd1436b3402 100644 --- a/src/python/pants/backend/python/typecheck/mypy/rules.py +++ b/src/python/pants/backend/python/typecheck/mypy/rules.py @@ -34,7 +34,7 @@ from pants.engine.target import FieldSet, Target, TransitiveTargets, TransitiveTargetsRequest from pants.engine.unions import UnionRule from pants.python.python_setup import PythonSetup -from pants.util.docutil import bracketed_docs_url +from pants.util.docutil import doc_url from pants.util.logging import LogLevel from pants.util.ordered_set import FrozenOrderedSet, OrderedSet from pants.util.strutil import pluralize @@ -99,7 +99,7 @@ def check_and_warn_if_python_version_configured( logger.warning( f"You set {formatted_configured}. Normally, Pants would automatically set this for you " "based on your code's interpreter constraints " - f"({bracketed_docs_url('python-interpreter-compatibility')}). Instead, it will " + f"({doc_url('python-interpreter-compatibility')}). Instead, it will " "use what you set.\n\n(Automatically setting the option allows Pants to partition your " "targets by their constraints, so that, for example, you can run MyPy on Python 2-only " "code and Python 3-only code at the same time. This feature may no longer work.)" diff --git a/src/python/pants/core/goals/tailor.py b/src/python/pants/core/goals/tailor.py index fd8b051c9b3..c7465128232 100644 --- a/src/python/pants/core/goals/tailor.py +++ b/src/python/pants/core/goals/tailor.py @@ -35,6 +35,7 @@ UnexpandedTargets, ) from pants.engine.unions import UnionMembership, union +from pants.util.docutil import doc_url from pants.util.frozendict import FrozenDict from pants.util.logging import LogLevel from pants.util.memo import memoized @@ -227,7 +228,7 @@ def register_options(cls, register): type=dict, help="A mapping from standard target type to custom type to use instead. The custom " "type can be a custom target type or a macro that offers compatible functionality " - "to the one it replaces (see https://www.pantsbuild.org/docs/macros).", + f"to the one it replaces (see {doc_url('macros')}).", ) @property diff --git a/src/python/pants/engine/internals/graph.py b/src/python/pants/engine/internals/graph.py index efa7e6fae76..0da0f34900c 100644 --- a/src/python/pants/engine/internals/graph.py +++ b/src/python/pants/engine/internals/graph.py @@ -76,7 +76,7 @@ from pants.engine.unions import UnionMembership from pants.option.global_options import GlobalOptions, OwnersNotFoundBehavior from pants.source.filespec import matches_filespec -from pants.util.docutil import bracketed_docs_url +from pants.util.docutil import doc_url from pants.util.logging import LogLevel from pants.util.ordered_set import FrozenOrderedSet, OrderedSet @@ -418,9 +418,9 @@ def _log_or_raise_unmatched_owners( f"target whose `sources` field includes the file." ) msg = ( - f"{prefix} See {bracketed_docs_url('targets')} for more information on target definitions." + f"{prefix} See {doc_url('targets')} for more information on target definitions." f"\n\nYou may want to run `./pants tailor` to autogenerate your BUILD files. See " - f"{bracketed_docs_url('create-initial-build-files')}.{option_msg}" + f"{doc_url('create-initial-build-files')}.{option_msg}" ) if owners_not_found_behavior == OwnersNotFoundBehavior.warn: diff --git a/src/python/pants/engine/internals/parser.py b/src/python/pants/engine/internals/parser.py index 9c3ce07bdb2..37bec51c8f5 100644 --- a/src/python/pants/engine/internals/parser.py +++ b/src/python/pants/engine/internals/parser.py @@ -12,7 +12,7 @@ from pants.base.parse_context import ParseContext from pants.build_graph.build_file_aliases import BuildFileAliases from pants.engine.internals.target_adaptor import TargetAdaptor -from pants.util.docutil import bracketed_docs_url +from pants.util.docutil import doc_url from pants.util.frozendict import FrozenDict @@ -105,7 +105,7 @@ def parse( original = e.args[0].capitalize() help_str = ( "If you expect to see more symbols activated in the below list," - f" refer to {bracketed_docs_url('enabling-backends')} for all available" + f" refer to {doc_url('enabling-backends')} for all available" " backends to activate." ) @@ -142,6 +142,6 @@ def error_on_imports(build_file_content: str, filepath: str) -> None: raise ParseError( f"Import used in {filepath} at line {lineno}. Import statements are banned in " "BUILD files because they can easily break Pants caching and lead to stale results. " - f"\n\nInstead, consider writing a macro ({bracketed_docs_url('macros')}) or " - f"writing a plugin ({bracketed_docs_url('plugins-overview')}." + f"\n\nInstead, consider writing a macro ({doc_url('macros')}) or " + f"writing a plugin ({doc_url('plugins-overview')}." ) diff --git a/src/python/pants/engine/internals/parser_test.py b/src/python/pants/engine/internals/parser_test.py index be2ace3bf01..06cecbc6da9 100644 --- a/src/python/pants/engine/internals/parser_test.py +++ b/src/python/pants/engine/internals/parser_test.py @@ -7,7 +7,7 @@ from pants.build_graph.build_file_aliases import BuildFileAliases from pants.engine.internals.parser import BuildFilePreludeSymbols, ParseError, Parser -from pants.util.docutil import bracketed_docs_url +from pants.util.docutil import doc_url from pants.util.frozendict import FrozenDict @@ -37,7 +37,7 @@ def perform_test(extra_targets: list[str], dym: str) -> None: assert str(exc.value) == ( f"Name 'fake' is not defined.\n\n{dym}" "If you expect to see more symbols activated in the below list," - f" refer to {bracketed_docs_url('enabling-backends')} for all available" + f" refer to {doc_url('enabling-backends')} for all available" " backends to activate.\n\n" f"All registered symbols: ['caof', {fmt_extra_sym}'obj', 'prelude', 'tgt']" ) diff --git a/src/python/pants/engine/target.py b/src/python/pants/engine/target.py index e36a00d2da8..5cb6c6d54ee 100644 --- a/src/python/pants/engine/target.py +++ b/src/python/pants/engine/target.py @@ -46,7 +46,7 @@ from pants.option.global_options import FilesNotFoundBehavior from pants.source.filespec import Filespec, matches_filespec from pants.util.collections import ensure_list, ensure_str_list -from pants.util.docutil import bracketed_docs_url +from pants.util.docutil import doc_url from pants.util.frozendict import FrozenDict from pants.util.memo import memoized_classproperty, memoized_method, memoized_property from pants.util.meta import frozen_after_init @@ -1663,7 +1663,7 @@ def maybe_warn_of_ambiguous_dependency_inference( f"with `!` or `!!` so that one or no targets are left." f"\n\nAlternatively, you can remove the ambiguity by deleting/changing some of the " f"targets so that only 1 target owns this {import_reference}. Refer to " - f"{bracketed_docs_url('troubleshooting#import-errors-and-missing-dependencies')}." + f"{doc_url('troubleshooting#import-errors-and-missing-dependencies')}." ) def disambiguated_via_ignores(self, ambiguous_addresses: tuple[Address, ...]) -> Address | None: diff --git a/src/python/pants/goal/anonymous_telemetry.py b/src/python/pants/goal/anonymous_telemetry.py index d907c54548c..96a8bd744b1 100644 --- a/src/python/pants/goal/anonymous_telemetry.py +++ b/src/python/pants/goal/anonymous_telemetry.py @@ -22,15 +22,14 @@ ) from pants.engine.unions import UnionRule from pants.option.subsystem import Subsystem -from pants.util.docutil import bracketed_docs_url +from pants.util.docutil import doc_url logger = logging.getLogger(__name__) _bugout_access_token = "3ae76900-9a68-4a87-a127-7c9f179d7272" _bugout_journal_id = "801e9b3c-6b03-40a7-870f-5b25d326da66" -_telemetry_docs_url = bracketed_docs_url("anonymous-telemetry") -_telemetry_docs_referral = f"See {_telemetry_docs_url} for details" +_telemetry_docs_referral = f"See {doc_url('anonymous-telemetry')} for details" class AnonymousTelemetry(Subsystem): diff --git a/src/python/pants/init/logging.py b/src/python/pants/init/logging.py index 9e9114885ed..50788a40ab9 100644 --- a/src/python/pants/init/logging.py +++ b/src/python/pants/init/logging.py @@ -15,7 +15,7 @@ from pants.engine.internals import native_engine from pants.option.option_value_container import OptionValueContainer from pants.util.dirutil import safe_mkdir_for -from pants.util.docutil import unbracketed_docs_url +from pants.util.docutil import doc_url from pants.util.logging import LogLevel from pants.util.strutil import strip_prefix @@ -61,8 +61,8 @@ def formatException(self, exc_info): ) return ( - f"{stacktrace}\n\n({debug_instructions}See {unbracketed_docs_url('troubleshooting')} for common issues. " - f"Consider reaching out for help: {unbracketed_docs_url('getting-help')}.)" + f"{stacktrace}\n\n({debug_instructions}See {doc_url('troubleshooting')} for common issues. " + f"Consider reaching out for help: {doc_url('getting-help')}.)" ) diff --git a/src/python/pants/option/global_options.py b/src/python/pants/option/global_options.py index cec1e47989f..21798b985ba 100644 --- a/src/python/pants/option/global_options.py +++ b/src/python/pants/option/global_options.py @@ -32,7 +32,7 @@ from pants.option.scope import GLOBAL_SCOPE from pants.option.subsystem import Subsystem from pants.util.dirutil import fast_relpath_optional -from pants.util.docutil import bracketed_docs_url +from pants.util.docutil import doc_url from pants.util.logging import LogLevel from pants.util.ordered_set import OrderedSet from pants.util.osutil import CPU_COUNT @@ -573,7 +573,7 @@ def register_bootstrap_options(cls, register): help="Use this Pants version. Note that Pants only uses this to verify that you are " "using the requested version, as Pants cannot dynamically change the version it " "is using once the program is already running.\n\nIf you use the `./pants` script from " - f"{bracketed_docs_url('installation')}, however, changing the value in your " + f"{doc_url('installation')}, however, changing the value in your " "`pants.toml` will cause the new version to be installed and run automatically.\n\n" "Run `./pants --version` to check what is being used.", ) @@ -1254,7 +1254,7 @@ def register_options(cls, register): metavar="[+-]tag1,tag2,...", help=( "Include only targets with these tags (optional '+' prefix) or without these " - f"tags ('-' prefix). See {bracketed_docs_url('advanced-target-selection')}." + f"tags ('-' prefix). See {doc_url('advanced-target-selection')}." ), ) register( @@ -1314,7 +1314,7 @@ def register_options(cls, register): default=[], help=( "Python files to evaluate and whose symbols should be exposed to all BUILD files. " - f"See {bracketed_docs_url('macros')}." + f"See {doc_url('macros')}." ), ) register( diff --git a/src/python/pants/source/source_root.py b/src/python/pants/source/source_root.py index 55154d916dc..45db67eb498 100644 --- a/src/python/pants/source/source_root.py +++ b/src/python/pants/source/source_root.py @@ -17,7 +17,7 @@ from pants.engine.rules import Get, MultiGet, collect_rules, rule from pants.engine.target import Target from pants.option.subsystem import Subsystem -from pants.util.docutil import bracketed_docs_url +from pants.util.docutil import doc_url from pants.util.frozendict import FrozenDict from pants.util.logging import LogLevel from pants.util.memo import memoized_method @@ -42,9 +42,7 @@ class SourceRootError(Exception): """An error related to SourceRoot computation.""" def __init__(self, msg: str): - super().__init__( - f"{msg}See {bracketed_docs_url('source-roots')} for how to define source roots." - ) + super().__init__(f"{msg}See {doc_url('source-roots')} for how to define source roots.") class InvalidSourceRootPatternError(SourceRootError): @@ -113,7 +111,7 @@ def register_options(cls, register): "`/project1/src/python`. A `*` wildcard will match a single path segment, " "e.g., `src/*` will match `/src/python` and `/src/rust`. " "Use `/` to signify that the buildroot itself is a source root. " - f"See {bracketed_docs_url('source-roots')}", + f"See {doc_url('source-roots')}.", ) register( "--marker-filenames", diff --git a/src/python/pants/util/docutil.py b/src/python/pants/util/docutil.py index b0efa822d73..96f3eaa8502 100644 --- a/src/python/pants/util/docutil.py +++ b/src/python/pants/util/docutil.py @@ -1,7 +1,14 @@ # Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). # Licensed under the Apache License, Version 2.0 (see LICENSE). +from __future__ import annotations + +import re import shutil +from html.parser import HTMLParser +from typing import Iterable, cast + +import requests from pants.version import MAJOR_MINOR @@ -12,23 +19,92 @@ def terminal_width(*, fallback: int = 96, padding: int = 2) -> int: return shutil.get_terminal_size(fallback=(fallback, 24)).columns - padding -def bracketed_docs_url(slug: str) -> str: - """Link to the Pants docs using the current version of Pants. +def doc_url(slug: str) -> str: + return f"https://www.pantsbuild.org/v{MAJOR_MINOR}/docs/{slug}" - Returned URL is surrounded by parentheses, to prevent linkifiers from considering any - adjacent punctuation (such as a period at the end of a sentence) as part of the URL. - Note that this function used to use square brackets, hence the name, but it turns out - that those prevent any linkification at all from happening on readme.com, so we switched - to parens, but didn't update the name to prevent churn. - """ - return f"({unbracketed_docs_url(slug)})" +# Code to replace doc urls with appropriate markdown, for rendering on the docsite. +_doc_url_pattern = r"https://www.pantsbuild.org/v(\d+\.[^/]+)/docs/(?P[a-zA-Z0-9_-]+)" -def unbracketed_docs_url(slug: str) -> str: - """Link to the Pants docs using the current version of Pants. - Returned URL is _not_ surrounded by parentheses. This should only be used in error messages. Use - `bracketed_docs_url` for help messages so that linkifiers work correctly. - """ - return f"https://www.pantsbuild.org/v{MAJOR_MINOR}/docs/{slug}" +class DocUrlMatcher: + """Utilities for regex matching docsite URLs.""" + + def __init__(self): + self._doc_url_re = re.compile(_doc_url_pattern) + + def slug_for_url(self, url: str) -> str: + mo = self._doc_url_re.match(url) + if not mo: + raise ValueError(f"Not a docsite URL: {url}") + return cast(str, mo.group("slug")) + + def find_doc_urls(self, strs: Iterable[str]) -> set[str]: + """Find all the docsite urls in the given strings.""" + return {mo.group(0) for s in strs for mo in self._doc_url_re.finditer(s)} + + +class DocUrlRewriter: + def __init__(self, slug_to_title: dict[str, str]): + self._doc_url_re = re.compile(_doc_url_pattern) + self._slug_to_title = slug_to_title + + def _rewrite_url(self, mo: re.Match) -> str: + # The docsite injects the version automatically at markdown rendering time, so we + # must not also do so, or it will be doubled, and the resulting links will be broken. + slug = mo.group("slug") + title = self._slug_to_title.get(slug) + if not title: + raise ValueError(f"Found empty or no title for {mo.group(0)}") + return f"[{title}](doc:{slug})" + + def rewrite(self, s: str) -> str: + return self._doc_url_re.sub(self._rewrite_url, s) + + +class TitleFinder(HTMLParser): + """Grabs the page title out of a docsite page.""" + + def __init__(self): + super().__init__() + self._in_title: bool = False + self._title: str | None = None + + def handle_starttag(self, tag, attrs): + if tag == "title": + self._in_title = True + + def handle_endtag(self, tag): + if tag == "title": + self._in_title = False + + def handle_data(self, data): + if self._in_title: + self._title = data.strip() + + @property + def title(self) -> str | None: + return self._title + + +def get_title_from_page_content(page_content: str) -> str: + title_finder = TitleFinder() + title_finder.feed(page_content) + return title_finder.title or "" + + +def get_title(url: str) -> str: + return get_title_from_page_content(requests.get(url).text) + + +def get_titles(urls: set[str]) -> dict[str, str]: + """Return map from slug->title for each given docsite URL.""" + + matcher = DocUrlMatcher() + # TODO: Parallelize the http requests. + # E.g., by turning generate_docs.py into a plugin goal and using the engine. + ret = {} + for url in urls: + ret[matcher.slug_for_url(url)] = get_title(url) + return ret diff --git a/src/python/pants/util/docutil_test.py b/src/python/pants/util/docutil_test.py new file mode 100644 index 00000000000..c6b2e819187 --- /dev/null +++ b/src/python/pants/util/docutil_test.py @@ -0,0 +1,56 @@ +# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +import textwrap + +import pytest + +from pants.util.docutil import DocUrlMatcher, DocUrlRewriter, doc_url, get_title_from_page_content + + +@pytest.mark.parametrize("slug", ["foo-bar", "baz3", "qux"]) +def test_slug_for_url(slug: str) -> None: + assert DocUrlMatcher().slug_for_url(doc_url(slug)) == slug + + +def test_slug_for_url_error() -> None: + with pytest.raises(ValueError) as excinfo: + DocUrlMatcher().slug_for_url("https://notthedocsite.com/v2.6/foobar") + assert "Not a docsite URL" in str(excinfo.value) + + +def test_find_doc_urls() -> None: + matcher = DocUrlMatcher() + strs = [ + f"See {doc_url('foo-bar')} for details.", + f"See {doc_url('qux')}.", # Don't capture trailing dot. + f"See {doc_url('foo-bar')} and {doc_url('baz3')}", # Multiple urls in string. + ] + assert matcher.find_doc_urls(strs) == {doc_url(slug) for slug in ["foo-bar", "baz3", "qux"]} + + +def test_get_title_from_page_content(): + page_content = textwrap.dedent( + """ + + + + + Welcome to Pants! + + + Welcome to Pants, the ergonomic build system! + """ + ) + assert get_title_from_page_content(page_content) == "Welcome to Pants!" + + +def test_doc_url_rewriter(): + dur = DocUrlRewriter( + { + "foo": "Foo", + "bar": "Welcome to Bar!", + } + ) + assert dur.rewrite(f"See {doc_url('foo')} for details.") == "See [Foo](doc:foo) for details." + assert dur.rewrite(f"Check out {doc_url('bar')}.") == "Check out [Welcome to Bar!](doc:bar)." diff --git a/src/python/pants/vcs/changed.py b/src/python/pants/vcs/changed.py index df6bd1309b9..99af4058b83 100644 --- a/src/python/pants/vcs/changed.py +++ b/src/python/pants/vcs/changed.py @@ -16,7 +16,7 @@ from pants.engine.rules import Get, collect_rules, rule from pants.option.option_value_container import OptionValueContainer from pants.option.subsystem import Subsystem -from pants.util.docutil import bracketed_docs_url +from pants.util.docutil import doc_url from pants.vcs.git import Git @@ -89,8 +89,8 @@ def changed_files(self, git: Git) -> List[str]: class Changed(Subsystem): options_scope = "changed" help = ( - "Tell Pants to detect what files and targets have changed from Git.\n\nSee " - f"{bracketed_docs_url('advanced-target-selection')}." + "Tell Pants to detect what files and targets have changed from Git.\n\n" + f"See {doc_url('advanced-target-selection')}." ) @classmethod diff --git a/src/rust/engine/src/externs/mod.rs b/src/rust/engine/src/externs/mod.rs index 5d07be1fe91..f2be13cebf6 100644 --- a/src/rust/engine/src/externs/mod.rs +++ b/src/rust/engine/src/externs/mod.rs @@ -256,12 +256,12 @@ pub fn val_to_log_level(obj: &PyObject) -> Result { } /// Link to the Pants docs using the current version of Pants. -pub fn bracketed_docs_url(slug: &str) -> String { +pub fn doc_url(slug: &str) -> String { let gil = Python::acquire_gil(); let py = gil.python(); let docutil = py.import("pants.util.docutil").unwrap(); docutil - .call(py, "bracketed_docs_url", (slug,), None) + .call(py, "doc_url", (slug,), None) .unwrap() .extract(py) .unwrap() diff --git a/src/rust/engine/src/nodes.rs b/src/rust/engine/src/nodes.rs index 4b008211b8d..5f21bbf44af 100644 --- a/src/rust/engine/src/nodes.rs +++ b/src/rust/engine/src/nodes.rs @@ -499,7 +499,7 @@ fn unmatched_globs_additional_context() -> Option { "\n\nDo the file(s) exist? If so, check if the file(s) are in your `.gitignore` or the global \ `pants_ignore` option, which may result in Pants not being able to see the file(s) even though \ they exist on disk. Refer to {}.", - externs::bracketed_docs_url("troubleshooting#pants-cannot-find-a-file-in-your-project") + externs::doc_url("troubleshooting#pants-cannot-find-a-file-in-your-project") )) }