Skip to content

Commit

Permalink
feat: Add option to resolve autorefs to closest URLs when multiple on…
Browse files Browse the repository at this point in the history
…es are found

Issue-52: #52
  • Loading branch information
pawamoy committed Sep 1, 2024
1 parent a927bab commit 2916eb2
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 3 deletions.
44 changes: 42 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,38 @@ We can [link to that heading][hello-world] from another page too.
This works the same as [a normal link to that heading](../doc1.md#hello-world).
```

Linking to a heading without needing to know the destination page can be useful if specifying that path is cumbersome, e.g. when the pages have deeply nested paths, are far apart, or are moved around frequently. And the issue is somewhat exacerbated by the fact that [MkDocs supports only *relative* links between pages](https://github.com/mkdocs/mkdocs/issues/1592).
Linking to a heading without needing to know the destination page can be useful if specifying that path is cumbersome, e.g. when the pages have deeply nested paths, are far apart, or are moved around frequently.

Note that this plugin's behavior is undefined when trying to link to a heading title that appears several times throughout the site. Currently it arbitrarily chooses one of the pages. In such cases, use [Markdown anchors](#markdown-anchors) to add unique aliases to your headings.
### Non-unique headings

When linking to a heading that appears several times throughout the site, this plugin will log a warning message stating that multiple URLs were found and that headings should be made unique, and will resolve the link using the first found URL.

To prevent getting warnings, use [Markdown anchors](#markdown-anchors) to add unique aliases to your headings, and use these aliases when referencing the headings.

If you cannot use Markdown anchors, for example because you inject the same generated contents in multiple locations (for example mkdocstrings' API documentation), then you can try to alleviate the warnings by enabling the `resolve_closest` option:

```yaml
plugins:
- autorefs:
resolve_closest: true
```

When `resolve_closest` is enabled, and multiple URLs are found for the same identifier, the plugin will try to resolve to the one that is "closest" to the current page (the page containing the link). By closest, we mean:

- URLs that are relative to the current page's URL, climbing up parents
- if multiple URLs are relative to it, use the one at the shortest distance if possible.

If multiple relative URLs are at the same distance, the first of these URLs will be used. If no URL is relative to the current page's URL, the first URL of all found URLs will be used.

Examples:

Current page | Candidate URLs | Relative URLs | Winner
------------ | -------------- | ------------- | ------
` ` | `x/#b`, `#b` | `#b` | `#b` (only one relative)
`a/` | `b/c/#d`, `c/#d` | none | `b/c/#d` (no relative, use first one, even if longer distance)
`a/b/` | `x/#e`, `a/c/#e`, `a/d/#e` | `a/c/#e`, `a/d/#e` (relative to parent `a/`) | `a/c/#e` (same distance, use first one)
`a/b/` | `x/#e`, `a/c/d/#e`, `a/c/#e` | `a/c/d/#e`, `a/c/#e` (relative to parent `a/`) | `a/c/#e` (shortest distance)
`a/b/c/` | `x/#e`, `a/#e`, `a/b/#e`, `a/b/c/d/#e`, `a/b/c/#e` | `a/b/c/d/#e`, `a/b/c/#e` | `a/b/c/#e` (shortest distance)

### Markdown anchors

Expand Down Expand Up @@ -143,3 +172,14 @@ You don't want to change headings and make them redundant, like `## Arch: Instal
```

...changing `arch` by `debian`, `gentoo`, etc. in the other pages.

---

You can also change the actual identifier of a heading, thanks again to the `attr_list` Markdown extension:

```md
## Install from sources { #arch-install-src }
...
```

...though note that this will impact the URL anchor too (and therefore the permalink to the heading).
76 changes: 75 additions & 1 deletion src/mkdocs_autorefs/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@
import contextlib
import functools
import logging
import sys
from typing import TYPE_CHECKING, Any, Callable, Sequence
from urllib.parse import urlsplit

from mkdocs.config.base import Config
from mkdocs.config.config_options import Type
from mkdocs.plugins import BasePlugin
from mkdocs.structure.pages import Page

Expand All @@ -37,6 +40,41 @@
log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment]


# YORE: EOL 3.8: Remove block.
if sys.version_info < (3, 9):
from pathlib import PurePosixPath

class URL(PurePosixPath): # noqa: D101
def is_relative_to(self, *args: Any) -> bool: # noqa: D102
try:
self.relative_to(*args)
except ValueError:
return False
return True
else:
from pathlib import PurePosixPath as URL # noqa: N814


class AutorefsConfig(Config):
"""Configuration options for the `autorefs` plugin."""

resolve_closest = Type(bool, default=False)
"""Whether to resolve an autoref to the closest URL when multiple URLs are found for an identifier.
By closest, we mean a combination of "relative to the current page" and "shortest distance from the current page".
For example, if you link to identifier `hello` from page `foo/bar/`,
and the identifier is found in `foo/`, `foo/baz/` and `foo/bar/baz/qux/` pages,
autorefs will resolve to `foo/bar/baz/qux`, which is the only URL relative to `foo/bar/`.
If multiple URLs are equally close, autorefs will resolve to the first of these equally close URLs.
If autorefs cannot find any URL that is close to the current page, it will log a warning and resolve to the first URL found.
When false and multiple URLs are found for an identifier, autorefs will log a warning and resolve to the first URL.
"""


class AutorefsPlugin(BasePlugin[AutorefsConfig]):
"""The `autorefs` plugin for `mkdocs`.
This plugin defines the following event hooks:
Expand Down Expand Up @@ -83,10 +121,44 @@ def register_url(self, identifier: str, url: str) -> None:
"""
self._abs_url_map[identifier] = url

@staticmethod
def _get_closest_url(from_url: str, urls: list[str]) -> str:
"""Return the closest URL to the current page.
Arguments:
from_url: The URL of the base page, from which we link towards the targeted pages.
urls: A list of URLs to choose from.
Returns:
The closest URL to the current page.
"""
base_url = URL(from_url)

while True:
if candidates := [url for url in urls if URL(url).is_relative_to(base_url)]:
break
base_url = base_url.parent
if not base_url.name:
break

if not candidates:
log.warning(
"Could not find closest URL (from %s, candidates: %s). "
"Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).",
from_url,
urls,
)
return urls[0]

winner = candidates[0] if len(candidates) == 1 else min(candidates, key=lambda c: c.count("/"))
log.debug("Closest URL found: %s (from %s, candidates: %s)", winner, from_url, urls)
return winner

def _get_item_url(
self,
identifier: str,
fallback: Callable[[str], Sequence[str]] | None = None,
from_url: str | None = None,
) -> str:
try:
urls = self._url_map[identifier]
Expand All @@ -103,6 +175,8 @@ def _get_item_url(
raise

if len(urls) > 1:
if self.config.resolve_closest and from_url is not None:
return self._get_closest_url(from_url, urls)
log.warning(
"Multiple URLs found for '%s': %s. "
"Make sure to use unique headings, identifiers, or Markdown anchors (see our docs).",
Expand All @@ -127,7 +201,7 @@ def get_item_url(
Returns:
A site-relative URL.
"""
url = self._get_item_url(identifier, fallback)
url = self._get_item_url(identifier, fallback, from_url)
if from_url is not None:
parsed = urlsplit(url)
if not parsed.scheme and not parsed.netloc:
Expand Down
24 changes: 24 additions & 0 deletions tests/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,27 @@ def test_dont_make_relative_urls_relative_again() -> None:
plugin.get_item_url("hello", from_url="baz/bar/foo.html", fallback=lambda _: ("foo.bar.baz",))
== "../../foo/bar/baz.html#foo.bar.baz"
)


@pytest.mark.parametrize(
("base", "urls", "expected"),
[
# One URL is closest.
("", ["x/#b", "#b"], "#b"),
# Several URLs are equally close.
("a/b", ["x/#e", "a/c/#e", "a/d/#e"], "a/c/#e"),
("a/b/", ["x/#e", "a/d/#e", "a/c/#e"], "a/d/#e"),
# Two close URLs, one is shorter (closer).
("a/b", ["x/#e", "a/c/#e", "a/c/d/#e"], "a/c/#e"),
("a/b/", ["x/#e", "a/c/d/#e", "a/c/#e"], "a/c/#e"),
# Deeper-nested URLs.
("a/b/c", ["x/#e", "a/#e", "a/b/#e", "a/b/c/#e", "a/b/c/d/#e"], "a/b/c/#e"),
("a/b/c/", ["x/#e", "a/#e", "a/b/#e", "a/b/c/d/#e", "a/b/c/#e"], "a/b/c/#e"),
# No closest URL, use first one even if longer distance.
("a", ["b/c/#d", "c/#d"], "b/c/#d"),
("a/", ["c/#d", "b/c/#d"], "c/#d"),
],
)
def test_find_closest_url(base: str, urls: list[str], expected: str) -> None:
"""Find closest URLs given a list of URLs."""
assert AutorefsPlugin._get_closest_url(base, urls) == expected

0 comments on commit 2916eb2

Please sign in to comment.