Skip to content

Commit

Permalink
Fix naive handling of relative links when proofing
Browse files Browse the repository at this point in the history
This commit fixes naive handling of relative links when proofing. The
problem here was the assumption that a relative link of foo.html that
was coming from a file at bar/fubar.md in the filesystem would always
appear at bar/foo.html.

That is not always true. If directory_urls was on, then it wasn't true.
Thus, #46.

It was also not true when using relative links between blog posts when
using the mkdocs material blog plugin.

This commit changes the relative url lookup logic to be more robust and
in the process make the proofer work with both the mkdocs material blog
plugin and with directory_urls on.

It does this by storing additional information in the lookup files that
we use to find information about pages. We can now look up a file by its
src_uri entry, for example blog/posts/index.md and then use that to get
the dest_uri like blog/12/10/some-post/ and then use that dest_uri value
to robustly handle figuring out where a relative link is pointing.
  • Loading branch information
SeanTAllen committed Feb 26, 2024
1 parent 65ae40f commit 6dfa86f
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 17 deletions.
12 changes: 9 additions & 3 deletions htmlproofer/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def on_post_page(self, output_content: str, page: Page, config: Config) -> None:
# in incorrect values appearing as the key.
opt_files = {}
opt_files.update({os.path.normpath(file.url): file for file in self.files})
opt_files.update({os.path.normpath(file.src_uri): file for file in self.files})

# Optimization: only parse links and headings
# li, sup are used for footnotes
Expand Down Expand Up @@ -225,9 +226,14 @@ def find_source_file(url: str, src_path: str, files: Dict[str, File]) -> Optiona
# Convert root/site paths
search_path = os.path.normpath(url[1:])
else:
# Handle relative links by concatenating the source dir with the destination path
src_dir = urllib.parse.quote(str(pathlib.Path(src_path).parent), safe='/\\')
search_path = os.path.normpath(str(pathlib.Path(src_dir) / pathlib.Path(url)))
# Handle relative links by looking up the destination url for the
# src_path and getting the parent directory.
try:
dest_uri = files[src_path].dest_uri
src_dir = urllib.parse.quote(str(pathlib.Path(dest_uri).parent), safe='/\\')
search_path = os.path.normpath(str(pathlib.Path(src_dir) / pathlib.Path(url)))
except KeyError:
return None

try:
return files[search_path]
Expand Down
55 changes: 41 additions & 14 deletions tests/unit/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,12 +241,17 @@ def test_get_url_status__local_page(plugin):
index_page = Mock(spec=Page, markdown='# Heading\nContent')
page1_page = Mock(spec=Page, markdown='# Page One\n## Sub Heading\nContent')
special_char_page = Mock(spec=Page, markdown='# Heading éèà\n## Sub Heading éèà\nContent')
files = {os.path.normpath(file.url): file for file in Files([
Mock(spec=File, src_path='index.md', dest_path='index.html', url='index.html', page=index_page),
Mock(spec=File, src_path='page1.md', dest_path='page1.html', url='page1.html', page=page1_page),
mock_files = Files([
Mock(spec=File, src_path='index.md', dest_path='index.html', dest_uri='index.html', url='index.html', src_uri='index.md', page=index_page),
Mock(spec=File, src_path='page1.md', dest_path='page1.html', dest_uri='page1.html', url='page1.html', src_uri='page1.md', page=page1_page),
Mock(spec=File, src_path='Dir éèà/éèà.md', dest_path='Dir éèà/éèà.html',
url='Dir%20%C3%A9%C3%A8%C3%A0/%C3%A9%C3%A8%C3%A0.html', page=special_char_page),
])}
dest_uri='Dir éèà/éèà.html', url='Dir%20%C3%A9%C3%A8%C3%A0/%C3%A9%C3%A8%C3%A0.html', src_uri='Dir éèà/éèà.md', page=special_char_page),
Mock(spec=File, src_path='Dir éèà/page1.md', dest_path='Dir éèà/page1.html',
dest_uri='Dir éèà/page1.html', url='Dir%20%C3%A9%C3%A8%C3%A0/page1.html', src_uri='Dir%20%C3%A9%C3%A8%C3%A0/page1.md', page=special_char_page),
])
files = {}
files.update({os.path.normpath(file.url): file for file in mock_files})
files.update({os.path.normpath(file.src_uri): file for file in mock_files})

assert plugin.get_url_status('index.html', 'page1.md', set(), files, False) == 0
assert plugin.get_url_status('index.html#heading', 'page1.md', set(), files, False) == 0
Expand All @@ -261,15 +266,18 @@ def test_get_url_status__local_page(plugin):

assert plugin.get_url_status('Dir%20%C3%A9%C3%A8%C3%A0/%C3%A9%C3%A8%C3%A0.html#sub-heading-eea',
'page1.md', set(), files, False) == 0
assert plugin.get_url_status('%C3%A9%C3%A8%C3%A0.html#sub-heading-eea', 'Dir éèà/page3.md', set(), files, False) == 0
assert plugin.get_url_status('%C3%A9%C3%A8%C3%A0.html#sub-heading-eea', 'Dir%20%C3%A9%C3%A8%C3%A0/page1.md', set(), files, False) == 0


def test_get_url_status__non_markdown_page(plugin):
index_page = Mock(spec=Page, markdown='# Heading\nContent')
files = {os.path.normpath(file.url): file for file in Files([
Mock(spec=File, src_path='index.md', dest_path='index.html', url='index.html', page=index_page),
Mock(spec=File, src_path='drawing.svg', dest_path='drawing.svg', url='drawing.svg', page=None),
])}
mock_files = Files([
Mock(spec=File, src_path='index.md', dest_path='index.html', dest_uri='index.html', url='index.html', src_uri='index.md', page=index_page),
Mock(spec=File, src_path='drawing.svg', dest_path='drawing.svg', dest_uri='index.html', url='drawing.svg', src_uri='drawing.svg', page=None),
])
files = {}
files.update({os.path.normpath(file.url): file for file in mock_files})
files.update({os.path.normpath(file.src_uri): file for file in mock_files})

assert plugin.get_url_status('drawing.svg', 'index.md', set(), files, False) == 0
assert plugin.get_url_status('/drawing.svg', 'index.md', set(), files, False) == 0
Expand All @@ -282,37 +290,56 @@ def test_get_url_status__local_page_nested(plugin):
nested1_sibling_page = Mock(spec=Page, markdown='# Nested Sibling')
nested2_page = Mock(spec=Page, markdown='# Nested\n## Nested Two\nContent')
nested2_sibling_page = Mock(spec=Page, markdown='# Nested Sibling')
files = {os.path.normpath(file.url): file for file in Files([
Mock(spec=File, src_path='index.md', dest_path='index.html', url='index.html', page=index_page),
mock_files = Files([
Mock(
spec=File,
src_path='index.md',
dest_path='index.html',
dest_uri='index.html',
url='index.html',
src_uri='index.md',
page=index_page),
Mock(
spec=File,
src_path='foo/bar/nested.md',
dest_path='foo/bar/nested.html',
dest_uri='foo/bar/nested.html',
url='foo/bar/nested.html',
src_uri='foo/bar/nested.md',
page=nested1_page
),
Mock(
spec=File,
src_path='foo/bar/sibling.md',
dest_path='foo/bar/sibling.html',
dest_uri='foo/bar/sibling.html',
url='foo/bar/sibling.html',
src_uri='foo/bar/sibling.md',
page=nested1_sibling_page
),
Mock(
spec=File,
src_path='foo/baz/nested.md',
dest_path='foo/baz/nested.html',
dest_uri='foo/baz/nested.html',
url='foo/baz/nested.html',
src_uri='foo/baz/nested.md',
page=nested2_page
),
Mock(
spec=File,
src_path='foo/baz/sibling.md',
dest_path='foo/baz/sibling.html',
dest_uri='foo/baz/sibling.html',
url='foo/baz/sibling.html',
src_uri='foo/baz/sibling.md',
page=nested2_sibling_page
),
])}
])

files = {}
files.update({os.path.normpath(file.url): file for file in mock_files})
files.update({os.path.normpath(file.src_uri): file for file in mock_files})

assert plugin.get_url_status('nested.html#nested-one', 'foo/bar/sibling.md', set(), files, False) == 0
assert plugin.get_url_status('nested.html#nested-two', 'foo/bar/sibling.md', set(), files, False) == 404
Expand Down Expand Up @@ -349,7 +376,7 @@ def test_get_url_status__excluded_existing_relative_url__no_warning(log_warning_
existing_page = Mock(spec=Page, markdown='')
files = {
os.path.normpath(file.url): file for file in Files([
Mock(spec=File, src_path=src_path, dest_path=url, url=url, page=existing_page)
Mock(spec=File, src_path=src_path, dest_path=url, dest_uri=url, url=url, src_uri=src_path, page=existing_page)
])
}
plugin.config['raise_error_excludes'][url_status] = [url]
Expand Down

0 comments on commit 6dfa86f

Please sign in to comment.