diff --git a/README.md b/README.md
index 0eb0eb4..3ee32b6 100644
--- a/README.md
+++ b/README.md
@@ -28,12 +28,6 @@ plugins:
- htmlproofer
```
-To enable cross-page anchor validation, you must set `use_directory_urls = False` in `mkdocs.yml`:
-
-```yaml
-use_directory_urls: False
-```
-
## Configuring
### `enabled`
@@ -147,11 +141,11 @@ plugins:
## Compatibility with `attr_list` extension
-If you need to manually specify anchors make use of the `attr_list` [extension](https://python-markdown.github.io/extensions/attr_list) in the markdown.
+If you need to manually specify anchors make use of the `attr_list` [extension](https://python-markdown.github.io/extensions/attr_list) in the markdown.
This can be useful for multilingual documentation to keep anchors as language neutral permalinks in all languages.
* A sample for a heading `# Grüße {#greetings}` (the slugified generated anchor `Gre` is overwritten with `greetings`).
-* This also works for images `this is a nice image [](foo-bar.png){#nice-image}`
+* This also works for images `this is a nice image [](foo-bar.png){#nice-image}`
* And generall for paragraphs:
```markdown
Listing: This is noteworthy.
@@ -164,4 +158,4 @@ More information about plugins in the [MkDocs documentation](http://www.mkdocs.o
## Acknowledgement
-This work is based on the [mkdocs-markdownextradata-plugin](https://github.com/rosscdh/mkdocs-markdownextradata-plugin) project and the [Finding and Fixing Website Link Rot with Python, BeautifulSoup and Requests](https://www.twilio.com/en-us/blog/find-fix-website-link-rot-python-beautifulsoup-requests-html) article.
+This work is based on the [mkdocs-markdownextradata-plugin](https://github.com/rosscdh/mkdocs-markdownextradata-plugin) project and the [Finding and Fixing Website Link Rot with Python, BeautifulSoup and Requests](https://www.twilio.com/en-us/blog/find-fix-website-link-rot-python-beautifulsoup-requests-html) article.
diff --git a/htmlproofer/plugin.py b/htmlproofer/plugin.py
index 77fbbd2..59d62fa 100644
--- a/htmlproofer/plugin.py
+++ b/htmlproofer/plugin.py
@@ -98,14 +98,13 @@ def on_post_page(self, output_content: str, page: Page, config: Config) -> None:
if not self.config['enabled']:
return
- use_directory_urls = config.data["use_directory_urls"]
-
# Optimization: At this point, we have all the files, so we can create
# a dictionary for faster lookups. Prior to this point, files are
# still being updated so creating a dictionary before now would result
# in incorrect values appearing as the key.
opt_files = {}
opt_files.update({os.path.normpath(file.url): file for file in self.files})
+ opt_files.update({os.path.normpath(file.src_uri): file for file in self.files})
# Optimization: only parse links and headings
# li, sup are used for footnotes
@@ -123,7 +122,7 @@ def on_post_page(self, output_content: str, page: Page, config: Config) -> None:
if self.config['warn_on_ignored_urls']:
log_warning(f"ignoring URL {url} from {page.file.src_path}")
else:
- url_status = self.get_url_status(url, page.file.src_path, all_element_ids, opt_files, use_directory_urls)
+ url_status = self.get_url_status(url, page.file.src_path, all_element_ids, opt_files)
if self.bad_url(url_status) and self.is_error(self.config, url, url_status):
self.report_invalid_url(url, url_status, page.file.src_path)
@@ -161,8 +160,7 @@ def get_url_status(
url: str,
src_path: str,
all_element_ids: Set[str],
- files: Dict[str, File],
- use_directory_urls: bool
+ files: Dict[str, File]
) -> int:
if any(pat.match(url) for pat in LOCAL_PATTERNS):
return 0
@@ -174,18 +172,13 @@ def get_url_status(
return 0
if fragment and not path:
return 0 if url[1:] in all_element_ids else 404
- elif not use_directory_urls:
- # use_directory_urls = True injects too many challenges for locating the correct target
- # Markdown file, so disable target anchor validation in this case. Examples include:
- # ../..#BAD_ANCHOR style links to index.html and extra ../ inserted into relative
- # links.
+ else:
is_valid = self.is_url_target_valid(url, src_path, files)
url_status = 404
if not is_valid and self.is_error(self.config, url, url_status):
log_warning(f"Unable to locate source file for: {url}")
return url_status
return 0
- return 0
@staticmethod
def is_url_target_valid(url: str, src_path: str, files: Dict[str, File]) -> bool:
@@ -225,9 +218,14 @@ def find_source_file(url: str, src_path: str, files: Dict[str, File]) -> Optiona
# Convert root/site paths
search_path = os.path.normpath(url[1:])
else:
- # Handle relative links by concatenating the source dir with the destination path
- src_dir = urllib.parse.quote(str(pathlib.Path(src_path).parent), safe='/\\')
- search_path = os.path.normpath(str(pathlib.Path(src_dir) / pathlib.Path(url)))
+ # Handle relative links by looking up the destination url for the
+ # src_path and getting the parent directory.
+ try:
+ dest_uri = files[src_path].dest_uri
+ src_dir = urllib.parse.quote(str(pathlib.Path(dest_uri).parent), safe='/\\')
+ search_path = os.path.normpath(str(pathlib.Path(src_dir) / pathlib.Path(url)))
+ except KeyError:
+ return None
try:
return files[search_path]
diff --git a/tests/integration/docs/nested/page1.md b/tests/integration/docs/nested/page1.md
index 338df37..b29e14e 100644
--- a/tests/integration/docs/nested/page1.md
+++ b/tests/integration/docs/nested/page1.md
@@ -8,7 +8,7 @@ you can either link to a local file without or with an anchor.
* [Main Page](../index.md)
* [Sub-Page](./page2.md)
*
@@ -27,6 +27,6 @@ But allows valid anchors such as
## Image Link absolute/relative
-![test](../assets/hello-world.drawio.svg)
+![test](../assets/hello-world.drawio.svg)
![test](/assets/hello-world.drawio.svg)
diff --git a/tests/unit/test_plugin.py b/tests/unit/test_plugin.py
index a17f2a6..2645779 100644
--- a/tests/unit/test_plugin.py
+++ b/tests/unit/test_plugin.py
@@ -115,7 +115,7 @@ def test_on_post_page__plugin_disabled():
),
)
def test_get_url_status__ignore_local_servers(plugin, empty_files, url):
- assert plugin.get_url_status(url, 'src/path.md', set(), empty_files, False) == 0
+ assert plugin.get_url_status(url, 'src/path.md', set(), empty_files) == 0
@pytest.mark.parametrize(
@@ -126,7 +126,7 @@ def test_get_url_status(validate_external: bool):
plugin.load_config({'validate_external_urls': validate_external})
def get_url():
- return plugin.get_url_status('https://google.com', 'src/path.md', set(), empty_files, False)
+ return plugin.get_url_status('https://google.com', 'src/path.md', set(), empty_files)
if validate_external:
with pytest.raises(Exception):
@@ -172,9 +172,9 @@ def test_contains_anchor(plugin, markdown, anchor, expected):
def test_get_url_status__same_page_anchor(plugin, empty_files):
- assert plugin.get_url_status('#ref', 'src/path.md', {'ref'}, empty_files, False) == 0
- assert plugin.get_url_status('##ref', 'src/path.md', {'ref'}, empty_files, False) == 404
- assert plugin.get_url_status('#ref', 'src/path.md', set(), empty_files, False) == 404
+ assert plugin.get_url_status('#ref', 'src/path.md', {'ref'}, empty_files) == 0
+ assert plugin.get_url_status('##ref', 'src/path.md', {'ref'}, empty_files) == 404
+ assert plugin.get_url_status('#ref', 'src/path.md', set(), empty_files) == 404
@pytest.mark.parametrize(
@@ -195,7 +195,7 @@ def test_get_url_status__external(plugin, empty_files, url):
with patch.object(HtmlProoferPlugin, "get_external_url") as mock_get_ext_url:
mock_get_ext_url.return_value = expected_status
- status = plugin.get_url_status(url, src_path, set(), empty_files, False)
+ status = plugin.get_url_status(url, src_path, set(), empty_files)
mock_get_ext_url.assert_called_once_with(url, scheme, src_path)
assert status == expected_status
@@ -241,39 +241,64 @@ def test_get_url_status__local_page(plugin):
index_page = Mock(spec=Page, markdown='# Heading\nContent')
page1_page = Mock(spec=Page, markdown='# Page One\n## Sub Heading\nContent')
special_char_page = Mock(spec=Page, markdown='# Heading éèà\n## Sub Heading éèà\nContent')
- files = {os.path.normpath(file.url): file for file in Files([
- Mock(spec=File, src_path='index.md', dest_path='index.html', url='index.html', page=index_page),
- Mock(spec=File, src_path='page1.md', dest_path='page1.html', url='page1.html', page=page1_page),
+ mock_files = Files([
+ Mock(spec=File, src_path='index.md', dest_path='index.html',
+ dest_uri='index.html', url='index.html', src_uri='index.md',
+ page=index_page),
+ Mock(spec=File, src_path='page1.md', dest_path='page1.html',
+ dest_uri='page1.html', url='page1.html', src_uri='page1.md',
+ page=page1_page),
Mock(spec=File, src_path='Dir éèà/éèà.md', dest_path='Dir éèà/éèà.html',
- url='Dir%20%C3%A9%C3%A8%C3%A0/%C3%A9%C3%A8%C3%A0.html', page=special_char_page),
- ])}
+ dest_uri='Dir éèà/éèà.html',
+ url='Dir%20%C3%A9%C3%A8%C3%A0/%C3%A9%C3%A8%C3%A0.html',
+ src_uri='Dir éèà/éèà.md', page=special_char_page),
+ Mock(spec=File, src_path='Dir éèà/page1.md', dest_path='Dir éèà/page1.html',
+ dest_uri='Dir éèà/page1.html',
+ url='Dir%20%C3%A9%C3%A8%C3%A0/page1.html',
+ src_uri='Dir%20%C3%A9%C3%A8%C3%A0/page1.md',
+ page=special_char_page),
+ ])
+ files = {}
+ files.update({os.path.normpath(file.url): file for file in mock_files})
+ files.update({file.src_uri: file for file in mock_files})
- assert plugin.get_url_status('index.html', 'page1.md', set(), files, False) == 0
- assert plugin.get_url_status('index.html#heading', 'page1.md', set(), files, False) == 0
- assert plugin.get_url_status('index.html#bad-heading', 'page1.md', set(), files, False) == 404
+ assert plugin.get_url_status('index.html', 'page1.md', set(), files) == 0
+ assert plugin.get_url_status('index.html#heading', 'page1.md', set(), files) == 0
+ assert plugin.get_url_status('index.html#bad-heading', 'page1.md', set(), files) == 404
- assert plugin.get_url_status('page1.html', 'page1.md', set(), files, False) == 0
- assert plugin.get_url_status('page1.html#sub-heading', 'page1.md', set(), files, False) == 0
- assert plugin.get_url_status('page1.html#heading', 'page1.md', set(), files, False) == 404
+ assert plugin.get_url_status('page1.html', 'page1.md', set(), files) == 0
+ assert plugin.get_url_status('page1.html#sub-heading', 'page1.md', set(), files) == 0
+ assert plugin.get_url_status('page1.html#heading', 'page1.md', set(), files) == 404
- assert plugin.get_url_status('page2.html', 'page1.md', set(), files, False) == 404
- assert plugin.get_url_status('page2.html#heading', 'page1.md', set(), files, False) == 404
+ assert plugin.get_url_status('page2.html', 'page1.md', set(), files) == 404
+ assert plugin.get_url_status('page2.html#heading', 'page1.md', set(), files) == 404
- assert plugin.get_url_status('Dir%20%C3%A9%C3%A8%C3%A0/%C3%A9%C3%A8%C3%A0.html#sub-heading-eea',
- 'page1.md', set(), files, False) == 0
- assert plugin.get_url_status('%C3%A9%C3%A8%C3%A0.html#sub-heading-eea', 'Dir éèà/page3.md', set(), files, False) == 0
+ assert plugin.get_url_status(
+ 'Dir%20%C3%A9%C3%A8%C3%A0/%C3%A9%C3%A8%C3%A0.html#sub-heading-eea',
+ 'page1.md', set(), files) == 0
+ assert plugin.get_url_status(
+ '%C3%A9%C3%A8%C3%A0.html#sub-heading-eea',
+ 'Dir%20%C3%A9%C3%A8%C3%A0/page1.md',
+ set(), files) == 0
def test_get_url_status__non_markdown_page(plugin):
index_page = Mock(spec=Page, markdown='# Heading\nContent')
- files = {os.path.normpath(file.url): file for file in Files([
- Mock(spec=File, src_path='index.md', dest_path='index.html', url='index.html', page=index_page),
- Mock(spec=File, src_path='drawing.svg', dest_path='drawing.svg', url='drawing.svg', page=None),
- ])}
+ mock_files = Files([
+ Mock(spec=File, src_path='index.md', dest_path='index.html',
+ dest_uri='index.html', url='index.html', src_uri='index.md',
+ page=index_page),
+ Mock(spec=File, src_path='drawing.svg', dest_path='drawing.svg',
+ dest_uri='index.html', url='drawing.svg', src_uri='drawing.svg',
+ page=None),
+ ])
+ files = {}
+ files.update({os.path.normpath(file.url): file for file in mock_files})
+ files.update({file.src_uri: file for file in mock_files})
- assert plugin.get_url_status('drawing.svg', 'index.md', set(), files, False) == 0
- assert plugin.get_url_status('/drawing.svg', 'index.md', set(), files, False) == 0
- assert plugin.get_url_status('not-existing.svg', 'index.md', set(), files, False) == 404
+ assert plugin.get_url_status('drawing.svg', 'index.md', set(), files) == 0
+ assert plugin.get_url_status('/drawing.svg', 'index.md', set(), files) == 0
+ assert plugin.get_url_status('not-existing.svg', 'index.md', set(), files) == 404
def test_get_url_status__local_page_nested(plugin):
@@ -282,48 +307,67 @@ def test_get_url_status__local_page_nested(plugin):
nested1_sibling_page = Mock(spec=Page, markdown='# Nested Sibling')
nested2_page = Mock(spec=Page, markdown='# Nested\n## Nested Two\nContent')
nested2_sibling_page = Mock(spec=Page, markdown='# Nested Sibling')
- files = {os.path.normpath(file.url): file for file in Files([
- Mock(spec=File, src_path='index.md', dest_path='index.html', url='index.html', page=index_page),
+ mock_files = Files([
+ Mock(
+ spec=File,
+ src_path='index.md',
+ dest_path='index.html',
+ dest_uri='index.html',
+ url='index.html',
+ src_uri='index.md',
+ page=index_page),
Mock(
spec=File,
src_path='foo/bar/nested.md',
dest_path='foo/bar/nested.html',
+ dest_uri='foo/bar/nested.html',
url='foo/bar/nested.html',
+ src_uri='foo/bar/nested.md',
page=nested1_page
),
Mock(
spec=File,
src_path='foo/bar/sibling.md',
dest_path='foo/bar/sibling.html',
+ dest_uri='foo/bar/sibling.html',
url='foo/bar/sibling.html',
+ src_uri='foo/bar/sibling.md',
page=nested1_sibling_page
),
Mock(
spec=File,
src_path='foo/baz/nested.md',
dest_path='foo/baz/nested.html',
+ dest_uri='foo/baz/nested.html',
url='foo/baz/nested.html',
+ src_uri='foo/baz/nested.md',
page=nested2_page
),
Mock(
spec=File,
src_path='foo/baz/sibling.md',
dest_path='foo/baz/sibling.html',
+ dest_uri='foo/baz/sibling.html',
url='foo/baz/sibling.html',
+ src_uri='foo/baz/sibling.md',
page=nested2_sibling_page
),
- ])}
+ ])
+
+ files = {}
+ files.update({os.path.normpath(file.url): file for file in mock_files})
+ files.update({file.src_uri: file for file in mock_files})
- assert plugin.get_url_status('nested.html#nested-one', 'foo/bar/sibling.md', set(), files, False) == 0
- assert plugin.get_url_status('nested.html#nested-two', 'foo/bar/sibling.md', set(), files, False) == 404
+ assert plugin.get_url_status('nested.html#nested-one', 'foo/bar/sibling.md', set(), files) == 0
+ assert plugin.get_url_status('nested.html#nested-two', 'foo/bar/sibling.md', set(), files) == 404
- assert plugin.get_url_status('nested.html#nested-two', 'foo/baz/sibling.md', set(), files, False) == 0
- assert plugin.get_url_status('nested.html#nested-one', 'foo/baz/sibling.md', set(), files, False) == 404
+ assert plugin.get_url_status('nested.html#nested-two', 'foo/baz/sibling.md', set(), files) == 0
+ assert plugin.get_url_status('nested.html#nested-one', 'foo/baz/sibling.md', set(), files) == 404
- assert plugin.get_url_status('foo/bar/nested.html#nested-one', 'index.md', set(), files, False) == 0
- assert plugin.get_url_status('foo/baz/nested.html#nested-two', 'index.md', set(), files, False) == 0
+ assert plugin.get_url_status('foo/bar/nested.html#nested-one', 'index.md', set(), files) == 0
+ assert plugin.get_url_status('foo/baz/nested.html#nested-two', 'index.md', set(), files) == 0
- assert plugin.get_url_status('/index.html', 'foo/baz/sibling.md', set(), files, False) == 0
+ assert plugin.get_url_status('/index.html', 'foo/baz/sibling.md', set(), files) == 0
@patch.object(htmlproofer.plugin, "log_warning", autospec=True)
@@ -334,7 +378,7 @@ def test_get_url_status__excluded_non_existing_relative_url__no_warning(log_warn
files = {}
plugin.config['raise_error_excludes'][url_status] = [url]
- status = plugin.get_url_status(url, src_path, set(), files, False)
+ status = plugin.get_url_status(url, src_path, set(), files)
log_warning_mock.assert_not_called()
assert 0 == status
@@ -349,12 +393,12 @@ def test_get_url_status__excluded_existing_relative_url__no_warning(log_warning_
existing_page = Mock(spec=Page, markdown='')
files = {
os.path.normpath(file.url): file for file in Files([
- Mock(spec=File, src_path=src_path, dest_path=url, url=url, page=existing_page)
+ Mock(spec=File, src_path=src_path, dest_path=url, dest_uri=url, url=url, src_uri=src_path, page=existing_page)
])
}
plugin.config['raise_error_excludes'][url_status] = [url]
- status = plugin.get_url_status(url, src_path, set(), files, False)
+ status = plugin.get_url_status(url, src_path, set(), files)
log_warning_mock.assert_not_called()
assert 0 == status
@@ -367,7 +411,7 @@ def test_get_url_status__non_existing_relative_url__warning_and_404(log_warning_
src_path = "index.md"
files = {}
- status = plugin.get_url_status(url, src_path, set(), files, False)
+ status = plugin.get_url_status(url, src_path, set(), files)
log_warning_mock.assert_called_once()
assert expected_url_status == status