Skip to content

Rework use_directory_url=True validation to better catch incorrect links #80

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions htmlproofer/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,18 +194,20 @@ def is_url_target_valid(url: str, src_path: str, files: Dict[str, File]) -> bool
return True

url_target, _, optional_anchor = match.groups()
_, extension = os.path.splitext(url_target)
if extension == ".html":
# URL is a link to another local Markdown file that may include an anchor.
target_markdown = HtmlProoferPlugin.find_target_markdown(url_target, src_path, files)
if target_markdown is None:
# The corresponding Markdown page was not found.
return False
if optional_anchor and not HtmlProoferPlugin.contains_anchor(target_markdown, optional_anchor):
# The corresponding Markdown header for this anchor was not found.
return False
elif HtmlProoferPlugin.find_source_file(url_target, src_path, files) is None:
source_file = HtmlProoferPlugin.find_source_file(url_target, src_path, files)
if source_file is None:
return False

# If there's an anchor (fragment) on the link, we try to find it in the source_file
if optional_anchor:
_, extension = os.path.splitext(source_file.src_uri)
# Currently only Markdown-based pages are supported, but conceptually others could be added below
if extension == ".md":
if source_file.page is None or source_file.page.markdown is None:
return False
if not HtmlProoferPlugin.contains_anchor(source_file.page.markdown, optional_anchor):
return False

return True

@staticmethod
Expand Down
7 changes: 5 additions & 2 deletions tests/integration/docs/nested/page1.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@ This plugin can detect invalid anchor links to another page, such as
[Acknowledgement](../index.md#BAD_ANCHOR)
or to a nested page
[Invalid Anchor](./page2.md#BAD_ANCHOR).
It should also detect links to pages in the same directory without './'
[Invalid Anchor](page2.md#BAD_ANCHOR).

But allows valid anchors such as
[Main Page](../index.md#mkdocs-htmlproofer-plugin),
[Table of Contents](../index.md#table-of-contents), and
[Emoji Anchor](./page2.md#title-with-emojis).
[Table of Contents](../index.md#table-of-contents),
[Emoji Anchor](./page2.md#title-with-emojis), and
[Good Heading](page2.md#good-heading).

## Image Link absolute/relative

Expand Down
2 changes: 2 additions & 0 deletions tests/integration/docs/nested/page2.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Second Nested Test Page

## :smile_cat: Title with Emojis :material-star: :octicons-apps-16:

## Good Heading
2 changes: 2 additions & 0 deletions tests/integration/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ plugins:
'#acknowledge',
'../index.html#BAD_ANCHOR',
'page2.html#BAD_ANCHOR',
'../../#BAD_ANCHOR', # if use_directory_urls=True
'../page2/#BAD_ANCHOR', # if use_directory_urls=True
'../../../tests',
]
skip_downloads: True
51 changes: 51 additions & 0 deletions tests/unit/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,51 @@ def test_get_url_status__local_page(plugin):
set(), files) == 0


def test_get_url_status__local_page_with_directory_urls(plugin):
index_page = Mock(spec=Page, markdown='# Heading\nContent')
page1_page = Mock(spec=Page, markdown='# Page One\n## Sub Heading\nContent')
special_char_page = Mock(spec=Page, markdown='# Heading éèà\n## Sub Heading éèà\nContent')
mock_files = Files([
Mock(spec=File, src_path='index.md', dest_path='index/index.html',
dest_uri='index/index.html', url='index/', src_uri='index.md',
page=index_page),
Mock(spec=File, src_path='page1.md', dest_path='page1/index.html',
dest_uri='page1/index.html', url='page1/', src_uri='page1.md',
page=page1_page),
Mock(spec=File, src_path='Dir éèà/éèà.md', dest_path='Dir éèà/éèà/index.html',
dest_uri='Dir éèà/éèà/index.html',
url='Dir%20%C3%A9%C3%A8%C3%A0/%C3%A9%C3%A8%C3%A0/',
src_uri='Dir éèà/éèà.md', page=special_char_page),
Mock(spec=File, src_path='Dir éèà/page1.md', dest_path='Dir éèà/page1/index.html',
dest_uri='Dir éèà/page1/index.html',
url='Dir%20%C3%A9%C3%A8%C3%A0/page1/',
src_uri='Dir%20%C3%A9%C3%A8%C3%A0/page1.md',
page=special_char_page),
])
files = {}
files.update({os.path.normpath(file.url): file for file in mock_files})
files.update({file.src_uri: file for file in mock_files})

assert plugin.get_url_status('../index/', 'page1.md', set(), files) == 0
assert plugin.get_url_status('../index/#heading', 'page1.md', set(), files) == 0
assert plugin.get_url_status('../index/#bad-heading', 'page1.md', set(), files) == 404

assert plugin.get_url_status('../page1/', 'page1.md', set(), files) == 0
assert plugin.get_url_status('../page1/#sub-heading', 'page1.md', set(), files) == 0
assert plugin.get_url_status('../page1/#heading', 'page1.md', set(), files) == 404

assert plugin.get_url_status('../page2/', 'page1.md', set(), files) == 404
assert plugin.get_url_status('../page2/#heading', 'page1.md', set(), files) == 404

assert plugin.get_url_status(
'../Dir%20%C3%A9%C3%A8%C3%A0/%C3%A9%C3%A8%C3%A0/#sub-heading-eea',
'page1.md', set(), files) == 0
assert plugin.get_url_status(
'../%C3%A9%C3%A8%C3%A0/#sub-heading-eea',
'Dir%20%C3%A9%C3%A8%C3%A0/page1.md',
set(), files) == 0


def test_get_url_status__non_markdown_page(plugin):
index_page = Mock(spec=Page, markdown='# Heading\nContent')
mock_files = Files([
Expand All @@ -293,6 +338,9 @@ def test_get_url_status__non_markdown_page(plugin):
Mock(spec=File, src_path='drawing.svg', dest_path='drawing.svg',
dest_uri='index.html', url='drawing.svg', src_uri='drawing.svg',
page=None),
Mock(spec=File, src_path='page.html', dest_path='page.html',
dest_uri='page.html', url='page.html', src_uri='page.html',
page=None),
])
files = {}
files.update({os.path.normpath(file.url): file for file in mock_files})
Expand All @@ -302,6 +350,9 @@ def test_get_url_status__non_markdown_page(plugin):
assert plugin.get_url_status('/drawing.svg', 'index.md', set(), files) == 0
assert plugin.get_url_status('not-existing.svg', 'index.md', set(), files) == 404

assert plugin.get_url_status('page.html', 'index.md', set(), files) == 0
assert plugin.get_url_status('page.html#heading', 'index.md', set(), files) == 0 # no validation for non-markdown pages


def test_get_url_status__local_page_nested(plugin):
index_page = Mock(spec=Page, markdown='# Heading\nContent')
Expand Down