Skip to content

Commit e0dc894

Browse files
committed
wip
1 parent dcc7868 commit e0dc894

File tree

5 files changed

+243
-93
lines changed

5 files changed

+243
-93
lines changed

mkdocs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ theme:
4040
- content.tooltips
4141
- navigation.footer
4242
- navigation.indexes
43+
- navigation.instant.preview
4344
- navigation.sections
4445
- navigation.tabs
4546
- navigation.tabs.sticky

src/mkdocs_autorefs/plugin.py

Lines changed: 107 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,21 @@
1515
import contextlib
1616
import functools
1717
import logging
18+
from collections import defaultdict
1819
from pathlib import PurePosixPath as URL # noqa: N814
1920
from typing import TYPE_CHECKING, Any, Callable
2021
from urllib.parse import urlsplit
2122
from warnings import warn
2223

2324
from mkdocs.config.base import Config
2425
from mkdocs.config.config_options import Type
25-
from mkdocs.plugins import BasePlugin
26+
from mkdocs.plugins import BasePlugin, event_priority
2627
from mkdocs.structure.pages import Page
28+
from mkdocs.structure.files import Files
29+
from mkdocs.structure.nav import Section
30+
from jinja2.environment import Environment
2731

28-
from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url
32+
from mkdocs_autorefs.references import AutorefsExtension, URLAndTitle, _find_backlinks, fix_refs, relative_url
2933

3034
if TYPE_CHECKING:
3135
from collections.abc import Sequence
@@ -43,6 +47,15 @@
4347
log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment]
4448

4549

50+
# TODO: BACKLINKS: Record URLs directly. It's wrong to record ids and use them later
51+
# to fetch all associated URLs: not all these URLs link to the cross-ref'd object.
52+
# Also, don't store URLs + titles, only store URLs in maps, and store titles in a separate dict.
53+
# Also also, backlinks should be fetched for all aliases of a given identifier,
54+
# not just for this specific identifier. For example, mkdocstrings-python will create
55+
# an autoref for a parameter default value with `used-by` type and `object.canonical.path` as id,
56+
# But if we don't render the object with this canonical path but instead `object.path`,
57+
# then we won't find the backlinks for it.
58+
4659
class AutorefsConfig(Config):
4760
"""Configuration options for the `autorefs` plugin."""
4861

@@ -76,7 +89,7 @@ class AutorefsPlugin(BasePlugin[AutorefsConfig]):
7689
"""
7790

7891
scan_toc: bool = True
79-
current_page: str | None = None
92+
current_page: Page | None = None
8093
# YORE: Bump 2: Remove line.
8194
legacy_refs: bool = True
8295

@@ -111,7 +124,9 @@ def __init__(self) -> None:
111124
# This logic unfolds in `_get_item_url`.
112125
self._primary_url_map: dict[str, list[str]] = {}
113126
self._secondary_url_map: dict[str, list[str]] = {}
127+
self._title_map: dict[str, str] = {}
114128
self._abs_url_map: dict[str, str] = {}
129+
self._backlinks: dict[str, dict[str, set[str]]] = defaultdict(lambda: defaultdict(set))
115130
# YORE: Bump 2: Remove line.
116131
self._get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None
117132

@@ -133,22 +148,69 @@ def get_fallback_anchor(self, value: Callable[[str], tuple[str, ...]] | None) ->
133148
stacklevel=2,
134149
)
135150

136-
def register_anchor(self, page: str, identifier: str, anchor: str | None = None, *, primary: bool = True) -> None:
151+
def _record_backlink(self, identifier: str, backlink_type: str, backlink_anchor: str, page_url: str) -> None:
152+
"""Record a backlink.
153+
154+
Arguments:
155+
identifier: The target identifier.
156+
backlink_type: The type of backlink.
157+
backlink_anchor: The backlink target anchor.
158+
page_url: The URL of the page containing the backlink.
159+
"""
160+
if identifier in self._primary_url_map or identifier in self._secondary_url_map:
161+
self._backlinks[identifier][backlink_type].add(f"{page_url}#{backlink_anchor}")
162+
163+
def get_backlinks(self, *identifiers: str, from_url: str) -> dict[str, set[URLAndTitle]]:
164+
"""Return the backlinks to an identifier relative to the given URL.
165+
166+
Arguments:
167+
*identifiers: The identifiers to get backlinks for.
168+
from_url: The URL of the page where backlinks are rendered.
169+
170+
Returns:
171+
A dictionary of backlinks, with the type of reference as key and a list of URLs as value.
172+
"""
173+
relative_backlinks: dict[str, set[URLAndTitle]] = defaultdict(set)
174+
for identifier in identifiers:
175+
backlinks = self._backlinks.get(identifier, {})
176+
for backlink_type, backlink_urls in backlinks.items():
177+
for backlink_url in backlink_urls:
178+
relative_backlinks[backlink_type].add((relative_url(from_url, backlink_url), self._title_map[backlink_url]))
179+
return relative_backlinks
180+
181+
def _breadcrumbs(self, page: Page | Section, title: str) -> str:
182+
breadcrumbs = [title, page.title]
183+
while page.parent:
184+
page = page.parent
185+
breadcrumbs.append(page.title)
186+
return " ❭ ".join(reversed(breadcrumbs))
187+
188+
def register_anchor(
189+
self,
190+
identifier: str,
191+
anchor: str | None = None,
192+
*,
193+
title: str | None = None,
194+
primary: bool = True,
195+
) -> None:
137196
"""Register that an anchor corresponding to an identifier was encountered when rendering the page.
138197
139198
Arguments:
140-
page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'`
141199
identifier: The identifier to register.
142200
anchor: The anchor on the page, without `#`. If not provided, defaults to the identifier.
201+
title: The title of the anchor (optional).
143202
primary: Whether this anchor is the primary one for the identifier.
144203
"""
145-
page_anchor = f"{page}#{anchor or identifier}"
204+
page_anchor = f"{self.current_page.url}#{anchor or identifier}"
146205
url_map = self._primary_url_map if primary else self._secondary_url_map
147206
if identifier in url_map:
148207
if page_anchor not in url_map[identifier]:
149208
url_map[identifier].append(page_anchor)
150209
else:
151210
url_map[identifier] = [page_anchor]
211+
if title and page_anchor not in self._title_map:
212+
title = self._breadcrumbs(self.current_page, title) if self.current_page else title
213+
self._title_map[page_anchor] = title
152214

153215
def register_url(self, identifier: str, url: str) -> None:
154216
"""Register that the identifier should be turned into a link to this URL.
@@ -240,7 +302,7 @@ def get_item_url(
240302
from_url: str | None = None,
241303
# YORE: Bump 2: Remove line.
242304
fallback: Callable[[str], Sequence[str]] | None = None,
243-
) -> str:
305+
) -> URLAndTitle:
244306
"""Return a site-relative URL with anchor to the identifier, if it's present anywhere.
245307
246308
Arguments:
@@ -252,11 +314,12 @@ def get_item_url(
252314
"""
253315
# YORE: Bump 2: Replace `, fallback` with `` within line.
254316
url = self._get_item_url(identifier, from_url, fallback)
317+
title = self._title_map.get(url) or None
255318
if from_url is not None:
256319
parsed = urlsplit(url)
257320
if not parsed.scheme and not parsed.netloc:
258-
return relative_url(from_url, url)
259-
return url
321+
url = relative_url(from_url, url)
322+
return url, title
260323

261324
def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None:
262325
"""Instantiate our Markdown extension.
@@ -287,7 +350,7 @@ def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: #
287350
The same Markdown. We only use this hook to keep a reference to the current page URL,
288351
used during Markdown conversion by the anchor scanner tree processor.
289352
"""
290-
self.current_page = page.url
353+
self.current_page = page
291354
return markdown
292355

293356
def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
@@ -306,56 +369,61 @@ def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa:
306369
Returns:
307370
The same HTML. We only use this hook to map anchors to URLs.
308371
"""
372+
self.current_page = page
373+
# Collect `std`-domain URLs.
309374
if self.scan_toc:
310375
log.debug("Mapping identifiers to URLs for page %s", page.file.src_path)
311376
for item in page.toc.items:
312-
self.map_urls(page.url, item)
377+
self.map_urls(item)
313378
return html
314379

315-
def map_urls(self, base_url: str, anchor: AnchorLink) -> None:
380+
def map_urls(self, anchor: AnchorLink) -> None:
316381
"""Recurse on every anchor to map its ID to its absolute URL.
317382
318383
This method populates `self._primary_url_map` by side-effect.
319384
320385
Arguments:
321-
base_url: The base URL to use as a prefix for each anchor's relative URL.
322386
anchor: The anchor to process and to recurse on.
323387
"""
324-
self.register_anchor(base_url, anchor.id, primary=True)
388+
self.register_anchor(anchor.id, title=anchor.title, primary=True)
325389
for child in anchor.children:
326-
self.map_urls(base_url, child)
390+
self.map_urls(child)
327391

328-
def on_post_page(self, output: str, page: Page, **kwargs: Any) -> str: # noqa: ARG002
329-
"""Fix cross-references.
392+
@event_priority(-50) # Late, after mkdocstrings has finished loading inventories.
393+
def on_env(self, env: Environment, /, *, config: MkDocsConfig, files: Files) -> Environment:
394+
"""Apply cross-references and collect backlinks.
330395
331-
Hook for the [`on_post_page` event](https://www.mkdocs.org/user-guide/plugins/#on_post_page).
396+
Hook for the [`on_env` event](https://www.mkdocs.org/user-guide/plugins/#on_env).
332397
In this hook, we try to fix unresolved references of the form `[title][identifier]` or `[identifier][]`.
333398
Doing that allows the user of `autorefs` to cross-reference objects in their documentation strings.
334399
It uses the native Markdown syntax so it's easy to remember and use.
335400
336-
We log a warning for each reference that we couldn't map to an URL, but try to be smart and ignore identifiers
337-
that do not look legitimate (sometimes documentation can contain strings matching
338-
our [`AUTO_REF_RE`][mkdocs_autorefs.references.AUTO_REF_RE] regular expression that did not intend to reference anything).
339-
We currently ignore references when their identifier contains a space or a slash.
401+
We log a warning for each reference that we couldn't map to an URL.
402+
403+
We also collect backlinks at the same time. We fix cross-refs and collect backlinks in a single pass
404+
for performance reasons (we don't want to run the regular expression on each page twice).
340405
341406
Arguments:
342-
output: HTML converted from Markdown.
343-
page: The related MkDocs page instance.
344-
kwargs: Additional arguments passed by MkDocs.
407+
env: The MkDocs environment.
408+
config: The MkDocs config object.
409+
files: The list of files in the MkDocs project.
345410
346411
Returns:
347-
Modified HTML.
412+
The unmodified environment.
348413
"""
349-
log.debug("Fixing references in page %s", page.file.src_path)
350-
351-
# YORE: Bump 2: Replace `, fallback=self.get_fallback_anchor` with `` within line.
352-
url_mapper = functools.partial(self.get_item_url, from_url=page.url, fallback=self.get_fallback_anchor)
353-
# YORE: Bump 2: Replace `, _legacy_refs=self.legacy_refs` with `` within line.
354-
fixed_output, unmapped = fix_refs(output, url_mapper, _legacy_refs=self.legacy_refs)
355-
356-
if unmapped and log.isEnabledFor(logging.WARNING):
357-
for ref, context in unmapped:
358-
message = f"from {context.filepath}:{context.lineno}: ({context.origin}) " if context else ""
359-
log.warning(f"{page.file.src_path}: {message}Could not find cross-reference target '{ref}'")
360-
361-
return fixed_output
414+
for file in files:
415+
if file.page and file.page.content:
416+
log.debug("Applying cross-refs in page %s", file.page.file.src_path)
417+
418+
# YORE: Bump 2: Replace `, fallback=self.get_fallback_anchor` with `` within line.
419+
url_mapper = functools.partial(self.get_item_url, from_url=file.page.url, fallback=self.get_fallback_anchor)
420+
backlink_recorder = functools.partial(self._record_backlink, page_url=file.page.url)
421+
# YORE: Bump 2: Replace `, _legacy_refs=self.legacy_refs` with `` within line.
422+
file.page.content, unmapped = fix_refs(file.page.content, url_mapper, record_backlink=backlink_recorder, _legacy_refs=self.legacy_refs)
423+
424+
if unmapped and log.isEnabledFor(logging.WARNING):
425+
for ref, context in unmapped:
426+
message = f"from {context.filepath}:{context.lineno}: ({context.origin}) " if context else ""
427+
log.warning(f"{file.page.file.src_path}: {message}Could not find cross-reference target '{ref}'")
428+
429+
return env

0 commit comments

Comments
 (0)