15
15
import contextlib
16
16
import functools
17
17
import logging
18
+ from collections import defaultdict
18
19
from pathlib import PurePosixPath as URL # noqa: N814
19
20
from typing import TYPE_CHECKING , Any , Callable
20
21
from urllib .parse import urlsplit
21
22
from warnings import warn
22
23
23
24
from mkdocs .config .base import Config
24
25
from mkdocs .config .config_options import Type
25
- from mkdocs .plugins import BasePlugin
26
+ from mkdocs .plugins import BasePlugin , event_priority
26
27
from mkdocs .structure .pages import Page
28
+ from mkdocs .structure .files import Files
29
+ from mkdocs .structure .nav import Section
30
+ from jinja2 .environment import Environment
27
31
28
- from mkdocs_autorefs .references import AutorefsExtension , fix_refs , relative_url
32
+ from mkdocs_autorefs .references import AutorefsExtension , URLAndTitle , _find_backlinks , fix_refs , relative_url
29
33
30
34
if TYPE_CHECKING :
31
35
from collections .abc import Sequence
43
47
log = logging .getLogger (f"mkdocs.plugins.{ __name__ } " ) # type: ignore[assignment]
44
48
45
49
50
+ # TODO: BACKLINKS: Record URLs directly. It's wrong to record ids and use them later
51
+ # to fetch all associated URLs: not all these URLs link to the cross-ref'd object.
52
+ # Also, don't store URLs + titles, only store URLs in maps, and store titles in a separate dict.
53
+ # Also also, backlinks should be fetched for all aliases of a given identifier,
54
+ # not just for this specific identifier. For example, mkdocstrings-python will create
55
+ # an autoref for a parameter default value with `used-by` type and `object.canonical.path` as id,
56
+ # But if we don't render the object with this canonical path but instead `object.path`,
57
+ # then we won't find the backlinks for it.
58
+
46
59
class AutorefsConfig (Config ):
47
60
"""Configuration options for the `autorefs` plugin."""
48
61
@@ -76,7 +89,7 @@ class AutorefsPlugin(BasePlugin[AutorefsConfig]):
76
89
"""
77
90
78
91
scan_toc : bool = True
79
- current_page : str | None = None
92
+ current_page : Page | None = None
80
93
# YORE: Bump 2: Remove line.
81
94
legacy_refs : bool = True
82
95
@@ -111,7 +124,9 @@ def __init__(self) -> None:
111
124
# This logic unfolds in `_get_item_url`.
112
125
self ._primary_url_map : dict [str , list [str ]] = {}
113
126
self ._secondary_url_map : dict [str , list [str ]] = {}
127
+ self ._title_map : dict [str , str ] = {}
114
128
self ._abs_url_map : dict [str , str ] = {}
129
+ self ._backlinks : dict [str , dict [str , set [str ]]] = defaultdict (lambda : defaultdict (set ))
115
130
# YORE: Bump 2: Remove line.
116
131
self ._get_fallback_anchor : Callable [[str ], tuple [str , ...]] | None = None
117
132
@@ -133,22 +148,69 @@ def get_fallback_anchor(self, value: Callable[[str], tuple[str, ...]] | None) ->
133
148
stacklevel = 2 ,
134
149
)
135
150
136
- def register_anchor (self , page : str , identifier : str , anchor : str | None = None , * , primary : bool = True ) -> None :
151
+ def _record_backlink (self , identifier : str , backlink_type : str , backlink_anchor : str , page_url : str ) -> None :
152
+ """Record a backlink.
153
+
154
+ Arguments:
155
+ identifier: The target identifier.
156
+ backlink_type: The type of backlink.
157
+ backlink_anchor: The backlink target anchor.
158
+ page_url: The URL of the page containing the backlink.
159
+ """
160
+ if identifier in self ._primary_url_map or identifier in self ._secondary_url_map :
161
+ self ._backlinks [identifier ][backlink_type ].add (f"{ page_url } #{ backlink_anchor } " )
162
+
163
+ def get_backlinks (self , * identifiers : str , from_url : str ) -> dict [str , set [URLAndTitle ]]:
164
+ """Return the backlinks to an identifier relative to the given URL.
165
+
166
+ Arguments:
167
+ *identifiers: The identifiers to get backlinks for.
168
+ from_url: The URL of the page where backlinks are rendered.
169
+
170
+ Returns:
171
+ A dictionary of backlinks, with the type of reference as key and a list of URLs as value.
172
+ """
173
+ relative_backlinks : dict [str , set [URLAndTitle ]] = defaultdict (set )
174
+ for identifier in identifiers :
175
+ backlinks = self ._backlinks .get (identifier , {})
176
+ for backlink_type , backlink_urls in backlinks .items ():
177
+ for backlink_url in backlink_urls :
178
+ relative_backlinks [backlink_type ].add ((relative_url (from_url , backlink_url ), self ._title_map [backlink_url ]))
179
+ return relative_backlinks
180
+
181
+ def _breadcrumbs (self , page : Page | Section , title : str ) -> str :
182
+ breadcrumbs = [title , page .title ]
183
+ while page .parent :
184
+ page = page .parent
185
+ breadcrumbs .append (page .title )
186
+ return " ❭ " .join (reversed (breadcrumbs ))
187
+
188
+ def register_anchor (
189
+ self ,
190
+ identifier : str ,
191
+ anchor : str | None = None ,
192
+ * ,
193
+ title : str | None = None ,
194
+ primary : bool = True ,
195
+ ) -> None :
137
196
"""Register that an anchor corresponding to an identifier was encountered when rendering the page.
138
197
139
198
Arguments:
140
- page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'`
141
199
identifier: The identifier to register.
142
200
anchor: The anchor on the page, without `#`. If not provided, defaults to the identifier.
201
+ title: The title of the anchor (optional).
143
202
primary: Whether this anchor is the primary one for the identifier.
144
203
"""
145
- page_anchor = f"{ page } #{ anchor or identifier } "
204
+ page_anchor = f"{ self . current_page . url } #{ anchor or identifier } "
146
205
url_map = self ._primary_url_map if primary else self ._secondary_url_map
147
206
if identifier in url_map :
148
207
if page_anchor not in url_map [identifier ]:
149
208
url_map [identifier ].append (page_anchor )
150
209
else :
151
210
url_map [identifier ] = [page_anchor ]
211
+ if title and page_anchor not in self ._title_map :
212
+ title = self ._breadcrumbs (self .current_page , title ) if self .current_page else title
213
+ self ._title_map [page_anchor ] = title
152
214
153
215
def register_url (self , identifier : str , url : str ) -> None :
154
216
"""Register that the identifier should be turned into a link to this URL.
@@ -240,7 +302,7 @@ def get_item_url(
240
302
from_url : str | None = None ,
241
303
# YORE: Bump 2: Remove line.
242
304
fallback : Callable [[str ], Sequence [str ]] | None = None ,
243
- ) -> str :
305
+ ) -> URLAndTitle :
244
306
"""Return a site-relative URL with anchor to the identifier, if it's present anywhere.
245
307
246
308
Arguments:
@@ -252,11 +314,12 @@ def get_item_url(
252
314
"""
253
315
# YORE: Bump 2: Replace `, fallback` with `` within line.
254
316
url = self ._get_item_url (identifier , from_url , fallback )
317
+ title = self ._title_map .get (url ) or None
255
318
if from_url is not None :
256
319
parsed = urlsplit (url )
257
320
if not parsed .scheme and not parsed .netloc :
258
- return relative_url (from_url , url )
259
- return url
321
+ url = relative_url (from_url , url )
322
+ return url , title
260
323
261
324
def on_config (self , config : MkDocsConfig ) -> MkDocsConfig | None :
262
325
"""Instantiate our Markdown extension.
@@ -287,7 +350,7 @@ def on_page_markdown(self, markdown: str, page: Page, **kwargs: Any) -> str: #
287
350
The same Markdown. We only use this hook to keep a reference to the current page URL,
288
351
used during Markdown conversion by the anchor scanner tree processor.
289
352
"""
290
- self .current_page = page . url
353
+ self .current_page = page
291
354
return markdown
292
355
293
356
def on_page_content (self , html : str , page : Page , ** kwargs : Any ) -> str : # noqa: ARG002
@@ -306,56 +369,61 @@ def on_page_content(self, html: str, page: Page, **kwargs: Any) -> str: # noqa:
306
369
Returns:
307
370
The same HTML. We only use this hook to map anchors to URLs.
308
371
"""
372
+ self .current_page = page
373
+ # Collect `std`-domain URLs.
309
374
if self .scan_toc :
310
375
log .debug ("Mapping identifiers to URLs for page %s" , page .file .src_path )
311
376
for item in page .toc .items :
312
- self .map_urls (page . url , item )
377
+ self .map_urls (item )
313
378
return html
314
379
315
- def map_urls (self , base_url : str , anchor : AnchorLink ) -> None :
380
+ def map_urls (self , anchor : AnchorLink ) -> None :
316
381
"""Recurse on every anchor to map its ID to its absolute URL.
317
382
318
383
This method populates `self._primary_url_map` by side-effect.
319
384
320
385
Arguments:
321
- base_url: The base URL to use as a prefix for each anchor's relative URL.
322
386
anchor: The anchor to process and to recurse on.
323
387
"""
324
- self .register_anchor (base_url , anchor .id , primary = True )
388
+ self .register_anchor (anchor . id , title = anchor .title , primary = True )
325
389
for child in anchor .children :
326
- self .map_urls (base_url , child )
390
+ self .map_urls (child )
327
391
328
- def on_post_page (self , output : str , page : Page , ** kwargs : Any ) -> str : # noqa: ARG002
329
- """Fix cross-references.
392
+ @event_priority (- 50 ) # Late, after mkdocstrings has finished loading inventories.
393
+ def on_env (self , env : Environment , / , * , config : MkDocsConfig , files : Files ) -> Environment :
394
+ """Apply cross-references and collect backlinks.
330
395
331
- Hook for the [`on_post_page ` event](https://www.mkdocs.org/user-guide/plugins/#on_post_page ).
396
+ Hook for the [`on_env ` event](https://www.mkdocs.org/user-guide/plugins/#on_env ).
332
397
In this hook, we try to fix unresolved references of the form `[title][identifier]` or `[identifier][]`.
333
398
Doing that allows the user of `autorefs` to cross-reference objects in their documentation strings.
334
399
It uses the native Markdown syntax so it's easy to remember and use.
335
400
336
- We log a warning for each reference that we couldn't map to an URL, but try to be smart and ignore identifiers
337
- that do not look legitimate (sometimes documentation can contain strings matching
338
- our [`AUTO_REF_RE`][mkdocs_autorefs.references.AUTO_REF_RE] regular expression that did not intend to reference anything).
339
- We currently ignore references when their identifier contains a space or a slash .
401
+ We log a warning for each reference that we couldn't map to an URL.
402
+
403
+ We also collect backlinks at the same time. We fix cross-refs and collect backlinks in a single pass
404
+ for performance reasons (we don't want to run the regular expression on each page twice) .
340
405
341
406
Arguments:
342
- output: HTML converted from Markdown .
343
- page : The related MkDocs page instance .
344
- kwargs: Additional arguments passed by MkDocs.
407
+ env: The MkDocs environment .
408
+ config : The MkDocs config object .
409
+ files: The list of files in the MkDocs project .
345
410
346
411
Returns:
347
- Modified HTML .
412
+ The unmodified environment .
348
413
"""
349
- log .debug ("Fixing references in page %s" , page .file .src_path )
350
-
351
- # YORE: Bump 2: Replace `, fallback=self.get_fallback_anchor` with `` within line.
352
- url_mapper = functools .partial (self .get_item_url , from_url = page .url , fallback = self .get_fallback_anchor )
353
- # YORE: Bump 2: Replace `, _legacy_refs=self.legacy_refs` with `` within line.
354
- fixed_output , unmapped = fix_refs (output , url_mapper , _legacy_refs = self .legacy_refs )
355
-
356
- if unmapped and log .isEnabledFor (logging .WARNING ):
357
- for ref , context in unmapped :
358
- message = f"from { context .filepath } :{ context .lineno } : ({ context .origin } ) " if context else ""
359
- log .warning (f"{ page .file .src_path } : { message } Could not find cross-reference target '{ ref } '" )
360
-
361
- return fixed_output
414
+ for file in files :
415
+ if file .page and file .page .content :
416
+ log .debug ("Applying cross-refs in page %s" , file .page .file .src_path )
417
+
418
+ # YORE: Bump 2: Replace `, fallback=self.get_fallback_anchor` with `` within line.
419
+ url_mapper = functools .partial (self .get_item_url , from_url = file .page .url , fallback = self .get_fallback_anchor )
420
+ backlink_recorder = functools .partial (self ._record_backlink , page_url = file .page .url )
421
+ # YORE: Bump 2: Replace `, _legacy_refs=self.legacy_refs` with `` within line.
422
+ file .page .content , unmapped = fix_refs (file .page .content , url_mapper , record_backlink = backlink_recorder , _legacy_refs = self .legacy_refs )
423
+
424
+ if unmapped and log .isEnabledFor (logging .WARNING ):
425
+ for ref , context in unmapped :
426
+ message = f"from { context .filepath } :{ context .lineno } : ({ context .origin } ) " if context else ""
427
+ log .warning (f"{ file .page .file .src_path } : { message } Could not find cross-reference target '{ ref } '" )
428
+
429
+ return env
0 commit comments