Skip to content

Commit dcd276d

Browse files
committed
Extract _feed_visit_nodes
1 parent c87b758 commit dcd276d

File tree

1 file changed

+48
-39
lines changed

1 file changed

+48
-39
lines changed

sphinx/search/__init__.py

Lines changed: 48 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from sphinx.util.index_entries import split_index_msg
2020

2121
if TYPE_CHECKING:
22-
from collections.abc import Iterable
22+
from collections.abc import Callable, Iterable
2323

2424
from sphinx.environment import BuildEnvironment
2525

@@ -525,47 +525,12 @@ def stem(word_to_stem: str) -> str:
525525
self._index_entries[docname] = sorted(_index_entries)
526526

527527
def _word_collector(self, doctree: nodes.document) -> WordStore:
528-
def _visit_nodes(node: nodes.Node) -> None:
529-
if isinstance(node, nodes.comment):
530-
return
531-
elif isinstance(node, nodes.raw):
532-
if 'html' in node.get('format', '').split():
533-
# Some people might put content in raw HTML that should be searched,
534-
# so we just amateurishly strip HTML tags and index the remaining
535-
# content
536-
nodetext = re.sub(
537-
r'<style.*?</style>',
538-
'',
539-
node.astext(),
540-
flags=re.IGNORECASE | re.DOTALL,
541-
)
542-
nodetext = re.sub(
543-
r'<script.*?</script>',
544-
'',
545-
nodetext,
546-
flags=re.IGNORECASE | re.DOTALL,
547-
)
548-
nodetext = re.sub(r'<[^<]+?>', '', nodetext)
549-
word_store.words.extend(split(nodetext))
550-
return
551-
elif isinstance(node, nodes.meta) and _is_meta_keywords(node, language):
552-
keywords = [keyword.strip() for keyword in node['content'].split(',')]
553-
word_store.words.extend(keywords)
554-
elif isinstance(node, nodes.Text):
555-
word_store.words.extend(split(node.astext()))
556-
elif isinstance(node, nodes.title):
557-
title, is_main_title = node.astext(), len(word_store.titles) == 0
558-
ids = node.parent['ids']
559-
title_node_id = None if is_main_title else ids[0] if ids else None
560-
word_store.titles.append((title, title_node_id))
561-
word_store.title_words.extend(split(title))
562-
for child in node.children:
563-
_visit_nodes(child)
564-
565528
word_store = WordStore()
566529
split = self.lang.split
567530
language = self.lang.lang
568-
_visit_nodes(doctree)
531+
_feed_visit_nodes(
532+
doctree, word_store=word_store, split=split, language=language
533+
)
569534
return word_store
570535

571536
def context_for_searchtool(self) -> dict[str, Any]:
@@ -611,3 +576,47 @@ def get_js_stemmer_code(self) -> str:
611576
)
612577
else:
613578
return self.lang.js_stemmer_code
579+
580+
581+
def _feed_visit_nodes(
582+
node: nodes.Node,
583+
*,
584+
word_store: WordStore,
585+
split: Callable[[str], list[str]],
586+
language: str,
587+
) -> None:
588+
if isinstance(node, nodes.comment):
589+
return
590+
elif isinstance(node, nodes.raw):
591+
if 'html' in node.get('format', '').split():
592+
# Some people might put content in raw HTML that should be searched,
593+
# so we just amateurishly strip HTML tags and index the remaining
594+
# content
595+
nodetext = re.sub(
596+
r'<style.*?</style>',
597+
'',
598+
node.astext(),
599+
flags=re.IGNORECASE | re.DOTALL,
600+
)
601+
nodetext = re.sub(
602+
r'<script.*?</script>',
603+
'',
604+
nodetext,
605+
flags=re.IGNORECASE | re.DOTALL,
606+
)
607+
nodetext = re.sub(r'<[^<]+?>', '', nodetext)
608+
word_store.words.extend(split(nodetext))
609+
return
610+
elif isinstance(node, nodes.meta) and _is_meta_keywords(node, language):
611+
keywords = [keyword.strip() for keyword in node['content'].split(',')]
612+
word_store.words.extend(keywords)
613+
elif isinstance(node, nodes.Text):
614+
word_store.words.extend(split(node.astext()))
615+
elif isinstance(node, nodes.title):
616+
title, is_main_title = node.astext(), len(word_store.titles) == 0
617+
ids = node.parent['ids']
618+
title_node_id = None if is_main_title else ids[0] if ids else None
619+
word_store.titles.append((title, title_node_id))
620+
word_store.title_words.extend(split(title))
621+
for child in node.children:
622+
_feed_visit_nodes(child, word_store=word_store, split=split, language=language)

0 commit comments

Comments
 (0)