| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113 |
- from __future__ import annotations
- import html
- import string
- from typing import TYPE_CHECKING
- from docutils import nodes
- if TYPE_CHECKING:
- from collections.abc import Set
- def get_description(
- doctree: nodes.document,
- description_length: int,
- known_titles: Set[str] = frozenset(),
- ) -> str:
- mcv = DescriptionParser(
- doctree, desc_len=description_length, known_titles=known_titles
- )
- doctree.walkabout(mcv)
- return mcv.description
- class DescriptionParser(nodes.NodeVisitor):
- """Finds the title and creates a description from a doctree."""
- def __init__(
- self,
- document: nodes.document,
- *,
- desc_len: int,
- known_titles: Set[str] = frozenset(),
- ) -> None:
- super().__init__(document)
- self.description = ''
- self.desc_len = desc_len
- self.list_level = 0
- self.known_titles = known_titles
- self.first_title_found = False
- # Exceptions can't be raised from dispatch_departure()
- # This is used to loop the stop call back to the next dispatch_visit()
- self.stop = False
- def dispatch_visit(self, node: nodes.Element) -> None:
- if self.stop:
- raise nodes.StopTraversal
- # Skip comments & all admonitions
- if isinstance(node, (nodes.Admonition, nodes.Invisible)):
- raise nodes.SkipNode
- # Mark start of nested lists
- if isinstance(node, nodes.Sequential):
- self.list_level += 1
- if self.list_level > 1:
- self.description += '-'
- # Skip the first title if it's the title of the page
- if not self.first_title_found and isinstance(node, nodes.title):
- self.first_title_found = True
- if node.astext() in self.known_titles:
- raise nodes.SkipNode
- if isinstance(node, nodes.raw) or isinstance(node.parent, nodes.literal_block):
- raise nodes.SkipNode
- # Only include leaf nodes in the description
- if len(node.children) == 0:
- text = node.astext().replace('\r', '').replace('\n', ' ').strip()
- # Ensure string contains HTML-safe characters
- text = html.escape(text, quote=True)
- # Remove double spaces
- while text.find(' ') != -1:
- text = text.replace(' ', ' ')
- # Put a space between elements if one does not already exist.
- if (
- len(self.description) > 0
- and len(text) > 0
- and self.description[-1] not in string.whitespace
- and text[0] not in string.whitespace + string.punctuation
- ):
- self.description += ' '
- self.description += text
- def dispatch_departure(self, node: nodes.Element) -> None:
- # Separate title from text
- if isinstance(node, nodes.title):
- self.description += ':'
- # Separate list elements
- if isinstance(node, nodes.Part):
- self.description += ','
- # Separate end of list from text
- if isinstance(node, nodes.Sequential):
- if self.description and self.description[-1] == ',':
- self.description = self.description[:-1]
- self.description += '.'
- self.list_level -= 1
- # Check for length
- if len(self.description) > self.desc_len:
- self.description = self.description[: self.desc_len]
- if self.desc_len >= 3:
- self.description = self.description[:-3] + '...'
- self.stop = True
|