cpp
/
crown-engine
mirror of https://github.com/crownengine/crown.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
							from __future__ import annotations

import html
import string
from typing import TYPE_CHECKING

from docutils import nodes

if TYPE_CHECKING:
    from collections.abc import Set


def get_description(
    doctree: nodes.document,
    description_length: int,
    known_titles: Set[str] = frozenset(),
) -> str:
    mcv = DescriptionParser(
        doctree, desc_len=description_length, known_titles=known_titles
    )
    doctree.walkabout(mcv)
    return mcv.description


class DescriptionParser(nodes.NodeVisitor):
    """Finds the title and creates a description from a doctree."""

    def __init__(
        self,
        document: nodes.document,
        *,
        desc_len: int,
        known_titles: Set[str] = frozenset(),
    ) -> None:
        super().__init__(document)
        self.description = ''
        self.desc_len = desc_len
        self.list_level = 0
        self.known_titles = known_titles
        self.first_title_found = False

        # Exceptions can't be raised from dispatch_departure()
        # This is used to loop the stop call back to the next dispatch_visit()
        self.stop = False

    def dispatch_visit(self, node: nodes.Element) -> None:
        if self.stop:
            raise nodes.StopTraversal

        # Skip comments & all admonitions
        if isinstance(node, (nodes.Admonition, nodes.Invisible)):
            raise nodes.SkipNode

        # Mark start of nested lists
        if isinstance(node, nodes.Sequential):
            self.list_level += 1
            if self.list_level > 1:
                self.description += '-'

        # Skip the first title if it's the title of the page
        if not self.first_title_found and isinstance(node, nodes.title):
            self.first_title_found = True
            if node.astext() in self.known_titles:
                raise nodes.SkipNode

        if isinstance(node, nodes.raw) or isinstance(node.parent, nodes.literal_block):
            raise nodes.SkipNode

        # Only include leaf nodes in the description
        if len(node.children) == 0:
            text = node.astext().replace('\r', '').replace('\n', ' ').strip()

            # Ensure string contains HTML-safe characters
            text = html.escape(text, quote=True)

            # Remove double spaces
            while text.find('  ') != -1:
                text = text.replace('  ', ' ')

            # Put a space between elements if one does not already exist.
            if (
                len(self.description) > 0
                and len(text) > 0
                and self.description[-1] not in string.whitespace
                and text[0] not in string.whitespace + string.punctuation
            ):
                self.description += ' '

            self.description += text

    def dispatch_departure(self, node: nodes.Element) -> None:
        # Separate title from text
        if isinstance(node, nodes.title):
            self.description += ':'

        # Separate list elements
        if isinstance(node, nodes.Part):
            self.description += ','

        # Separate end of list from text
        if isinstance(node, nodes.Sequential):
            if self.description and self.description[-1] == ',':
                self.description = self.description[:-1]
            self.description += '.'
            self.list_level -= 1

        # Check for length
        if len(self.description) > self.desc_len:
            self.description = self.description[: self.desc_len]
            if self.desc_len >= 3:
                self.description = self.description[:-3] + '...'

            self.stop = True