_description_parser.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. from __future__ import annotations
  2. import html
  3. import string
  4. from typing import TYPE_CHECKING
  5. from docutils import nodes
  6. if TYPE_CHECKING:
  7. from collections.abc import Set
  8. def get_description(
  9. doctree: nodes.document,
  10. description_length: int,
  11. known_titles: Set[str] = frozenset(),
  12. ) -> str:
  13. mcv = DescriptionParser(
  14. doctree, desc_len=description_length, known_titles=known_titles
  15. )
  16. doctree.walkabout(mcv)
  17. return mcv.description
  18. class DescriptionParser(nodes.NodeVisitor):
  19. """Finds the title and creates a description from a doctree."""
  20. def __init__(
  21. self,
  22. document: nodes.document,
  23. *,
  24. desc_len: int,
  25. known_titles: Set[str] = frozenset(),
  26. ) -> None:
  27. super().__init__(document)
  28. self.description = ''
  29. self.desc_len = desc_len
  30. self.list_level = 0
  31. self.known_titles = known_titles
  32. self.first_title_found = False
  33. # Exceptions can't be raised from dispatch_departure()
  34. # This is used to loop the stop call back to the next dispatch_visit()
  35. self.stop = False
  36. def dispatch_visit(self, node: nodes.Element) -> None:
  37. if self.stop:
  38. raise nodes.StopTraversal
  39. # Skip comments & all admonitions
  40. if isinstance(node, (nodes.Admonition, nodes.Invisible)):
  41. raise nodes.SkipNode
  42. # Mark start of nested lists
  43. if isinstance(node, nodes.Sequential):
  44. self.list_level += 1
  45. if self.list_level > 1:
  46. self.description += '-'
  47. # Skip the first title if it's the title of the page
  48. if not self.first_title_found and isinstance(node, nodes.title):
  49. self.first_title_found = True
  50. if node.astext() in self.known_titles:
  51. raise nodes.SkipNode
  52. if isinstance(node, nodes.raw) or isinstance(node.parent, nodes.literal_block):
  53. raise nodes.SkipNode
  54. # Only include leaf nodes in the description
  55. if len(node.children) == 0:
  56. text = node.astext().replace('\r', '').replace('\n', ' ').strip()
  57. # Ensure string contains HTML-safe characters
  58. text = html.escape(text, quote=True)
  59. # Remove double spaces
  60. while text.find(' ') != -1:
  61. text = text.replace(' ', ' ')
  62. # Put a space between elements if one does not already exist.
  63. if (
  64. len(self.description) > 0
  65. and len(text) > 0
  66. and self.description[-1] not in string.whitespace
  67. and text[0] not in string.whitespace + string.punctuation
  68. ):
  69. self.description += ' '
  70. self.description += text
  71. def dispatch_departure(self, node: nodes.Element) -> None:
  72. # Separate title from text
  73. if isinstance(node, nodes.title):
  74. self.description += ':'
  75. # Separate list elements
  76. if isinstance(node, nodes.Part):
  77. self.description += ','
  78. # Separate end of list from text
  79. if isinstance(node, nodes.Sequential):
  80. if self.description and self.description[-1] == ',':
  81. self.description = self.description[:-1]
  82. self.description += '.'
  83. self.list_level -= 1
  84. # Check for length
  85. if len(self.description) > self.desc_len:
  86. self.description = self.description[: self.desc_len]
  87. if self.desc_len >= 3:
  88. self.description = self.description[:-3] + '...'
  89. self.stop = True