entities.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. #!/usr/bin/env python3
  2. """entities.h generator"""
  3. import re
  4. import sys
  5. import textwrap
  6. from pathlib import Path
  7. from typing import Dict
  8. # get names for html-4.0 characters from:
  9. # http://www.w3.org/TR/REC-html40/sgml/entities.html
  10. entity_name_length_max: int = 0
  11. entity: Dict[str, str] = {}
  12. with open(Path(__file__).parent / "entities.html", "rb") as f:
  13. for rec in f:
  14. if m := re.match(
  15. rb'&lt;!ENTITY\s+(?P<name>[^\s]*)\s+CDATA\s+"&amp;#(?P<val>\d+);"\s+--', rec
  16. ):
  17. name = m.group("name").decode("utf-8")
  18. val = m.group("val").decode("utf-8")
  19. entity[name] = val
  20. entity_name_length_max = max(entity_name_length_max, len(name))
  21. with open(sys.argv[1], "wt", encoding="utf-8") as f:
  22. f.write(
  23. textwrap.dedent(
  24. f"""\
  25. /// @file
  26. /// @ingroup common_utils
  27. /*
  28. * Generated file - do not edit directly.
  29. *
  30. * This file was generated from:
  31. * http://www.w3.org/TR/REC-html40/sgml/entities.html
  32. * by means of the script:
  33. * {Path(__file__).name}
  34. */
  35. #ifdef __cplusplus
  36. extern "C" {{
  37. #endif
  38. static const struct entities_s {{
  39. char *name;
  40. int value;
  41. }} entities[] = {{
  42. """
  43. )
  44. )
  45. for name, val in sorted(list(entity.items())):
  46. f.write(f' {{"{name}", {val}}},\n')
  47. f.write(
  48. textwrap.dedent(
  49. f"""\
  50. }};
  51. #define ENTITY_NAME_LENGTH_MAX {entity_name_length_max}
  52. #define NR_OF_ENTITIES (sizeof(entities) / sizeof(entities[0]))
  53. #ifdef __cplusplus
  54. }}
  55. #endif
  56. """
  57. )
  58. )