pathological_tests.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. import re
  4. import argparse
  5. import sys
  6. import platform
  7. from cmark import CMark
  8. from timeit import default_timer as timer
  9. if __name__ == "__main__":
  10. parser = argparse.ArgumentParser(description='Run cmark tests.')
  11. parser.add_argument('-p', '--program', dest='program', nargs='?', default=None,
  12. help='program to test')
  13. parser.add_argument('--library-dir', dest='library_dir', nargs='?',
  14. default=None, help='directory containing dynamic library')
  15. args = parser.parse_args(sys.argv[1:])
  16. cmark = CMark(prog=args.program, library_dir=args.library_dir)
  17. # list of pairs consisting of input and a regex that must match the output.
  18. pathological = {
  19. # note - some pythons have limit of 65535 for {num-matches} in re.
  20. "U+0000":
  21. ("abc\u0000de\u0000",
  22. re.compile("abc\ufffd?de\ufffd?")),
  23. "U+FEFF (Unicode BOM)":
  24. ("\ufefffoo",
  25. re.compile("<p>foo</p>")),
  26. "nested strong emph":
  27. (("*a **a " * 65000) + "b" + (" a** a*" * 65000),
  28. re.compile("(<em>a <strong>a ){65000}b( a</strong> a</em>){65000}")),
  29. "many emph closers with no openers":
  30. (("a_ " * 65000),
  31. re.compile("(a[_] ){64999}a_")),
  32. "many emph openers with no closers":
  33. (("_a " * 65000),
  34. re.compile("(_a ){64999}_a")),
  35. "many 3-emph openers with no closers":
  36. (("a***" * 65000),
  37. re.compile("(a<em><strong>a</strong></em>){32500}")),
  38. "many link closers with no openers":
  39. (("a]" * 65000),
  40. re.compile("(a\]){65000}")),
  41. "many link openers with no closers":
  42. (("[a" * 65000),
  43. re.compile("(\[a){65000}")),
  44. "mismatched openers and closers":
  45. (("*a_ " * 50000),
  46. re.compile("([*]a[_] ){49999}[*]a_")),
  47. "openers and closers multiple of 3":
  48. (("a**b" + ("c* " * 50000)),
  49. re.compile("a[*][*]b(c[*] ){49999}c[*]")),
  50. "link openers and emph closers":
  51. (("[ a_" * 50000),
  52. re.compile("(\[ a_){50000}")),
  53. "hard link/emph case":
  54. ("**x [a*b**c*](d)",
  55. re.compile("\\*\\*x <a href=\"d\">a<em>b\\*\\*c</em></a>")),
  56. "nested brackets":
  57. (("[" * 50000) + "a" + ("]" * 50000),
  58. re.compile("\[{50000}a\]{50000}")),
  59. "nested block quotes":
  60. ((("> " * 50000) + "a"),
  61. re.compile("(<blockquote>\r?\n){50000}")),
  62. "backticks":
  63. ("".join(map(lambda x: ("e" + "`" * x), range(1,1000))),
  64. re.compile("^<p>[e`]*</p>\r?\n$")),
  65. "many links":
  66. ("[t](/u) " * 50000,
  67. re.compile("(<a href=\"/u\">t</a> ?){50000}")),
  68. "many references":
  69. ("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,20000 * 16))) + "[0] " * 20000,
  70. re.compile("(\[0\] ){19999}")),
  71. "deeply nested lists":
  72. ("".join(map(lambda x: (" " * x + "* a\n"), range(0,1000))),
  73. re.compile("<ul>\r?\n(<li>a<ul>\r?\n){999}<li>a</li>\r?\n</ul>\r?\n(</li>\r?\n</ul>\r?\n){999}")),
  74. "many html openers and closers":
  75. (("<>" * 50000),
  76. re.compile("(&lt;&gt;){50000}")),
  77. "many html proc. inst. openers":
  78. (("x" + "<?" * 50000),
  79. re.compile("x(&lt;\\?){50000}")),
  80. "many html CDATA openers":
  81. (("x" + "<![CDATA[" * 50000),
  82. re.compile("x(&lt;!\\[CDATA\\[){50000}")),
  83. "many backticks and escapes":
  84. (("\\``" * 50000),
  85. re.compile("(``){50000}")),
  86. "many broken link titles":
  87. (("[ (](" * 50000),
  88. re.compile("(\[ \(\]\(){50000}")),
  89. "broken thematic break":
  90. (("* " * 50000 + "a"),
  91. re.compile("<ul>\r?\n(<li><ul>\r?\n){49999}<li>a</li>\r?\n</ul>\r?\n(</li>\r?\n</ul>\r?\n){49999}")),
  92. "nested invalid link references":
  93. (("[" * 50000 + "]" * 50000 + "\n\n[a]: /b"),
  94. re.compile("\[{50000}\]{50000}"))
  95. }
  96. whitespace_re = re.compile('/s+/')
  97. passed = 0
  98. errored = 0
  99. failed = 0
  100. #print("Testing pathological cases:")
  101. for description in pathological:
  102. (inp, regex) = pathological[description]
  103. start = timer()
  104. [rc, actual, err] = cmark.to_html(inp)
  105. end = timer()
  106. if rc != 0:
  107. errored += 1
  108. print('{:35} [ERRORED (return code %d)]'.format(description, rc))
  109. print(err)
  110. elif regex.search(actual):
  111. print('{:35} [PASSED] {:.3f} secs'.format(description, end-start))
  112. passed += 1
  113. else:
  114. print('{:35} [FAILED]'.format(description))
  115. print(repr(actual))
  116. failed += 1
  117. print("%d passed, %d failed, %d errored" % (passed, failed, errored))
  118. if (failed == 0 and errored == 0):
  119. exit(0)
  120. else:
  121. exit(1)