utest.markdown.utils.pas 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. {
  2. This file is part of the Free Component Library (FCL)
  3. Copyright (c) 2025 by Michael Van Canneyt
  4. Markdown utils tests
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. unit UTest.Markdown.Utils;
  12. {$mode objfpc}
  13. {$H+}
  14. interface
  15. uses
  16. Classes, SysUtils, fpcunit, testregistry, contnrs,
  17. markdown.utils;
  18. type
  19. { TTestMarkdownUtils }
  20. TTestMarkdownUtils = class(TTestCase)
  21. private
  22. FEntities: TFPStringHashTable;
  23. FBuilder: TStringBuilder;
  24. protected
  25. procedure SetUp; override;
  26. procedure TearDown; override;
  27. published
  28. procedure TestIsWhitespaceChar;
  29. procedure TestIsWhitespace;
  30. procedure TestMustEscape;
  31. procedure TestIsStringOfChar;
  32. procedure TestCopyUpTo;
  33. procedure TestCopySkipped;
  34. procedure TestCopyMatching;
  35. procedure TestStartsWithWS;
  36. procedure TestLeadingWhitespace;
  37. procedure TestLengthWhiteSpaceCorrected;
  38. procedure TestRemoveWS;
  39. procedure TestStripWhitespace;
  40. procedure TestHtmlEscape;
  41. procedure TestUrlEscape;
  42. procedure TestParseEntityString;
  43. procedure TestCheckForEntity;
  44. procedure TestIsRegexMatch;
  45. procedure TestIsUnicodePunctuation;
  46. procedure TestCountStartChars;
  47. procedure TestToUnicodeChars;
  48. procedure TestTransformTabs;
  49. end;
  50. implementation
  51. procedure TTestMarkdownUtils.SetUp;
  52. begin
  53. FEntities := TFPStringHashTable.Create;
  54. FEntities.Add('amp', '&');
  55. FEntities.Add('lt', '<');
  56. FBuilder := TStringBuilder.Create;
  57. end;
  58. procedure TTestMarkdownUtils.TearDown;
  59. begin
  60. FEntities.Free;
  61. FBuilder.Free;
  62. end;
  63. procedure TTestMarkdownUtils.TestIsWhitespaceChar;
  64. begin
  65. AssertTrue('Space should be whitespace', isWhitespaceChar(' '));
  66. AssertTrue('Tab should be whitespace', isWhitespaceChar(#9));
  67. AssertTrue('Line Feed should be whitespace', isWhitespaceChar(#10));
  68. AssertFalse('Letter "a" should not be whitespace', isWhitespaceChar('a'));
  69. AssertFalse('Carriage Return is not considered whitespace by this implementation', isWhitespaceChar(#13));
  70. end;
  71. procedure TTestMarkdownUtils.TestIsWhitespace;
  72. begin
  73. AssertTrue('Empty string is considered whitespace', isWhitespace(''));
  74. AssertTrue('String with only spaces is whitespace', isWhitespace(' '));
  75. AssertTrue('String with mixed whitespace is whitespace', isWhitespace(#9#10' '));
  76. AssertFalse('String with non-whitespace characters is not whitespace', isWhitespace(' a '));
  77. end;
  78. procedure TTestMarkdownUtils.TestMustEscape;
  79. begin
  80. AssertTrue('! must be escaped', MustEscape('!'));
  81. AssertTrue('& must be escaped', MustEscape('&'));
  82. AssertTrue('\ must be escaped', MustEscape('\'));
  83. AssertTrue('` must be escaped', MustEscape('`'));
  84. AssertFalse('a must not be escaped', MustEscape('a'));
  85. AssertFalse('1 must not be escaped', MustEscape('1'));
  86. AssertFalse('space must not be escaped', MustEscape(' '));
  87. end;
  88. procedure TTestMarkdownUtils.TestIsStringOfChar;
  89. begin
  90. AssertTrue('Empty string', IsStringOfChar(''));
  91. AssertTrue('Single character string', IsStringOfChar('a'));
  92. AssertTrue('String of identical chars', IsStringOfChar('---'));
  93. AssertFalse('String of non-identical chars', IsStringOfChar('--a'));
  94. end;
  95. procedure TTestMarkdownUtils.TestCopyUpTo;
  96. begin
  97. AssertEquals('Stop at first excluded char', 'abc', CopyUpTo('abc#def', ['#', ';']));
  98. AssertEquals('No excluded chars present', 'abcdef', CopyUpTo('abcdef', ['#', ';']));
  99. AssertEquals('Excluded char at start', '', CopyUpTo('#abcdef', ['#', ';']));
  100. AssertEquals('Empty string', '', CopyUpTo('', ['#', ';']));
  101. end;
  102. procedure TTestMarkdownUtils.TestCopySkipped;
  103. begin
  104. AssertEquals('Skip leading spaces', 'abc', CopySkipped(' abc', [' ']));
  105. AssertEquals('Skip leading tabs', 'abc', CopySkipped(#9#9'abc', [#9]));
  106. AssertEquals('Skip mixed leading whitespace', 'abc', CopySkipped(#9' abc', [' ', #9]));
  107. AssertEquals('No chars to skip', 'abc', CopySkipped('abc', [' ']));
  108. AssertEquals('String with only skippable chars', '', CopySkipped(' ', [' ']));
  109. AssertEquals('Empty string', '', CopySkipped('', [' ']));
  110. end;
  111. procedure TTestMarkdownUtils.TestCopyMatching;
  112. begin
  113. AssertEquals('Match leading digits', '123', CopyMatching('123abc', ['0'..'9']));
  114. AssertEquals('No matching chars at start', '', CopyMatching('abc123', ['0'..'9']));
  115. AssertEquals('String with only matching chars', '123', CopyMatching('123', ['0'..'9']));
  116. AssertEquals('Empty string', '', CopyMatching('', ['0'..'9']));
  117. end;
  118. procedure TTestMarkdownUtils.TestStartsWithWS;
  119. var
  120. Len: Integer;
  121. begin
  122. AssertTrue('Char: one space', StartsWithWhitespace(' >', '>', Len));
  123. AssertEquals('Char: Length for one space', 1, Len);
  124. AssertTrue('Char: three spaces', StartsWithWhitespace(' >', '>', Len));
  125. AssertEquals('Char: Length for three spaces', 3, Len);
  126. AssertFalse('Char: four spaces (more than default wsLen=3)', StartsWithWhitespace(' >', '>', Len));
  127. AssertTrue('Char: no space', StartsWithWhitespace('>', '>', Len));
  128. AssertEquals('Char: Length for no space', 0, Len);
  129. AssertFalse('Char: wrong prefix', StartsWithWhitespace('x>', '>', Len));
  130. AssertTrue('String: one space', StartsWithWhitespace(' item', 'item', Len));
  131. AssertEquals('String: Length for one space', 1, Len);
  132. AssertTrue('String: three spaces', StartsWithWhitespace(' item', 'item', Len));
  133. AssertEquals('String: Length for three spaces', 3, Len);
  134. // Todo: take into account tabs
  135. // AssertTrue('String: one tab', StartsWithWhitespace(#9'item', 'item', Len));
  136. // AssertEquals('String: Length for one tab', 1, Len);
  137. end;
  138. procedure TTestMarkdownUtils.TestLeadingWhitespace;
  139. var
  140. Tabs, Chars: Integer;
  141. begin
  142. AssertEquals('Should be 0 for "abc"', 0, LeadingWhitespace('abc'));
  143. AssertEquals('Should be 2 for " abc"', 2, LeadingWhitespace(' abc'));
  144. AssertEquals('Should be 4 for tab', 4, LeadingWhitespace(#9'abc'));
  145. AssertEquals('Should be 4 for space-tab', 4, LeadingWhitespace(' '#9'abc'));
  146. AssertEquals('Should be 4 for 2space-tab', 4, LeadingWhitespace(' '#9'abc'));
  147. AssertEquals('Should be 4 for 3space-tab', 4, LeadingWhitespace(' '#9'abc'));
  148. AssertEquals('Should be 8 for 4space-tab', 8, LeadingWhitespace(' '#9'abc'));
  149. AssertEquals('Should be 8 for 2 tabs', 8, LeadingWhitespace(#9#9'abc'));
  150. AssertEquals('Check returned spaces', 4, LeadingWhitespace(' '#9'abc', Tabs, Chars));
  151. AssertEquals('Check returned tabs', 1, Tabs);
  152. AssertEquals('Check returned whitespace chars', 2, Chars);
  153. end;
  154. procedure TTestMarkdownUtils.TestLengthWhiteSpaceCorrected;
  155. begin
  156. AssertEquals('Length of "abc"', 3, lengthWhitespaceCorrected('abc'));
  157. AssertEquals('Length of tab', 4, lengthWhiteSpaceCorrected(#9));
  158. AssertEquals('Length of "a<tab>b"', 5, lengthWhiteSpaceCorrected('a'#9'b')); // 1 + 3 (tab at col 2) + 1
  159. AssertEquals('Length of "abcd<tab>"', 8, lengthWhiteSpaceCorrected('abcd'#9)); // 4 + 4 (tab at col 5)
  160. end;
  161. procedure TTestMarkdownUtils.TestRemoveWS;
  162. begin
  163. AssertEquals('Remove 2 spaces', 'abc', RemoveLeadingWhiteSpace(' abc', 2));
  164. AssertEquals('Remove 2 of 4 spaces', ' abc', RemoveLeadingWhiteSpace(' abc', 2));
  165. AssertEquals('Remove 1 tab (width 4)', 'abc', RemoveLeadingWhiteSpace(#9'abc', 4));
  166. AssertEquals('Remove 2 spaces from tab (width 4)', ' abc', RemoveLeadingWhiteSpace(#9'abc', 2));
  167. AssertEquals('Remove space and tab (total width 4)', 'abc', RemoveLeadingWhiteSpace(' '#9'abc', 4));
  168. end;
  169. procedure TTestMarkdownUtils.TestStripWhitespace;
  170. begin
  171. AssertEquals('Strip from " a b c "', 'abc', stripWhitespace(' a b c '));
  172. AssertEquals('Strip with tabs and newlines', 'abc', stripWhitespace(#9'a'#10'b c'));
  173. AssertEquals('Strip from "abc"', 'abc', stripWhitespace('abc'));
  174. AssertEquals('Strip only whitespace', '', stripWhitespace(' '#9#10));
  175. end;
  176. procedure TTestMarkdownUtils.TestHtmlEscape;
  177. begin
  178. AssertEquals('Escape <', '&lt;', HtmlEscape('<'));
  179. AssertEquals('Escape >', '&gt;', HtmlEscape('>'));
  180. AssertEquals('Escape "', '&quot;', HtmlEscape('"'));
  181. AssertEquals('Escape &', '&amp;', HtmlEscape('&'));
  182. AssertEquals('Escape a', 'a', HtmlEscape('a'));
  183. AssertEquals('Escape full string', '&lt;a href=&quot;url&quot;&gt; &amp; b', HtmlEscape('<a href="url"> & b'));
  184. end;
  185. procedure TTestMarkdownUtils.TestUrlEscape;
  186. begin
  187. AssertEquals('URL Escape "a"', 'a', urlEscape('a'));
  188. AssertEquals('URL Escape "&"', '&amp;', urlEscape('&')); // Ampersand should be HTML-escaped
  189. AssertEquals('URL Escape "["', '%5B', urlEscape('[')); // Square bracket
  190. AssertEquals('URL Escape "`"', '%60', urlEscape('`')); // Backtick
  191. AssertEquals('URL Escape "é"', '%C3%A9', urlEscape('é')); // Unicode char é (UTF-8 bytes C3 A9)
  192. AssertEquals('URL Escape "€"', '%E2%82%AC', urlEscape('€')); // Unicode char € (UTF-8 bytes E2 82 AC)
  193. AssertEquals('URL Escape full string', 'a path with &amp; %E2%82%AC', urlEscape('a path with & €'));
  194. end;
  195. procedure TTestMarkdownUtils.TestParseEntityString;
  196. begin
  197. AssertEquals('Parse &amp;', '&', parseEntityString(FEntities, '&amp;'));
  198. AssertEquals('Parse &lt;', '<', parseEntityString(FEntities, '&lt;'));
  199. AssertEquals('Parse unknown', '', parseEntityString(FEntities, '&unknown;'));
  200. AssertEquals('Parse numeric &#60;', '<', parseEntityString(FEntities, '&#60;'));
  201. AssertEquals('Parse numeric &#8364;', '€', parseEntityString(FEntities, '&#8364;'));
  202. AssertEquals('Parse invalid numeric &#0;', #$FFFD, UTF8Decode(parseEntityString(FEntities, '&#0;')));
  203. end;
  204. procedure TTestMarkdownUtils.TestCheckForEntity;
  205. begin
  206. AssertEquals('Check for &amp without ;', 5, CheckForTrailingEntity('&amp;'));
  207. AssertEquals('Check for test&amp without ;', 5, CheckForTrailingEntity('test&amp;'));
  208. AssertEquals('Check for &amp with ;', 5, CheckForTrailingEntity('&amp;')); // Semicolon is not alphanumeric, so it fails
  209. AssertEquals('Check with space', 5, CheckForTrailingEntity('test &amp;')); // Space fails the check
  210. end;
  211. procedure TTestMarkdownUtils.TestIsRegexMatch;
  212. begin
  213. AssertTrue('Substring match', isRegexMatch('content', 'ont'));
  214. AssertTrue('Start anchor match', isRegexMatch('content', '^con'));
  215. AssertFalse('Start anchor fail', isRegexMatch('content', '^ont'));
  216. AssertFalse('Empty content never matches', isRegexMatch('', 'a'));
  217. end;
  218. procedure TTestMarkdownUtils.TestIsUnicodePunctuation;
  219. begin
  220. AssertTrue('. should be punctuation', isUnicodePunctuation('.'));
  221. AssertTrue('! should be punctuation', isUnicodePunctuation('!'));
  222. AssertFalse('"a" should not be punctuation', isUnicodePunctuation('a'));
  223. AssertFalse('"7" should not be punctuation', isUnicodePunctuation('7'));
  224. end;
  225. procedure TTestMarkdownUtils.TestCountStartChars;
  226. begin
  227. AssertEquals('Count 3 chars', 3, CountStartChars('---abc', '-'));
  228. AssertEquals('Count 0 chars', 0, CountStartChars('abc---', '-'));
  229. AssertEquals('Count 4 chars', 4, CountStartChars('----', '-'));
  230. AssertEquals('Count in empty string', 0, CountStartChars('', '-'));
  231. end;
  232. procedure TTestMarkdownUtils.TestToUnicodeChars;
  233. var
  234. arr: TUnicodeCharDynArray;
  235. begin
  236. arr := ToUnicodeChars('a€b');
  237. AssertEquals('Array length for "a€b"', 3, Length(arr));
  238. if Length(arr) = 3 then
  239. begin
  240. AssertEquals('First char should be a', 'a', arr[0]);
  241. AssertEquals('Second char should be €','€', arr[1]);
  242. AssertEquals('Third char should be b', 'b', arr[2]);
  243. end;
  244. AssertEquals('Array length for empty string', 0, Length(ToUnicodeChars('')));
  245. end;
  246. procedure TTestMarkdownUtils.TestTransformTabs;
  247. begin
  248. AssertEquals('Transform leading tab', ' abc', TransformTabs(#9'abc'));
  249. AssertEquals('Transform leading space and tab', ' abc', TransformTabs(' '#9'abc'));
  250. AssertEquals('No change for no tabs', 'abc', TransformTabs('abc'));
  251. AssertEquals('No change for tab in middle', 'abc'#9'def', TransformTabs('abc'#9'def'));
  252. end;
  253. initialization
  254. RegisterTest(TTestMarkdownUtils);
  255. end.