tests.pp 25 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243
  1. { %norun }
  2. unit tests;
  3. {$IFDEF FPC}
  4. {$mode objfpc}{$H+}
  5. {$ENDIF}
  6. { $DEFINE DUMPTESTS} //define this to dump results to console
  7. {$IFDEF VER130} {$DEFINE D5} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D5
  8. {$IFDEF VER140} {$DEFINE D6} {$DEFINE D5} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D6
  9. {$IFDEF VER150} {$DEFINE D7} {$DEFINE D6} {$DEFINE D5} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D7
  10. {$IFDEF D5} {$DEFINE OverMeth} {$ENDIF}
  11. {$IFDEF FPC} {$DEFINE OverMeth} {$ENDIF}
  12. {$DEFINE Unicode}
  13. interface
  14. uses
  15. {$IFDEF FPC}
  16. fpcunit, testregistry,
  17. {$IFDEF VER3}
  18. fpwidestring, //required in FPC to use WideChar uppercase/lowercase
  19. {$ENDIF}
  20. {$ELSE}
  21. TestFramework,
  22. {$ENDIF}
  23. {$IFDEF UNICODE}
  24. uregexpr,
  25. {$ELSE}
  26. regexpr,
  27. {$ENDIF}
  28. Classes, SysUtils;
  29. type
  30. { TTestRegexpr }
  31. TTestRegexpr= class(TTestCase)
  32. private
  33. RE: TRegExpr;
  34. protected
  35. procedure RunRETest(aIndex: Integer);
  36. procedure CompileRE(const AExpression: RegExprString);
  37. procedure IsNotNull(AErrorMessage: string; AObjectToCheck: TObject);
  38. procedure IsTrue(AErrorMessage: string; AConditionToCheck: boolean);
  39. procedure IsFalse(AErrorMessage: string; AConditionToCheck: boolean);
  40. procedure AreEqual(AErrorMessage: string; s1, s2: string); overload;
  41. procedure AreEqual(AErrorMessage: string; i1, i2: integer); overload;
  42. procedure TestBadRegex(const AErrorMessage: string; const AExpression: RegExprString);
  43. published
  44. procedure TestEmpty;
  45. procedure TestNotFound;
  46. procedure TestBads;
  47. {$IFDEF OverMeth}
  48. procedure TestReplaceOverload;
  49. {$ENDIF}
  50. procedure RunTest1;
  51. procedure RunTest2;
  52. procedure RunTest3;
  53. procedure RunTest4;
  54. procedure RunTest5;
  55. procedure RunTest6;
  56. procedure RunTest7;
  57. procedure RunTest8;
  58. procedure RunTest9;
  59. procedure RunTest10;
  60. procedure RunTest11;
  61. procedure RunTest12;
  62. procedure RunTest13;
  63. procedure RunTest14;
  64. procedure RunTest15;
  65. procedure RunTest16;
  66. procedure RunTest17;
  67. procedure RunTest18;
  68. procedure RunTest19;
  69. procedure RunTest20;
  70. procedure RunTest21;
  71. procedure RunTest22;
  72. procedure RunTest23;
  73. procedure RunTest24;
  74. procedure RunTest25;
  75. procedure RunTest26;
  76. procedure RunTest27;
  77. procedure RunTest28;
  78. procedure RunTest29;
  79. procedure RunTest30;
  80. procedure RunTest31;
  81. procedure RunTest32;
  82. procedure RunTest33;
  83. procedure RunTest34;
  84. procedure RunTest35;
  85. procedure RunTest36;
  86. procedure RunTest37;
  87. procedure RunTest38;
  88. procedure RunTest39;
  89. procedure RunTest40;
  90. procedure RunTest41;
  91. procedure RunTest42;
  92. procedure RunTest43;
  93. procedure RunTest44;
  94. procedure RunTest45;
  95. procedure RunTest46;
  96. procedure RunTest47;
  97. procedure RunTest48;
  98. procedure RunTest49;
  99. procedure RunTest50;
  100. procedure TestGroups;
  101. {$IFDEF Unicode}
  102. {$IFDEF FastUniCodeData}
  103. procedure RunTest51unicode;
  104. procedure RunTest52unicode;
  105. {$ENDIF}
  106. procedure RunTest70russian;
  107. {$ENDIF}
  108. procedure RunTest53;
  109. procedure RunTest54;
  110. procedure RunTest55;
  111. procedure RunTest56;
  112. procedure RunTest57;
  113. procedure RunTest58;
  114. procedure RunTest59;
  115. procedure RunTest60;
  116. procedure RunTest61;
  117. procedure RunTest62;
  118. procedure RunTest63;
  119. procedure RunTest64;
  120. procedure RunTest65;
  121. procedure RunTest66;
  122. procedure RunTest67;
  123. procedure RunTest68;
  124. procedure RunTest69;
  125. end;
  126. implementation
  127. Type
  128. TRegExTest = record
  129. Expression: RegExprString;
  130. InputText: RegExprString;
  131. SubstitutionText: RegExprString;
  132. ExpectedResult: RegExprString;
  133. MatchStart: integer;
  134. end;
  135. function PrintableString(const S: RegExprString): string;
  136. var
  137. buf: string;
  138. ch: char;
  139. i: integer;
  140. begin
  141. Result := '';
  142. buf := UTF8Encode(S);
  143. for i := 1 to Length(buf) do
  144. begin
  145. ch := buf[i];
  146. if Ord(ch) < 31 then
  147. Result := Result + '#' + IntToStr(Ord(ch))
  148. else
  149. Result := Result + ch;
  150. end;
  151. end;
  152. const
  153. testCases: array [1 .. 69] of TRegExTest = (
  154. // 1
  155. (
  156. expression: '\nd';
  157. inputText: 'abc'#13#10'def';
  158. substitutionText: '\n\x{10}\r\\';
  159. expectedResult: 'abc'#13#10#16#13'\ef';
  160. MatchStart: 0
  161. ),
  162. // 2
  163. (
  164. expression: '(\w*)';
  165. inputText: 'name.ext';
  166. substitutionText: '$1.new';
  167. expectedResult: 'name.new.new.ext.new.new';
  168. MatchStart: 0
  169. ),
  170. // 3
  171. (
  172. expression: #$d'('#$a')';
  173. inputText: 'word'#$d#$a;
  174. substitutionText: '${1}';
  175. expectedResult: 'word'#$a;
  176. MatchStart: 0
  177. ),
  178. // 4
  179. (
  180. expression: '(word)';
  181. inputText: 'word';
  182. substitutionText: '\U$1\\r';
  183. expectedResult: 'WORD\r';
  184. MatchStart: 0
  185. ),
  186. // 5
  187. (
  188. expression: '(word)';
  189. inputText: 'word';
  190. substitutionText: '$1\n';
  191. expectedResult: 'word'#$a;
  192. MatchStart: 0
  193. ),
  194. // 6
  195. (
  196. expression: '[A-Z]';
  197. inputText: '234578923457823659GHJK38';
  198. substitutionText: '';
  199. expectedResult: 'G';
  200. matchStart: 19;
  201. ),
  202. // 7
  203. (
  204. expression: '[A-Z]*?';
  205. inputText: '234578923457823659ARTZU38';
  206. substitutionText: '';
  207. expectedResult: '';
  208. matchStart: 1
  209. ),
  210. // 8
  211. (
  212. expression: '[A-Z]+';
  213. inputText: '234578923457823659ARTZU38';
  214. substitutionText: '';
  215. expectedResult: 'ARTZU';
  216. matchStart: 19
  217. ),
  218. // 9
  219. (
  220. expression: '[A-Z][A-Z]*';
  221. inputText: '234578923457823659ARTZU38';
  222. substitutionText: '';
  223. expectedResult: 'ARTZU';
  224. matchStart: 19
  225. ),
  226. // 10
  227. (
  228. expression: '[A-Z][A-Z]?';
  229. inputText: '234578923457823659ARTZU38';
  230. substitutionText: '';
  231. expectedResult: 'AR';
  232. matchStart: 19
  233. ),
  234. // 11
  235. (
  236. expression: '[^\d]+';
  237. inputText: '234578923457823659ARTZU38';
  238. substitutionText: '';
  239. expectedResult: 'ARTZU';
  240. matchStart: 19
  241. ),
  242. // 12
  243. (
  244. expression: '[A-Z][A-Z]?[A-Z]';
  245. inputText: '234578923457823659ARTZU38';
  246. substitutionText: '';
  247. expectedResult: 'ART';
  248. matchStart: 19
  249. ),
  250. // 13
  251. (
  252. expression: '[A-Z][A-Z]*[0-9]';
  253. inputText: '234578923457823659ARTZU38';
  254. substitutionText: '';
  255. expectedResult: 'ARTZU3';
  256. matchStart: 19
  257. ),
  258. // 14
  259. (
  260. expression: '[A-Z]+[0-9]';
  261. inputText: '234578923457823659ARTZU38';
  262. substitutionText: '';
  263. expectedResult: 'ARTZU3';
  264. matchStart: 19
  265. ),
  266. // 15
  267. (
  268. expression: '(?i)[A-Z]';
  269. inputText: '234578923457823659a38';
  270. substitutionText: '';
  271. expectedResult: 'a';
  272. matchStart: 19
  273. ),
  274. // 16
  275. (
  276. expression: '(?i)[a-z]';
  277. inputText: '234578923457823659A38';
  278. substitutionText: '';
  279. expectedResult: 'A';
  280. matchStart: 19
  281. ),
  282. // 17
  283. (
  284. expression: '(foo)1234';
  285. inputText: '1234 foo1234XXXX';
  286. substitutionText: '';
  287. expectedResult: 'foo1234';
  288. matchStart: 8
  289. ),
  290. // 18
  291. (
  292. expression: '(((foo)))1234';
  293. inputText: '1234 foo1234XXXX';
  294. substitutionText: '';
  295. expectedResult: 'foo1234';
  296. matchStart: 8
  297. ),
  298. // 19
  299. (
  300. expression: '(foo)(1234)';
  301. inputText: '1234 foo1234XXXX';
  302. substitutionText: '';
  303. expectedResult: 'foo1234';
  304. matchStart: 8
  305. ),
  306. // 20
  307. (
  308. expression: 'nofoo|foo';
  309. inputText: '1234 foo1234XXXX';
  310. substitutionText: '';
  311. expectedResult: 'foo';
  312. matchStart: 8
  313. ),
  314. // 21
  315. (
  316. expression: '(nofoo|foo)1234';
  317. inputText: '1234 nofoo1234XXXX';
  318. substitutionText: '';
  319. expectedResult: 'nofoo1234';
  320. matchStart: 8
  321. ),
  322. // 22
  323. (
  324. expression: '(nofoo|foo|anotherfoo)1234';
  325. inputText: '1234 nofoo1234XXXX';
  326. substitutionText: '';
  327. expectedResult: 'nofoo1234';
  328. matchStart: 8
  329. ),
  330. // 23
  331. (
  332. expression: 'nofoo1234|foo1234';
  333. inputText: '1234 foo1234XXXX';
  334. substitutionText: '';
  335. expectedResult: 'foo1234';
  336. matchStart: 8
  337. ),
  338. // 24
  339. (
  340. expression: '(\w*)';
  341. inputText: 'name.ext';
  342. substitutionText: '';
  343. expectedResult: 'name';
  344. matchStart: 1
  345. ),
  346. // 25
  347. (
  348. expression: '\r(\n)';
  349. inputText: #$d#$a;
  350. substitutionText: '';
  351. expectedResult: #$d#$a;
  352. matchStart: 1
  353. ),
  354. // 26
  355. (
  356. expression: '\r(\n)';
  357. inputText: #$d#$a;
  358. substitutionText: '\n';
  359. expectedResult: #$a;
  360. matchStart: 1
  361. ),
  362. // 27
  363. (
  364. expression: '(?m)Test:\s*(.*?)\s;';
  365. inputText: 'Test: hel'#$d#$a'lo ;';
  366. substitutionText: '';
  367. expectedResult: 'Test: hel'#$d#$a'lo ;';
  368. matchStart: 1
  369. ),
  370. // 28
  371. (
  372. expression: '(?:\w+)=\w+;(\w+)=\w+;(?:\w+)=\w+;(\w+)=\w+;';
  373. inputText: 'skip1=11;needed1=22;skip2=33;needed2=44;';
  374. substitutionText: '$1 $2';
  375. expectedResult: 'needed1 needed2';
  376. matchStart: 0
  377. ),
  378. // 29
  379. (
  380. expression: '.*?\b(https?|ftp)\b://(?:\w+)\.(?:\w+)\.(\w\B\w\B\w)';
  381. inputText: '>>ftp://www.name.com';
  382. substitutionText: '$1 $2';
  383. expectedResult: 'ftp com';
  384. matchStart: 0
  385. ),
  386. // 30
  387. (
  388. expression: '\v';
  389. inputText: 'aaa'#10'bbb'#13'ccc'#$c'ddd'#$b'eee';
  390. substitutionText: '-';
  391. expectedResult: 'aaa-bbb-ccc-ddd-eee';
  392. matchStart: 0
  393. ),
  394. // 31
  395. (
  396. expression: '\h+';
  397. inputText: #9'aaa bbb '#9' ccc '#$A0#9;
  398. substitutionText: '-';
  399. expectedResult: '-aaa-bbb-ccc-';
  400. matchStart: 0
  401. ),
  402. // 32
  403. (
  404. expression: '\w+';
  405. inputText: 'abc XY 12.,';
  406. substitutionText: '\L$0';
  407. expectedResult: 'abc xy 12.,';
  408. matchStart: 0
  409. ),
  410. // 33
  411. (
  412. expression: '\w+';
  413. inputText: 'abc XY 12.,';
  414. substitutionText: '\U$0';
  415. expectedResult: 'ABC XY 12.,';
  416. matchStart: 0
  417. ),
  418. // 34
  419. ( // NULL chars in InputString
  420. expression: #0+'?[2-5]+(\s+)([xyz\$\#]{3,})\1'+#0+'+.+';
  421. inputText: '.:'+#0+'ab'+#0+'_34 z$x '+#0+'end';
  422. substitutionText: '';
  423. expectedResult: '34 z$x '+#0+'end';
  424. matchStart: 8
  425. ),
  426. // 35
  427. (
  428. expression: '\w\cA\cz\cb\w';
  429. inputText: '..abc'#1#26#2'test';
  430. substitutionText: '';
  431. expectedResult: 'c'#1#26#2't';
  432. matchStart: 5
  433. ),
  434. // 36
  435. (
  436. expression: '\V+';
  437. inputText: '.,,'#10'aB2'#13'cc()'#$c'$%'#$b'[]';
  438. substitutionText: '-';
  439. expectedResult: '-'#10'-'#13'-'#$c'-'#$b'-';
  440. matchStart: 0
  441. ),
  442. // 37
  443. (
  444. expression: '\H+';
  445. inputText: #9'.,; aB2 '#9' ^&() '#$A0#9;
  446. substitutionText: '-';
  447. expectedResult: #9'- - '#9' - '#$A0#9;
  448. matchStart: 0
  449. ),
  450. // 38
  451. ( // brackets just after [
  452. expression: '[[\w]+ []\w]+';
  453. inputText: ' ww[ww w]www';
  454. substitutionText: '';
  455. expectedResult: 'ww[ww w]www';
  456. matchStart: 3
  457. ),
  458. // 39
  459. ( // NULL in expression, negative \W \S \D in []
  460. expression: '([\x00\d]+ )+ [\W]+ [\S\x00-\x10]+ [\D]+';
  461. inputText: ' 22'#0'33 '#0'33 .& w#'#5#0' w#';
  462. substitutionText: '';
  463. expectedResult: '22'#0'33 '#0'33 .& w#'#5#0' w#';
  464. matchStart: 3
  465. ),
  466. // 40
  467. ( // find 1+ simple chars
  468. expression: 'd+';
  469. inputText: ' ddddee ';
  470. substitutionText: '';
  471. expectedResult: 'dddd';
  472. matchStart: 3
  473. ),
  474. // 41
  475. ( // find {N,M} spaces
  476. expression: ' {4,}';
  477. inputText: 'dd dd';
  478. substitutionText: '';
  479. expectedResult: ' ';
  480. matchStart: 3
  481. ),
  482. // 42
  483. ( // valid regex set [.-]
  484. expression: '\w+([.-])\d+([.-])\w+([.-])\w+';
  485. inputText: 'Pictures-2018-Spain.Madrid';
  486. substitutionText: '$1 $2 $3';
  487. expectedResult: '- - .';
  488. matchStart: 0
  489. ),
  490. // 43
  491. ( // valid regex set combinaton if escaping
  492. expression: '\w+([.\-])\d+([\.-])\w+([\.\-])\w+';
  493. inputText: 'Pictures-2018.Spain-Madrid';
  494. substitutionText: '$1 $2 $3';
  495. expectedResult: '- . -';
  496. matchStart: 0
  497. ),
  498. // 44
  499. ( // valid regex set
  500. expression: '.*?([.-]Test[.-])';
  501. inputText: 'This.Is.A_Test_1234.Test.abc';
  502. substitutionText: '$1';
  503. expectedResult: '.Test.abc';
  504. matchStart: 0
  505. ),
  506. // 45
  507. ( // comments and modifier-strings
  508. expression: '(?#zzz)(?i)aA(?#zz).*(?-i)aA(?#zzz)';
  509. inputText: '_a_aaaAAAaaaAAAaaa__';
  510. substitutionText: '';
  511. expectedResult: 'aaaAAAaaaA';
  512. matchStart: 4
  513. ),
  514. // 46
  515. ( // named groups
  516. expression: '(?P<quote>[''"])\w+(?P=quote).*(?:\w+).*(?P<q>")\w+(?P=q)';
  517. inputText: 'aa "bb? "ok" a ''b "ok" eeee';
  518. substitutionText: '';
  519. expectedResult: '"ok" a ''b "ok"';
  520. matchStart: 9
  521. ),
  522. // 47
  523. ( // lookbehind. it also has group refs \1 \2.
  524. expression: '(?<=foo)(=)(\w)\w+\2\1';
  525. inputText: '..=tat=..=tat=..foo=tabt=..';
  526. substitutionText: '';
  527. expectedResult: '=tabt=';
  528. matchStart: 20
  529. ),
  530. // 48
  531. ( // lookahead
  532. expression: '(=)\w+\1(?=bar)';
  533. inputText: '..=taat=..=tddt=bar..';
  534. substitutionText: '';
  535. expectedResult: '=tddt=';
  536. matchStart: 11
  537. ),
  538. // 49
  539. ( // lookahead+lookbehind
  540. expression: '(?<=[a-z]+)(\d+)[a-z]+\1(?=[a-z]+)';
  541. inputText: '..2tt2..foo23test23bar..';
  542. substitutionText: '';
  543. expectedResult: '23test23';
  544. matchStart: 12
  545. ),
  546. // 50
  547. ( // replace with named groups
  548. expression: '\s+(?P<aa>[f-h]+)\s+(?P<bb>[o-r]+)\s+';
  549. inputText: '< fg oppo >';
  550. substitutionText: '{${bb},${aa}}';
  551. expectedResult: '<{oppo,fg}>';
  552. matchStart: 1
  553. ),
  554. // 51, unicode!
  555. (
  556. expression: '\pL \p{Lu}{3,} \PL+ \P{Lu}+';
  557. inputText: ',,wew ABDEF 345 weUPend';
  558. substitutionText: '';
  559. expectedResult: 'w ABDEF 345 we';
  560. matchStart: 5
  561. ),
  562. // 52, unicode!
  563. (
  564. expression: '[\p{Ll}\p{N}%]{5,} [\P{L}]+';
  565. inputText: ',,NOPE%400 @_ ok%200 @_end';
  566. substitutionText: '';
  567. expectedResult: 'ok%200 @_';
  568. matchStart: 15
  569. ),
  570. // 53, lookahead aa(?!bb)
  571. (
  572. expression: 'a+(?!\w)';
  573. inputText: 'aabaaddaaazaaa=aau';
  574. substitutionText: '';
  575. expectedResult: 'aaa';
  576. matchStart: 12
  577. ),
  578. // 54, lookahead aa(?!bb)
  579. (
  580. expression: '(?:\s+)\w{2,}\.(?!com|org|net)';
  581. inputText: ' www.com www.org www.ok www.net';
  582. substitutionText: '';
  583. expectedResult: ' www.';
  584. matchStart: 19
  585. ),
  586. // 55, atomic groups
  587. (
  588. expression: 'a(?>bc|b)c';
  589. inputText: ' abc abcc abc abcc ';
  590. substitutionText: '_';
  591. expectedResult: ' abc _ abc _ ';
  592. matchStart: 1
  593. ),
  594. // 56, a++
  595. (
  596. expression: '\d++e\d++';
  597. inputText: ' 20ed2 100e20 2e34 ';
  598. substitutionText: '_';
  599. expectedResult: ' 20ed2 _ _ ';
  600. matchStart: 1
  601. ),
  602. // 57, a*+, must fail
  603. (
  604. expression: '".*+"';
  605. inputText: 'dd "abc" ee';
  606. substitutionText: '';
  607. expectedResult: '';
  608. matchStart: -1
  609. ),
  610. // 58, recursion
  611. (
  612. expression: 'a(?R)?b';
  613. inputText: '__aaaabbbbbbbb__';
  614. substitutionText: '';
  615. expectedResult: 'aaaabbbb';
  616. matchStart: 3
  617. ),
  618. // 59, recursion, generic regex 1 - https://regular-expressions.mobi/recurse.html?wlr=1
  619. (
  620. expression: 'b(?:m|(?R))*e';
  621. inputText: '_bbfee_bbbmeee__';
  622. substitutionText: '';
  623. expectedResult: 'bbbmeee';
  624. matchStart: 8
  625. ),
  626. // 60, recursion, generic regex 2 - https://regular-expressions.mobi/recurse.html?wlr=1
  627. (
  628. expression: 'b(?R)*e|m';
  629. inputText: '__bbbmeee__bme__m__';
  630. substitutionText: '@';
  631. expectedResult: '__@__@__@__';
  632. matchStart: 1
  633. ),
  634. // 61, recursion, balanced set of parentheses - https://regular-expressions.mobi/recurse.html?wlr=1
  635. (
  636. expression: '\((?>[^()]|(?0))*\)';
  637. inputText: '__(((dd)dd))__(dd)__(((dd)f)f)__';
  638. substitutionText: '@';
  639. expectedResult: '__@__@__@__';
  640. matchStart: 1
  641. ),
  642. // 62, subroutine call (?3) + non-capturing groups + atomic group
  643. (
  644. expression: '(rr)(qq)(?:t)(?:t)(\[(?>m|(?3))*\])';
  645. inputText: '__rrqqtt[[[mmm]mm]m]m]m]m]m]__';
  646. substitutionText: '';
  647. expectedResult: 'rrqqtt[[[mmm]mm]m]';
  648. matchStart: 3
  649. ),
  650. // 63, subroutine call (?P>name)
  651. (
  652. expression: '(?P<name>[abc])(?1)(?P>name)';
  653. inputText: '__bcabcadef__';
  654. substitutionText: '';
  655. expectedResult: 'bca';
  656. matchStart: 3
  657. ),
  658. // 64
  659. ( // named groups with Perl syntax
  660. expression: '(?''quote''[''"])\w+(?&quote).*(?:\w+).*(?''q''")\w+(?&q)';
  661. inputText: 'aa "bb? "ok" a ''b "ok" eeee';
  662. substitutionText: '';
  663. expectedResult: '"ok" a ''b "ok"';
  664. matchStart: 9
  665. ),
  666. // 65
  667. ( // \A and \z
  668. expression: '(?s)\A.+\z';
  669. inputText: 'some'#10'text'#10;
  670. substitutionText: '-';
  671. expectedResult: '-';
  672. matchStart: 1
  673. ),
  674. // 66
  675. ( // \A and \Z
  676. expression: '(?s)\A.+\w\Z';
  677. inputText: 'some'#13#10'text'#13#10;
  678. substitutionText: '-';
  679. expectedResult: '-'#13#10;
  680. matchStart: 1
  681. ),
  682. // 67
  683. ( // (?<!foo)bar
  684. expression: '(?<!foo)bar';
  685. inputText: 'foobar foobar zzbar';
  686. substitutionText: '';
  687. expectedResult: 'bar';
  688. matchStart: 17
  689. ),
  690. // 68
  691. ( // (?<!foo)bar
  692. expression: '(?<![a-o]\d)bar';
  693. inputText: 'a2bar o3bar __bar';
  694. substitutionText: '';
  695. expectedResult: 'bar';
  696. matchStart: 15
  697. ),
  698. // 69
  699. ( // empty str
  700. expression: '^ *$';
  701. inputText: '';
  702. substitutionText: '';
  703. expectedResult: '';
  704. matchStart: 1
  705. )
  706. );
  707. procedure TTestRegexpr.IsFalse(AErrorMessage: string; AConditionToCheck: boolean);
  708. begin
  709. IsTrue(AErrorMessage, not AConditionToCheck)
  710. end;
  711. procedure TTestRegexpr.IsTrue(AErrorMessage: string; AConditionToCheck: boolean);
  712. begin
  713. {$IFDEF FPC}
  714. AssertTrue(AErrorMessage, AConditionToCheck);
  715. {$ELSE}
  716. CheckTrue(AConditionToCheck, AErrorMessage)
  717. {$ENDIF}
  718. end;
  719. procedure TTestRegexpr.IsNotNull(AErrorMessage: string; AObjectToCheck: TObject
  720. );
  721. begin
  722. {$IFDEF FPC}
  723. AssertNotNull(AErrorMessage, AObjectToCheck);
  724. {$ELSE}
  725. CheckNotNull(AObjectToCheck, AErrorMessage)
  726. {$ENDIF}
  727. end;
  728. procedure TTestRegexpr.AreEqual(AErrorMessage: string; s1, s2: string);
  729. begin
  730. {$IFDEF FPC}
  731. AssertEquals(AErrorMessage, s1,s2);
  732. {$ELSE}
  733. CheckEquals(s1,s2, AErrorMessage)
  734. {$ENDIF}
  735. end;
  736. procedure TTestRegexpr.AreEqual(AErrorMessage: string; i1, i2: integer);
  737. begin
  738. {$IFDEF FPC}
  739. AssertEquals(AErrorMessage, i1,i2);
  740. {$ELSE}
  741. CheckEquals(i1,i2, AErrorMessage)
  742. {$ENDIF}
  743. end;
  744. procedure TTestRegexpr.TestBadRegex(const AErrorMessage: string;
  745. const AExpression: RegExprString);
  746. var
  747. ok: boolean;
  748. begin
  749. try
  750. CompileRE(AExpression);
  751. ok := False;
  752. except
  753. ok := True;
  754. end;
  755. IsTrue(AErrorMessage, ok);
  756. end;
  757. procedure TTestRegexpr.TestEmpty;
  758. begin
  759. CompileRE('1'); // just to create RE object
  760. end;
  761. procedure TTestRegexpr.TestNotFound;
  762. begin
  763. CompileRE('w{2,}');
  764. RE.InputString:= 'tst';
  765. IsFalse('Exec must give False', RE.Exec(1));
  766. AreEqual('MatchPos[0] must fail', -1, RE.MatchPos[0]);
  767. AreEqual('MatchLen[0] must fail', -1, RE.MatchLen[0]);
  768. AreEqual('SubExprCount must be -1', -1, RE.SubExprMatchCount);
  769. end;
  770. {$IFDEF OverMeth}
  771. procedure TTestRegexpr.TestReplaceOverload;
  772. var
  773. act: string;
  774. begin
  775. CompileRE('A\r(\n)'); // just to print compiled re - it will be recompiled below
  776. act:=ReplaceRegExpr('A\r(\n)', 'a'#$d#$a, '\n', [rroModifierI, rroUseSubstitution]);
  777. AssertEquals('Replace failed', PrintableString(#$a), PrintableString(Act))
  778. end;
  779. {$ENDIF}
  780. procedure TTestRegexpr.TestBads;
  781. begin
  782. //TestBadRegex('No Error for bad braces', 'd{');
  783. //TestBadRegex('No Error for bad braces', 'd{22');
  784. //TestBadRegex('No Error for bad braces', 'd{}');
  785. end;
  786. procedure TTestRegexpr.RunTest1;
  787. begin
  788. RunRETest(1);
  789. end;
  790. procedure TTestRegexpr.RunTest2;
  791. begin
  792. RunRETest(2);
  793. end;
  794. procedure TTestRegexpr.RunTest3;
  795. begin
  796. RunRETest(3);
  797. end;
  798. procedure TTestRegexpr.RunTest4;
  799. begin
  800. RunRETest(4);
  801. end;
  802. procedure TTestRegexpr.RunTest5;
  803. begin
  804. RunRETest(5);
  805. end;
  806. procedure TTestRegexpr.RunTest6;
  807. begin
  808. RunRETest(6);
  809. end;
  810. procedure TTestRegexpr.RunTest7;
  811. begin
  812. RunRETest(7);
  813. end;
  814. procedure TTestRegexpr.RunTest8;
  815. begin
  816. RunRETest(8);
  817. end;
  818. procedure TTestRegexpr.RunTest9;
  819. begin
  820. RunRETest(9);
  821. end;
  822. procedure TTestRegexpr.RunTest10;
  823. begin
  824. RunRETest(10);
  825. end;
  826. procedure TTestRegexpr.RunTest11;
  827. begin
  828. RunRETest(11);
  829. end;
  830. procedure TTestRegexpr.RunTest12;
  831. begin
  832. RunRETest(12);
  833. end;
  834. procedure TTestRegexpr.RunTest13;
  835. begin
  836. RunRETest(13);
  837. end;
  838. procedure TTestRegexpr.RunTest14;
  839. begin
  840. RunRETest(14);
  841. end;
  842. procedure TTestRegexpr.RunTest15;
  843. begin
  844. RunRETest(15);
  845. end;
  846. procedure TTestRegexpr.RunTest16;
  847. begin
  848. RunRETest(16);
  849. end;
  850. procedure TTestRegexpr.RunTest17;
  851. begin
  852. RunRETest(17);
  853. end;
  854. procedure TTestRegexpr.RunTest18;
  855. begin
  856. RunRETest(18);
  857. end;
  858. procedure TTestRegexpr.RunTest19;
  859. begin
  860. RunRETest(19);
  861. end;
  862. procedure TTestRegexpr.RunTest20;
  863. begin
  864. RunRETest(20);
  865. end;
  866. procedure TTestRegexpr.RunTest21;
  867. begin
  868. RunRETest(21);
  869. end;
  870. procedure TTestRegexpr.RunTest22;
  871. begin
  872. RunRETest(22);
  873. end;
  874. procedure TTestRegexpr.RunTest23;
  875. begin
  876. RunRETest(23);
  877. end;
  878. procedure TTestRegexpr.RunTest24;
  879. begin
  880. RunRETest(24);
  881. end;
  882. procedure TTestRegexpr.RunTest25;
  883. begin
  884. RunRETest(25);
  885. end;
  886. procedure TTestRegexpr.RunTest26;
  887. begin
  888. RunRETest(26);
  889. end;
  890. procedure TTestRegexpr.RunTest27;
  891. begin
  892. RunRETest(27);
  893. end;
  894. procedure TTestRegexpr.RunTest28;
  895. begin
  896. RunRETest(28);
  897. end;
  898. procedure TTestRegexpr.RunTest29;
  899. begin
  900. RunRETest(29);
  901. end;
  902. procedure TTestRegexpr.RunTest30;
  903. begin
  904. RunRETest(30);
  905. end;
  906. procedure TTestRegexpr.RunTest31;
  907. begin
  908. RunRETest(31);
  909. end;
  910. procedure TTestRegexpr.RunTest32;
  911. begin
  912. RunRETest(32);
  913. end;
  914. procedure TTestRegexpr.RunTest33;
  915. begin
  916. RunRETest(33);
  917. end;
  918. procedure TTestRegexpr.RunTest34;
  919. begin
  920. RunRETest(34);
  921. end;
  922. procedure TTestRegexpr.RunTest35;
  923. begin
  924. RunRETest(35);
  925. end;
  926. procedure TTestRegexpr.RunTest36;
  927. begin
  928. RunRETest(36);
  929. end;
  930. procedure TTestRegexpr.RunTest37;
  931. begin
  932. RunRETest(37);
  933. end;
  934. procedure TTestRegexpr.RunTest38;
  935. begin
  936. RunRETest(38);
  937. end;
  938. procedure TTestRegexpr.RunTest39;
  939. begin
  940. RunRETest(39);
  941. end;
  942. procedure TTestRegexpr.RunTest40;
  943. begin
  944. RunRETest(40);
  945. end;
  946. procedure TTestRegexpr.RunTest41;
  947. begin
  948. RunRETest(41);
  949. end;
  950. procedure TTestRegexpr.RunTest42;
  951. begin
  952. RunRETest(42);
  953. end;
  954. procedure TTestRegexpr.RunTest43;
  955. begin
  956. RunRETest(43);
  957. end;
  958. procedure TTestRegexpr.RunTest44;
  959. begin
  960. RunRETest(44);
  961. end;
  962. procedure TTestRegexpr.RunTest45;
  963. begin
  964. RunRETest(45);
  965. end;
  966. procedure TTestRegexpr.RunTest46;
  967. begin
  968. RunRETest(46);
  969. end;
  970. procedure TTestRegexpr.RunTest47;
  971. begin
  972. RunRETest(47);
  973. end;
  974. procedure TTestRegexpr.RunTest48;
  975. begin
  976. RunRETest(48);
  977. end;
  978. procedure TTestRegexpr.RunTest49;
  979. begin
  980. RunRETest(49);
  981. end;
  982. procedure TTestRegexpr.RunTest50;
  983. begin
  984. RunRETest(50);
  985. end;
  986. {$IFDEF Unicode}
  987. {$IFDEF FastUnicodeData}
  988. procedure TTestRegexpr.RunTest51unicode;
  989. begin
  990. RunRETest(51);
  991. end;
  992. procedure TTestRegexpr.RunTest52unicode;
  993. begin
  994. RunRETest(52);
  995. end;
  996. {$ENDIF}
  997. procedure TTestRegexpr.RunTest70russian;
  998. //Alexey: if I add Russian test directly to array of tests,
  999. //I have problems with UTF8 coding then, which I cannot solve in this test
  1000. var
  1001. T: TRegExTest;
  1002. begin
  1003. T.Expression:= UTF8Decode('[а-я]+');
  1004. T.InputText:= UTF8Decode('12морошка');
  1005. T.ExpectedResult:= UTF8Decode('морошка');
  1006. T.MatchStart:= 3;
  1007. T.SubstitutionText:= '';
  1008. CompileRE(T.Expression);
  1009. RE.Exec(T.inputText);
  1010. AreEqual('Search position', T.MatchStart, RE.MatchPos[0]);
  1011. AreEqual('Matched text', PrintableString(T.ExpectedResult), PrintableString(RE.Match[0]));
  1012. end;
  1013. {$ENDIF}
  1014. procedure TTestRegexpr.RunTest53;
  1015. begin
  1016. RunRETest(53);
  1017. end;
  1018. procedure TTestRegexpr.RunTest54;
  1019. begin
  1020. RunRETest(54);
  1021. end;
  1022. procedure TTestRegexpr.RunTest55;
  1023. begin
  1024. RunRETest(55);
  1025. end;
  1026. procedure TTestRegexpr.RunTest56;
  1027. begin
  1028. RunRETest(56);
  1029. end;
  1030. procedure TTestRegexpr.RunTest57;
  1031. begin
  1032. RunRETest(57);
  1033. end;
  1034. procedure TTestRegexpr.RunTest58;
  1035. begin
  1036. RunRETest(58);
  1037. end;
  1038. procedure TTestRegexpr.RunTest59;
  1039. begin
  1040. RunRETest(59);
  1041. end;
  1042. procedure TTestRegexpr.RunTest60;
  1043. begin
  1044. RunRETest(60);
  1045. end;
  1046. procedure TTestRegexpr.RunTest61;
  1047. begin
  1048. RunRETest(61);
  1049. end;
  1050. procedure TTestRegexpr.RunTest62;
  1051. begin
  1052. RunRETest(62);
  1053. end;
  1054. procedure TTestRegexpr.RunTest63;
  1055. begin
  1056. RunRETest(63);
  1057. end;
  1058. procedure TTestRegexpr.RunTest64;
  1059. begin
  1060. RunRETest(64);
  1061. end;
  1062. procedure TTestRegexpr.RunTest65;
  1063. begin
  1064. RunRETest(65);
  1065. end;
  1066. procedure TTestRegexpr.RunTest66;
  1067. begin
  1068. RunRETest(66);
  1069. end;
  1070. procedure TTestRegexpr.RunTest67;
  1071. begin
  1072. RunRETest(67);
  1073. end;
  1074. procedure TTestRegexpr.RunTest68;
  1075. begin
  1076. RunRETest(68);
  1077. end;
  1078. procedure TTestRegexpr.RunTest69;
  1079. begin
  1080. RunRETest(69);
  1081. end;
  1082. procedure TTestRegexpr.TestGroups;
  1083. var
  1084. R: TRegExpr;
  1085. begin
  1086. R:= TRegExpr.Create;
  1087. try
  1088. R.Expression:= '(\w+) (?:\w+) (\w+) (?:\w+) (\d+)';
  1089. R.InputString:= 'abc wall dirt wert 234';
  1090. R.ExecPos(1);
  1091. AreEqual('Group finder failed', 1, R.MatchPos[0]);
  1092. AreEqual('Group counter failed', 3, R.SubExprMatchCount);
  1093. finally
  1094. FreeAndNil(R);
  1095. end;
  1096. end;
  1097. procedure TTestRegexpr.CompileRE(const AExpression: RegExprString);
  1098. begin
  1099. if (RE = Nil) then
  1100. begin
  1101. RE := TRegExpr.Create;
  1102. RE.ReplaceLineEnd := #10;
  1103. end;
  1104. RE.Expression := AExpression;
  1105. RE.Compile;
  1106. {$IFDEF DUMPTESTS}
  1107. writeln(' Modifiers "', RE.ModifierStr, '"');
  1108. writeln(' Regular expression: ', T.Expression,' ,');
  1109. writeln(' compiled into p-code: ');
  1110. writeln(' ', RE.Dump);
  1111. writeln(' Input text: "', PrintableString(T.inputText), '"');
  1112. if (T.substitutionText <> '') then
  1113. Writeln(' Substitution text: "', PrintableString(T.substitutionText), '"');
  1114. {$ENDIF}
  1115. end;
  1116. procedure TTestRegexpr.RunRETest(aIndex: Integer);
  1117. var
  1118. T: TRegExTest;
  1119. S: RegExprString;
  1120. begin
  1121. T:= testCases[aIndex];
  1122. {$IFDEF DUMPTESTS}
  1123. Writeln('Test: ',TestName);
  1124. {$ENDIF}
  1125. CompileRE(T.Expression);
  1126. if T.SubstitutionText<>'' then
  1127. begin
  1128. S:= RE.Replace(T.InputText, T.SubstitutionText, True);
  1129. AreEqual('Replace failed', PrintableString(T.ExpectedResult), PrintableString(S))
  1130. end
  1131. else
  1132. begin
  1133. RE.Exec(T.inputText);
  1134. AreEqual('Search position', T.MatchStart, RE.MatchPos[0]);
  1135. AreEqual('Matched text', PrintableString(T.ExpectedResult), PrintableString(RE.Match[0]));
  1136. end;
  1137. end;
  1138. initialization
  1139. {$IFDEF FPC}
  1140. RegisterTest(TTestRegexpr);
  1141. {$ENDIF}
  1142. end.