system.regularexpressionscore.pp 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481
  1. {
  2. This file is part of the Free Component Library (FCL)
  3. Copyright (c) 2023 The Free Pascal team
  4. Delphi-compatible Regular expressions unit.
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {
  12. Note that the original Delphi unit (by Jan Goyvaerts) uses PCRE1,
  13. but this unit uses PCRE2. The string type depends on how the packages were
  14. compiled.
  15. }
  16. unit System.RegularExpressionsCore;
  17. {$MODE OBJFPC}
  18. {$H+}
  19. interface
  20. {.$DEFINE USEWIDESTRING} // uncomment if you want to force widestring...
  21. // We cannot detect the char size before the uses clause is parsed, it will return 1, the compiler default.
  22. // So we need a define here, maybe a compiler switch is needed to set the default size (-Sw ?) which would allow to set the default type.
  23. // The detection here is based on the assumption that the dotted units use widestring...
  24. {$IFDEF FPC_DOTTEDUNITS}
  25. {$DEFINE USEWIDESTRING}
  26. {$ENDIF}
  27. uses
  28. {$IFDEF FPC_DOTTEDUNITS}
  29. System.SysUtils, System.Classes, System.Contnrs, System.CTypes,
  30. {$IFNDEF CPUWASM}
  31. {$IFNDEF USEWIDESTRING}
  32. Api.PCRE2_8
  33. {$ELSE}
  34. Api.PCRE2_16
  35. {$ENDIF},
  36. {$ELSE}
  37. wasm.pcrebridge,
  38. {$ENDIF}
  39. System.RegularExpressionsConsts;
  40. {$ELSE}
  41. SysUtils, Classes, Contnrs,ctypes,
  42. {$IFNDEF CPUWASM}
  43. {$IFNDEF USEWIDESTRING}
  44. libpcre2_8
  45. {$ELSE}
  46. libpcre2_16
  47. {$ENDIF},
  48. {$ELSE}
  49. wasm.pcrebridge,
  50. {$ENDIF}
  51. System.RegularExpressionsConsts;
  52. {$ENDIF}
  53. const
  54. MAX_SUBEXPRESSIONS = 99;
  55. type
  56. {$IFDEF USEWIDESTRING}
  57. TREString = UnicodeString;
  58. {$ElSE}
  59. TREString = AnsiString;
  60. {$ENDIF}
  61. TREStringDynArray = Array of TREString;
  62. TPerlRegExOption = (preCaseLess,preMultiLine,preSingleLine,preExtended,preAnchored,preUnGreedy,preNoAutoCapture,
  63. preAllowEmptyClass, preAltBSUX, preAltCircumFlex, preAltVerbNames,
  64. preDollarEndOnly, preDupNames, preEndAnchored, preFirstLine, preLiteral, preMatchInvalidUTF,
  65. preMatchUnsetBackRef, preNeverBackslashC, preNoAutoPossess, preNoDotStarAnchor, preNoStartOptimize,
  66. preNoUTFCheck, preUseOffsetLimit);
  67. TPerlRegExOptions = set of TPerlRegExOption;
  68. TPerlRegExStateItem = (preNotBOL,preNotEOL,preNotEmpty);
  69. TPerlRegExState = set of TPerlRegExStateItem;
  70. TPerlRegExReplaceEvent = procedure(Sender: TObject; var ReplaceWith: TREString) of object;
  71. { TPerlRegEx }
  72. TPerlRegEx = class
  73. Private
  74. Type
  75. TTransformation = (tNone,tLowerCase,tUpperCase,tFirstCap,tInitialCap);
  76. TMatchResult = (mrFound,mrNotFound,mrAfterStop);
  77. class function TransForm(aTransform: TTransformation; const S: TREString): TREString;
  78. private
  79. {$IFDEF USEWIDESTRING}
  80. FCode : Ppcre2_code_16;
  81. {$ELSE}
  82. FCode : Ppcre2_code_8;
  83. {$ENDIF}
  84. FOnMatch: TNotifyEvent;
  85. FOnReplace: TPerlRegExReplaceEvent;
  86. FOptions: TPerlRegExOptions;
  87. FRegEx: TREString;
  88. FState: TPerlRegExState;
  89. FStart,
  90. FStop: Integer;
  91. FStudied: Boolean;
  92. FResultVector : Psize_t;
  93. FResultCount : Cardinal;
  94. FMatchData : ppcre2_match_data;
  95. FModifiedSubject,
  96. FSubject: TREString;
  97. FSubjectLength : cuint32;
  98. FNameCount : cuint32;
  99. FNameTable : PCRE2_SPTR;
  100. FNameEntrySize : cuint32;
  101. FLastModifiedEnd: SizeInt;
  102. FReplacement : TREString;
  103. FStoredGroups: array of TREString;
  104. FCrLFIsNewLine,
  105. FIsUtf : Boolean;
  106. Procedure CheckMatch; inline;
  107. function DoMatch(Opts: CUInt32): TMatchResult;
  108. function GetBackRefIndex(const Ref: TREString; var I: Integer): Integer;
  109. function GetCompiled: Boolean;
  110. function GetFoundMatch: Boolean; inline;
  111. function GetGroupCount: Integer;
  112. function GetGroupLengths(aIndex: Integer): Integer;
  113. function GetGroupOffsets(aIndex: Integer): Integer;
  114. function GetGroups(aIndex: Integer): TREString;
  115. function GetMatchedLength: Integer;
  116. function GetMatchedOffset: Integer;
  117. function GetMatchedText: TREString;
  118. function GetModifiedSubject: TREString;
  119. function GetNamedGroup(const aName : TREString): TREString;
  120. procedure GetNamedGroupInfo;
  121. function GetNames(aIndex : Integer): TREString;
  122. function GetPCREErrorMsg(ErrorNr: Integer): TREString;
  123. function GetResultString(aIndex: Integer): TREString;
  124. function GetStart: Integer;
  125. function GetStop: Integer;
  126. function GetSubject: TREString;
  127. function GetSubjectLeft: TREString;
  128. function GetSubjectRight: TREString;
  129. function MakeOptions(aOptions: TPerlRegExOptions): Integer;
  130. procedure SetOptions(aValue: TPerlRegExOptions);
  131. procedure SetRegEx(const aValue: TREString);
  132. procedure SetReplacement(const aValue: TREString);
  133. procedure SetStart(aValue: Integer);
  134. procedure SetStop(aValue: Integer);
  135. procedure SetSubject(const aValue: TREString);
  136. protected
  137. procedure FreeCodeData;
  138. procedure FreeMatchData;
  139. procedure CleanUp; virtual;
  140. procedure ClearStoredGroups;
  141. function FirstOffset : Cardinal;
  142. function FirstLength : Cardinal;
  143. public
  144. constructor Create;
  145. destructor Destroy; override;
  146. // Use this to escape special characters.
  147. class function EscapeRegExChars(const aString: TREString): TREString;
  148. // Compile the regex.
  149. procedure Compile;
  150. // Study regex (may result in faster execution);
  151. procedure Study;
  152. // Try to match, starting at beginning. Returns true if a match was found.
  153. function Match: Boolean;
  154. // Try to match again, starting previous match end. Returns true if a new match was found.
  155. function MatchAgain: Boolean;
  156. // Replace current match in Subject with ComputeReplacement. Returns computed replacement
  157. function Replace: TREString;
  158. // Replace all matches in Subject with their ComputeReplacement. Returns true if a match was found.
  159. function ReplaceAll: Boolean;
  160. // Compute replacement text.
  161. function ComputeReplacement: TREString;
  162. // Store groups for faster access.
  163. procedure StoreGroups;
  164. // Index in groups of name.
  165. function NamedGroup(const aName: TREString): Integer;
  166. // Split subject TREString based on regex. aStrings will contain everything outside the matches.
  167. procedure Split(const aStrings: TStrings; aLimit: Integer = 0);
  168. // Split subject TREString based on regex. Result will contain everything outside the matches.
  169. function Split(aLimit: Integer = 0) : TREStringDynArray;
  170. // Split subject TREString based on regex, but include matches in result.
  171. procedure SplitCapture(const aStrings: TStrings; aLimit: Integer); overload;
  172. // Split subject TREString based on regex, but include matches in result.
  173. // if aoffset is > 1 then everything till offset is put in the first TREString.
  174. procedure SplitCapture(const aStrings: TStrings; aLimit: Integer; aOffset : Integer); overload;
  175. // Same with result in array
  176. function SplitCapture(aLimit: Integer; aOffset : Integer) : TREStringDynArray; overload;
  177. // Was the regex compiled ?
  178. property Compiled: Boolean read GetCompiled;
  179. // Match found ?
  180. property FoundMatch: Boolean read GetFoundMatch;
  181. // Did study ?
  182. property Studied: Boolean read FStudied;
  183. // Fast access, group 0.
  184. property MatchedText: TREString read GetMatchedText;
  185. property MatchedLength: Integer read GetMatchedLength;
  186. property MatchedOffset: Integer read GetMatchedOffset;
  187. // Minimum search position, 1-based.
  188. property Start: Integer read GetStart write SetStart;
  189. // Maximum search position, 1-based.
  190. property Stop: Integer read GetStop write SetStop;
  191. property State: TPerlRegExState read FState write FState;
  192. // Group count.
  193. property GroupCount: Integer read GetGroupCount;
  194. // Group Texts. Index 0 - GroupCount. 0 is whole matched text. on original search text.
  195. property Groups[aIndex: Integer]: TREString read GetGroups;
  196. // Group lengths & Offsets. Index 0 - GroupCount. 0 is whole matched text, on original search text.
  197. property GroupLengths[aIndex: Integer]: Integer read GetGroupLengths;
  198. property GroupOffsets[aIndex: Integer]: Integer read GetGroupOffsets;
  199. // Named access to groups.
  200. property NamedGroups[aName : TREString] : TREString Read GetNamedGroup;
  201. // Names available in current match.
  202. Property NameCount : Cardinal Read FNameCount;
  203. Property Names[aIndex : Integer] : TREString Read GetNames;
  204. // Subject TREString. Will be modified by replace !
  205. property Subject: TREString read GetModifiedSubject write SetSubject;
  206. // Original subject TREString. Not modified by replace !
  207. property OriginalSubject: TREString read FSubject write SetSubject;
  208. // Left of original subject.
  209. property SubjectLeft: TREString read GetSubjectLeft;
  210. // Right of original subject.
  211. property SubjectRight: TREString read GetSubjectRight;
  212. public
  213. // Set options.
  214. property Options: TPerlRegExOptions read FOptions write SetOptions;
  215. // The regular expression
  216. property RegEx: TREString read FRegEx write SetRegEx;
  217. // The replacement expression.
  218. property Replacement: TREString read FReplacement write SetReplacement;
  219. // Called on every match.
  220. property OnMatch: TNotifyEvent read FOnMatch write FOnMatch;
  221. // Set this to modify the computed replacement text.
  222. property OnReplace: TPerlRegExReplaceEvent read FOnReplace write FOnReplace;
  223. end;
  224. TRegExStudyOption = (preJIT, preJITPartialHard, preJITPartialSoft);
  225. TRegExStudyOptions = set of TRegExStudyOption;
  226. { TPerlRegExList }
  227. TPerlRegExList = class
  228. private
  229. FMatch: TPerlRegEx;
  230. FList : TFPObjectList;
  231. FStart : Integer;
  232. FStop : Integer;
  233. FSubject : TREString;
  234. function GetCount: Integer;
  235. function GetOwnsRegex: Boolean;
  236. function GetRegEx(aIndex: Integer): TPerlRegEx;
  237. function GetStart: Integer;
  238. function GetStop: Integer;
  239. function GetSubject: TREString;
  240. procedure SetRegEx(aIndex: Integer; aValue: TPerlRegEx);
  241. procedure SetStart(aValue: Integer);
  242. procedure SetStop(aValue: Integer);
  243. procedure SetSubject(aValue: TREString);
  244. protected
  245. procedure UpdateRegEx(const aRegEx: TPerlRegEx);
  246. public
  247. constructor Create(OwnsRegex : Boolean);
  248. destructor Destroy; override;
  249. public
  250. function Add(const aRegEx: TPerlRegEx): Integer;
  251. procedure Clear;
  252. procedure Delete(aIndex: Integer);
  253. function IndexOf(const aRegEx: TPerlRegEx): Integer;
  254. procedure Insert(aIndex: Integer; const aRegEx: TPerlRegEx);
  255. public
  256. function Match: Boolean;
  257. function MatchAgain: Boolean;
  258. property RegEx[aIndex: Integer]: TPerlRegEx read GetRegEx write SetRegEx;
  259. property Count: Integer read GetCount;
  260. property Subject: TREString read GetSubject write SetSubject;
  261. property Start: Integer read GetStart write SetStart;
  262. property Stop: Integer read GetStop write SetStop;
  263. property MatchedRegEx: TPerlRegEx read FMatch;
  264. Property OwnsRegex : Boolean Read GetOwnsRegex;
  265. end;
  266. ERegularExpressionError = class(Exception);
  267. // Todo: move to strutils ?
  268. Function InitialCaps(const S : TREString) : TREString;
  269. implementation
  270. {$IFNDEF USEWIDESTRING}
  271. function GetStrLen(p : PAnsiChar; len : Integer) : AnsiString;
  272. var
  273. L : Integer;
  274. begin
  275. Result:='';
  276. L:=StrLen(P);
  277. if L>Len then
  278. L:=Len;
  279. SetLength(Result,L);
  280. if L>0 then
  281. Move(P^,Result[1],L);
  282. end;
  283. {$ELSE}
  284. function GetStrLen(p : PWideChar; len : Integer) : UnicodeString;
  285. var
  286. L : Integer;
  287. begin
  288. Result:='';
  289. L:=StrLen(P);
  290. if L>Len then
  291. L:=Len;
  292. SetLength(Result,L);
  293. if Len>0 then
  294. Move(P^,Result[1],L*2);
  295. end;
  296. {$ENDIF}
  297. Function InitialCaps(const S : TREString) : TREString;
  298. const
  299. NonWord = [#0..'&', '(', '*', '+', ',', '-', '.', '?', '<', '[', '{', #$B7];
  300. var
  301. L : TREString;
  302. Len,Last,I : Integer;
  303. Upper : Boolean;
  304. begin
  305. L:=LowerCase(S);
  306. Len:=Length(L);
  307. Last:=1;
  308. I:=1;
  309. Upper:=True;
  310. Result:='';
  311. While I<=Len do
  312. begin
  313. if L[i] in NonWord then
  314. Upper:=True
  315. else if Upper then
  316. begin
  317. if I>Last then
  318. Result:=Result+Copy(L,Last,I-Last);
  319. Result:=Result+UpperCase(L[i]);
  320. inc(I);
  321. Last:=I;
  322. Upper:=False;
  323. end;
  324. Inc(i);
  325. end;
  326. Result:=Result+Copy(L,Last,I-Last);
  327. end;
  328. { TPerlRegEx }
  329. function TPerlRegEx.GetFoundMatch: Boolean;
  330. begin
  331. Result:=FResultCount>0;
  332. end;
  333. function TPerlRegEx.GetCompiled: Boolean;
  334. begin
  335. Result:=Assigned(FCode);
  336. end;
  337. procedure TPerlRegEx.CheckMatch;
  338. begin
  339. if not FoundMatch then
  340. raise ERegularExpressionError.Create(SRegExMatchRequired);
  341. end;
  342. function TPerlRegEx.GetGroupCount: Integer;
  343. begin
  344. CheckMatch;
  345. Result:=FResultCount-1;
  346. end;
  347. function TPerlRegEx.GetGroupLengths(aIndex: Integer): Integer;
  348. begin
  349. CheckMatch;
  350. Result:=FResultVector[2*aIndex+1]-FResultVector[2*aIndex];
  351. end;
  352. function TPerlRegEx.GetGroupOffsets(aIndex: Integer): Integer;
  353. begin
  354. CheckMatch;
  355. Result:=FResultVector[2*aIndex]+1;
  356. end;
  357. function TPerlRegEx.GetResultString(aIndex: Integer): TREString;
  358. var
  359. astart,aLength : Ptrint;
  360. begin
  361. // Writeln('AIndex ',aIndex,' ',FResultCount);
  362. aStart:=FResultVector[2*aIndex];
  363. aLength:=FResultVector[2*aIndex+1]-aStart;
  364. inc(aStart); // 1-based
  365. Result:=Copy(FSubject,AStart,aLength);
  366. end;
  367. function TPerlRegEx.GetGroups(aIndex: Integer): TREString;
  368. begin
  369. CheckMatch;
  370. if Length(FStoredGroups)>0 then
  371. Result:=FStoredGroups[aIndex]
  372. else
  373. Result:=GetResultString(aIndex);
  374. end;
  375. function TPerlRegEx.GetMatchedLength: Integer;
  376. begin
  377. Result:=GetGroupLengths(0)
  378. end;
  379. function TPerlRegEx.GetMatchedOffset: Integer;
  380. begin
  381. Result:=GetGroupOffsets(0);
  382. end;
  383. function TPerlRegEx.GetMatchedText: TREString;
  384. begin
  385. Result:=GetResultString(0)
  386. end;
  387. function TPerlRegEx.GetModifiedSubject: TREString;
  388. begin
  389. Result:=FModifiedSubject;
  390. end;
  391. function TPerlRegEx.GetNamedGroup(const aName: TREString): TREString;
  392. var
  393. Idx : integer;
  394. begin
  395. Result:='';
  396. Idx:=NamedGroup(aName);
  397. if Idx<>-1 then
  398. Result:=Groups[Idx];
  399. end;
  400. function TPerlRegEx.GetStart: Integer;
  401. begin
  402. Result:=FStart+1;
  403. end;
  404. function TPerlRegEx.GetStop: Integer;
  405. begin
  406. Result:=FStop+1;
  407. end;
  408. function TPerlRegEx.GetSubject: TREString;
  409. begin
  410. Result:=FSubject;
  411. end;
  412. function TPerlRegEx.GetSubjectLeft: TREString;
  413. begin
  414. // Resultvector is 0 based
  415. Result:=Copy(FSubject,1,FResultVector[0]);
  416. end;
  417. function TPerlRegEx.GetSubjectRight: TREString;
  418. var
  419. SPos : Integer;
  420. begin
  421. SPos:=FResultVector[1]; // 0-based
  422. Result:=Copy(FSubject,SPos+1,FSubjectLength-SPos);
  423. end;
  424. procedure TPerlRegEx.SetOptions(aValue: TPerlRegExOptions);
  425. begin
  426. if FOptions=AValue then Exit;
  427. FOptions:=AValue;
  428. CleanUp; // Need to reset...
  429. end;
  430. procedure TPerlRegEx.SetRegEx(const aValue: TREString);
  431. begin
  432. if FRegEx=AValue then Exit;
  433. FRegEx:=aValue;
  434. end;
  435. procedure TPerlRegEx.SetReplacement(const aValue: TREString);
  436. begin
  437. FReplacement:=AValue;
  438. end;
  439. procedure TPerlRegEx.SetStart(aValue: Integer);
  440. begin
  441. FStart:=aValue-1;
  442. end;
  443. procedure TPerlRegEx.SetStop(aValue: Integer);
  444. begin
  445. if FStop=aValue-1 then Exit;
  446. FStop:=aValue-1;
  447. end;
  448. procedure TPerlRegEx.SetSubject(const aValue: TREString);
  449. begin
  450. FSubject:=aValue;
  451. FSubjectLength:=Length(FSubject);
  452. FModifiedSubject:=aValue;
  453. CleanUp;
  454. FStart:=0;
  455. FStop:=Length(FSubject);
  456. end;
  457. procedure TPerlRegEx.CleanUp;
  458. begin
  459. FreeMatchData;
  460. FreeCodeData;
  461. ClearStoredGroups;
  462. FResultCount:=0;
  463. FResultVector:=Nil;
  464. FLastModifiedEnd:=0;
  465. end;
  466. procedure TPerlRegEx.ClearStoredGroups;
  467. begin
  468. SetLength(FStoredGroups,0);
  469. end;
  470. function TPerlRegEx.FirstOffset: Cardinal;
  471. begin
  472. Result:=FResultVector[0];
  473. end;
  474. function TPerlRegEx.FirstLength: Cardinal;
  475. begin
  476. Result:=FResultVector[1]-FResultVector[0];
  477. end;
  478. constructor TPerlRegEx.Create;
  479. begin
  480. if not libpcre28loaded then
  481. Loadlibpcre28;
  482. end;
  483. destructor TPerlRegEx.Destroy;
  484. begin
  485. inherited Destroy;
  486. end;
  487. class function TPerlRegEx.EscapeRegExChars(const aString: TREString): TREString;
  488. Const
  489. NeedEscape = ['\','[',']','^','$','.','|','?','*','+','-','(',')','{','}','&','<','>'];
  490. var
  491. I : Integer;
  492. PSrc,PDest,PStart : PChar;
  493. begin
  494. Result:='';
  495. SetLength(Result,2*Length(aString));
  496. PSrc:=PChar(aString);
  497. PDest:=PChar(Result);
  498. PStart:=PDest;
  499. for I:=1 to Length(aString) do
  500. begin
  501. if PSrc^=#0 then
  502. begin
  503. PDest^:='\';
  504. Inc(PDest);
  505. PDest^:='0';
  506. end
  507. else if CharInSet(PSrc^,NeedEscape) then
  508. begin
  509. PDest^:='\';
  510. Inc(PDest);
  511. PDest^:=PSrc^;
  512. end
  513. else
  514. PDest^:=PSrc^;
  515. Inc(PSrc);
  516. Inc(PDest);
  517. end;
  518. SetLength(Result,(PDest-PStart));
  519. end;
  520. function TPerlRegEx.MakeOptions(aOptions: TPerlRegExOptions): Integer;
  521. Procedure AddOption(aOpt : TPerlRegExOption; aValue : cuint32);
  522. begin
  523. if aOpt in AOptions then
  524. Result:=Result or aValue;
  525. end;
  526. begin
  527. Result:=PCRE2_NEWLINE_ANY or PCRE2_UTF;
  528. AddOption(preCaseLess,PCRE2_CASELESS);
  529. AddOption(preMultiLine,PCRE2_MULTILINE);
  530. AddOption(preSingleLine,PCRE2_DOTALL);
  531. AddOption(preExtended,PCRE2_EXTENDED);
  532. AddOption(preAnchored,PCRE2_ANCHORED);
  533. AddOption(preUnGreedy,PCRE2_UNGREEDY);
  534. AddOption(preNoAutoCapture,PCRE2_NO_AUTO_CAPTURE);
  535. AddOption(preAllowEmptyClass,PCRE2_ALLOW_EMPTY_CLASS);
  536. AddOption(preAltBSUX,PCRE2_ALT_BSUX);
  537. AddOption(preAltCircumFlex,PCRE2_ALT_CIRCUMFLEX);
  538. AddOption(preAltVerbNames,PCRE2_ALT_VERBNAMES);
  539. AddOption(preDollarEndOnly,PCRE2_DOLLAR_ENDONLY);
  540. AddOption(preDupNames,PCRE2_DUPNAMES);
  541. AddOption(preEndAnchored,PCRE2_ENDANCHORED);
  542. AddOption(preFirstLine,PCRE2_FIRSTLINE);
  543. AddOption(preLiteral,PCRE2_LITERAL);
  544. AddOption(preMatchInvalidUTF,PCRE2_MATCH_INVALID_UTF);
  545. AddOption(preMatchUnsetBackRef,PCRE2_MATCH_UNSET_BACKREF);
  546. AddOption(preNeverBackslashC,PCRE2_NEVER_BACKSLASH_C);
  547. AddOption(preNoAutoPossess,PCRE2_NO_AUTO_POSSESS);
  548. AddOption(preNoDotStarAnchor,PCRE2_NO_DOTSTAR_ANCHOR);
  549. AddOption(preNoStartOptimize,PCRE2_NO_START_OPTIMIZE);
  550. // maybe we should enable by default ?
  551. AddOption(preNoUTFCheck,PCRE2_NO_UTF_CHECK);
  552. AddOption(preUseOffsetLimit,PCRE2_USE_OFFSET_LIMIT);
  553. // AddOption(preUTF,PCRE2_UTF);
  554. end;
  555. function TPerlRegEx.GetPCREErrorMsg(ErrorNr: Integer): TREString;
  556. var
  557. Buffer : Array[0..255] of ansichar;
  558. begin
  559. pcre2_get_error_message(ErrorNr,@Buffer,SizeOf(Buffer));
  560. Result:=strpas(@Buffer);
  561. end;
  562. procedure TPerlRegEx.Compile;
  563. var
  564. ErrorNr: Integer;
  565. ErrorPos: Integer;
  566. begin
  567. if (FRegEx='') then
  568. raise ERegularExpressionError.CreateRes(@SRegExMissingExpression);
  569. CleanUp;
  570. FCode:=pcre2_compile(TPCRE2_SPTR8(FRegEx),Length(FRegEx),MakeOptions(FOptions),@ErrorNr,@ErrorPos,Nil);
  571. if (FCode=nil) then
  572. raise ERegularExpressionError.CreateFmt(SRegExExpressionError,[ErrorPos+1,GetPCREErrorMsg(ErrorNr)]);
  573. FMatchData:=pcre2_match_data_create_from_pattern(FCode,Nil);
  574. end;
  575. procedure TPerlRegEx.Study;
  576. begin
  577. end;
  578. procedure TPerlRegEx.FreeMatchData;
  579. var
  580. Data : ppcre2_match_data;
  581. begin
  582. if FMatchData=Nil then exit;
  583. Data:=FMatchData;
  584. FMatchData:=Nil;
  585. pcre2_match_data_free(Data);
  586. FResultVector:=Nil;
  587. end;
  588. procedure TPerlRegEx.FreeCodeData;
  589. var
  590. {$IFDEF USEWIDESTRING}
  591. Data : Ppcre2_code_16;
  592. {$ELSE}
  593. Data : Ppcre2_code_8;
  594. {$ENDIF}
  595. begin
  596. if (FCode=Nil) then
  597. exit;
  598. Data:=FCode;
  599. FCode:=Nil;
  600. pcre2_code_free(Data);
  601. end;
  602. procedure TPerlRegEx.GetNamedGroupInfo;
  603. begin
  604. FNameEntrySize:=0;
  605. FNameTable:=Nil;
  606. pcre2_pattern_info(
  607. FCode, (* the compiled pattern *)
  608. PCRE2_INFO_NAMECOUNT, (* get the number of named substrings *)
  609. @FNameCount); (* where to put the answer *)
  610. if (FNameCount = 0) then
  611. Exit;
  612. pcre2_pattern_info(
  613. FCode, (* the compiled pattern *)
  614. PCRE2_INFO_NAMETABLE, (* address of the table *)
  615. @FNameTable); (* where to put the answer *)
  616. pcre2_pattern_info(
  617. FCODE, (* the compiled pattern *)
  618. PCRE2_INFO_NAMEENTRYSIZE, (* size of each entry in the table *)
  619. @FNameEntrySize);
  620. end;
  621. function TPerlRegEx.GetNames(aIndex : Integer): TREString;
  622. var
  623. Ptr : PCRE2_SPTR;
  624. I : Integer;
  625. begin
  626. Ptr:=FNameTable;
  627. if (aIndex<0) or (aIndex>FNameCount) then
  628. Raise ERegularExpressionError.CreateFmt(SErrInvalidNameIndex,[aIndex,FNameCount]);
  629. for i:=0 to aIndex-1 do
  630. Inc(Ptr,FNameEntrySize);
  631. {$IFDEF USEWIDESTRING}
  632. Result:=GetStrLen((Ptr+1),FNameEntrySize-2);
  633. {$ELSE}
  634. Result:=GetStrLen((Ptr+2),FNameEntrySize-3);
  635. {$ENDIF}
  636. end;
  637. function TPerlRegEx.Match: Boolean;
  638. var
  639. newline,option_bits : cuint32;
  640. begin
  641. Result:=False;
  642. ClearStoredGroups;
  643. if not Compiled then
  644. Compile;
  645. FMatchData:=pcre2_match_data_create_from_pattern(FCode,Nil);
  646. Result:=DoMatch(0)=mrFound;
  647. if Result then
  648. begin
  649. pcre2_pattern_info(FCode,PCRE2_INFO_ALLOPTIONS, @option_bits);
  650. FIsUtf:=((option_bits and PCRE2_UTF) <> 0);
  651. pcre2_pattern_info(FCode,PCRE2_INFO_NEWLINE,@newline);
  652. FCrLFIsNewLine:= (newline=PCRE2_NEWLINE_ANY) or
  653. (newline=PCRE2_NEWLINE_CRLF) or
  654. (newline=PCRE2_NEWLINE_ANYCRLF);
  655. end;
  656. end;
  657. function TPerlRegEx.DoMatch(Opts : CUInt32): TMatchResult;
  658. var
  659. len,rc : cInt;
  660. S : TREString;
  661. begin
  662. Result:=mrNotFound;
  663. {$IF SIZEOF(CHAR)=2}
  664. rc:=pcre2_match_w(
  665. {$ELSE}
  666. rc:=pcre2_match(
  667. {$ENDIF}
  668. FCode, (* the compiled pattern *)
  669. PChar(FSubject), (* the subject TREString *)
  670. FSubjectLength, (* the length of the subject *)
  671. FStart, (* start at offset 0 in the subject *)
  672. Opts, (* default options *)
  673. FMatchData, (* block for storing the result *)
  674. Nil);
  675. if (rc <= 0) then
  676. begin
  677. FreeMatchData;
  678. FreeCodeData;
  679. if (rc=PCRE2_ERROR_NOMATCH) then
  680. Exit(mrNotFound)
  681. else if (rc = 0) then
  682. raise ERegularExpressionError.CreateFmt(SRegExMatchError,[SErrRegexOvectorTooSmall])
  683. else
  684. raise ERegularExpressionError.CreateFmt(SRegExMatchError,[GetPCREErrorMsg(rc)]);
  685. end;
  686. Result:=mrFound;
  687. FResultCount:=rc;
  688. FResultVector:=pcre2_get_ovector_pointer(FMatchData);
  689. if FResultVector[0]>FStop then
  690. Exit(mrAfterStop);
  691. {For i:=0 to FResultCount-1 do
  692. Writeln(I,': ',FResultVector[2*I],' - ',FResultVector[2*I+1]);}
  693. if (FResultVector[0]>FResultVector[1]) then
  694. begin
  695. Len:=integer(FResultVector[0]-FResultVector[1]);
  696. S:=Copy(FSubject,FResultVector[1],Len);
  697. FreeMatchData;
  698. FreeCodeData;
  699. raise ERegularExpressionError.CreateFmt(SRegExMatcStartAfterEnd,[S]);
  700. end;
  701. // Next should start after current
  702. FStart:=FResultVector[1];
  703. GetNamedGroupInfo;
  704. if Assigned(OnMatch) then
  705. OnMatch(Self);
  706. end;
  707. function TPerlRegEx.MatchAgain: Boolean;
  708. var
  709. StartChar,Opts : cuint32;
  710. begin
  711. Result:=False;
  712. Opts:=0;
  713. // Special case, empty TREString.
  714. if (FResultVector[0]=FResultVector[1]) then
  715. begin
  716. if (FResultVector[0]>=FSubjectLength) then
  717. Exit;
  718. Opts:=PCRE2_NOTEMPTY_ATSTART or PCRE2_ANCHORED;
  719. end
  720. else
  721. begin
  722. // Check whether start empty
  723. Startchar:=pcre2_get_startchar(FMatchData);
  724. if (FStart<=Startchar) then
  725. begin
  726. (* Reached end of subject. *)
  727. if (startchar>=FSubjectLength) then
  728. Exit;
  729. (* Advance by one character. *)
  730. FStart:=StartChar+1;
  731. (* If UTF-8, it may be more than one code unit. *)
  732. if FIsUtf then
  733. begin
  734. While (FStart<FSubjectLength) do
  735. begin
  736. if ((Ord(Subject[FStart+1]) and $c0)<>$80) then
  737. Exit;
  738. Inc(FStart);
  739. end;
  740. end;
  741. end;
  742. end;
  743. // If we're behind stop, exit at once.
  744. Case DoMatch(Opts) of
  745. mrAfterStop : Exit(False);
  746. mrNotFound : Result:=False;
  747. mrFound: Result:=True;
  748. end;
  749. (*
  750. This time, a result of NOMATCH isn't an error. If the value in 'options'
  751. is zero, it just means we have found all possible matches, so the loop ends.
  752. Otherwise, it means we have failed to find a non-empty-TREString match at a
  753. point where there was a previous empty-TREString match. In this case, we do what
  754. Perl does: advance the matching position by one character, and continue. We
  755. do this by setting the 'end of previous match' offset, because that is picked
  756. up at the top of the loop as the point at which to start again.
  757. There are two complications: (a) When CRLF is a valid newline sequence, and
  758. the current position is just before it, advance by an extra byte. (b)
  759. Otherwise we must ensure that we skip an entire UTF character if we are in
  760. UTF mode.
  761. *)
  762. While not Result do
  763. begin
  764. if Opts=0 then
  765. Break;
  766. FResultVector[1]:=FStart+1; (* Advance one code unit *)
  767. if FCrLFIsNewLine and (* If CRLF is a newline & *)
  768. (FStart<FSubjectLength-2) and (* we are at CRLF *)
  769. (FSubject[FStart+1]=#13) and
  770. (FSubject[Fstart+2]=#10) then
  771. inc(FResultVector[1]) (* Advance by one more. *)
  772. else if (FIsUtf) then (* Otherwise, ensure we advance a whole UTF-8 character. *)
  773. begin
  774. while (FResultVector[1]<FSubjectLength-1) do
  775. begin
  776. if ((Ord(subject[FResultVector[1]]) and $c0) <> $80) then
  777. break;
  778. inc(FResultVector[1]);
  779. end;
  780. end;
  781. Case DoMatch(Opts) of
  782. mrAfterStop :
  783. begin
  784. Result:=False;
  785. Break;
  786. end;
  787. mrNotFound : Result:=False;
  788. mrFound: Result:=True;
  789. end;
  790. end;
  791. end;
  792. function TPerlRegEx.Replace: TREString;
  793. var
  794. NewSubject,Tmp : TREString;
  795. begin
  796. CheckMatch;
  797. Result:=ComputeReplacement;
  798. if Assigned(OnReplace) then
  799. OnReplace(Self, Result);
  800. Tmp:=Result;
  801. if FLastModifiedEnd=0 then
  802. FLastModifiedEnd:=GetMatchedOffset-1;
  803. NewSubject:=Copy(FModifiedSubject,1,FLastModifiedEnd)+Tmp;
  804. FLastModifiedEnd:=Length(NewSubject)+1;
  805. tmp:=GetSubjectRight;
  806. FModifiedSubject:=NewSubject+tmp;
  807. ClearStoredGroups;
  808. end;
  809. function TPerlRegEx.ReplaceAll: Boolean;
  810. begin
  811. Result:=Match;
  812. if Not Result then
  813. exit;
  814. repeat
  815. Replace;
  816. until not MatchAgain;
  817. end;
  818. function IsAlphaAndUnderline(const C: Char): Boolean;
  819. Const
  820. allowed = ['A'..'Z', 'a'..'z', '_'];
  821. begin
  822. Result:=CharInSet(C,Allowed);
  823. end;
  824. function IsNumeric(const C: Char): Boolean;
  825. Const
  826. allowed = ['0'..'9'];
  827. begin
  828. Result:=CharInSet(C,Allowed);
  829. end;
  830. { Return values:
  831. >=0 : group number.
  832. -1 : whole subject.
  833. -2 : Left of match.
  834. -3 : Right of match.
  835. -99 : invalid.
  836. On return, I is the index of the next character to process.
  837. }
  838. function TPerlRegEx.GetBackRefIndex(const Ref: TREString; var I: Integer): Integer;
  839. var
  840. Len,P,N,Group : Integer;
  841. begin
  842. Len:=Length(Ref);
  843. Group:=-99;
  844. Case Ref[I] of
  845. '0'..'9':
  846. begin
  847. Group:=Ord(Ref[i])-Ord('0');
  848. Inc(I);
  849. // Only consume as much integers as there are groups.
  850. // So if there are 15 groups then $16 -> $1 + literal 6.
  851. While (I<=Len) and (Ref[i] in ['0'..'9']) do
  852. begin
  853. N:=(Group*10)+Ord(Ref[i])-Ord('0');
  854. if N>GroupCount then
  855. Break;
  856. Group:=N;
  857. Inc(I);
  858. end;
  859. end;
  860. '{':
  861. begin
  862. Inc(I);
  863. if (Ref[I] in ['0'..'9']) then
  864. // \{123}
  865. begin
  866. Group:=0;
  867. while (I<Len) and IsNumeric(Ref[I]) do
  868. begin
  869. Group:=(Group*10)+Ord(Ref[i])-Ord('0');
  870. Inc(I);
  871. end;
  872. if (I>Len) or (Ref[I]<>'}') then
  873. Group:=-99
  874. else
  875. Inc(I);
  876. end
  877. else
  878. // \{named}
  879. begin
  880. P:=I;
  881. while (I<Len) and IsAlphaAndUnderline(Ref[I]) do
  882. Inc(I);
  883. if (I>Len) or (Ref[I]<>'}') then
  884. Group:=-99
  885. else
  886. begin
  887. Group:=NamedGroup(Copy(Ref,P,I-P));
  888. if Group=-1 then
  889. group:=-99;
  890. Inc(I);
  891. end
  892. end;
  893. end;
  894. '_': // Whole subject
  895. begin
  896. Group:=-1;
  897. Inc(I);
  898. end;
  899. '&': // \& or $& (whole regex match)
  900. begin
  901. Group:=0;
  902. Inc(I);
  903. end;
  904. '+': // Last group
  905. begin
  906. Group:=GroupCount;
  907. Inc(I);
  908. end;
  909. '`': // Subject to left of match.
  910. begin
  911. Group:=-2;
  912. inc(I);
  913. end;
  914. #39: // Subject to right of match.
  915. begin
  916. Group:=-3;
  917. inc(I);
  918. end
  919. end;
  920. Result:=Group;
  921. end;
  922. class function TPerlRegEx.TransForm(aTransform: TTransformation; const S: TREString): TREString;
  923. begin
  924. Case aTransform of
  925. tFirstCap : Result:=UpperCase(Copy(S,1,1))+LowerCase(Copy(S,2,Length(S)-1));
  926. tInitialCap : Result:=InitialCaps(S);
  927. tUpperCase : Result:=UpperCase(S);
  928. tLowerCase : Result:=LowerCase(S);
  929. else
  930. Result:=S;
  931. end;
  932. end;
  933. function TPerlRegEx.ComputeReplacement: TREString;
  934. var
  935. Res : TREString;
  936. Len : Integer;
  937. Procedure AddToResult(aStart,aNext : Integer); inline;
  938. begin
  939. Res:=Res+Copy(FReplacement,aStart,aNext-aStart);
  940. end;
  941. Procedure AddNamedGroup(const aName : TREString); inline;
  942. begin
  943. Res:=Res+NamedGroups[aName];
  944. end;
  945. Function AddBackRef(aTransform : TTransformation; I : Integer) : Integer;
  946. var
  947. P,N,Group : Integer;
  948. begin
  949. Group:=GetBackRefIndex(FReplacement,I);
  950. Case Group of
  951. -99 : ; // invalid
  952. -1 : Res:=Res+TransForm(aTransform,FSubject);
  953. -2 : Res:=Res+TransForm(aTransform,SubjectLeft);
  954. -3 : Res:=Res+TransForm(aTransform,SubjectRight);
  955. else
  956. if Group<=GroupCount then
  957. Res:=Res+TransForm(aTransform,Groups[Group]);
  958. end;
  959. Result:=I;
  960. end;
  961. var
  962. I, P, Last : Integer;
  963. updatelast : boolean;
  964. begin
  965. Len:=Length(FReplacement);
  966. if Len=0 then
  967. Exit('');
  968. I:=1;
  969. Last:=1;
  970. while I<=Len do
  971. begin
  972. case FReplacement[I] of
  973. '\':
  974. begin
  975. if (I=Len) then
  976. raise ERegularExpressionError.CreateFmt(SRegExIndexOutOfBounds,[I]);
  977. AddToResult(Last,I);
  978. Inc(I);
  979. UpdateLast:=True;
  980. case FReplacement[I] of
  981. '$', '\':
  982. begin
  983. Inc(I);
  984. AddToResult(I-1,I);
  985. end;
  986. 'g':
  987. begin
  988. if (I+2<Len) and (FReplacement[I+1] = '<') then
  989. begin
  990. Inc(I,2); // First char
  991. P:=I;
  992. while (I<Len) and IsAlphaAndUnderline(FReplacement[I]) do
  993. Inc(I);
  994. // We should now be on closing >
  995. if (I<=Len) and (FReplacement[I]='>') then
  996. begin
  997. AddNamedGroup(Copy(FReplaceMent,P,I-P));
  998. Inc(I);
  999. Last:=I;
  1000. end
  1001. else
  1002. begin
  1003. I:=I+2; // Skip everything.
  1004. UpdateLast:=False
  1005. end;
  1006. end
  1007. else
  1008. UpdateLast:=False;
  1009. end;
  1010. 'l','L' : I:=AddBackRef(tLowerCase,I);
  1011. 'u','U' : I:=AddBackRef(tLowerCase,I);
  1012. 'f','F' : I:=AddBackRef(tFirstCap,I);
  1013. 'i','I' : I:=AddBackRef(tInitialCap,I);
  1014. else
  1015. I:=AddBackRef(tNone,I);
  1016. end;
  1017. if UpdateLast then
  1018. Last:=I;
  1019. end;
  1020. '$':
  1021. begin
  1022. if I=Len then
  1023. raise ERegularExpressionError.CreateFmt(SRegExIndexOutOfBounds,[I]);
  1024. AddToResult(Last,I);
  1025. Inc(I);
  1026. if FReplacement[I]='$' then
  1027. begin
  1028. AddToResult(Last,I);
  1029. Inc(I);
  1030. end
  1031. else
  1032. I:=AddBackRef(tNone,I);
  1033. Last:=I;
  1034. end;
  1035. else // Case
  1036. Inc(I);
  1037. end;
  1038. end;
  1039. if I>Last then
  1040. AddToResult(Last,I);
  1041. Result:=Res;
  1042. end;
  1043. procedure TPerlRegEx.StoreGroups;
  1044. var
  1045. I : Integer;
  1046. begin
  1047. CheckMatch;
  1048. SetLength(FStoredGroups,GroupCount+1);
  1049. For I:=0 to GroupCount do
  1050. FStoredGroups[i]:=GetResultString(I);
  1051. end;
  1052. function TPerlRegEx.NamedGroup(const aName: TREString): Integer;
  1053. var
  1054. Ptr : PCRE2_SPTR;
  1055. N,I : Integer;
  1056. tblName : TREString;
  1057. begin
  1058. Ptr:=FNameTable;
  1059. for i:=0 to FNameCount-1 do
  1060. begin
  1061. {$IFDEF USEWIDESTRING}
  1062. n:=ord(ptr[0]);
  1063. tblName:=GetStrLen((Ptr+1),FNameEntrySize-2);
  1064. {$ELSE}
  1065. n:=(ord(ptr[0]) shl 8) or ord(ptr[1]);
  1066. tblName:=GetStrLen((Ptr+2),FNameEntrySize-3);
  1067. {$ENDIF}
  1068. if SameText(TblName,aName) then
  1069. Exit(n);
  1070. Inc(Ptr,FNameEntrySize);
  1071. end ;
  1072. Result:=-1;
  1073. end;
  1074. procedure TPerlRegEx.Split(const aStrings: TStrings; aLimit: Integer);
  1075. var
  1076. NewStart,LastEnd,Matches: Integer;
  1077. begin
  1078. if Not Assigned(aStrings) then
  1079. raise ERegularExpressionError.Create(SRegExStringsRequired);
  1080. if (aLimit=1) or not Match then
  1081. begin
  1082. aStrings.Add(Subject);
  1083. Exit;
  1084. end;
  1085. LastEnd:=0; // Last match pos
  1086. Matches:=1;
  1087. repeat
  1088. NewStart:=FirstOffset; // Start of current match
  1089. aStrings.Add(Copy(Subject,LastEnd+1,NewStart-LastEnd)); // Copy everything since last match.
  1090. Inc(Matches);
  1091. LastEnd:=NewStart+MatchedLength; // update last match pos.
  1092. until ((aLimit>1) and (Matches>=aLimit)) or not MatchAgain;
  1093. aStrings.Add(TREString(Copy(FSubject,LastEnd+1,FSubjectLength -LastEnd)));
  1094. end;
  1095. function TPerlRegEx.Split(aLimit: Integer): TREStringDynArray;
  1096. var
  1097. L: TStrings;
  1098. I : integer;
  1099. begin
  1100. L:=TStringList.Create;
  1101. try
  1102. Split(L,aLimit);
  1103. // We cannot use L.ToStringArray, because the string type may differ :/
  1104. SetLength(Result,L.Count);
  1105. For I:=0 to L.Count-1 do
  1106. Result[I]:=L[I];
  1107. finally
  1108. L.Free;
  1109. end;
  1110. end;
  1111. procedure TPerlRegEx.SplitCapture(const aStrings: TStrings; aLimit: Integer);
  1112. begin
  1113. SplitCapture(aStrings,aLimit,1);
  1114. end;
  1115. procedure TPerlRegEx.SplitCapture(const aStrings: TStrings; aLimit: Integer; aOffset: Integer);
  1116. var
  1117. NewStart,LastEnd,Matches: Integer;
  1118. DoCopy : Boolean;
  1119. begin
  1120. if Not Assigned(aStrings) then
  1121. raise ERegularExpressionError.Create(SRegExStringsRequired);
  1122. if (aLimit=1) or not Match then
  1123. begin
  1124. aStrings.Add(Subject);
  1125. Exit;
  1126. end;
  1127. Dec(aOffset);
  1128. if (aOffset>0) then
  1129. Dec(aLimit);
  1130. LastEnd:=0; // Last match pos
  1131. Matches:=1;
  1132. repeat
  1133. NewStart:=FirstOffset; // Start of current match
  1134. DoCopy:=(NewStart>aOffset);
  1135. if DoCopy then
  1136. begin
  1137. aStrings.Add(Copy(Subject,LastEnd+1,NewStart-LastEnd)); // Copy everything since last match.
  1138. if GroupCount > 0 then
  1139. aStrings.Add(Groups[GroupCount]);
  1140. Inc(Matches);
  1141. LastEnd:=NewStart+MatchedLength; // update last match pos.
  1142. end;
  1143. until ((aLimit>1) and (Matches>=aLimit)) or not MatchAgain;
  1144. aStrings.Add(TREString(Copy(FSubject,LastEnd+1,FSubjectLength-LastEnd)));
  1145. end;
  1146. function TPerlRegEx.SplitCapture(aLimit: Integer; aOffset: Integer): TREStringDynArray;
  1147. var
  1148. L: TStrings;
  1149. I : integer;
  1150. begin
  1151. L:=TStringList.Create;
  1152. try
  1153. SplitCapture(L,aLimit,aOffset);
  1154. // We cannot use L.ToStringArray, because the string type may differ :/
  1155. SetLength(Result,L.Count);
  1156. For I:=0 to L.Count-1 do
  1157. Result[I]:=L[I];
  1158. finally
  1159. L.Free;
  1160. end;
  1161. end;
  1162. { TPerlRegExList }
  1163. function TPerlRegExList.GetCount: Integer;
  1164. begin
  1165. Result:=FList.Count;
  1166. end;
  1167. function TPerlRegExList.GetOwnsRegex: Boolean;
  1168. begin
  1169. Result:=FList.OwnsObjects;
  1170. end;
  1171. function TPerlRegExList.GetRegEx(aIndex: Integer): TPerlRegEx;
  1172. begin
  1173. Result:=TPerlRegEx(Flist[aIndex])
  1174. end;
  1175. function TPerlRegExList.GetStart: Integer;
  1176. begin
  1177. Result:=FStart;
  1178. end;
  1179. function TPerlRegExList.GetStop: Integer;
  1180. begin
  1181. Result:=FStop;
  1182. end;
  1183. function TPerlRegExList.GetSubject: TREString;
  1184. begin
  1185. Result:=FSubject;
  1186. end;
  1187. procedure TPerlRegExList.SetRegEx(aIndex: Integer; aValue: TPerlRegEx);
  1188. begin
  1189. FList[aIndex]:=aValue;
  1190. end;
  1191. procedure TPerlRegExList.SetStart(AValue: Integer);
  1192. var
  1193. I : Integer;
  1194. begin
  1195. if AValue=FStart then exit;
  1196. FStart:=aValue;
  1197. For I:=0 to Count-1 do
  1198. RegEx[I].Start:=aValue;
  1199. end;
  1200. procedure TPerlRegExList.SetStop(AValue: Integer);
  1201. var
  1202. I : Integer;
  1203. begin
  1204. if AValue=FStart then exit;
  1205. FStop:=aValue;
  1206. For I:=0 to Count-1 do
  1207. RegEx[I].Stop:=aValue;
  1208. end;
  1209. procedure TPerlRegExList.SetSubject(aValue: TREString);
  1210. var
  1211. I: Integer;
  1212. begin
  1213. if aValue=FSUbject then exit;
  1214. FSubject:=aValue;
  1215. for I:=Count-1 downto 0 do
  1216. RegEx[I].Subject:=Subject;
  1217. FMatch:=nil;
  1218. end;
  1219. procedure TPerlRegExList.UpdateRegEx(const aRegEx: TPerlRegEx);
  1220. begin
  1221. aRegEx.Subject:=FSubject;
  1222. ARegEx.Start:=FStart;
  1223. ARegEx.Stop:=FStop;
  1224. end;
  1225. constructor TPerlRegExList.Create(OwnsRegex: Boolean);
  1226. begin
  1227. FList:=TFPObjectList.Create(OwnsRegex);
  1228. end;
  1229. destructor TPerlRegExList.Destroy;
  1230. begin
  1231. FreeAndNil(FList);
  1232. inherited Destroy;
  1233. end;
  1234. function TPerlRegExList.Add(const aRegEx: TPerlRegEx): Integer;
  1235. begin
  1236. Result:=FList.Add(aRegEx);
  1237. UpdateRegEx(aRegEx);
  1238. end;
  1239. procedure TPerlRegExList.Clear;
  1240. begin
  1241. FList.Clear;
  1242. end;
  1243. procedure TPerlRegExList.Delete(aIndex: Integer);
  1244. begin
  1245. FList.Delete(aIndex);
  1246. end;
  1247. function TPerlRegExList.IndexOf(const aRegEx: TPerlRegEx): Integer;
  1248. begin
  1249. Result:=FList.IndexOf(aRegex);
  1250. end;
  1251. procedure TPerlRegExList.Insert(aIndex: Integer; const aRegEx: TPerlRegEx);
  1252. begin
  1253. FList.Insert(aIndex,aRegex);
  1254. end;
  1255. function TPerlRegExList.Match: Boolean;
  1256. begin
  1257. SetStart(1);
  1258. FMatch:=nil;
  1259. Result:=MatchAgain;
  1260. end;
  1261. function TPerlRegExList.MatchAgain: Boolean;
  1262. var
  1263. PRE : TPerlRegEx;
  1264. I,StartAt,Current: Integer;
  1265. begin
  1266. // Determine start position
  1267. if not Assigned(FMatch) then
  1268. StartAt:=Start
  1269. else
  1270. With FMatch do
  1271. StartAt:=0; // MVC todo {InternalGetMatchedOffset+InternalGetMatchedLength};
  1272. FMatch:=nil;
  1273. Current:=-1;
  1274. // Check all regexes for new closest match.
  1275. I:=0;
  1276. While (I<Count) and (Current>StartAt) do
  1277. begin
  1278. PRE:=RegEx[I];
  1279. // Should we search this regex again ?
  1280. if (not PRE.FoundMatch) or (PRE.FirstOffset<StartAt) then
  1281. begin
  1282. PRE.Start:=StartAt;
  1283. PRE.MatchAgain;
  1284. end;
  1285. // New first position found ?
  1286. if PRE.FoundMatch and ((FMatch=Nil) or (PRE.FirstOffset<Current)) then
  1287. begin
  1288. Current:=Pre.FirstOffset;
  1289. FMatch:=PRE;
  1290. end;
  1291. Inc(I);
  1292. end;
  1293. Result:=Current<>-1;
  1294. end;
  1295. end.