|
@@ -0,0 +1,1382 @@
|
|
|
+{
|
|
|
+ This file is part of the Free Component Library (FCL)
|
|
|
+ Copyright (c) 2023 The free pascal team
|
|
|
+
|
|
|
+ Delphi-compatible Regular expressions unit.
|
|
|
+
|
|
|
+ See the file COPYING.FPC, included in this distribution,
|
|
|
+ for details about the copyright.
|
|
|
+
|
|
|
+ This program is distributed in the hope that it will be useful,
|
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
+
|
|
|
+ **********************************************************************}
|
|
|
+{
|
|
|
+ Note that the original Delphi unit (by Jan Goyvaerts) uses PCRE1,
|
|
|
+ but this unit uses PCRE2. The string type depends on how the packages were
|
|
|
+ compiled.
|
|
|
+}
|
|
|
+
|
|
|
+unit System.RegularExpressionsCore;
|
|
|
+
|
|
|
+{$MODE OBJFPC}
|
|
|
+{$H+}
|
|
|
+
|
|
|
+interface
|
|
|
+
|
|
|
+{.$DEFINE USEWIDESTRING} // uncomment if you want to force widestring...
|
|
|
+
|
|
|
+// We cannot detect the char size before the uses clause is parsed, it will return 1, the compiler default.
|
|
|
+// So we need a define here, maybe a compiler switch is needed to set the default size (-Sw ?) which would allow to set the default type.
|
|
|
+// The detection here is based on the assumption that the dotted units use widestring...
|
|
|
+{$IFDEF FPC_DOTTEDUNITS}
|
|
|
+{$DEFINE USEWIDESTRING}
|
|
|
+{$ENDIF}
|
|
|
+
|
|
|
+
|
|
|
+uses
|
|
|
+{$IFDEF FPC_DOTTEDUNITS}
|
|
|
+ System.SysUtils, System.Classes, System.Contnrs, {$IFNDEF USEWIDESTRING} Api.PCRE2_8 {$ELSE} Api.PCRE2_16 {$ENDIF}, System.CTypes, System.RegularExpressionsConsts;
|
|
|
+{$ELSE}
|
|
|
+ SysUtils, Classes, Contnrs, {$IFNDEF USEWIDESTRING} libpcre2_8 {$ELSE} libpcre2_16 {$ENDIF}, ctypes, System.RegularExpressionsConsts;
|
|
|
+{$ENDIF}
|
|
|
+
|
|
|
+const
|
|
|
+ MAX_SUBEXPRESSIONS = 99;
|
|
|
+
|
|
|
+type
|
|
|
+ {$IFDEF USEWIDESTRING}
|
|
|
+ TREString = UnicodeString;
|
|
|
+ {$ElSE}
|
|
|
+ TREString = AnsiString;
|
|
|
+ {$ENDIF}
|
|
|
+
|
|
|
+ TPerlRegExOption = (preCaseLess,preMultiLine,preSingleLine,preExtended,preAnchored,preUnGreedy,preNoAutoCapture,
|
|
|
+ preAllowEmptyClass, preAltBSUX, preAltCircumFlex, preAltVerbNames,
|
|
|
+ preDollarEndOnly, preDupNames, preEndAnchored, preFirstLine, preLiteral, preMatchInvalidUTF,
|
|
|
+ preMatchUnsetBackRef, preNeverBackslashC, preNoAutoPossess, preNoDotStarAnchor, preNoStartOptimize,
|
|
|
+ preNoUTFCheck, preUseOffsetLimit);
|
|
|
+
|
|
|
+ TPerlRegExOptions = set of TPerlRegExOption;
|
|
|
+
|
|
|
+ TPerlRegExStateItem = (preNotBOL,preNotEOL,preNotEmpty);
|
|
|
+ TPerlRegExState = set of TPerlRegExStateItem;
|
|
|
+
|
|
|
+ TPerlRegExReplaceEvent = procedure(Sender: TObject; var ReplaceWith: TREString) of object;
|
|
|
+
|
|
|
+ { TPerlRegEx }
|
|
|
+
|
|
|
+ TPerlRegEx = class
|
|
|
+ Private
|
|
|
+ Type
|
|
|
+ TTransformation = (tNone,tLowerCase,tUpperCase,tFirstCap,tInitialCap);
|
|
|
+
|
|
|
+ class function TransForm(aTransform: TTransformation; S: TREString): TREString;
|
|
|
+ private
|
|
|
+ {$IFDEF USEWIDESTRING}
|
|
|
+ FCode : Ppcre2_code_16;
|
|
|
+ {$ELSE}
|
|
|
+ FCode : Ppcre2_code_8;
|
|
|
+ {$ENDIF}
|
|
|
+ FOnMatch: TNotifyEvent;
|
|
|
+ FOnReplace: TPerlRegExReplaceEvent;
|
|
|
+ FOptions: TPerlRegExOptions;
|
|
|
+ FRegEx: TREString;
|
|
|
+ FState: TPerlRegExState;
|
|
|
+ FStart,
|
|
|
+ FStop: Integer;
|
|
|
+ FStudied: Boolean;
|
|
|
+ FResultVector : Psize_t;
|
|
|
+ FResultCount : Cardinal;
|
|
|
+ FMatchData : ppcre2_match_data;
|
|
|
+ FModifiedSubject,
|
|
|
+ FSubject: TREString;
|
|
|
+ FSubjectLength : cuint32;
|
|
|
+ FNameCount : cuint32;
|
|
|
+ FNameTable : PCRE2_SPTR;
|
|
|
+ FNameEntrySize : cuint32;
|
|
|
+ FLastModifiedEnd: SizeInt;
|
|
|
+ FReplacement : TREString;
|
|
|
+ FStoredGroups: array of TREString;
|
|
|
+ FCrLFIsNewLine,
|
|
|
+ FIsUtf : Boolean;
|
|
|
+ Procedure CheckMatch; inline;
|
|
|
+ function DoMatch(Opts: CUInt32): Boolean;
|
|
|
+ function GetBackRefIndex(const Ref: TREString; var I: Integer): Integer;
|
|
|
+ function GetCompiled: Boolean;
|
|
|
+ function GetFoundMatch: Boolean; inline;
|
|
|
+ function GetGroupCount: Integer;
|
|
|
+ function GetGroupLengths(aIndex: Integer): Integer;
|
|
|
+ function GetGroupOffsets(aIndex: Integer): Integer;
|
|
|
+ function GetGroups(aIndex: Integer): TREString;
|
|
|
+ function GetMatchedLength: Integer;
|
|
|
+ function GetMatchedOffset: Integer;
|
|
|
+ function GetMatchedText: TREString;
|
|
|
+ function GetModifiedSubject: TREString;
|
|
|
+ function GetNamedGroup(aName : TREString): TREString;
|
|
|
+ procedure GetNamedGroupInfo;
|
|
|
+ function GetPCREErrorMsg(ErrorNr: Integer): TREString;
|
|
|
+ function GetResultString(aIndex: Integer): TREString;
|
|
|
+ function GetStart: Integer;
|
|
|
+ function GetStop: Integer;
|
|
|
+ function GetSubject: TREString;
|
|
|
+ function GetSubjectLeft: TREString;
|
|
|
+ function GetSubjectRight: TREString;
|
|
|
+ function MakeOptions(aOptions: TPerlRegExOptions): Integer;
|
|
|
+ procedure SetOptions(aValue: TPerlRegExOptions);
|
|
|
+ procedure SetRegEx(aValue: TREString);
|
|
|
+ procedure SetReplacement(aValue: TREString);
|
|
|
+ procedure SetStart(aValue: Integer);
|
|
|
+ procedure SetStop(aValue: Integer);
|
|
|
+ procedure SetSubject(aValue: TREString);
|
|
|
+ protected
|
|
|
+ procedure FreeCodeData;
|
|
|
+ procedure FreeMatchData;
|
|
|
+ procedure CleanUp; virtual;
|
|
|
+ procedure ClearStoredGroups;
|
|
|
+ function FirstOffset : Cardinal;
|
|
|
+ function FirstLength : Cardinal;
|
|
|
+ public
|
|
|
+ constructor Create;
|
|
|
+ destructor Destroy; override;
|
|
|
+ // Use this to escape special characters.
|
|
|
+ class function EscapeRegExChars(const aString: TREString): TREString;
|
|
|
+ // Compile the regex.
|
|
|
+ procedure Compile;
|
|
|
+ // Study regex (may result in faster execution);
|
|
|
+ procedure Study;
|
|
|
+ // Try to match, starting at beginning. Returns true if a match was found.
|
|
|
+ function Match: Boolean;
|
|
|
+ // Try to match again, starting previous match end. Returns true if a new match was found.
|
|
|
+ function MatchAgain: Boolean;
|
|
|
+ // Replace current match in Subject with ComputeReplacement. Returns computed replacement
|
|
|
+ function Replace: TREString;
|
|
|
+ // Replace all matches in Subject with their ComputeReplacement. Returns true if a match was found.
|
|
|
+ function ReplaceAll: Boolean;
|
|
|
+ // Compute replacement text.
|
|
|
+ function ComputeReplacement: TREString;
|
|
|
+ // Store groups for faster access.
|
|
|
+ procedure StoreGroups;
|
|
|
+ // Index in groups of name.
|
|
|
+ function NamedGroup(const aName: TREString): Integer;
|
|
|
+ // Split subject TREString based on regex. aStrings will contain everything outside the matches.
|
|
|
+ procedure Split(const aStrings: TStrings; aLimit: Integer);
|
|
|
+ // Split subject TREString based on regex, but include matches in result.
|
|
|
+ procedure SplitCapture(const aStrings: TStrings; aLimit: Integer); overload;
|
|
|
+ // Split subject TREString based on regex, but include matches in result.
|
|
|
+ // if aoffset is > 1 then everything till offset is put in the first TREString.
|
|
|
+ procedure SplitCapture(const aStrings: TStrings; aLimit: Integer; aOffset : Integer); overload;
|
|
|
+ // Was the regex compiled ?
|
|
|
+ property Compiled: Boolean read GetCompiled;
|
|
|
+ // Match found ?
|
|
|
+ property FoundMatch: Boolean read GetFoundMatch;
|
|
|
+ // Did study ?
|
|
|
+ property Studied: Boolean read FStudied;
|
|
|
+ // Fast access, group 0.
|
|
|
+ property MatchedText: TREString read GetMatchedText;
|
|
|
+ property MatchedLength: Integer read GetMatchedLength;
|
|
|
+ property MatchedOffset: Integer read GetMatchedOffset;
|
|
|
+ // Minimum search position, 1-based.
|
|
|
+ property Start: Integer read GetStart write SetStart;
|
|
|
+ // Maximum search position, 1-based.
|
|
|
+ property Stop: Integer read GetStop write SetStop;
|
|
|
+ property State: TPerlRegExState read FState write FState;
|
|
|
+ // Group count.
|
|
|
+ property GroupCount: Integer read GetGroupCount;
|
|
|
+ // Group Texts. Index 0 - GroupCount. 0 is whole matched text. on original search text.
|
|
|
+ property Groups[aIndex: Integer]: TREString read GetGroups;
|
|
|
+ // Group lengths & Offsets. Index 0 - GroupCount. 0 is whole matched text, on original search text.
|
|
|
+ property GroupLengths[aIndex: Integer]: Integer read GetGroupLengths;
|
|
|
+ property GroupOffsets[aIndex: Integer]: Integer read GetGroupOffsets;
|
|
|
+ // Named access to groups.
|
|
|
+ property NamedGroups[aName : TREString] : TREString Read GetNamedGroup;
|
|
|
+ // Subject TREString. Will be modified by replace !
|
|
|
+ property Subject: TREString read GetModifiedSubject write SetSubject;
|
|
|
+ // Original subject TREString. Not modified by replace !
|
|
|
+ property OriginalSubject: TREString read FSubject write SetSubject;
|
|
|
+ // Left of original subject.
|
|
|
+ property SubjectLeft: TREString read GetSubjectLeft;
|
|
|
+ // Right of original subject.
|
|
|
+ property SubjectRight: TREString read GetSubjectRight;
|
|
|
+ public
|
|
|
+ // Set options.
|
|
|
+ property Options: TPerlRegExOptions read FOptions write SetOptions;
|
|
|
+ // The regular expression
|
|
|
+ property RegEx: TREString read FRegEx write SetRegEx;
|
|
|
+ // The replacement expression.
|
|
|
+ property Replacement: TREString read FReplacement write SetReplacement;
|
|
|
+ // Called on every match.
|
|
|
+ property OnMatch: TNotifyEvent read FOnMatch write FOnMatch;
|
|
|
+ // Set this to modify the computed replacement text.
|
|
|
+ property OnReplace: TPerlRegExReplaceEvent read FOnReplace write FOnReplace;
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
+ TRegExStudyOption = (preJIT, preJITPartialHard, preJITPartialSoft);
|
|
|
+ TRegExStudyOptions = set of TRegExStudyOption;
|
|
|
+
|
|
|
+ { TPerlRegExList }
|
|
|
+
|
|
|
+ TPerlRegExList = class
|
|
|
+ private
|
|
|
+ FMatch: TPerlRegEx;
|
|
|
+ FList : TFPObjectList;
|
|
|
+ FStart : Integer;
|
|
|
+ FStop : Integer;
|
|
|
+ FSubject : TREString;
|
|
|
+ function GetCount: Integer;
|
|
|
+ function GetOwnsRegex: Boolean;
|
|
|
+ function GetRegEx(aIndex: Integer): TPerlRegEx;
|
|
|
+ function GetStart: Integer;
|
|
|
+ function GetStop: Integer;
|
|
|
+ function GetSubject: TREString;
|
|
|
+ procedure SetRegEx(aIndex: Integer; aValue: TPerlRegEx);
|
|
|
+ procedure SetStart(aValue: Integer);
|
|
|
+ procedure SetStop(aValue: Integer);
|
|
|
+ procedure SetSubject(aValue: TREString);
|
|
|
+ protected
|
|
|
+ procedure UpdateRegEx(const aRegEx: TPerlRegEx);
|
|
|
+ public
|
|
|
+ constructor Create(OwnsRegex : Boolean);
|
|
|
+ destructor Destroy; override;
|
|
|
+ public
|
|
|
+ function Add(const aRegEx: TPerlRegEx): Integer;
|
|
|
+ procedure Clear;
|
|
|
+ procedure Delete(aIndex: Integer);
|
|
|
+ function IndexOf(const aRegEx: TPerlRegEx): Integer;
|
|
|
+ procedure Insert(aIndex: Integer; const aRegEx: TPerlRegEx);
|
|
|
+ public
|
|
|
+ function Match: Boolean;
|
|
|
+ function MatchAgain: Boolean;
|
|
|
+ property RegEx[aIndex: Integer]: TPerlRegEx read GetRegEx write SetRegEx;
|
|
|
+ property Count: Integer read GetCount;
|
|
|
+ property Subject: TREString read GetSubject write SetSubject;
|
|
|
+ property Start: Integer read GetStart write SetStart;
|
|
|
+ property Stop: Integer read GetStop write SetStop;
|
|
|
+ property MatchedRegEx: TPerlRegEx read FMatch;
|
|
|
+ Property OwnsRegex : Boolean Read GetOwnsRegex;
|
|
|
+ end;
|
|
|
+
|
|
|
+ ERegularExpressionError = class(Exception);
|
|
|
+
|
|
|
+// Todo: move to strutils ?
|
|
|
+Function InitialCaps(const S : TREString) : TREString;
|
|
|
+
|
|
|
+implementation
|
|
|
+
|
|
|
+{$IFNDEF USEWIDESTRING}
|
|
|
+function GetStrLen(p : PAnsiChar; len : Integer) : AnsiString;
|
|
|
+
|
|
|
+var
|
|
|
+ L : Integer;
|
|
|
+
|
|
|
+begin
|
|
|
+ Result:='';
|
|
|
+ L:=StrLen(P);
|
|
|
+ if L>Len then
|
|
|
+ L:=Len;
|
|
|
+ SetLength(Result,L);
|
|
|
+ if L>0 then
|
|
|
+ Move(P^,Result[1],L);
|
|
|
+end;
|
|
|
+{$ELSE}
|
|
|
+function GetStrLen(p : PWideChar; len : Integer) : UnicodeString;
|
|
|
+
|
|
|
+var
|
|
|
+ L : Integer;
|
|
|
+
|
|
|
+begin
|
|
|
+ Result:='';
|
|
|
+ L:=StrLen(P);
|
|
|
+ if L>Len then
|
|
|
+ L:=Len;
|
|
|
+ SetLength(Result,L);
|
|
|
+ if Len>0 then
|
|
|
+ Move(P^,Result[1],L*2);
|
|
|
+end;
|
|
|
+{$ENDIF}
|
|
|
+
|
|
|
+Function InitialCaps(const S : TREString) : TREString;
|
|
|
+
|
|
|
+const
|
|
|
+ NonWord = [#0..'&', '(', '*', '+', ',', '-', '.', '?', '<', '[', '{', #$B7];
|
|
|
+
|
|
|
+var
|
|
|
+ L : TREString;
|
|
|
+ Len,Last,I : Integer;
|
|
|
+ Upper : Boolean;
|
|
|
+
|
|
|
+begin
|
|
|
+ L:=LowerCase(S);
|
|
|
+ Len:=Length(L);
|
|
|
+ Last:=1;
|
|
|
+ I:=1;
|
|
|
+ Upper:=True;
|
|
|
+ Result:='';
|
|
|
+ While I<=Len do
|
|
|
+ begin
|
|
|
+ if L[i] in NonWord then
|
|
|
+ Upper:=True
|
|
|
+ else if Upper then
|
|
|
+ begin
|
|
|
+ if I>Last then
|
|
|
+ Result:=Result+Copy(L,Last,I-Last);
|
|
|
+ Result:=Result+UpperCase(L[i]);
|
|
|
+ inc(I);
|
|
|
+ Last:=I;
|
|
|
+ Upper:=False;
|
|
|
+ end;
|
|
|
+ Inc(i);
|
|
|
+ end;
|
|
|
+ Result:=Result+Copy(L,Last,I-Last);
|
|
|
+end;
|
|
|
+
|
|
|
+
|
|
|
+{ TPerlRegEx }
|
|
|
+
|
|
|
+
|
|
|
+function TPerlRegEx.GetFoundMatch: Boolean;
|
|
|
+begin
|
|
|
+ Result:=FResultCount>0;
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.GetCompiled: Boolean;
|
|
|
+begin
|
|
|
+ Result:=Assigned(FCode);
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.CheckMatch;
|
|
|
+begin
|
|
|
+ if not FoundMatch then
|
|
|
+ raise ERegularExpressionError.Create(SRegExMatchRequired);
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.GetGroupCount: Integer;
|
|
|
+begin
|
|
|
+ CheckMatch;
|
|
|
+ Result:=FResultCount-1;
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.GetGroupLengths(aIndex: Integer): Integer;
|
|
|
+
|
|
|
+begin
|
|
|
+ CheckMatch;
|
|
|
+ Result:=FResultVector[2*aIndex+1]-FResultVector[2*aIndex];
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.GetGroupOffsets(aIndex: Integer): Integer;
|
|
|
+begin
|
|
|
+ CheckMatch;
|
|
|
+ Result:=FResultVector[2*aIndex]+1;
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.GetResultString(aIndex: Integer): TREString;
|
|
|
+
|
|
|
+var
|
|
|
+ astart,aLength : Ptrint;
|
|
|
+
|
|
|
+begin
|
|
|
+ // Writeln('AIndex ',aIndex,' ',FResultCount);
|
|
|
+ aStart:=FResultVector[2*aIndex];
|
|
|
+ aLength:=FResultVector[2*aIndex+1]-aStart;
|
|
|
+ inc(aStart); // 1-based
|
|
|
+ Result:=Copy(FSubject,AStart,aLength);
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.GetGroups(aIndex: Integer): TREString;
|
|
|
+
|
|
|
+begin
|
|
|
+ CheckMatch;
|
|
|
+ if Length(FStoredGroups)>0 then
|
|
|
+ Result:=FStoredGroups[aIndex]
|
|
|
+ else
|
|
|
+ Result:=GetResultString(aIndex);
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.GetMatchedLength: Integer;
|
|
|
+
|
|
|
+begin
|
|
|
+ Result:=GetGroupLengths(0)
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.GetMatchedOffset: Integer;
|
|
|
+begin
|
|
|
+ Result:=GetGroupOffsets(0);
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.GetMatchedText: TREString;
|
|
|
+begin
|
|
|
+ Result:=GetResultString(0)
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.GetModifiedSubject: TREString;
|
|
|
+begin
|
|
|
+ Result:=FModifiedSubject;
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.GetNamedGroup(aName : TREString): TREString;
|
|
|
+
|
|
|
+var
|
|
|
+ Idx : integer;
|
|
|
+
|
|
|
+begin
|
|
|
+ Result:='';
|
|
|
+ Idx:=NamedGroup(aName);
|
|
|
+ if Idx<>-1 then
|
|
|
+ Result:=Groups[Idx];
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.GetStart: Integer;
|
|
|
+begin
|
|
|
+ Result:=FStart+1;
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.GetStop: Integer;
|
|
|
+begin
|
|
|
+ Result:=FStop+1;
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.GetSubject: TREString;
|
|
|
+begin
|
|
|
+ Result:=FSubject;
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.GetSubjectLeft: TREString;
|
|
|
+begin
|
|
|
+ // Resultvector is 0 based
|
|
|
+ Result:=Copy(FSubject,1,FResultVector[0]);
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.GetSubjectRight: TREString;
|
|
|
+var
|
|
|
+ SPos : Integer;
|
|
|
+begin
|
|
|
+ SPos:=FResultVector[1]; // 0-based
|
|
|
+ Result:=Copy(FSubject,SPos+1,FSubjectLength-SPos);
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.SetOptions(aValue: TPerlRegExOptions);
|
|
|
+begin
|
|
|
+ if FOptions=AValue then Exit;
|
|
|
+ FOptions:=AValue;
|
|
|
+ CleanUp; // Need to reset...
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.SetRegEx(aValue: TREString);
|
|
|
+begin
|
|
|
+ if FRegEx=AValue then Exit;
|
|
|
+ FRegEx:=aValue;
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.SetReplacement(aValue: TREString);
|
|
|
+begin
|
|
|
+ FReplacement:=AValue;
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.SetStart(aValue: Integer);
|
|
|
+begin
|
|
|
+ FStart:=aValue-1;
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.SetStop(aValue: Integer);
|
|
|
+begin
|
|
|
+ if FStop=aValue-1 then Exit;
|
|
|
+ FStop:=aValue-1;
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.SetSubject(aValue: TREString);
|
|
|
+begin
|
|
|
+ FSubject:=aValue;
|
|
|
+ FSubjectLength:=Length(FSubject);
|
|
|
+ FModifiedSubject:=aValue;
|
|
|
+ CleanUp;
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.CleanUp;
|
|
|
+begin
|
|
|
+ FreeMatchData;
|
|
|
+ FreeCodeData;
|
|
|
+ ClearStoredGroups;
|
|
|
+ FResultCount:=0;
|
|
|
+ FResultVector:=Nil;
|
|
|
+ FStart:=0;
|
|
|
+ FStop:=Length(FSubject);
|
|
|
+ FLastModifiedEnd:=0;
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.ClearStoredGroups;
|
|
|
+begin
|
|
|
+ SetLength(FStoredGroups,0);
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.FirstOffset: Cardinal;
|
|
|
+begin
|
|
|
+ Result:=FResultVector[0];
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.FirstLength: Cardinal;
|
|
|
+begin
|
|
|
+ Result:=FResultVector[1]-FResultVector[0];
|
|
|
+end;
|
|
|
+
|
|
|
+constructor TPerlRegEx.Create;
|
|
|
+begin
|
|
|
+ if not libpcre28loaded then
|
|
|
+ Loadlibpcre28;
|
|
|
+end;
|
|
|
+
|
|
|
+destructor TPerlRegEx.Destroy;
|
|
|
+begin
|
|
|
+ inherited Destroy;
|
|
|
+end;
|
|
|
+
|
|
|
+class function TPerlRegEx.EscapeRegExChars(const aString: TREString): TREString;
|
|
|
+
|
|
|
+Const
|
|
|
+ NeedEscape = ['\','[',']','^','$','.','|','?','*','+','-','(',')','{','}','&','<','>'];
|
|
|
+
|
|
|
+var
|
|
|
+ I : Integer;
|
|
|
+ PSrc,PDest,PStart : PChar;
|
|
|
+
|
|
|
+begin
|
|
|
+ Result:='';
|
|
|
+ SetLength(Result,2*Length(aString));
|
|
|
+ PSrc:=PChar(aString);
|
|
|
+ PDest:=PChar(Result);
|
|
|
+ PStart:=PDest;
|
|
|
+ for I:=1 to Length(aString) do
|
|
|
+ begin
|
|
|
+ if PSrc^=#0 then
|
|
|
+ begin
|
|
|
+ PDest^:='\';
|
|
|
+ Inc(PDest);
|
|
|
+ PDest^:='0';
|
|
|
+ end
|
|
|
+ else if CharInSet(PSrc^,NeedEscape) then
|
|
|
+ begin
|
|
|
+ PDest^:='\';
|
|
|
+ Inc(PDest);
|
|
|
+ PDest^:=PSrc^;
|
|
|
+ end
|
|
|
+ else
|
|
|
+ PDest^:=PSrc^;
|
|
|
+ Inc(PSrc);
|
|
|
+ Inc(PDest);
|
|
|
+ end;
|
|
|
+ SetLength(Result,(PDest-PStart));
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.MakeOptions(aOptions: TPerlRegExOptions): Integer;
|
|
|
+
|
|
|
+ Procedure AddOption(aOpt : TPerlRegExOption; aValue : cuint32);
|
|
|
+
|
|
|
+ begin
|
|
|
+ if aOpt in AOptions then
|
|
|
+ Result:=Result or aValue;
|
|
|
+ end;
|
|
|
+
|
|
|
+begin
|
|
|
+ Result:=PCRE2_NEWLINE_ANY or PCRE2_UTF;
|
|
|
+ AddOption(preCaseLess,PCRE2_CASELESS);
|
|
|
+ AddOption(preMultiLine,PCRE2_MULTILINE);
|
|
|
+ AddOption(preSingleLine,PCRE2_DOTALL);
|
|
|
+ AddOption(preExtended,PCRE2_EXTENDED);
|
|
|
+ AddOption(preAnchored,PCRE2_ANCHORED);
|
|
|
+ AddOption(preUnGreedy,PCRE2_UNGREEDY);
|
|
|
+ AddOption(preNoAutoCapture,PCRE2_NO_AUTO_CAPTURE);
|
|
|
+ AddOption(preAllowEmptyClass,PCRE2_ALLOW_EMPTY_CLASS);
|
|
|
+ AddOption(preAltBSUX,PCRE2_ALT_BSUX);
|
|
|
+ AddOption(preAltCircumFlex,PCRE2_ALT_CIRCUMFLEX);
|
|
|
+ AddOption(preAltVerbNames,PCRE2_ALT_VERBNAMES);
|
|
|
+ AddOption(preDollarEndOnly,PCRE2_DOLLAR_ENDONLY);
|
|
|
+ AddOption(preDupNames,PCRE2_DUPNAMES);
|
|
|
+ AddOption(preEndAnchored,PCRE2_ENDANCHORED);
|
|
|
+ AddOption(preFirstLine,PCRE2_FIRSTLINE);
|
|
|
+ AddOption(preLiteral,PCRE2_LITERAL);
|
|
|
+ AddOption(preMatchInvalidUTF,PCRE2_MATCH_INVALID_UTF);
|
|
|
+ AddOption(preMatchUnsetBackRef,PCRE2_MATCH_UNSET_BACKREF);
|
|
|
+ AddOption(preNeverBackslashC,PCRE2_NEVER_BACKSLASH_C);
|
|
|
+ AddOption(preNoAutoPossess,PCRE2_NO_AUTO_POSSESS);
|
|
|
+ AddOption(preNoDotStarAnchor,PCRE2_NO_DOTSTAR_ANCHOR);
|
|
|
+ AddOption(preNoStartOptimize,PCRE2_NO_START_OPTIMIZE);
|
|
|
+ // maybe we should enable by default ?
|
|
|
+ AddOption(preNoUTFCheck,PCRE2_NO_UTF_CHECK);
|
|
|
+ AddOption(preUseOffsetLimit,PCRE2_USE_OFFSET_LIMIT);
|
|
|
+// AddOption(preUTF,PCRE2_UTF);
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.GetPCREErrorMsg(ErrorNr: Integer): TREString;
|
|
|
+
|
|
|
+var
|
|
|
+ Buffer : Array[0..255] of ansichar;
|
|
|
+
|
|
|
+begin
|
|
|
+ pcre2_get_error_message(ErrorNr,@Buffer,SizeOf(Buffer));
|
|
|
+ Result:=strpas(@Buffer);
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.Compile;
|
|
|
+
|
|
|
+var
|
|
|
+ ErrorNr: Integer;
|
|
|
+ ErrorPos: Integer;
|
|
|
+
|
|
|
+begin
|
|
|
+ if (FRegEx='') then
|
|
|
+ raise ERegularExpressionError.CreateRes(@SRegExMissingExpression);
|
|
|
+ CleanUp;
|
|
|
+ FCode:=pcre2_compile(TPCRE2_SPTR8(FRegEx),Length(FRegEx),MakeOptions(FOptions),@ErrorNr,@ErrorPos,Nil);
|
|
|
+ if (FCode=nil) then
|
|
|
+ raise ERegularExpressionError.CreateFmt(SRegExExpressionError,[ErrorPos+1,GetPCREErrorMsg(ErrorNr)]);
|
|
|
+ FMatchData:=pcre2_match_data_create_from_pattern(FCode,Nil);
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.Study;
|
|
|
+begin
|
|
|
+
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.FreeMatchData;
|
|
|
+
|
|
|
+var
|
|
|
+ Data : ppcre2_match_data;
|
|
|
+
|
|
|
+begin
|
|
|
+ if FMatchData=Nil then exit;
|
|
|
+ Data:=FMatchData;
|
|
|
+ FMatchData:=Nil;
|
|
|
+ pcre2_match_data_free(Data);
|
|
|
+ FResultVector:=Nil;
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.FreeCodeData;
|
|
|
+
|
|
|
+var
|
|
|
+ {$IFDEF USEWIDESTRING}
|
|
|
+ Data : Ppcre2_code_16;
|
|
|
+ {$ELSE}
|
|
|
+ Data : Ppcre2_code_8;
|
|
|
+ {$ENDIF}
|
|
|
+
|
|
|
+begin
|
|
|
+ if (FCode=Nil) then
|
|
|
+ exit;
|
|
|
+ Data:=FCode;
|
|
|
+ FCode:=Nil;
|
|
|
+ pcre2_code_free(Data);
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.GetNamedGroupInfo;
|
|
|
+
|
|
|
+
|
|
|
+begin
|
|
|
+ FNameEntrySize:=0;
|
|
|
+ FNameTable:=Nil;
|
|
|
+ pcre2_pattern_info(
|
|
|
+ FCode, (* the compiled pattern *)
|
|
|
+ PCRE2_INFO_NAMECOUNT, (* get the number of named substrings *)
|
|
|
+ @FNameCount); (* where to put the answer *)
|
|
|
+ if (FNameCount = 0) then
|
|
|
+ Exit;
|
|
|
+ pcre2_pattern_info(
|
|
|
+ FCode, (* the compiled pattern *)
|
|
|
+ PCRE2_INFO_NAMETABLE, (* address of the table *)
|
|
|
+ @FNameTable); (* where to put the answer *)
|
|
|
+ pcre2_pattern_info(
|
|
|
+ FCODE, (* the compiled pattern *)
|
|
|
+ PCRE2_INFO_NAMEENTRYSIZE, (* size of each entry in the table *)
|
|
|
+ @FNameEntrySize);
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.Match: Boolean;
|
|
|
+
|
|
|
+var
|
|
|
+ newline,option_bits : cuint32;
|
|
|
+
|
|
|
+begin
|
|
|
+ Result:=False;
|
|
|
+ ClearStoredGroups;
|
|
|
+ if not Compiled then
|
|
|
+ Compile;
|
|
|
+ FMatchData:=pcre2_match_data_create_from_pattern(FCode,Nil);
|
|
|
+ Result:=DoMatch(0);
|
|
|
+ if Result then
|
|
|
+ begin
|
|
|
+ pcre2_pattern_info(FCode,PCRE2_INFO_ALLOPTIONS, @option_bits);
|
|
|
+ FIsUtf:=((option_bits and PCRE2_UTF) <> 0);
|
|
|
+ pcre2_pattern_info(FCode,PCRE2_INFO_NEWLINE,@newline);
|
|
|
+ FCrLFIsNewLine:= (newline=PCRE2_NEWLINE_ANY) or
|
|
|
+ (newline=PCRE2_NEWLINE_CRLF) or
|
|
|
+ (newline=PCRE2_NEWLINE_ANYCRLF);
|
|
|
+ end;
|
|
|
+end;
|
|
|
+
|
|
|
+
|
|
|
+function TPerlRegEx.DoMatch(Opts : CUInt32): Boolean;
|
|
|
+
|
|
|
+var
|
|
|
+ len,rc : cInt;
|
|
|
+ S : TREString;
|
|
|
+
|
|
|
+begin
|
|
|
+ Result:=False;
|
|
|
+{$IF SIZEOF(CHAR)=2}
|
|
|
+ rc:=pcre2_match_w(
|
|
|
+{$ELSE}
|
|
|
+ rc:=pcre2_match(
|
|
|
+{$ENDIF}
|
|
|
+ FCode, (* the compiled pattern *)
|
|
|
+ PChar(FSubject), (* the subject TREString *)
|
|
|
+ FSubjectLength, (* the length of the subject *)
|
|
|
+ FStart, (* start at offset 0 in the subject *)
|
|
|
+ Opts, (* default options *)
|
|
|
+ FMatchData, (* block for storing the result *)
|
|
|
+ Nil);
|
|
|
+ if (rc <= 0) then
|
|
|
+ begin
|
|
|
+ FreeMatchData;
|
|
|
+ FreeCodeData;
|
|
|
+ if (rc=PCRE2_ERROR_NOMATCH) then
|
|
|
+ Exit(False)
|
|
|
+ else if (rc = 0) then
|
|
|
+ raise ERegularExpressionError.CreateFmt(SRegExMatchError,[SErrRegexOvectorTooSmall])
|
|
|
+ else
|
|
|
+ raise ERegularExpressionError.CreateFmt(SRegExMatchError,[GetPCREErrorMsg(rc)]);
|
|
|
+ end;
|
|
|
+ Result:=True;
|
|
|
+ FResultCount:=rc;
|
|
|
+ FResultVector:=pcre2_get_ovector_pointer(FMatchData);
|
|
|
+ {For i:=0 to FResultCount-1 do
|
|
|
+ Writeln(I,': ',FResultVector[2*I],' - ',FResultVector[2*I+1]);}
|
|
|
+ if (FResultVector[0]>FResultVector[1]) then
|
|
|
+ begin
|
|
|
+ Len:=integer(FResultVector[0]-FResultVector[1]);
|
|
|
+ S:=Copy(FSubject,FResultVector[1],Len);
|
|
|
+ FreeMatchData;
|
|
|
+ FreeCodeData;
|
|
|
+ raise ERegularExpressionError.CreateFmt(SRegExMatcStartAfterEnd,[S]);
|
|
|
+ end;
|
|
|
+ // Next should start after current
|
|
|
+ FStart:=FResultVector[1];
|
|
|
+ GetNamedGroupInfo;
|
|
|
+ if Assigned(OnMatch) then
|
|
|
+ OnMatch(Self);
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.MatchAgain: Boolean;
|
|
|
+
|
|
|
+var
|
|
|
+ StartChar,Opts : cuint32;
|
|
|
+begin
|
|
|
+ Result:=False;
|
|
|
+ Opts:=0;
|
|
|
+ // Special case, empty TREString.
|
|
|
+ if (FResultVector[0]=FResultVector[1]) then
|
|
|
+ begin
|
|
|
+ if (FResultVector[0]>=FSubjectLength) then
|
|
|
+ Exit;
|
|
|
+ Opts:=PCRE2_NOTEMPTY_ATSTART or PCRE2_ANCHORED;
|
|
|
+ end
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ // Check whether start empty
|
|
|
+ Startchar:=pcre2_get_startchar(FMatchData);
|
|
|
+ if (FStart<=Startchar) then
|
|
|
+ begin
|
|
|
+ (* Reached end of subject. *)
|
|
|
+ if (startchar>=FSubjectLength) then
|
|
|
+ Exit;
|
|
|
+ (* Advance by one character. *)
|
|
|
+ FStart:=StartChar+1;
|
|
|
+ (* If UTF-8, it may be more than one code unit. *)
|
|
|
+ if FIsUtf then
|
|
|
+ begin
|
|
|
+ While (FStart<FSubjectLength) do
|
|
|
+ begin
|
|
|
+ if ((Ord(Subject[FStart+1]) and $c0)<>$80) then
|
|
|
+ Exit;
|
|
|
+ Inc(FStart);
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ Result:=DoMatch(Opts);
|
|
|
+ (*
|
|
|
+ This time, a result of NOMATCH isn't an error. If the value in 'options'
|
|
|
+ is zero, it just means we have found all possible matches, so the loop ends.
|
|
|
+ Otherwise, it means we have failed to find a non-empty-TREString match at a
|
|
|
+ point where there was a previous empty-TREString match. In this case, we do what
|
|
|
+ Perl does: advance the matching position by one character, and continue. We
|
|
|
+ do this by setting the 'end of previous match' offset, because that is picked
|
|
|
+ up at the top of the loop as the point at which to start again.
|
|
|
+
|
|
|
+ There are two complications: (a) When CRLF is a valid newline sequence, and
|
|
|
+ the current position is just before it, advance by an extra byte. (b)
|
|
|
+ Otherwise we must ensure that we skip an entire UTF character if we are in
|
|
|
+ UTF mode.
|
|
|
+ *)
|
|
|
+ While not Result do
|
|
|
+ begin
|
|
|
+ if Opts=0 then
|
|
|
+ Break;
|
|
|
+ FResultVector[1]:=FStart+1; (* Advance one code unit *)
|
|
|
+ if FCrLFIsNewLine and (* If CRLF is a newline & *)
|
|
|
+ (FStart<FSubjectLength-2) and (* we are at CRLF *)
|
|
|
+ (FSubject[FStart+1]=#13) and
|
|
|
+ (FSubject[Fstart+2]=#10) then
|
|
|
+ inc(FResultVector[1]) (* Advance by one more. *)
|
|
|
+ else if (FIsUtf) then (* Otherwise, ensure we advance a whole UTF-8 character. *)
|
|
|
+ begin
|
|
|
+ while (FResultVector[1]<FSubjectLength-1) do
|
|
|
+ begin
|
|
|
+ if ((Ord(subject[FResultVector[1]]) and $c0) <> $80) then
|
|
|
+ break;
|
|
|
+ inc(FResultVector[1]);
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ Result:=DoMatch(Opts);
|
|
|
+ end;
|
|
|
+end;
|
|
|
+
|
|
|
+
|
|
|
+function TPerlRegEx.Replace: TREString;
|
|
|
+
|
|
|
+var
|
|
|
+ NewSubject,Tmp : TREString;
|
|
|
+
|
|
|
+begin
|
|
|
+ CheckMatch;
|
|
|
+ Result:=ComputeReplacement;
|
|
|
+ if Assigned(OnReplace) then
|
|
|
+ OnReplace(Self, Result);
|
|
|
+ Tmp:=Result;
|
|
|
+ if FLastModifiedEnd=0 then
|
|
|
+ FLastModifiedEnd:=GetMatchedOffset-1;
|
|
|
+ NewSubject:=Copy(FModifiedSubject,1,FLastModifiedEnd)+Tmp;
|
|
|
+ FLastModifiedEnd:=Length(NewSubject)+1;
|
|
|
+ tmp:=GetSubjectRight;
|
|
|
+ FModifiedSubject:=NewSubject+tmp;
|
|
|
+ ClearStoredGroups;
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.ReplaceAll: Boolean;
|
|
|
+begin
|
|
|
+ Result:=Match;
|
|
|
+ if Not Result then
|
|
|
+ exit;
|
|
|
+ repeat
|
|
|
+ Replace;
|
|
|
+ until not MatchAgain;
|
|
|
+end;
|
|
|
+
|
|
|
+function IsAlphaAndUnderline(const C: Char): Boolean;
|
|
|
+
|
|
|
+Const
|
|
|
+ allowed = ['A'..'Z', 'a'..'z', '_'];
|
|
|
+
|
|
|
+begin
|
|
|
+ Result:=CharInSet(C,Allowed);
|
|
|
+end;
|
|
|
+
|
|
|
+function IsNumeric(const C: Char): Boolean;
|
|
|
+
|
|
|
+Const
|
|
|
+ allowed = ['0'..'9'];
|
|
|
+
|
|
|
+begin
|
|
|
+ Result:=CharInSet(C,Allowed);
|
|
|
+end;
|
|
|
+
|
|
|
+
|
|
|
+{ Return values:
|
|
|
+ >=0 : group number.
|
|
|
+ -1 : whole subject.
|
|
|
+ -2 : Left of match.
|
|
|
+ -3 : Right of match.
|
|
|
+ -99 : invalid.
|
|
|
+ On return, I is the index of the next character to process.
|
|
|
+}
|
|
|
+
|
|
|
+Function TPerlRegEx.GetBackRefIndex(const Ref : TREString; var I : Integer) : Integer;
|
|
|
+
|
|
|
+var
|
|
|
+ Len,P,N,Group : Integer;
|
|
|
+
|
|
|
+begin
|
|
|
+ Len:=Length(Ref);
|
|
|
+ Group:=-99;
|
|
|
+ Case Ref[I] of
|
|
|
+ '0'..'9':
|
|
|
+ begin
|
|
|
+ Group:=Ord(Ref[i])-Ord('0');
|
|
|
+ Inc(I);
|
|
|
+ // Only consume as much integers as there are groups.
|
|
|
+ // So if there are 15 groups then $16 -> $1 + literal 6.
|
|
|
+ While (I<=Len) and (Ref[i] in ['0'..'9']) do
|
|
|
+ begin
|
|
|
+ N:=(Group*10)+Ord(Ref[i])-Ord('0');
|
|
|
+ if N>GroupCount then
|
|
|
+ Break;
|
|
|
+ Group:=N;
|
|
|
+ Inc(I);
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ '{':
|
|
|
+ begin
|
|
|
+ Inc(I);
|
|
|
+ if (Ref[I] in ['0'..'9']) then
|
|
|
+ // \{123}
|
|
|
+ begin
|
|
|
+ Group:=0;
|
|
|
+ while (I<Len) and IsNumeric(Ref[I]) do
|
|
|
+ begin
|
|
|
+ Group:=(Group*10)+Ord(Ref[i])-Ord('0');
|
|
|
+ Inc(I);
|
|
|
+ end;
|
|
|
+ if (I>Len) or (Ref[I]<>'}') then
|
|
|
+ Group:=-99
|
|
|
+ else
|
|
|
+ Inc(I);
|
|
|
+ end
|
|
|
+ else
|
|
|
+ // \{named}
|
|
|
+ begin
|
|
|
+ P:=I;
|
|
|
+ while (I<Len) and IsAlphaAndUnderline(Ref[I]) do
|
|
|
+ Inc(I);
|
|
|
+ if (I>Len) or (Ref[I]<>'}') then
|
|
|
+ Group:=-99
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ Group:=NamedGroup(Copy(Ref,P,I-P));
|
|
|
+ if Group=-1 then
|
|
|
+ group:=-99;
|
|
|
+ Inc(I);
|
|
|
+ end
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ '_': // Whole subject
|
|
|
+ begin
|
|
|
+ Group:=-1;
|
|
|
+ Inc(I);
|
|
|
+ end;
|
|
|
+ '&': // \& or $& (whole regex match)
|
|
|
+ begin
|
|
|
+ Group:=0;
|
|
|
+ Inc(I);
|
|
|
+ end;
|
|
|
+ '+': // Last group
|
|
|
+ begin
|
|
|
+ Group:=GroupCount;
|
|
|
+ Inc(I);
|
|
|
+ end;
|
|
|
+ '`': // Subject to left of match.
|
|
|
+ begin
|
|
|
+ Group:=-2;
|
|
|
+ inc(I);
|
|
|
+ end;
|
|
|
+ #39: // Subject to right of match.
|
|
|
+ begin
|
|
|
+ Group:=-3;
|
|
|
+ inc(I);
|
|
|
+ end
|
|
|
+ end;
|
|
|
+ Result:=Group;
|
|
|
+end;
|
|
|
+
|
|
|
+Class function TPerlRegEx.TransForm(aTransform : TTransformation; S : TREString): TREString;
|
|
|
+
|
|
|
+begin
|
|
|
+ Case aTransform of
|
|
|
+ tFirstCap : Result:=UpperCase(Copy(S,1,1))+LowerCase(Copy(S,2,Length(S)-1));
|
|
|
+ tInitialCap : Result:=InitialCaps(S);
|
|
|
+ tUpperCase : Result:=UpperCase(S);
|
|
|
+ tLowerCase : Result:=LowerCase(S);
|
|
|
+ else
|
|
|
+ Result:=S;
|
|
|
+ end;
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.ComputeReplacement: TREString;
|
|
|
+
|
|
|
+var
|
|
|
+ Res : TREString;
|
|
|
+ Len : Integer;
|
|
|
+
|
|
|
+ Procedure AddToResult(aStart,aNext : Integer); inline;
|
|
|
+
|
|
|
+ begin
|
|
|
+ Res:=Res+Copy(FReplacement,aStart,aNext-aStart);
|
|
|
+ end;
|
|
|
+
|
|
|
+ Procedure AddNamedGroup(const aName : TREString); inline;
|
|
|
+
|
|
|
+ begin
|
|
|
+ Res:=Res+NamedGroups[aName];
|
|
|
+ end;
|
|
|
+
|
|
|
+ Function AddBackRef(aTransform : TTransformation; I : Integer) : Integer;
|
|
|
+
|
|
|
+ var
|
|
|
+ P,N,Group : Integer;
|
|
|
+
|
|
|
+ begin
|
|
|
+ Group:=GetBackRefIndex(FReplacement,I);
|
|
|
+ Case Group of
|
|
|
+ -99 : ; // invalid
|
|
|
+ -1 : Res:=Res+TransForm(aTransform,FSubject);
|
|
|
+ -2 : Res:=Res+TransForm(aTransform,SubjectLeft);
|
|
|
+ -3 : Res:=Res+TransForm(aTransform,SubjectRight);
|
|
|
+ else
|
|
|
+ if Group<=GroupCount then
|
|
|
+ Res:=Res+TransForm(aTransform,Groups[Group]);
|
|
|
+ end;
|
|
|
+ Result:=I;
|
|
|
+ end;
|
|
|
+
|
|
|
+var
|
|
|
+ I, P, Last : Integer;
|
|
|
+ updatelast : boolean;
|
|
|
+
|
|
|
+begin
|
|
|
+ Len:=Length(FReplacement);
|
|
|
+ if Len=0 then
|
|
|
+ Exit('');
|
|
|
+ I:=1;
|
|
|
+ Last:=1;
|
|
|
+ while I<=Len do
|
|
|
+ begin
|
|
|
+ case FReplacement[I] of
|
|
|
+ '\':
|
|
|
+ begin
|
|
|
+ if (I=Len) then
|
|
|
+ raise ERegularExpressionError.CreateFmt(SRegExIndexOutOfBounds,[I]);
|
|
|
+ AddToResult(Last,I);
|
|
|
+ Inc(I);
|
|
|
+ UpdateLast:=True;
|
|
|
+ case FReplacement[I] of
|
|
|
+ '$', '\':
|
|
|
+ begin
|
|
|
+ Inc(I);
|
|
|
+ AddToResult(I-1,I);
|
|
|
+ end;
|
|
|
+ 'g':
|
|
|
+ begin
|
|
|
+ if (I+2<Len) and (FReplacement[I+1] = '<') then
|
|
|
+ begin
|
|
|
+ Inc(I,2); // First char
|
|
|
+ P:=I;
|
|
|
+ while (I<Len) and IsAlphaAndUnderline(FReplacement[I]) do
|
|
|
+ Inc(I);
|
|
|
+ // We should now be on closing >
|
|
|
+ if (I<=Len) and (FReplacement[I]='>') then
|
|
|
+ begin
|
|
|
+ AddNamedGroup(Copy(FReplaceMent,P,I-P));
|
|
|
+ Inc(I);
|
|
|
+ Last:=I;
|
|
|
+ end
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ I:=I+2; // Skip everything.
|
|
|
+ UpdateLast:=False
|
|
|
+ end;
|
|
|
+ end
|
|
|
+ else
|
|
|
+ UpdateLast:=False;
|
|
|
+ end;
|
|
|
+ 'l','L' : I:=AddBackRef(tLowerCase,I);
|
|
|
+ 'u','U' : I:=AddBackRef(tLowerCase,I);
|
|
|
+ 'f','F' : I:=AddBackRef(tFirstCap,I);
|
|
|
+ 'i','I' : I:=AddBackRef(tInitialCap,I);
|
|
|
+ else
|
|
|
+ I:=AddBackRef(tNone,I);
|
|
|
+ end;
|
|
|
+ if UpdateLast then
|
|
|
+ Last:=I;
|
|
|
+ end;
|
|
|
+ '$':
|
|
|
+ begin
|
|
|
+ if I=Len then
|
|
|
+ raise ERegularExpressionError.CreateFmt(SRegExIndexOutOfBounds,[I]);
|
|
|
+ AddToResult(Last,I);
|
|
|
+ Inc(I);
|
|
|
+ if FReplacement[I]='$' then
|
|
|
+ begin
|
|
|
+ AddToResult(Last,I);
|
|
|
+ Inc(I);
|
|
|
+ end
|
|
|
+ else
|
|
|
+ I:=AddBackRef(tNone,I);
|
|
|
+ Last:=I;
|
|
|
+ end;
|
|
|
+ else // Case
|
|
|
+ Inc(I);
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ if I>Last then
|
|
|
+ AddToResult(Last,I);
|
|
|
+ Result:=Res;
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.StoreGroups;
|
|
|
+
|
|
|
+var
|
|
|
+ I : Integer;
|
|
|
+
|
|
|
+begin
|
|
|
+ CheckMatch;
|
|
|
+ SetLength(FStoredGroups,GroupCount+1);
|
|
|
+ For I:=0 to GroupCount do
|
|
|
+ FStoredGroups[i]:=GetResultString(I);
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegEx.NamedGroup(const aName: TREString): Integer;
|
|
|
+
|
|
|
+var
|
|
|
+ Ptr : PCRE2_SPTR;
|
|
|
+ N,I : Integer;
|
|
|
+ tblName : TREString;
|
|
|
+
|
|
|
+begin
|
|
|
+ Ptr:=FNameTable;
|
|
|
+ for i:=0 to FNameCount-1 do
|
|
|
+ begin
|
|
|
+{$IFDEF USEWIDESTRING}
|
|
|
+ n:=ord(ptr[0]);
|
|
|
+ tblName:=GetStrLen((Ptr+1),FNameEntrySize-2);
|
|
|
+{$ELSE}
|
|
|
+ n:=(ord(ptr[0]) shl 8) or ord(ptr[1]);
|
|
|
+ tblName:=GetStrLen((Ptr+2),FNameEntrySize-3);
|
|
|
+{$ENDIF}
|
|
|
+ if SameText(TblName,aName) then
|
|
|
+ Exit(n);
|
|
|
+ Inc(Ptr,FNameEntrySize);
|
|
|
+ end ;
|
|
|
+ Result:=-1;
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.Split(const aStrings: TStrings; aLimit: Integer);
|
|
|
+
|
|
|
+var
|
|
|
+ NewStart,LastEnd,Matches: Integer;
|
|
|
+
|
|
|
+begin
|
|
|
+ if Not Assigned(aStrings) then
|
|
|
+ raise ERegularExpressionError.Create(SRegExStringsRequired);
|
|
|
+ if (aLimit=1) or not Match then
|
|
|
+ begin
|
|
|
+ aStrings.Add(Subject);
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ LastEnd:=0; // Last match pos
|
|
|
+ Matches:=1;
|
|
|
+ repeat
|
|
|
+ NewStart:=FirstOffset; // Start of current match
|
|
|
+ aStrings.Add(Copy(Subject,LastEnd+1,NewStart-LastEnd)); // Copy everything since last match.
|
|
|
+ Inc(Matches);
|
|
|
+ LastEnd:=NewStart+MatchedLength; // update last match pos.
|
|
|
+ until ((aLimit>1) and (Matches>=aLimit)) or not MatchAgain;
|
|
|
+ aStrings.Add(TREString(Copy(FSubject,LastEnd+1,FSubjectLength -LastEnd)));
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.SplitCapture(const aStrings: TStrings; aLimit: Integer);
|
|
|
+
|
|
|
+begin
|
|
|
+ SplitCapture(aStrings,aLimit,1);
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegEx.SplitCapture(const aStrings: TStrings; aLimit: Integer; aOffset: Integer);
|
|
|
+
|
|
|
+var
|
|
|
+ NewStart,LastEnd,Matches: Integer;
|
|
|
+ DoCopy : Boolean;
|
|
|
+
|
|
|
+begin
|
|
|
+ if Not Assigned(aStrings) then
|
|
|
+ raise ERegularExpressionError.Create(SRegExStringsRequired);
|
|
|
+ if (aLimit=1) or not Match then
|
|
|
+ begin
|
|
|
+ aStrings.Add(Subject);
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ Dec(aOffset);
|
|
|
+ if (aOffset>0) then
|
|
|
+ Dec(aLimit);
|
|
|
+ LastEnd:=0; // Last match pos
|
|
|
+ Matches:=1;
|
|
|
+ repeat
|
|
|
+ NewStart:=FirstOffset; // Start of current match
|
|
|
+ DoCopy:=(NewStart>aOffset);
|
|
|
+ if DoCopy then
|
|
|
+ begin
|
|
|
+ aStrings.Add(Copy(Subject,LastEnd+1,NewStart-LastEnd)); // Copy everything since last match.
|
|
|
+ if GroupCount > 0 then
|
|
|
+ aStrings.Add(Groups[GroupCount]);
|
|
|
+ Inc(Matches);
|
|
|
+ LastEnd:=NewStart+MatchedLength; // update last match pos.
|
|
|
+ end;
|
|
|
+ until ((aLimit>1) and (Matches>=aLimit)) or not MatchAgain;
|
|
|
+ aStrings.Add(TREString(Copy(FSubject,LastEnd+1,FSubjectLength-LastEnd)));
|
|
|
+end;
|
|
|
+
|
|
|
+{ TPerlRegExList }
|
|
|
+
|
|
|
+function TPerlRegExList.GetCount: Integer;
|
|
|
+begin
|
|
|
+ Result:=FList.Count;
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegExList.GetOwnsRegex: Boolean;
|
|
|
+begin
|
|
|
+ Result:=FList.OwnsObjects;
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegExList.GetRegEx(aIndex: Integer): TPerlRegEx;
|
|
|
+begin
|
|
|
+ Result:=TPerlRegEx(Flist[aIndex])
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegExList.GetStart: Integer;
|
|
|
+begin
|
|
|
+ Result:=FStart;
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegExList.GetStop: Integer;
|
|
|
+begin
|
|
|
+ Result:=FStop;
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegExList.GetSubject: TREString;
|
|
|
+begin
|
|
|
+ Result:=FSubject;
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegExList.SetRegEx(aIndex: Integer; aValue: TPerlRegEx);
|
|
|
+begin
|
|
|
+ FList[aIndex]:=aValue;
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegExList.SetStart(AValue: Integer);
|
|
|
+var
|
|
|
+ I : Integer;
|
|
|
+begin
|
|
|
+ if AValue=FStart then exit;
|
|
|
+ FStart:=aValue;
|
|
|
+ For I:=0 to Count-1 do
|
|
|
+ RegEx[I].Start:=aValue;
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegExList.SetStop(AValue: Integer);
|
|
|
+var
|
|
|
+ I : Integer;
|
|
|
+begin
|
|
|
+ if AValue=FStart then exit;
|
|
|
+ FStop:=aValue;
|
|
|
+ For I:=0 to Count-1 do
|
|
|
+ RegEx[I].Stop:=aValue;
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegExList.SetSubject(aValue: TREString);
|
|
|
+var
|
|
|
+ I: Integer;
|
|
|
+begin
|
|
|
+ if aValue=FSUbject then exit;
|
|
|
+ FSubject:=aValue;
|
|
|
+ for I:=Count-1 downto 0 do
|
|
|
+ RegEx[I].Subject:=Subject;
|
|
|
+ FMatch:=nil;
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegExList.UpdateRegEx(const aRegEx: TPerlRegEx);
|
|
|
+begin
|
|
|
+ aRegEx.Subject:=FSubject;
|
|
|
+ ARegEx.Start:=FStart;
|
|
|
+ ARegEx.Stop:=FStop;
|
|
|
+end;
|
|
|
+
|
|
|
+constructor TPerlRegExList.Create(OwnsRegex: Boolean);
|
|
|
+begin
|
|
|
+ FList:=TFPObjectList.Create(OwnsRegex);
|
|
|
+end;
|
|
|
+
|
|
|
+destructor TPerlRegExList.Destroy;
|
|
|
+begin
|
|
|
+ FreeAndNil(FList);
|
|
|
+ inherited Destroy;
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegExList.Add(const aRegEx: TPerlRegEx): Integer;
|
|
|
+begin
|
|
|
+ Result:=FList.Add(aRegEx);
|
|
|
+ UpdateRegEx(aRegEx);
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegExList.Clear;
|
|
|
+begin
|
|
|
+ FList.Clear;
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegExList.Delete(aIndex: Integer);
|
|
|
+begin
|
|
|
+ FList.Delete(aIndex);
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegExList.IndexOf(const aRegEx: TPerlRegEx): Integer;
|
|
|
+begin
|
|
|
+ Result:=FList.IndexOf(aRegex);
|
|
|
+end;
|
|
|
+
|
|
|
+procedure TPerlRegExList.Insert(aIndex: Integer; const aRegEx: TPerlRegEx);
|
|
|
+begin
|
|
|
+ FList.Insert(aIndex,aRegex);
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegExList.Match: Boolean;
|
|
|
+begin
|
|
|
+ SetStart(1);
|
|
|
+ FMatch:=nil;
|
|
|
+ Result:=MatchAgain;
|
|
|
+end;
|
|
|
+
|
|
|
+function TPerlRegExList.MatchAgain: Boolean;
|
|
|
+
|
|
|
+var
|
|
|
+ PRE : TPerlRegEx;
|
|
|
+ I,StartAt,Current: Integer;
|
|
|
+
|
|
|
+begin
|
|
|
+ // Determine start position
|
|
|
+ if not Assigned(FMatch) then
|
|
|
+ StartAt:=Start
|
|
|
+ else
|
|
|
+ With FMatch do
|
|
|
+ StartAt:=0; // MVC todo {InternalGetMatchedOffset+InternalGetMatchedLength};
|
|
|
+ FMatch:=nil;
|
|
|
+ Current:=-1;
|
|
|
+ // Check all regexes for new closest match.
|
|
|
+ I:=0;
|
|
|
+ While (I<Count) and (Current>StartAt) do
|
|
|
+ begin
|
|
|
+ PRE:=RegEx[I];
|
|
|
+ // Should we search this regex again ?
|
|
|
+ if (not PRE.FoundMatch) or (PRE.FirstOffset<StartAt) then
|
|
|
+ begin
|
|
|
+ PRE.Start:=StartAt;
|
|
|
+ PRE.MatchAgain;
|
|
|
+ end;
|
|
|
+ // New first position found ?
|
|
|
+ if PRE.FoundMatch and ((FMatch=Nil) or (PRE.FirstOffset<Current)) then
|
|
|
+ begin
|
|
|
+ Current:=Pre.FirstOffset;
|
|
|
+ FMatch:=PRE;
|
|
|
+ end;
|
|
|
+ Inc(I);
|
|
|
+ end;
|
|
|
+ Result:=Current<>-1;
|
|
|
+end;
|
|
|
+
|
|
|
+end.
|