Browse Source

* Add Delphi-compatible regular expressions based on libpcre

Michaël Van Canneyt 1 năm trước cách đây
mục cha
commit
8d2e2c6c21

+ 5 - 0
packages/vcl-compat/fpmake.pp

@@ -36,6 +36,7 @@ begin
     P.Dependencies.Add('fcl-json');
     P.Dependencies.Add('fcl-hash');
     P.Dependencies.Add('hash');
+    P.Dependencies.Add('libpcre');
     P.SourcePath.Add('src');
     P.IncludePath.Add('src');
 
@@ -61,6 +62,10 @@ begin
     T.Dependencies.AddUnit('system.json');
     T:=P.Targets.AddUnit('system.hash.pp');
     T.ResourceStrings := True;
+    T:=P.Targets.AddUnit('system.regularexpressionsconsts.pp');
+    T.ResourceStrings := True;
+    T:=P.Targets.AddUnit('system.regularexpressionscore.pp');
+    T.Dependencies.AddUnit('system.regularexpressionsconsts');
 
 
 {$ifndef ALLPACKAGES}

+ 22 - 0
packages/vcl-compat/src/system.regularexpressionsconsts.pp

@@ -0,0 +1,22 @@
+unit System.RegularExpressionsConsts;
+
+interface
+
+resourcestring
+  SRegExExpressionError   = 'Error in regular expression at offset %d: %s';
+  SRegExIndexOutOfBounds  = 'Index out of bounds (%d)';
+  SRegExInvalidGroupName  = 'Invalid group name (%s)';
+  SRegExInvalidIndexType  = 'Invalid index type';
+  SRegExMatchRequired     = 'Successful match required';
+  SRegExMatchError        = 'Error matching the regex: %s';
+  SRegExMissingExpression = 'A regular expression specified in RegEx is required';
+  SRegExStringsRequired   = 'Strings parameter cannot be nil';
+  SRegExStudyError        = 'Error studying the regex: %s';
+  SErrRegexOvectorTooSmall = 'output vector was not big enough for all the captured substrings';
+  SRegExMatcStartAfterEnd = '\K was used in an assertion to set the match start after its end.'+sLineBreak+
+                            'From end to start the match was: %s';
+
+implementation
+
+end.
+

+ 1382 - 0
packages/vcl-compat/src/system.regularexpressionscore.pp

@@ -0,0 +1,1382 @@
+{
+    This file is part of the Free Component Library (FCL)
+    Copyright (c) 2023 The free pascal team
+
+    Delphi-compatible Regular expressions unit.
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+{
+  Note that the original Delphi unit (by Jan Goyvaerts) uses PCRE1,
+  but this unit uses PCRE2. The string type depends on how the packages were
+  compiled.
+}
+
+unit System.RegularExpressionsCore;
+
+{$MODE OBJFPC}
+{$H+}
+
+interface
+
+{.$DEFINE USEWIDESTRING} // uncomment if you want to force widestring...
+
+// We cannot detect the char size before the uses clause is parsed, it will return 1, the compiler default.
+// So we need a define here, maybe a compiler switch is needed to set the default size (-Sw ?) which would allow to set the default type.
+// The detection here is based on the assumption that the dotted units use widestring...
+{$IFDEF FPC_DOTTEDUNITS}
+{$DEFINE USEWIDESTRING}
+{$ENDIF}
+
+
+uses
+{$IFDEF FPC_DOTTEDUNITS}
+  System.SysUtils, System.Classes, System.Contnrs, {$IFNDEF USEWIDESTRING} Api.PCRE2_8 {$ELSE} Api.PCRE2_16 {$ENDIF}, System.CTypes, System.RegularExpressionsConsts;
+{$ELSE}
+  SysUtils, Classes, Contnrs, {$IFNDEF USEWIDESTRING} libpcre2_8 {$ELSE} libpcre2_16 {$ENDIF}, ctypes, System.RegularExpressionsConsts;
+{$ENDIF}
+
+const
+  MAX_SUBEXPRESSIONS = 99;
+
+type
+  {$IFDEF USEWIDESTRING}
+  TREString = UnicodeString;
+  {$ElSE}
+  TREString = AnsiString;
+  {$ENDIF}
+
+  TPerlRegExOption = (preCaseLess,preMultiLine,preSingleLine,preExtended,preAnchored,preUnGreedy,preNoAutoCapture,
+                      preAllowEmptyClass, preAltBSUX, preAltCircumFlex, preAltVerbNames,
+                      preDollarEndOnly, preDupNames, preEndAnchored, preFirstLine, preLiteral, preMatchInvalidUTF,
+                      preMatchUnsetBackRef, preNeverBackslashC, preNoAutoPossess, preNoDotStarAnchor, preNoStartOptimize,
+                      preNoUTFCheck, preUseOffsetLimit);
+
+  TPerlRegExOptions = set of TPerlRegExOption;
+
+  TPerlRegExStateItem = (preNotBOL,preNotEOL,preNotEmpty);
+  TPerlRegExState = set of TPerlRegExStateItem;
+
+  TPerlRegExReplaceEvent = procedure(Sender: TObject; var ReplaceWith: TREString) of object;
+
+  { TPerlRegEx }
+
+  TPerlRegEx = class
+  Private
+    Type
+      TTransformation = (tNone,tLowerCase,tUpperCase,tFirstCap,tInitialCap);
+
+    class function TransForm(aTransform: TTransformation; S: TREString): TREString;
+  private
+  {$IFDEF USEWIDESTRING}
+    FCode : Ppcre2_code_16;
+  {$ELSE}
+    FCode : Ppcre2_code_8;
+  {$ENDIF}
+    FOnMatch: TNotifyEvent;
+    FOnReplace: TPerlRegExReplaceEvent;
+    FOptions: TPerlRegExOptions;
+    FRegEx: TREString;
+    FState: TPerlRegExState;
+    FStart,
+    FStop: Integer;
+    FStudied: Boolean;
+    FResultVector : Psize_t;
+    FResultCount : Cardinal;
+    FMatchData : ppcre2_match_data;
+    FModifiedSubject,
+    FSubject: TREString;
+    FSubjectLength : cuint32;
+    FNameCount : cuint32;
+    FNameTable : PCRE2_SPTR;
+    FNameEntrySize : cuint32;
+    FLastModifiedEnd: SizeInt;
+    FReplacement : TREString;
+    FStoredGroups: array of TREString;
+    FCrLFIsNewLine,
+    FIsUtf : Boolean;
+    Procedure CheckMatch; inline;
+    function DoMatch(Opts: CUInt32): Boolean;
+    function GetBackRefIndex(const Ref: TREString; var I: Integer): Integer;
+    function GetCompiled: Boolean;
+    function GetFoundMatch: Boolean; inline;
+    function GetGroupCount: Integer;
+    function GetGroupLengths(aIndex: Integer): Integer;
+    function GetGroupOffsets(aIndex: Integer): Integer;
+    function GetGroups(aIndex: Integer): TREString;
+    function GetMatchedLength: Integer;
+    function GetMatchedOffset: Integer;
+    function GetMatchedText: TREString;
+    function GetModifiedSubject: TREString;
+    function GetNamedGroup(aName : TREString): TREString;
+    procedure GetNamedGroupInfo;
+    function GetPCREErrorMsg(ErrorNr: Integer): TREString;
+    function GetResultString(aIndex: Integer): TREString;
+    function GetStart: Integer;
+    function GetStop: Integer;
+    function GetSubject: TREString;
+    function GetSubjectLeft: TREString;
+    function GetSubjectRight: TREString;
+    function MakeOptions(aOptions: TPerlRegExOptions): Integer;
+    procedure SetOptions(aValue: TPerlRegExOptions);
+    procedure SetRegEx(aValue: TREString);
+    procedure SetReplacement(aValue: TREString);
+    procedure SetStart(aValue: Integer);
+    procedure SetStop(aValue: Integer);
+    procedure SetSubject(aValue: TREString);
+  protected
+    procedure FreeCodeData;
+    procedure FreeMatchData;
+    procedure CleanUp; virtual;
+    procedure ClearStoredGroups;
+    function FirstOffset : Cardinal;
+    function FirstLength : Cardinal;
+  public
+    constructor Create;
+    destructor Destroy; override;
+    // Use this to escape special characters.
+    class function EscapeRegExChars(const aString: TREString): TREString;
+    // Compile the regex.
+    procedure Compile;
+    // Study regex (may result in faster execution);
+    procedure Study;
+    // Try to match, starting at beginning. Returns true if a match was found.
+    function Match: Boolean;
+    // Try to match again, starting previous match end. Returns true if a new match was found.
+    function MatchAgain: Boolean;
+    // Replace current match in Subject with ComputeReplacement. Returns computed replacement
+    function Replace: TREString;
+    // Replace all matches in Subject with their ComputeReplacement. Returns true if a match was found.
+    function ReplaceAll: Boolean;
+    // Compute replacement text.
+    function ComputeReplacement: TREString;
+    // Store groups for faster access.
+    procedure StoreGroups;
+    // Index in groups of name.
+    function NamedGroup(const aName: TREString): Integer;
+    // Split subject TREString based on regex. aStrings will contain everything outside the matches.
+    procedure Split(const aStrings: TStrings; aLimit: Integer);
+    // Split subject TREString based on regex, but include matches in result.
+    procedure SplitCapture(const aStrings: TStrings; aLimit: Integer); overload;
+    // Split subject TREString based on regex, but include matches in result.
+    // if aoffset is > 1 then everything till offset is put in the first TREString.
+    procedure SplitCapture(const aStrings: TStrings; aLimit: Integer; aOffset : Integer); overload;
+    // Was the regex compiled ?
+    property Compiled: Boolean read GetCompiled;
+    // Match found ?
+    property FoundMatch: Boolean read GetFoundMatch;
+    // Did study ?
+    property Studied: Boolean read FStudied;
+    // Fast access, group 0.
+    property MatchedText: TREString read GetMatchedText;
+    property MatchedLength: Integer read GetMatchedLength;
+    property MatchedOffset: Integer read GetMatchedOffset;
+    // Minimum search position, 1-based.
+    property Start: Integer read GetStart write SetStart;
+    // Maximum search position, 1-based.
+    property Stop: Integer read GetStop write SetStop;
+    property State: TPerlRegExState read FState write FState;
+    // Group count.
+    property GroupCount: Integer read GetGroupCount;
+    // Group Texts. Index 0 - GroupCount. 0 is whole matched text. on original search text.
+    property Groups[aIndex: Integer]: TREString read GetGroups;
+    // Group lengths & Offsets. Index 0 - GroupCount. 0 is whole matched text, on original search text.
+    property GroupLengths[aIndex: Integer]: Integer read GetGroupLengths;
+    property GroupOffsets[aIndex: Integer]: Integer read GetGroupOffsets;
+    // Named access to groups.
+    property NamedGroups[aName : TREString] : TREString Read GetNamedGroup;
+    // Subject TREString. Will be modified by replace !
+    property Subject: TREString read GetModifiedSubject write SetSubject;
+    // Original subject TREString. Not modified by replace !
+    property OriginalSubject: TREString read FSubject write SetSubject;
+    // Left of original subject.
+    property SubjectLeft: TREString read GetSubjectLeft;
+    // Right of original subject.
+    property SubjectRight: TREString read GetSubjectRight;
+  public
+    // Set options.
+    property Options: TPerlRegExOptions read FOptions write SetOptions;
+    // The regular expression
+    property RegEx: TREString read FRegEx write SetRegEx;
+    // The replacement expression.
+    property Replacement: TREString read FReplacement write SetReplacement;
+    // Called on every match.
+    property OnMatch: TNotifyEvent read FOnMatch write FOnMatch;
+    // Set this to modify the computed replacement text.
+    property OnReplace: TPerlRegExReplaceEvent read FOnReplace write FOnReplace;
+  end;
+
+                                                                             
+  TRegExStudyOption = (preJIT, preJITPartialHard, preJITPartialSoft);
+  TRegExStudyOptions = set of TRegExStudyOption;
+
+  { TPerlRegExList }
+
+  TPerlRegExList = class
+  private
+    FMatch: TPerlRegEx;
+    FList : TFPObjectList;
+    FStart : Integer;
+    FStop : Integer;
+    FSubject : TREString;
+    function GetCount: Integer;
+    function GetOwnsRegex: Boolean;
+    function GetRegEx(aIndex: Integer): TPerlRegEx;
+    function GetStart: Integer;
+    function GetStop: Integer;
+    function GetSubject: TREString;
+    procedure SetRegEx(aIndex: Integer; aValue: TPerlRegEx);
+    procedure SetStart(aValue: Integer);
+    procedure SetStop(aValue: Integer);
+    procedure SetSubject(aValue: TREString);
+  protected
+    procedure UpdateRegEx(const aRegEx: TPerlRegEx);
+  public
+    constructor Create(OwnsRegex : Boolean);
+    destructor Destroy; override;
+  public
+    function Add(const aRegEx: TPerlRegEx): Integer;
+    procedure Clear;
+    procedure Delete(aIndex: Integer);
+    function IndexOf(const aRegEx: TPerlRegEx): Integer;
+    procedure Insert(aIndex: Integer; const aRegEx: TPerlRegEx);
+  public
+    function Match: Boolean;
+    function MatchAgain: Boolean;
+    property RegEx[aIndex: Integer]: TPerlRegEx read GetRegEx write SetRegEx;
+    property Count: Integer read GetCount;
+    property Subject: TREString read GetSubject write SetSubject;
+    property Start: Integer read GetStart write SetStart;
+    property Stop: Integer read GetStop write SetStop;
+    property MatchedRegEx: TPerlRegEx read FMatch;
+    Property OwnsRegex : Boolean Read GetOwnsRegex;
+  end;
+
+  ERegularExpressionError = class(Exception);
+
+// Todo: move to strutils ?
+Function InitialCaps(const S : TREString) : TREString;
+
+implementation
+
+{$IFNDEF USEWIDESTRING}
+function GetStrLen(p : PAnsiChar; len : Integer) : AnsiString;
+
+var
+  L : Integer;
+
+begin
+  Result:='';
+  L:=StrLen(P);
+  if L>Len then
+    L:=Len;
+  SetLength(Result,L);
+  if L>0 then
+    Move(P^,Result[1],L);
+end;
+{$ELSE}
+function GetStrLen(p : PWideChar; len : Integer) : UnicodeString;
+
+var
+  L : Integer;
+
+begin
+  Result:='';
+  L:=StrLen(P);
+  if L>Len then
+    L:=Len;
+  SetLength(Result,L);
+  if Len>0 then
+    Move(P^,Result[1],L*2);
+end;
+{$ENDIF}
+
+Function InitialCaps(const S : TREString) : TREString;
+
+const
+  NonWord = [#0..'&', '(', '*', '+', ',', '-', '.', '?', '<', '[', '{', #$B7];
+
+var
+  L : TREString;
+  Len,Last,I : Integer;
+  Upper : Boolean;
+
+begin
+  L:=LowerCase(S);
+  Len:=Length(L);
+  Last:=1;
+  I:=1;
+  Upper:=True;
+  Result:='';
+  While I<=Len do
+    begin
+    if L[i] in NonWord then
+      Upper:=True
+    else if Upper then
+      begin
+      if I>Last then
+        Result:=Result+Copy(L,Last,I-Last);
+      Result:=Result+UpperCase(L[i]);
+      inc(I);
+      Last:=I;
+      Upper:=False;
+      end;
+    Inc(i);
+    end;
+  Result:=Result+Copy(L,Last,I-Last);
+end;
+
+
+{ TPerlRegEx }
+
+
+function TPerlRegEx.GetFoundMatch: Boolean;
+begin
+  Result:=FResultCount>0;
+end;
+
+function TPerlRegEx.GetCompiled: Boolean;
+begin
+  Result:=Assigned(FCode);
+end;
+
+procedure TPerlRegEx.CheckMatch;
+begin
+  if not FoundMatch then
+    raise ERegularExpressionError.Create(SRegExMatchRequired);
+end;
+
+function TPerlRegEx.GetGroupCount: Integer;
+begin
+  CheckMatch;
+  Result:=FResultCount-1;
+end;
+
+function TPerlRegEx.GetGroupLengths(aIndex: Integer): Integer;
+
+begin
+  CheckMatch;
+  Result:=FResultVector[2*aIndex+1]-FResultVector[2*aIndex];
+end;
+
+function TPerlRegEx.GetGroupOffsets(aIndex: Integer): Integer;
+begin
+  CheckMatch;
+  Result:=FResultVector[2*aIndex]+1;
+end;
+
+function TPerlRegEx.GetResultString(aIndex: Integer): TREString;
+
+var
+  astart,aLength : Ptrint;
+
+begin
+  // Writeln('AIndex ',aIndex,' ',FResultCount);
+  aStart:=FResultVector[2*aIndex];
+  aLength:=FResultVector[2*aIndex+1]-aStart;
+  inc(aStart); // 1-based
+  Result:=Copy(FSubject,AStart,aLength);
+end;
+
+function TPerlRegEx.GetGroups(aIndex: Integer): TREString;
+
+begin
+  CheckMatch;
+  if Length(FStoredGroups)>0 then
+    Result:=FStoredGroups[aIndex]
+  else
+    Result:=GetResultString(aIndex);
+end;
+
+function TPerlRegEx.GetMatchedLength: Integer;
+
+begin
+  Result:=GetGroupLengths(0)
+end;
+
+function TPerlRegEx.GetMatchedOffset: Integer;
+begin
+  Result:=GetGroupOffsets(0);
+end;
+
+function TPerlRegEx.GetMatchedText: TREString;
+begin
+  Result:=GetResultString(0)
+end;
+
+function TPerlRegEx.GetModifiedSubject: TREString;
+begin
+  Result:=FModifiedSubject;
+end;
+
+function TPerlRegEx.GetNamedGroup(aName : TREString): TREString;
+
+var
+  Idx : integer;
+
+begin
+  Result:='';
+  Idx:=NamedGroup(aName);
+  if Idx<>-1 then
+    Result:=Groups[Idx];
+end;
+
+function TPerlRegEx.GetStart: Integer;
+begin
+  Result:=FStart+1;
+end;
+
+function TPerlRegEx.GetStop: Integer;
+begin
+  Result:=FStop+1;
+end;
+
+function TPerlRegEx.GetSubject: TREString;
+begin
+  Result:=FSubject;
+end;
+
+function TPerlRegEx.GetSubjectLeft: TREString;
+begin
+  // Resultvector is 0 based
+  Result:=Copy(FSubject,1,FResultVector[0]);
+end;
+
+function TPerlRegEx.GetSubjectRight: TREString;
+var
+  SPos : Integer;
+begin
+  SPos:=FResultVector[1]; // 0-based
+  Result:=Copy(FSubject,SPos+1,FSubjectLength-SPos);
+end;
+
+procedure TPerlRegEx.SetOptions(aValue: TPerlRegExOptions);
+begin
+  if FOptions=AValue then Exit;
+  FOptions:=AValue;
+  CleanUp; // Need to reset...
+end;
+
+procedure TPerlRegEx.SetRegEx(aValue: TREString);
+begin
+  if FRegEx=AValue then Exit;
+  FRegEx:=aValue;
+end;
+
+procedure TPerlRegEx.SetReplacement(aValue: TREString);
+begin
+  FReplacement:=AValue;
+end;
+
+procedure TPerlRegEx.SetStart(aValue: Integer);
+begin
+  FStart:=aValue-1;
+end;
+
+procedure TPerlRegEx.SetStop(aValue: Integer);
+begin
+  if FStop=aValue-1 then Exit;
+  FStop:=aValue-1;
+end;
+
+procedure TPerlRegEx.SetSubject(aValue: TREString);
+begin
+  FSubject:=aValue;
+  FSubjectLength:=Length(FSubject);
+  FModifiedSubject:=aValue;
+  CleanUp;
+end;
+
+procedure TPerlRegEx.CleanUp;
+begin
+  FreeMatchData;
+  FreeCodeData;
+  ClearStoredGroups;
+  FResultCount:=0;
+  FResultVector:=Nil;
+  FStart:=0;
+  FStop:=Length(FSubject);
+  FLastModifiedEnd:=0;
+end;
+
+procedure TPerlRegEx.ClearStoredGroups;
+begin
+  SetLength(FStoredGroups,0);
+end;
+
+function TPerlRegEx.FirstOffset: Cardinal;
+begin
+  Result:=FResultVector[0];
+end;
+
+function TPerlRegEx.FirstLength: Cardinal;
+begin
+  Result:=FResultVector[1]-FResultVector[0];
+end;
+
+constructor TPerlRegEx.Create;
+begin
+  if not libpcre28loaded then
+    Loadlibpcre28;
+end;
+
+destructor TPerlRegEx.Destroy;
+begin
+  inherited Destroy;
+end;
+
+class function TPerlRegEx.EscapeRegExChars(const aString: TREString): TREString;
+
+Const
+  NeedEscape = ['\','[',']','^','$','.','|','?','*','+','-','(',')','{','}','&','<','>'];
+
+var
+  I : Integer;
+  PSrc,PDest,PStart : PChar;
+
+begin
+  Result:='';
+  SetLength(Result,2*Length(aString));
+  PSrc:=PChar(aString);
+  PDest:=PChar(Result);
+  PStart:=PDest;
+  for I:=1 to Length(aString) do
+    begin
+    if PSrc^=#0 then
+      begin
+      PDest^:='\';
+      Inc(PDest);
+      PDest^:='0';
+      end
+    else if CharInSet(PSrc^,NeedEscape) then
+      begin
+      PDest^:='\';
+      Inc(PDest);
+      PDest^:=PSrc^;
+      end
+    else
+      PDest^:=PSrc^;
+    Inc(PSrc);
+    Inc(PDest);
+    end;
+  SetLength(Result,(PDest-PStart));
+end;
+
+function TPerlRegEx.MakeOptions(aOptions: TPerlRegExOptions): Integer;
+
+  Procedure AddOption(aOpt : TPerlRegExOption; aValue : cuint32);
+
+  begin
+    if aOpt in AOptions then
+      Result:=Result or aValue;
+  end;
+
+begin
+  Result:=PCRE2_NEWLINE_ANY or PCRE2_UTF;
+  AddOption(preCaseLess,PCRE2_CASELESS);
+  AddOption(preMultiLine,PCRE2_MULTILINE);
+  AddOption(preSingleLine,PCRE2_DOTALL);
+  AddOption(preExtended,PCRE2_EXTENDED);
+  AddOption(preAnchored,PCRE2_ANCHORED);
+  AddOption(preUnGreedy,PCRE2_UNGREEDY);
+  AddOption(preNoAutoCapture,PCRE2_NO_AUTO_CAPTURE);
+  AddOption(preAllowEmptyClass,PCRE2_ALLOW_EMPTY_CLASS);
+  AddOption(preAltBSUX,PCRE2_ALT_BSUX);
+  AddOption(preAltCircumFlex,PCRE2_ALT_CIRCUMFLEX);
+  AddOption(preAltVerbNames,PCRE2_ALT_VERBNAMES);
+  AddOption(preDollarEndOnly,PCRE2_DOLLAR_ENDONLY);
+  AddOption(preDupNames,PCRE2_DUPNAMES);
+  AddOption(preEndAnchored,PCRE2_ENDANCHORED);
+  AddOption(preFirstLine,PCRE2_FIRSTLINE);
+  AddOption(preLiteral,PCRE2_LITERAL);
+  AddOption(preMatchInvalidUTF,PCRE2_MATCH_INVALID_UTF);
+  AddOption(preMatchUnsetBackRef,PCRE2_MATCH_UNSET_BACKREF);
+  AddOption(preNeverBackslashC,PCRE2_NEVER_BACKSLASH_C);
+  AddOption(preNoAutoPossess,PCRE2_NO_AUTO_POSSESS);
+  AddOption(preNoDotStarAnchor,PCRE2_NO_DOTSTAR_ANCHOR);
+  AddOption(preNoStartOptimize,PCRE2_NO_START_OPTIMIZE);
+  // maybe we should enable by default ?
+  AddOption(preNoUTFCheck,PCRE2_NO_UTF_CHECK);
+  AddOption(preUseOffsetLimit,PCRE2_USE_OFFSET_LIMIT);
+//  AddOption(preUTF,PCRE2_UTF);
+
+end;
+
+function TPerlRegEx.GetPCREErrorMsg(ErrorNr: Integer): TREString;
+
+var
+  Buffer : Array[0..255] of ansichar;
+
+begin
+  pcre2_get_error_message(ErrorNr,@Buffer,SizeOf(Buffer));
+  Result:=strpas(@Buffer);
+end;
+
+procedure TPerlRegEx.Compile;
+
+var
+  ErrorNr: Integer;
+  ErrorPos: Integer;
+
+begin
+  if (FRegEx='') then
+    raise ERegularExpressionError.CreateRes(@SRegExMissingExpression);
+  CleanUp;
+  FCode:=pcre2_compile(TPCRE2_SPTR8(FRegEx),Length(FRegEx),MakeOptions(FOptions),@ErrorNr,@ErrorPos,Nil);
+  if (FCode=nil) then
+    raise ERegularExpressionError.CreateFmt(SRegExExpressionError,[ErrorPos+1,GetPCREErrorMsg(ErrorNr)]);
+  FMatchData:=pcre2_match_data_create_from_pattern(FCode,Nil);
+
+end;
+
+procedure TPerlRegEx.Study;
+begin
+
+end;
+
+procedure TPerlRegEx.FreeMatchData;
+
+var
+  Data : ppcre2_match_data;
+
+begin
+  if FMatchData=Nil then exit;
+  Data:=FMatchData;
+  FMatchData:=Nil;
+  pcre2_match_data_free(Data);
+  FResultVector:=Nil;
+end;
+
+procedure TPerlRegEx.FreeCodeData;
+
+var
+  {$IFDEF USEWIDESTRING}
+  Data : Ppcre2_code_16;
+  {$ELSE}
+  Data : Ppcre2_code_8;
+  {$ENDIF}
+
+begin
+  if (FCode=Nil) then
+    exit;
+  Data:=FCode;
+  FCode:=Nil;
+  pcre2_code_free(Data);
+end;
+
+procedure TPerlRegEx.GetNamedGroupInfo;
+
+
+begin
+  FNameEntrySize:=0;
+  FNameTable:=Nil;
+  pcre2_pattern_info(
+    FCode,                 (* the compiled pattern *)
+    PCRE2_INFO_NAMECOUNT,  (* get the number of named substrings *)
+    @FNameCount);          (* where to put the answer *)
+  if (FNameCount = 0) then
+    Exit;
+  pcre2_pattern_info(
+    FCode,                    (* the compiled pattern *)
+    PCRE2_INFO_NAMETABLE,     (* address of the table *)
+    @FNameTable);             (* where to put the answer *)
+  pcre2_pattern_info(
+    FCODE,                       (* the compiled pattern *)
+    PCRE2_INFO_NAMEENTRYSIZE, (* size of each entry in the table *)
+    @FNameEntrySize);
+end;
+
+function TPerlRegEx.Match: Boolean;
+
+var
+  newline,option_bits : cuint32;
+
+begin
+  Result:=False;
+  ClearStoredGroups;
+  if not Compiled then
+    Compile;
+  FMatchData:=pcre2_match_data_create_from_pattern(FCode,Nil);
+  Result:=DoMatch(0);
+  if Result then
+    begin
+    pcre2_pattern_info(FCode,PCRE2_INFO_ALLOPTIONS, @option_bits);
+    FIsUtf:=((option_bits and PCRE2_UTF) <> 0);
+    pcre2_pattern_info(FCode,PCRE2_INFO_NEWLINE,@newline);
+    FCrLFIsNewLine:= (newline=PCRE2_NEWLINE_ANY) or
+                     (newline=PCRE2_NEWLINE_CRLF) or
+                     (newline=PCRE2_NEWLINE_ANYCRLF);
+    end;
+end;
+
+
+function TPerlRegEx.DoMatch(Opts : CUInt32): Boolean;
+
+var
+  len,rc : cInt;
+  S : TREString;
+
+begin
+  Result:=False;
+{$IF SIZEOF(CHAR)=2}
+  rc:=pcre2_match_w(
+{$ELSE}
+  rc:=pcre2_match(
+{$ENDIF}
+    FCode,                   (* the compiled pattern *)
+    PChar(FSubject),         (* the subject TREString *)
+    FSubjectLength,         (* the length of the subject *)
+    FStart,                  (* start at offset 0 in the subject *)
+    Opts,                    (* default options *)
+    FMatchData,              (* block for storing the result *)
+    Nil);
+  if (rc <= 0) then
+    begin
+    FreeMatchData;
+    FreeCodeData;
+    if (rc=PCRE2_ERROR_NOMATCH) then
+      Exit(False)
+    else if (rc = 0) then
+      raise ERegularExpressionError.CreateFmt(SRegExMatchError,[SErrRegexOvectorTooSmall])
+    else
+      raise ERegularExpressionError.CreateFmt(SRegExMatchError,[GetPCREErrorMsg(rc)]);
+    end;
+  Result:=True;
+  FResultCount:=rc;
+  FResultVector:=pcre2_get_ovector_pointer(FMatchData);
+  {For i:=0 to FResultCount-1 do
+    Writeln(I,': ',FResultVector[2*I],' - ',FResultVector[2*I+1]);}
+  if (FResultVector[0]>FResultVector[1]) then
+    begin
+    Len:=integer(FResultVector[0]-FResultVector[1]);
+    S:=Copy(FSubject,FResultVector[1],Len);
+    FreeMatchData;
+    FreeCodeData;
+    raise ERegularExpressionError.CreateFmt(SRegExMatcStartAfterEnd,[S]);
+    end;
+  // Next should start after current
+  FStart:=FResultVector[1];
+  GetNamedGroupInfo;
+  if Assigned(OnMatch) then
+   OnMatch(Self);
+end;
+
+function TPerlRegEx.MatchAgain: Boolean;
+
+var
+  StartChar,Opts : cuint32;
+begin
+  Result:=False;
+  Opts:=0;
+  // Special case, empty TREString.
+  if (FResultVector[0]=FResultVector[1]) then
+    begin
+    if (FResultVector[0]>=FSubjectLength) then
+      Exit;
+    Opts:=PCRE2_NOTEMPTY_ATSTART or PCRE2_ANCHORED;
+    end
+  else
+    begin
+    // Check whether start empty
+    Startchar:=pcre2_get_startchar(FMatchData);
+    if (FStart<=Startchar) then
+      begin
+      (* Reached end of subject.   *)
+      if (startchar>=FSubjectLength) then
+        Exit;
+      (* Advance by one character. *)
+      FStart:=StartChar+1;
+      (* If UTF-8, it may be more than one code unit. *)
+      if FIsUtf then
+        begin
+        While (FStart<FSubjectLength) do
+          begin
+          if ((Ord(Subject[FStart+1]) and $c0)<>$80) then
+            Exit;
+          Inc(FStart);
+          end;
+        end;
+      end;
+    end;
+  Result:=DoMatch(Opts);
+  (*
+    This time, a result of NOMATCH isn't an error. If the value in 'options'
+    is zero, it just means we have found all possible matches, so the loop ends.
+    Otherwise, it means we have failed to find a non-empty-TREString match at a
+    point where there was a previous empty-TREString match. In this case, we do what
+    Perl does: advance the matching position by one character, and continue. We
+    do this by setting the 'end of previous match' offset, because that is picked
+    up at the top of the loop as the point at which to start again.
+
+    There are two complications: (a) When CRLF is a valid newline sequence, and
+    the current position is just before it, advance by an extra byte. (b)
+    Otherwise we must ensure that we skip an entire UTF character if we are in
+    UTF mode.
+  *)
+  While not Result do
+    begin
+    if Opts=0 then
+      Break;
+    FResultVector[1]:=FStart+1;             (* Advance one code unit *)
+    if FCrLFIsNewLine and                   (* If CRLF is a newline & *)
+       (FStart<FSubjectLength-2) and        (* we are at CRLF *)
+       (FSubject[FStart+1]=#13) and
+       (FSubject[Fstart+2]=#10) then
+       inc(FResultVector[1])                (* Advance by one more. *)
+    else if (FIsUtf) then                     (* Otherwise, ensure we advance a whole UTF-8 character. *)
+      begin
+      while (FResultVector[1]<FSubjectLength-1) do
+        begin
+        if ((Ord(subject[FResultVector[1]]) and $c0) <> $80) then
+          break;
+        inc(FResultVector[1]);
+        end;
+      end;
+    Result:=DoMatch(Opts);
+    end;
+end;
+
+
+function TPerlRegEx.Replace: TREString;
+
+var
+  NewSubject,Tmp : TREString;
+
+begin
+  CheckMatch;
+  Result:=ComputeReplacement;
+  if Assigned(OnReplace) then
+    OnReplace(Self, Result);
+  Tmp:=Result;
+  if FLastModifiedEnd=0 then
+    FLastModifiedEnd:=GetMatchedOffset-1;
+  NewSubject:=Copy(FModifiedSubject,1,FLastModifiedEnd)+Tmp;
+  FLastModifiedEnd:=Length(NewSubject)+1;
+  tmp:=GetSubjectRight;
+  FModifiedSubject:=NewSubject+tmp;
+  ClearStoredGroups;
+end;
+
+function TPerlRegEx.ReplaceAll: Boolean;
+begin
+  Result:=Match;
+  if Not Result then
+    exit;
+  repeat
+    Replace;
+  until not MatchAgain;
+end;
+
+function IsAlphaAndUnderline(const C: Char): Boolean;
+
+Const
+  allowed = ['A'..'Z', 'a'..'z', '_'];
+
+begin
+  Result:=CharInSet(C,Allowed);
+end;
+
+function IsNumeric(const C: Char): Boolean;
+
+Const
+  allowed = ['0'..'9'];
+
+begin
+  Result:=CharInSet(C,Allowed);
+end;
+
+
+{ Return values:
+  >=0 : group number.
+  -1 : whole subject.
+  -2 : Left of match.
+  -3 : Right of match.
+  -99  : invalid.
+  On return, I is the index of the next character to process.
+}
+
+Function TPerlRegEx.GetBackRefIndex(const Ref : TREString; var I : Integer) : Integer;
+
+var
+  Len,P,N,Group : Integer;
+
+begin
+  Len:=Length(Ref);
+  Group:=-99;
+  Case Ref[I] of
+  '0'..'9':
+    begin
+    Group:=Ord(Ref[i])-Ord('0');
+    Inc(I);
+    // Only consume as much integers as there are groups.
+    // So if there are 15 groups then $16 -> $1 + literal 6.
+    While (I<=Len) and (Ref[i] in ['0'..'9']) do
+      begin
+      N:=(Group*10)+Ord(Ref[i])-Ord('0');
+      if N>GroupCount then
+        Break;
+      Group:=N;
+      Inc(I);
+      end;
+    end;
+  '{':
+    begin
+    Inc(I);
+    if (Ref[I] in ['0'..'9']) then
+      // \{123}
+      begin
+      Group:=0;
+      while (I<Len) and IsNumeric(Ref[I]) do
+        begin
+        Group:=(Group*10)+Ord(Ref[i])-Ord('0');
+        Inc(I);
+        end;
+      if (I>Len) or (Ref[I]<>'}') then
+        Group:=-99
+      else
+        Inc(I);
+      end
+    else
+      // \{named}
+      begin
+      P:=I;
+      while (I<Len) and IsAlphaAndUnderline(Ref[I]) do
+        Inc(I);
+      if (I>Len) or (Ref[I]<>'}') then
+        Group:=-99
+      else
+        begin
+        Group:=NamedGroup(Copy(Ref,P,I-P));
+        if Group=-1 then
+          group:=-99;
+        Inc(I);
+        end
+      end;
+    end;
+  '_': // Whole subject
+    begin
+    Group:=-1;
+    Inc(I);
+    end;
+  '&': // \& or $& (whole regex match)
+    begin
+    Group:=0;
+    Inc(I);
+    end;
+  '+': // Last group
+    begin
+    Group:=GroupCount;
+    Inc(I);
+    end;
+  '`': // Subject to left of match.
+    begin
+    Group:=-2;
+    inc(I);
+    end;
+  #39:  // Subject to right of match.
+    begin
+    Group:=-3;
+    inc(I);
+    end
+  end;
+  Result:=Group;
+end;
+
+Class function TPerlRegEx.TransForm(aTransform : TTransformation; S : TREString): TREString;
+
+begin
+  Case aTransform of
+    tFirstCap : Result:=UpperCase(Copy(S,1,1))+LowerCase(Copy(S,2,Length(S)-1));
+    tInitialCap : Result:=InitialCaps(S);
+    tUpperCase : Result:=UpperCase(S);
+    tLowerCase : Result:=LowerCase(S);
+  else
+    Result:=S;
+  end;
+end;
+
+function TPerlRegEx.ComputeReplacement: TREString;
+
+var
+  Res : TREString;
+  Len : Integer;
+
+  Procedure AddToResult(aStart,aNext : Integer); inline;
+
+  begin
+    Res:=Res+Copy(FReplacement,aStart,aNext-aStart);
+  end;
+
+  Procedure AddNamedGroup(const aName : TREString); inline;
+
+  begin
+    Res:=Res+NamedGroups[aName];
+  end;
+
+  Function AddBackRef(aTransform : TTransformation; I : Integer) : Integer;
+
+  var
+    P,N,Group : Integer;
+
+  begin
+    Group:=GetBackRefIndex(FReplacement,I);
+    Case Group of
+      -99 : ; // invalid
+      -1 : Res:=Res+TransForm(aTransform,FSubject);
+      -2 : Res:=Res+TransForm(aTransform,SubjectLeft);
+      -3 : Res:=Res+TransForm(aTransform,SubjectRight);
+    else
+      if Group<=GroupCount then
+        Res:=Res+TransForm(aTransform,Groups[Group]);
+    end;
+    Result:=I;
+  end;
+
+var
+  I, P, Last : Integer;
+  updatelast : boolean;
+
+begin
+  Len:=Length(FReplacement);
+  if Len=0 then
+    Exit('');
+  I:=1;
+  Last:=1;
+  while I<=Len do
+    begin
+    case FReplacement[I] of
+    '\':
+        begin
+        if (I=Len) then
+          raise ERegularExpressionError.CreateFmt(SRegExIndexOutOfBounds,[I]);
+        AddToResult(Last,I);
+        Inc(I);
+        UpdateLast:=True;
+        case FReplacement[I] of
+          '$', '\':
+            begin
+            Inc(I);
+            AddToResult(I-1,I);
+            end;
+          'g':
+            begin
+            if (I+2<Len) and (FReplacement[I+1] = '<') then
+              begin
+              Inc(I,2); // First char
+              P:=I;
+              while (I<Len) and IsAlphaAndUnderline(FReplacement[I]) do
+                Inc(I);
+              // We should now be on closing >
+              if (I<=Len) and (FReplacement[I]='>') then
+                begin
+                AddNamedGroup(Copy(FReplaceMent,P,I-P));
+                Inc(I);
+                Last:=I;
+                end
+              else
+                begin
+                I:=I+2; // Skip everything.
+                UpdateLast:=False
+                end;
+              end
+            else
+              UpdateLast:=False;
+            end;
+          'l','L' : I:=AddBackRef(tLowerCase,I);
+          'u','U' : I:=AddBackRef(tLowerCase,I);
+          'f','F' : I:=AddBackRef(tFirstCap,I);
+          'i','I' : I:=AddBackRef(tInitialCap,I);
+        else
+          I:=AddBackRef(tNone,I);
+        end;
+        if UpdateLast then
+          Last:=I;
+      end;
+    '$':
+      begin
+      if I=Len then
+        raise ERegularExpressionError.CreateFmt(SRegExIndexOutOfBounds,[I]);
+      AddToResult(Last,I);
+      Inc(I);
+      if FReplacement[I]='$' then
+        begin
+        AddToResult(Last,I);
+        Inc(I);
+        end
+      else
+        I:=AddBackRef(tNone,I);
+       Last:=I;
+       end;
+    else // Case
+      Inc(I);
+    end;
+    end;
+  if I>Last then
+    AddToResult(Last,I);
+  Result:=Res;
+end;
+
+procedure TPerlRegEx.StoreGroups;
+
+var
+  I : Integer;
+
+begin
+  CheckMatch;
+  SetLength(FStoredGroups,GroupCount+1);
+  For I:=0 to GroupCount do
+    FStoredGroups[i]:=GetResultString(I);
+end;
+
+function TPerlRegEx.NamedGroup(const aName: TREString): Integer;
+
+var
+  Ptr : PCRE2_SPTR;
+  N,I : Integer;
+  tblName : TREString;
+
+begin
+  Ptr:=FNameTable;
+  for i:=0 to FNameCount-1 do
+    begin
+{$IFDEF USEWIDESTRING}
+    n:=ord(ptr[0]);
+    tblName:=GetStrLen((Ptr+1),FNameEntrySize-2);
+{$ELSE}
+    n:=(ord(ptr[0]) shl 8) or ord(ptr[1]);
+    tblName:=GetStrLen((Ptr+2),FNameEntrySize-3);
+{$ENDIF}
+    if SameText(TblName,aName) then
+      Exit(n);
+    Inc(Ptr,FNameEntrySize);
+    end ;
+  Result:=-1;
+end;
+
+procedure TPerlRegEx.Split(const aStrings: TStrings; aLimit: Integer);
+
+var
+  NewStart,LastEnd,Matches: Integer;
+
+begin
+  if Not Assigned(aStrings) then
+    raise ERegularExpressionError.Create(SRegExStringsRequired);
+  if (aLimit=1) or not Match then
+    begin
+    aStrings.Add(Subject);
+    Exit;
+    end;
+  LastEnd:=0; // Last match pos
+  Matches:=1;
+  repeat
+    NewStart:=FirstOffset; // Start of current match
+    aStrings.Add(Copy(Subject,LastEnd+1,NewStart-LastEnd)); // Copy everything since last match.
+    Inc(Matches);
+    LastEnd:=NewStart+MatchedLength; // update last match pos.
+  until ((aLimit>1) and (Matches>=aLimit)) or not MatchAgain;
+  aStrings.Add(TREString(Copy(FSubject,LastEnd+1,FSubjectLength -LastEnd)));
+end;
+
+procedure TPerlRegEx.SplitCapture(const aStrings: TStrings; aLimit: Integer);
+
+begin
+  SplitCapture(aStrings,aLimit,1);
+end;
+
+procedure TPerlRegEx.SplitCapture(const aStrings: TStrings; aLimit: Integer; aOffset: Integer);
+
+var
+  NewStart,LastEnd,Matches: Integer;
+  DoCopy : Boolean;
+
+begin
+  if Not Assigned(aStrings) then
+    raise ERegularExpressionError.Create(SRegExStringsRequired);
+  if (aLimit=1) or not Match then
+    begin
+    aStrings.Add(Subject);
+    Exit;
+    end;
+  Dec(aOffset);
+  if (aOffset>0) then
+    Dec(aLimit);
+  LastEnd:=0; // Last match pos
+  Matches:=1;
+  repeat
+    NewStart:=FirstOffset; // Start of current match
+    DoCopy:=(NewStart>aOffset);
+    if DoCopy then
+      begin
+      aStrings.Add(Copy(Subject,LastEnd+1,NewStart-LastEnd)); // Copy everything since last match.
+      if GroupCount > 0 then
+        aStrings.Add(Groups[GroupCount]);
+      Inc(Matches);
+      LastEnd:=NewStart+MatchedLength; // update last match pos.
+      end;
+  until ((aLimit>1) and (Matches>=aLimit)) or not MatchAgain;
+  aStrings.Add(TREString(Copy(FSubject,LastEnd+1,FSubjectLength-LastEnd)));
+end;
+
+{ TPerlRegExList }
+
+function TPerlRegExList.GetCount: Integer;
+begin
+  Result:=FList.Count;
+end;
+
+function TPerlRegExList.GetOwnsRegex: Boolean;
+begin
+  Result:=FList.OwnsObjects;
+end;
+
+function TPerlRegExList.GetRegEx(aIndex: Integer): TPerlRegEx;
+begin
+  Result:=TPerlRegEx(Flist[aIndex])
+end;
+
+function TPerlRegExList.GetStart: Integer;
+begin
+  Result:=FStart;
+end;
+
+function TPerlRegExList.GetStop: Integer;
+begin
+  Result:=FStop;
+end;
+
+function TPerlRegExList.GetSubject: TREString;
+begin
+  Result:=FSubject;
+end;
+
+procedure TPerlRegExList.SetRegEx(aIndex: Integer; aValue: TPerlRegEx);
+begin
+  FList[aIndex]:=aValue;
+end;
+
+procedure TPerlRegExList.SetStart(AValue: Integer);
+var
+  I : Integer;
+begin
+  if AValue=FStart then exit;
+  FStart:=aValue;
+  For I:=0 to Count-1 do
+    RegEx[I].Start:=aValue;
+end;
+
+procedure TPerlRegExList.SetStop(AValue: Integer);
+var
+  I : Integer;
+begin
+  if AValue=FStart then exit;
+  FStop:=aValue;
+  For I:=0 to Count-1 do
+    RegEx[I].Stop:=aValue;
+end;
+
+procedure TPerlRegExList.SetSubject(aValue: TREString);
+var
+  I: Integer;
+begin
+  if aValue=FSUbject then exit;
+  FSubject:=aValue;
+  for I:=Count-1 downto 0 do
+    RegEx[I].Subject:=Subject;
+  FMatch:=nil;
+end;
+
+procedure TPerlRegExList.UpdateRegEx(const aRegEx: TPerlRegEx);
+begin
+  aRegEx.Subject:=FSubject;
+  ARegEx.Start:=FStart;
+  ARegEx.Stop:=FStop;
+end;
+
+constructor TPerlRegExList.Create(OwnsRegex: Boolean);
+begin
+  FList:=TFPObjectList.Create(OwnsRegex);
+end;
+
+destructor TPerlRegExList.Destroy;
+begin
+  FreeAndNil(FList);
+  inherited Destroy;
+end;
+
+function TPerlRegExList.Add(const aRegEx: TPerlRegEx): Integer;
+begin
+  Result:=FList.Add(aRegEx);
+  UpdateRegEx(aRegEx);
+end;
+
+procedure TPerlRegExList.Clear;
+begin
+  FList.Clear;
+end;
+
+procedure TPerlRegExList.Delete(aIndex: Integer);
+begin
+  FList.Delete(aIndex);
+end;
+
+function TPerlRegExList.IndexOf(const aRegEx: TPerlRegEx): Integer;
+begin
+  Result:=FList.IndexOf(aRegex);
+end;
+
+procedure TPerlRegExList.Insert(aIndex: Integer; const aRegEx: TPerlRegEx);
+begin
+  FList.Insert(aIndex,aRegex);
+end;
+
+function TPerlRegExList.Match: Boolean;
+begin
+  SetStart(1);
+  FMatch:=nil;
+  Result:=MatchAgain;
+end;
+
+function TPerlRegExList.MatchAgain: Boolean;
+
+var
+  PRE : TPerlRegEx;
+  I,StartAt,Current: Integer;
+
+begin
+  // Determine start position
+  if not Assigned(FMatch) then
+    StartAt:=Start
+  else
+    With FMatch do
+      StartAt:=0;  // MVC todo {InternalGetMatchedOffset+InternalGetMatchedLength};
+  FMatch:=nil;
+  Current:=-1;
+  // Check all regexes for new closest match.
+  I:=0;
+  While (I<Count) and (Current>StartAt) do
+    begin
+    PRE:=RegEx[I];
+    // Should we search this regex again ?
+    if (not PRE.FoundMatch) or (PRE.FirstOffset<StartAt) then
+      begin
+      PRE.Start:=StartAt;
+      PRE.MatchAgain;
+      end;
+    // New first position found ?
+    if PRE.FoundMatch and ((FMatch=Nil) or (PRE.FirstOffset<Current)) then
+      begin
+      Current:=Pre.FirstOffset;
+      FMatch:=PRE;
+      end;
+    Inc(I);
+    end;
+  Result:=Current<>-1;
+end;
+
+end.

+ 4 - 0
packages/vcl-compat/tests/testcompat.lpi

@@ -68,6 +68,10 @@
         <Filename Value="utchash.pp"/>
         <IsPartOfProject Value="True"/>
       </Unit>
+      <Unit>
+        <Filename Value="utregex.pas"/>
+        <IsPartOfProject Value="True"/>
+      </Unit>
     </Units>
   </ProjectOptions>
   <CompilerOptions>

+ 1 - 1
packages/vcl-compat/tests/testcompat.lpr

@@ -6,7 +6,7 @@ uses
   {$IFDEF UNIX}cwstring,{$ENDIF}
   Classes, consoletestrunner, tcnetencoding, tciotuils, 
   utmessagemanager, utcdevices, utcanalytics, utcimagelist, 
-  utcnotifications, utcjson, utcpush, utchash;
+  utcnotifications, utcjson, utcpush, utchash, utcregex;
 
 type
 

+ 315 - 0
packages/vcl-compat/tests/utcregex.pas

@@ -0,0 +1,315 @@
+unit utcregex;
+
+{$mode objfpc}{$H+}
+{ $DEFINE USEWIDESTRING}
+
+interface
+
+uses
+  Classes, SysUtils, fpcunit, testutils, testregistry, system.regularexpressionscore;
+
+type
+
+  { TTestRegExpCore }
+
+  TTestRegExpCore = class(TTestCase)
+  private
+    FRegex: TPerlRegEx;
+    FMatchEventCount : Integer;
+    FSplitSubject: TStrings;
+    procedure AssertMatch(const Msg, aMatch: TREString; aPos, aLength: Integer; Groups: array of TREString);
+    procedure DoMatch(Sender: TObject);
+  protected
+    procedure SetUp; override;
+    procedure TearDown; override;
+    Property RegEx : TPerlRegEx Read FRegex;
+    Property SplitSubject : TStrings Read FSplitSubject;
+  published
+    Procedure TestHookup;
+    procedure TestMatch;
+    procedure TestNamedGroups;
+    procedure TestReplace;
+    procedure TestReplaceAll;
+    procedure TestSplitAll;
+    procedure TestSplitLimit;
+    procedure TestInitialCaps;
+    procedure TestReplaceGroupBackslash;
+    procedure TestReplaceGroupDollar;
+    procedure TestReplaceGroupQuoted;
+    procedure TestReplaceGroupNamed;
+    procedure TestReplaceGroupNamed2;
+    procedure TestReplaceGroupNamedInvalidName;
+    procedure TestReplaceWholeSubject;
+    procedure TestReplaceLeftOfMatch;
+    procedure TestReplaceRightOfMatch;
+    procedure TestReplaceWholeMatch;
+    procedure TestReplaceLastMatch;
+  end;
+
+implementation
+
+procedure TTestRegExpCore.AssertMatch(Const Msg,aMatch : TREString; aPos,aLength : Integer; Groups : Array of TREString);
+
+var
+  I : Integer;
+begin
+  AssertEquals(Msg+': matched text',aMatch,Regex.MatchedText);
+  AssertEquals(Msg+': offset',aPos,Regex.MatchedOffset);
+  AssertEquals(Msg+': length',aLength,Regex.MatchedLength);
+  AssertEquals(Msg+': group count',Length(Groups),Regex.GroupCount);
+  For I:=1 to Regex.GroupCount do
+    AssertEquals(Msg+' group['+IntToStr(I)+']',Groups[I-1],Regex.Groups[I]);
+end;
+
+procedure TTestRegExpCore.DoMatch(Sender: TObject);
+begin
+  Inc(FMatchEventCount);
+end;
+
+procedure TTestRegExpCore.TestMatch;
+
+begin
+  Regex.subject:='xyz abba abbba abbbba zyx';
+  Regex.RegEx:='a(b*)a';
+  AssertTrue('First match found',Regex.Match);
+  AssertEquals('Match event called',1,FMatchEventCount);
+  AssertMatch('Match 1','abba',5,4,['bb']);
+  AssertEquals('Left of match','xyz ',Regex.SubjectLeft);
+  AssertEquals('Right of match',' abbba abbbba zyx',Regex.SubjectRight);
+  AssertTrue('Second match found',Regex.MatchAgain);
+  AssertMatch('Match 2','abbba',10,5,['bbb']);
+  AssertTrue('Third match found',Regex.MatchAgain);
+  AssertMatch('Match 3','abbbba',16,6,['bbbb']);
+  AssertFalse('No more matches',Regex.MatchAgain);
+  AssertEquals('Match event called',3,FMatchEventCount);
+end;
+
+procedure TTestRegExpCore.TestNamedGroups;
+
+Const
+  Rec1 = 'Name:"John" Surname:"Doe" Email:"[email protected]"';
+  Rec2 = 'Name:"Jane" Surname:"Dolina" Email:"[email protected]"';
+
+begin
+  Regex.Subject:=Rec1+#10+Rec2;
+  Regex.RegEx:='Name:"(?<Name>[\w]+?)".*?Surname:"(?<Surname>[\w]+?)".*?Email:"(?<Email>\b[\w.%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}\b)"';
+  AssertTrue('First match found',Regex.Match);
+  AssertMatch('Match 1',Rec1,1,Length(Rec1),['John','Doe','[email protected]']);
+  AssertEquals('Nonexisting group','',Regex.NamedGroups['nonexisting']);
+  AssertEquals('Name group','John',Regex.NamedGroups['Name']);
+  AssertEquals('Surname group','Doe',Regex.NamedGroups['Surname']);
+  AssertEquals('Email group','[email protected]',Regex.NamedGroups['Email']);
+  AssertTrue('Second match found',Regex.MatchAgain);
+  AssertMatch('Match 2',Rec2,Length(Rec1)+2,Length(Rec2),['Jane','Dolina','[email protected]']);
+  AssertFalse('No more matches',Regex.MatchAgain);
+end;
+
+procedure TTestRegExpCore.TestReplace;
+begin
+  Regex.subject:='xyz abba abbba abbbba zyx';
+  Regex.RegEx:='a(b*)a';
+  Regex.Replacement:='c';
+  AssertTrue('First match found',Regex.Match);
+  AssertEquals('Replace','c',Regex.Replace);
+  AssertEquals('Replace result','xyz c abbba abbbba zyx',Regex.Subject);
+  AssertTrue('Second match found',Regex.MatchAgain);
+  AssertEquals('Replace 2','c',Regex.Replace);
+  AssertEquals('Replace 2 result','xyz c c abbbba zyx',Regex.Subject);
+  AssertTrue('Third match found',Regex.MatchAgain);
+  AssertEquals('Replace 3','c',Regex.Replace);
+  AssertEquals('Replace 3 result','xyz c c c zyx',Regex.Subject);
+  AssertFalse('No more matches',Regex.MatchAgain);
+end;
+
+procedure TTestRegExpCore.TestReplaceAll;
+begin
+  Regex.subject:='xyz abba abbba abbbba zyx';
+  Regex.RegEx:='a(b*)a';
+  Regex.Replacement:='c';
+  AssertTrue('Replacements done',Regex.ReplaceAll);
+  AssertEquals('ReplaceAll result','xyz c c c zyx',Regex.Subject);
+end;
+
+
+procedure TTestRegExpCore.TestReplaceGroupBackslash;
+// \n
+begin
+  Regex.subject:='*abba*';
+  Regex.RegEx:='a(b*)a';
+  Regex.Replacement:='\1';
+  AssertTrue('Match',Regex.Match);
+  AssertEquals('ReplaceText','bb',Regex.Replace);
+  AssertEquals('Result','*bb*',Regex.Subject);
+end;
+
+procedure TTestRegExpCore.TestReplaceGroupDollar;
+// $N
+begin
+  Regex.subject:='*abba*';
+  Regex.RegEx:='a(b*)a';
+  Regex.Replacement:='$1';
+  AssertTrue('Match',Regex.Match);
+  AssertEquals('ReplaceText','bb',Regex.Replace);
+  AssertEquals('Result','*bb*',Regex.Subject);
+end;
+
+procedure TTestRegExpCore.TestReplaceGroupQuoted;
+// \{N}
+begin
+  Regex.subject:='*abba*';
+  Regex.RegEx:='a(b*)a';
+  Regex.Replacement:='\{1}';
+  AssertTrue('Match',Regex.Match);
+  AssertEquals('ReplaceText','bb',Regex.Replace);
+  AssertEquals('Result','*bb*',Regex.Subject);
+end;
+
+procedure TTestRegExpCore.TestReplaceGroupNamed;
+// \{name}
+
+begin
+  Regex.subject:='*abba*';
+  Regex.RegEx:='a(?<Name>b*?)a';
+  Regex.Replacement:='\{Name}';
+  AssertTrue('Match',Regex.Match);
+  AssertEquals('ReplaceText','bb',Regex.Replace);
+  AssertEquals('Result','*bb*',Regex.Subject);
+end;
+
+procedure TTestRegExpCore.TestReplaceGroupNamed2;
+// \{name}
+begin
+  Regex.subject:='*abba*';
+  Regex.RegEx:='a(?<Name>b*?)a';
+  Regex.Replacement:='<\{Name}>';
+  AssertTrue('Match',Regex.Match);
+  AssertEquals('ReplaceText','<bb>',Regex.Replace);
+  AssertEquals('Result','*<bb>*',Regex.Subject);
+end;
+
+procedure TTestRegExpCore.TestReplaceGroupNamedInvalidName;
+// \{name} with invalid name
+begin
+  Regex.subject:='*abba*';
+  Regex.RegEx:='a(?<Name>b*?)a';
+  Regex.Replacement:='<\{NameX}>';
+  AssertTrue('Match',Regex.Match);
+  AssertEquals('ReplaceText','<>',Regex.Replace);
+  AssertEquals('Result','*<>*',Regex.Subject);
+end;
+
+procedure TTestRegExpCore.TestReplaceWholeSubject;
+begin
+  Regex.subject:='*abba*';
+  Regex.RegEx:='a(b*)a';
+  Regex.Replacement:='<\_>';
+  AssertTrue('Match',Regex.Match);
+  AssertEquals('ReplaceText','<*abba*>',Regex.Replace);
+  AssertEquals('Result','*<*abba*>*',Regex.Subject);
+end;
+
+procedure TTestRegExpCore.TestReplaceLeftOfMatch;
+// \`
+begin
+  Regex.subject:='x*abba*';
+  Regex.RegEx:='a(b*)a';
+  Regex.Replacement:='<\`>';
+  AssertTrue('Match',Regex.Match);
+  AssertEquals('ReplaceText','<x*>',Regex.Replace);
+  AssertEquals('Result','x*<x*>*',Regex.Subject);
+end;
+
+procedure TTestRegExpCore.TestReplaceRightOfMatch;
+// \'
+begin
+  Regex.subject:='*abba*x';
+  Regex.RegEx:='a(b*)a';
+  Regex.Replacement:='<\''>';
+  AssertTrue('Match',Regex.Match);
+  AssertEquals('ReplaceText','<*x>',Regex.Replace);
+  AssertEquals('Result','*<*x>*x',Regex.Subject);
+end;
+
+procedure TTestRegExpCore.TestReplaceWholeMatch;
+// \&
+begin
+  Regex.subject:='*abba*';
+  Regex.RegEx:='a(b*)a';
+  Regex.Replacement:='<\&>';
+  AssertTrue('Match',Regex.Match);
+  AssertEquals('ReplaceText','<abba>',Regex.Replace);
+  AssertEquals('Result','*<abba>*',Regex.Subject);
+end;
+
+procedure TTestRegExpCore.TestReplaceLastMatch;
+// \&
+begin
+  Regex.subject:='*abbcca*';
+  Regex.RegEx:='a(b*)(c*)a';
+  Regex.Replacement:='<\+>';
+  AssertTrue('Match',Regex.Match);
+  AssertEquals('ReplaceText','<cc>',Regex.Replace);
+  AssertEquals('Result','*<cc>*',Regex.Subject);
+end;
+
+
+procedure TTestRegExpCore.TestSplitAll;
+begin
+  Regex.subject:='xyz abba abbba abbbba zyx';
+  Regex.RegEx:='\s';
+  Regex.Split(SplitSubject,0);
+  AssertEquals('Count',5,SplitSubject.Count);
+  AssertEquals('Item 0','xyz',SplitSubject[0]);
+  AssertEquals('Item 1','abba',SplitSubject[1]);
+  AssertEquals('Item 2','abbba',SplitSubject[2]);
+  AssertEquals('Item 3','abbbba',SplitSubject[3]);
+  AssertEquals('Item 4','zyx',SplitSubject[4]);
+end;
+
+procedure TTestRegExpCore.TestSplitLimit;
+
+begin
+  Regex.subject:='xyz abba abbba abbbba zyx';
+  Regex.RegEx:='\s';
+  Regex.Split(SplitSubject,2);
+  AssertEquals('Count',2,SplitSubject.Count);
+  AssertEquals('Item 0','xyz',SplitSubject[0]);
+  AssertEquals('Item 1','abba abbba abbbba zyx',SplitSubject[1]);
+end;
+
+procedure TTestRegExpCore.TestInitialCaps;
+begin
+  AssertEquals('Initialcaps 1','Abc',InitialCaps('aBc'));
+  AssertEquals('Initialcaps 2',' Abc',InitialCaps(' aBc'));
+  AssertEquals('Initialcaps 3','Dad Abc',InitialCaps('dAd aBc'));
+  AssertEquals('Initialcaps 4','Dad Abc ',InitialCaps('dAd aBc '));
+end;
+
+procedure TTestRegExpCore.SetUp;
+begin
+  FRegex:=TPerlRegEx.Create;
+  FRegEx.OnMatch:=@DoMatch;
+  FMatchEventCount:=0;
+  FSplitSubject:=TStringList.Create;
+end;
+
+procedure TTestRegExpCore.TearDown;
+
+begin
+  FreeAndNil(FSplitSubject);
+  FreeAndNil(FRegex);
+end;
+
+procedure TTestRegExpCore.TestHookup;
+begin
+  AssertNotNull('Regex',Regex);
+  AssertTrue('Assigned OnMatch event',Assigned(Regex.OnMatch));
+  AssertEquals('Match event count',0,FMatchEventCount);
+
+end;
+
+initialization
+
+  RegisterTest(TTestRegExpCore);
+end.
+