Browse Source

* Added Regex unit with an NFA regexpression parser, based on code from Julian Bucknall which is used with his permission
* Added RegExprCompat unit which contains an interface to the Regex unit which is compatible with the existing RegExpr unit
* Added some more tests and added a define UseRegexCompat to make it possible to choose what regex-implementation to test. Test which fail on the existing regex-parser are placed between ifdefs

git-svn-id: trunk@10611 -

joost 17 years ago
parent
commit
44fe103c46

+ 2 - 0
.gitattributes

@@ -3980,7 +3980,9 @@ packages/pxlib/src/pxlib.pp svneol=native#text/plain
 packages/regexpr/Makefile svneol=native#text/plain
 packages/regexpr/Makefile.fpc svneol=native#text/plain
 packages/regexpr/fpmake.pp svneol=native#text/plain
+packages/regexpr/src/regex.pp svneol=native#text/plain
 packages/regexpr/src/regexpr.pp svneol=native#text/plain
+packages/regexpr/src/regexprcompat.pp svneol=native#text/plain
 packages/regexpr/tests/testreg1.pp svneol=native#text/plain
 packages/rexx/Makefile svneol=native#text/plain
 packages/rexx/Makefile.fpc svneol=native#text/plain

+ 57 - 57
packages/regexpr/Makefile

@@ -1,5 +1,5 @@
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2008/03/24]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2008/03/27]
 #
 default: all
 MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-wince arm-gba arm-nds arm-embedded arm-symbian powerpc64-linux powerpc64-darwin powerpc64-embedded avr-embedded armeb-linux armeb-embedded
@@ -254,172 +254,172 @@ PACKAGESDIR:=$(wildcard $(FPCDIR) $(FPCDIR)/packages $(FPCDIR)/packages/base $(F
 override PACKAGE_NAME=regexpr
 override PACKAGE_VERSION=2.0.0
 ifeq ($(FULL_TARGET),i386-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-go32v2)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-win32)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-os2)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-freebsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-beos)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-netbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-solaris)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-qnx)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-netware)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-openbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-wdosx)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-darwin)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-emx)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-watcom)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-netwlibc)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-wince)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-symbian)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),m68k-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),m68k-freebsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),m68k-netbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),m68k-amiga)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),m68k-atari)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),m68k-openbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),m68k-palmos)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),m68k-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),powerpc-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),powerpc-netbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),powerpc-amiga)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),powerpc-macos)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),powerpc-darwin)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),powerpc-morphos)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),powerpc-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),sparc-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),sparc-netbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),sparc-solaris)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),sparc-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),x86_64-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),x86_64-freebsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),x86_64-darwin)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),x86_64-win64)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),arm-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),arm-palmos)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),arm-wince)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),arm-gba)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),arm-nds)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),arm-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),arm-symbian)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),powerpc64-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),powerpc64-darwin)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),powerpc64-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),avr-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),armeb-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),armeb-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regexpr regex regexprcompat
 endif
 ifeq ($(FULL_TARGET),i386-linux)
 override TARGET_EXAMPLES+=testreg1

+ 1 - 1
packages/regexpr/Makefile.fpc

@@ -7,7 +7,7 @@ name=regexpr
 version=2.0.0
 
 [target]
-units=regexpr
+units=regexpr regex regexprcompat
 examples=testreg1
 
 [compiler]

+ 1228 - 0
packages/regexpr/src/regex.pp

@@ -0,0 +1,1228 @@
+{
+    This file is part of the Free Pascal packages library.
+    Copyright (c) 2008 by Joost van der Sluis, member of the
+    Free Pascal development team
+    
+    Regexpression parser
+    
+    This code is based on the examples in the book
+    'Tomes of Delphi: Algorithms and Data Structures' by Julian M Bucknall
+    The code is used with his permission. For an excellent explanation of
+    this unit, see the book...
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+unit Regex;
+
+{$mode Delphi}{$H+}
+{$INLINE ON}
+
+interface
+
+{Notes:
+  these classes parse regular expressions that follow this grammar:
+
+    <anchorexpr> ::= <expr> |
+                     '^' <expr> |
+                     <expr> '$' |
+                     '^' <expr> '$'
+    <expr> ::= <term> |
+               <term> '|' <expr>                 - alternation
+    <term> ::= <factor> |
+               <factor><term>                    - concatenation
+    <factor> ::= <atom> |
+                 <atom> '?' |                    - zero or one
+                 <atom> '*' |                    - zero or more
+                 <atom> 'n,m' |                  - min n, max m (added by Joost)
+                 <atom> '+'                      - one or more
+    <atom> ::= <char> |
+               '.' |                             - any char
+               '(' <expr> ') |                   - parentheses
+               '[' <charclass> ']' |             - normal class
+               '[^' <charclass> ']'              - negated class
+    <charclass> ::= <charrange> |
+                    <charrange><charclass>
+    <charrange> ::= <ccchar> |
+                    <ccchar> '-' <ccchar>
+    <char> ::= <any character except metacharacters> |
+               '\' <any character at all>
+    <ccchar> ::= <any character except '-' and ']'> |
+                 '\' <any character at all>
+
+  This means that parentheses have maximum precedence, followed
+  by square brackets, followed by the closure operators,
+  followed by concatenation, finally followed by alternation.
+}
+
+uses
+  SysUtils,
+  Classes;
+
+type
+  TUpcaseFunc = function(aCh : AnsiChar) : AnsiChar;
+
+  TNFAMatchType = (  {types of matching performed...}
+     mtNone,           {..no match (an epsilon no-cost move)}
+     mtAnyChar,        {..any character}
+     mtChar,           {..a particular character}
+     mtClass,          {..a character class}
+     mtNegClass,       {..a negated character class}
+     mtTerminal,       {..the final state--no matching}
+     mtUnused);        {..an unused state--no matching}
+
+  TRegexError = (    {error codes for invalid regex strings}
+     recNone,          {..no error}
+     recSuddenEnd,     {..unexpected end of string}
+     recMetaChar,      {..read metacharacter, but needed normal char}
+     recNoCloseParen,  {..expected close paren, but not there}
+     recExtraChars     {..not at end of string after parsing regex}
+     );
+
+  TRegexType = (
+     rtRegEx,
+     rtChars,
+     rtSingleChar
+     );
+
+  PCharSet = ^TCharSet;
+  TCharSet = set of Char;
+
+  { TtdRegexEngine }
+
+  TNFAState = record
+    sdNextState1: integer;
+    sdNextState2: integer;
+    sdClass     : PCharSet;
+    sdMatchType : TNFAMatchType;
+    sdChar      : AnsiChar;
+  end;
+
+
+  { TRegexEngine }
+
+  TRegexEngine = class
+    private
+      FAnchorEnd  : boolean;
+      FAnchorStart: boolean;
+      FErrorCode  : TRegexError;
+      FIgnoreCase : boolean;
+      FMultiLine  : boolean;
+      FPosn       : PAnsiChar;
+      FRegexStr   : string;
+      FStartState : integer;
+      FStateTable : Array of TNFAState;
+      FStateCount : integer;
+      FUpcase     : TUpcaseFunc;
+
+      // The deque (double-ended queue)
+      FList : array of integer;
+      FCapacity : integer;
+      FHead : integer;
+      FTail : integer;
+      
+      FRegexType : TRegexType;
+    protected
+      procedure DequeEnqueue(aValue : integer);
+      procedure DequePush(aValue : integer);
+      function DequePop : integer;
+      procedure DequeGrow;
+
+      procedure rcSetIgnoreCase(aValue : boolean); virtual;
+      procedure rcSetRegexStr(const aRegexStr : string); virtual;
+      procedure rcSetUpcase(aValue : TUpcaseFunc); virtual;
+      procedure rcSetMultiLine(aValue : Boolean); virtual;
+
+      procedure rcClear; virtual;
+      procedure rcError(aIndex      : integer); virtual;
+      procedure rcLevel1Optimize; virtual;
+      function rcMatchSubString(const S   : string;
+                                StartPosn : integer;
+                                var Len   : integer) : boolean; virtual;
+      function rcAddState(aMatchType : TNFAMatchType;
+                          aChar      : AnsiChar;
+                          aCharClass : PCharSet;
+                          aNextState1: integer;
+                          aNextState2: integer) : integer;
+      function rcSetState(aState     : integer;
+                          aNextState1: integer;
+                          aNextState2: integer) : integer;
+
+      function rcParseAnchorExpr : integer; virtual;
+      function rcParseAtom : integer; virtual;
+      function rcParseCCChar(out EscapeChar : Boolean) : AnsiChar; virtual;
+      function rcParseChar : integer; virtual;
+      function rcParseCharClass(aClass : PCharSet) : boolean; virtual;
+      function rcParseCharRange(aClass : PCharSet) : boolean; virtual;
+      function rcParseExpr : integer; virtual;
+      function rcParseFactor : integer; virtual;
+      function rcParseTerm : integer; virtual;
+      Function rcReturnEscapeChar : AnsiChar; virtual;
+    public
+      procedure WriteTable;
+      constructor Create(const aRegexStr : string);
+      destructor Destroy; override;
+
+      function Parse(var aErrorPos : integer;
+                     var aErrorCode: TRegexError) : boolean; virtual;
+      function MatchString(const S : string; out MatchPos : integer; var Offset : integer) : boolean; virtual;
+      function ReplaceAllString(const src, newstr: ansistring; out DestStr : string): Integer;
+
+
+      property IgnoreCase : boolean
+                  read FIgnoreCase write rcSetIgnoreCase;
+      property MultiLine : boolean
+                  read FMultiLine write rcSetMultiLine;
+      property RegexString : string
+                  read FRegexStr write rcSetRegexStr;
+      property Upcase : TUpcaseFunc
+                  read FUpcase write rcSetUpcase;
+  end;
+
+
+Resourcestring
+  eRegexParseError = 'Error at %d when parsing regular expression';
+
+implementation
+
+uses strutils;
+
+const
+  MetaCharacters : set of AnsiChar =
+                   ['[', ']', '(', ')', '|', '*', '+', '?', '-', '.',
+                    '^', '$', '{', '}'];
+  newline : TCharSet = [#10,#13,#$85];
+  {some handy constants}
+  UnusedState = -1;
+  NewFinalState = -2;
+  CreateNewState = -3;
+  ErrorState = -4;
+  MustScan = -5;
+
+  cs_allchars : tcharset = [#0..#255];
+  cs_wordchars : tcharset = ['A'..'Z','a'..'z','_','0'..'9'];
+  cs_newline : tcharset = [#10];
+  cs_digits : tcharset = ['0'..'9'];
+  cs_whitespace : tcharset = [' ',#9];
+
+
+{===Helper routines==================================================}
+function SystemUpcase(aCh : AnsiChar) : AnsiChar; far;
+begin
+  Result := System.Upcase(aCh);
+end;
+{====================================================================}
+
+
+{===TRegexEngine===================================================}
+constructor TRegexEngine.Create(const aRegexStr : string);
+begin
+  inherited Create;
+  FRegexStr := aRegexStr;
+  FIgnoreCase := false;
+  FUpcase := SystemUpcase;
+  SetLength(FStateTable,64);
+  FStateCount:=0;
+  FCapacity:=64;
+  setlength(FList,FCapacity);
+  {let's help out the user of the deque by putting the head and
+   tail pointers in the middle: it's probably more efficient}
+  FHead := FCapacity div 2;
+  FTail := FHead;
+
+  MultiLine:=False;
+end;
+{--------}
+destructor TRegexEngine.Destroy;
+begin
+  if (FStateTable <> nil) then
+    rcClear;
+  inherited Destroy;
+end;
+{--------}
+function TRegexEngine.MatchString(const S : string; out MatchPos : integer; var Offset : integer): boolean;
+var
+  i : integer;
+  ErrorPos  : integer;
+  ErrorCode : TRegexError;
+  pc : pchar;
+  x:integer;
+begin
+  {if the regex string hasn't been parsed yet, do so}
+  if (FStateCount = 0) then begin
+    if not Parse(ErrorPos, ErrorCode) then
+      rcError(ErrorPos);
+  end;
+
+  case FRegexType of
+    rtSingleChar :
+      begin
+      MatchPos := PosEx(char(FRegexStr[1]),s,Offset);
+      Offset := MatchPos+1;
+      Result := (MatchPos>0);
+      end;
+    rtChars :
+      begin
+      MatchPos := PosEx(FRegexStr,s,Offset);
+      Offset := MatchPos+length(FRegexStr);
+      Result := (MatchPos>0);
+      end
+    else
+      begin
+      {now try and see if the string matches (empty strings don't)}
+      Result := False;
+      MatchPos := 0;
+      if (S <> '') then
+        {if the regex specified a start anchor then we
+         need to check the string starting at the first position}
+        if FAnchorStart then begin
+          if rcMatchSubString(S, 1, Offset) then
+            begin
+            MatchPos:=1;
+            Result := True;
+            end
+          {If the first position did not match ang MultiLine is false, the string
+           doesn't match. If MultiLine is true, start at every position after a
+           newline }
+          else if FMultiLine then begin
+            for i := Offset to length(S)-1 do
+              if S[i] in newline then
+               if rcMatchSubString(S, i+1, Offset) then begin
+                MatchPos := i+1;
+                Result := True;
+                Break;
+              end;
+          end
+        end
+        {otherwise we try and match the string at every position and
+         return at the first success}
+        else begin
+          for i := Offset to length(S) do
+            if rcMatchSubString(S, i, Offset) then begin
+              MatchPos:=i;
+              Result := True;
+              Break;
+            end;
+        end;
+      end;
+    end; {case}
+end;
+
+function TRegexEngine.ReplaceAllString(const src, newstr: ansistring; out DestStr : string): Integer;
+
+type TReplRec = record
+                  Pos : integer;
+                  Len : integer;
+                end;
+
+var ofs         : Integer;
+    size_newstr,
+    size, pos   : Integer;
+    ReplArr     : array of TReplRec;
+    racount     : integer;
+    MatchPos    : integer;
+    DestSize    : integer;
+    LastPos     : integer;
+    MoveLen     : integer;
+    i           : integer;
+
+begin
+  setlength(ReplArr,64);
+
+  racount := 0;
+  DestSize:=length(src);
+  size_newstr := length(newstr);
+  Ofs := 1;
+  while MatchString(src,MatchPos,Ofs) do
+    begin
+    if racount = length(ReplArr) then
+      setlength(ReplArr,racount+racount div 2);
+    ReplArr[racount].Pos := MatchPos;
+    ReplArr[racount].Len := ofs;
+    DestSize:=DestSize-ofs+MatchPos+size_newstr;
+    inc(racount);
+    end;
+
+  SetLength(DestStr, SizeOf(Char)*DestSize);
+  MatchPos:=1; LastPos:=1;
+
+  if size_newstr<>0 then for i := 0 to racount -1 do
+    begin
+    MoveLen := ReplArr[i].Pos-LastPos;
+    move(src[LastPos],DestStr[MatchPos],MoveLen);
+    MatchPos:=MatchPos+MoveLen;
+    LastPos := ReplArr[i].Len;
+    move(newstr[1],DestStr[MatchPos],size_newstr);
+    Matchpos := MatchPos+size_newstr;
+    end
+  else for i := 0 to racount -1 do
+    begin
+    MoveLen := ReplArr[i].Pos-LastPos;
+    move(src[LastPos],DestStr[MatchPos],MoveLen);
+    MatchPos:=MatchPos+MoveLen;
+    LastPos := ReplArr[i].Len;
+    end;
+
+  move(src[LastPos],DestStr[MatchPos],length(src)-LastPos+1);
+  Result := racount;
+end;
+
+{--------}
+function TRegexEngine.Parse(var aErrorPos : integer;
+                              var aErrorCode: TRegexError)
+                                                            : boolean;
+begin
+  {clear the current transition table}
+  rcClear;
+  {empty regex strings are not allowed}
+  if (FRegexStr = '') then begin
+    Result := false;
+    aErrorPos := 1;
+    aErrorCode := recSuddenEnd;
+
+    Exit;
+  end;
+  {parse the regex string}
+  if not IgnoreCase then
+    begin
+    if length(FRegexStr)=1 then
+      FRegexType:=rtSingleChar
+    else
+      FRegexType:=rtChars
+    end
+  else
+    FRegexType:=rtRegEx;
+
+  FPosn := PAnsiChar(FRegexStr);
+  FStartState := rcParseAnchorExpr;
+  {if an error occurred or we're not at the end of the regex string,
+   clear the transition table, return false and the error position}
+  if (FStartState = ErrorState) or (FPosn^ <> #0) then begin
+    if (FStartState <> ErrorState) and (FPosn^ <> #0) then
+      FErrorCode := recExtraChars;
+    rcClear;
+    Result := false;
+    aErrorPos := succ(FPosn - PAnsiChar(FRegexStr));
+    aErrorCode := FErrorCode;
+  end
+  {otherwise add a terminal state, optimize, return true}
+  else begin
+    rcAddState(mtTerminal, #0, nil, UnusedState, UnusedState);
+    rcLevel1Optimize;
+    if FAnchorStart or FAnchorEnd then FRegexType:= rtRegEx;
+    Result := true;
+    aErrorPos := 0;
+    aErrorCode := recNone;
+  end;
+end;
+{--------}
+function TRegexEngine.rcAddState(aMatchType : TNFAMatchType;
+                                   aChar      : AnsiChar;
+                                   aCharClass : PCharSet;
+                                   aNextState1: integer;
+                                   aNextState2: integer) : integer;
+begin
+  {set up the fields in the state record}
+  with FStateTable[FStateCount] do
+    begin
+    if (aNextState1 = NewFinalState) then
+      sdNextState1 := FStateCount+1
+    else
+      sdNextState1 := aNextState1;
+    sdNextState2 := aNextState2;
+    sdMatchType := aMatchType;
+    if (aMatchType = mtChar) then
+      sdChar := aChar
+    else if (aMatchType = mtClass) or (aMatchType = mtNegClass) then
+      sdClass := aCharClass;
+    end;
+  Result := FStateCount;
+  inc(FStateCount);
+  if FStateCount=length(FStateTable) then
+    setlength(FStateTable,(FStateCount * 3) div 2);
+
+  if not (aMatchType in [mtChar,mtTerminal]) then FRegexType := rtRegEx;
+end;
+{--------}
+procedure TRegexEngine.rcClear;
+var
+  i : integer;
+begin
+  {free all items in the state transition table}
+  for i := 0 to FStateCount-1 do begin
+    with FStateTable[i] do begin
+      if (sdMatchType = mtClass) or
+         (sdMatchType = mtNegClass) then
+        if (sdClass <> nil) then
+          FreeMem(sdClass, sizeof(TCharSet));
+    end;
+  end;
+  {clear the state transition table}
+  FStateCount:=0;
+  FAnchorStart := false;
+  FAnchorEnd := false;
+end;
+{--------}
+procedure TRegexEngine.rcError(aIndex      : integer);
+begin
+  raise Exception.Create(Format(eRegexParseError,[aIndex]));
+end;
+{--------}
+procedure TRegexEngine.rcLevel1Optimize;
+var
+  i : integer;
+  Walker : integer;
+begin
+  {level 1 optimization removes all states that have only a single
+   no-cost move to another state}
+
+  {cycle through all the state records, except for the last one}
+  for i := 0 to FStateCount - 2 do begin
+    {get this state}
+    with FStateTable[i] do begin
+      {walk the chain pointed to by the first next state, unlinking
+       the states that are simple single no-cost moves}
+      Walker := sdNextState1;
+      while (FStateTable[walker].sdMatchType = mtNone) and
+            (FStateTable[walker].sdNextState2 = UnusedState) do begin
+        sdNextState1 := FStateTable[walker].sdNextState1;
+        Walker := sdNextState1;
+      end;
+      {walk the chain pointed to by the second next state, unlinking
+       the states that are simple single no-cost moves}
+      if (sdNextState2 <> UnusedState) then begin
+        Walker := sdNextState2;
+        while (FStateTable[walker].sdMatchType = mtNone) and
+              (FStateTable[walker].sdNextState2 = UnusedState) do begin
+          sdNextState2 := FStateTable[walker].sdNextState1;
+          Walker := sdNextState2;
+        end;
+      end;
+    end;
+  end;
+
+  {cycle through all the state records, except for the last one,
+   marking unused ones--not strictly necessary but good for debugging}
+  for i := 0 to FStateCount - 2 do begin
+    with FStateTable[i] do begin
+      if (sdMatchType = mtNone) and
+         (sdNextState2 = UnusedState) then
+        sdMatchType := mtUnused;
+    end;
+  end;
+end;
+{--------}
+function TRegexEngine.rcMatchSubString(const s   : string;
+                                         StartPosn : integer;
+                                         var Len   : integer)
+                                                            : boolean;
+var
+  Ch     : AnsiChar;
+  State  : integer;
+  StrInx : integer;
+begin
+  {assume we fail to match}
+  Result := false;
+  Len := StartPosn;
+  {clear the deque}
+  FHead := FCapacity div 2;
+  FTail := FHead;
+
+  
+  {enqueue the special value to start scanning}
+  DequeEnqueue(MustScan);
+  {enqueue the first state}
+  DequeEnqueue(FStartState);
+  {prepare the string index}
+  StrInx := StartPosn;
+  {loop until the deque is empty or we run out of string}
+  repeat
+    {pop the top state from the deque}
+    State := DequePop;
+    {process the "must scan" state first}
+    if (State = MustScan) then begin
+      {if the deque is empty at this point, we might as well give up
+       since there are no states left to process new characters}
+      if (FHead <> FTail) then begin
+        {if we haven't run out of string, get the character, and
+         enqueue the "must scan" state again}
+          if IgnoreCase then
+            Ch := Upcase(s[StrInx])
+          else
+            Ch := s[StrInx];
+          DequeEnqueue(MustScan);
+        inc(StrInx);
+      end;
+    end
+    {otherwise, process the state}
+    else with FStateTable[State] do begin
+      case sdMatchType of
+        mtChar :
+          begin
+            {for a match of a character, enqueue the next state}
+            if (Ch = sdChar) then
+              DequeEnqueue(sdNextState1);
+          end;
+        mtAnyChar :
+          begin
+            {for a match of any character, enqueue the next state}
+            if not (Ch in newline) then
+              DequeEnqueue(sdNextState1);
+          end;
+        mtClass :
+          begin
+            {for a match within a class, enqueue the next state}
+            if (Ch in sdClass^) then
+              DequeEnqueue(sdNextState1);
+          end;
+        mtNegClass :
+          begin
+            {for a match not within a class, enqueue the next state}
+            if not (Ch in sdClass^) then
+              DequeEnqueue(sdNextState1);
+          end;
+        mtTerminal :
+          begin
+            {for a terminal state, the string successfully matched
+             if the regex had no end anchor, or we're at the end
+             of the string or line}
+            if (not FAnchorEnd) or (ch=#0) or (FMultiLine and (ch in newline)) then begin
+              Result := true;
+              Len := StrInx-1;
+//                Exit;
+            end;
+          end;
+        mtNone :
+          begin
+            {for free moves, push the next states onto the deque}
+            Assert(sdNextState2 <> UnusedState,
+                   'optimization should remove all states with one no-cost move');
+            DequePush(sdNextState2);
+            DequePush(sdNextState1);
+          end;
+        mtUnused :
+          begin
+            Assert(false, 'unused states shouldn''t be seen');
+          end;
+      end;
+    end;
+  until (FHead = FTail) or (ch = #0); // deque empty or end of string
+  {if we reach this point we've either exhausted the deque or we've
+   run out of string; if the former, the substring did not match
+   since there are no more states. If the latter, we need to check
+   the states left on the deque to see if one is the terminating
+   state; if so the string matched the regular expression defined by
+   the transition table}
+  while (FHead <> FTail) do begin
+    State := DequePop;
+    with FStateTable[State] do begin
+      case sdMatchType of
+        mtNone :
+          begin
+            {for free moves, push the next states onto the deque}
+            Assert(sdNextState2 <> UnusedState,
+                   'optimization should remove all states with one no-cost move');
+            DequePush(sdNextState2);
+            DequePush(sdNextState1);
+          end;
+        mtTerminal :
+          begin
+            {for a terminal state, the string successfully matched
+             if the regex had no end anchor, or we're at the end
+             of the string or line}
+            if (not FAnchorEnd) or (ch=#0) or (FMultiLine and (ch in newline)) then begin
+              Result := true;
+              Len := StrInx -1;
+              Exit;
+            end;
+          end;
+      end;{case}
+    end;
+  end;
+end;
+{--------}
+function TRegexEngine.rcParseAnchorExpr : integer;
+begin
+  {check for an initial '^'}
+  if (FPosn^ = '^') then begin
+    FAnchorStart := true;
+    inc(FPosn);
+  end;
+
+  {parse an expression}
+  Result := rcParseExpr;
+
+  {if we were successful, check for the final '$'}
+  if (Result <> ErrorState) then begin
+    if (FPosn^ = '$') then begin
+      FAnchorEnd := true;
+      inc(FPosn);
+    end;
+  end;
+end;
+{--------}
+function TRegexEngine.rcParseAtom : integer;
+var
+  MatchType : TNFAMatchType;
+  CharClass : PCharSet;
+begin
+  case FPosn^ of
+    '(' :
+      begin
+        {move past the open parenthesis}
+        inc(FPosn);
+
+        {parse a complete regex between the parentheses}
+        Result := rcParseExpr;
+        if (Result = ErrorState) then
+          Exit;
+        {if the current character is not a close parenthesis,
+         there's an error}
+        if (FPosn^ <> ')') then begin
+          FErrorCode := recNoCloseParen;
+          Result := ErrorState;
+          Exit;
+        end;
+        {move past the close parenthesis}
+        inc(FPosn);
+      end;
+    '[' :
+      begin
+        {move past the open square bracket}
+        inc(FPosn);
+
+        {if the first character in the class is a '^' then the
+         class if negated, otherwise it's a normal one}
+        if (FPosn^ = '^') then begin
+          inc(FPosn);
+          MatchType := mtNegClass;
+        end
+        else begin
+          MatchType := mtClass;
+        end;
+        {allocate the class character set and parse the character
+         class; this will return either with an error, or when the
+         closing square bracket is encountered}
+        New(CharClass);
+        CharClass^ := [];
+        if not rcParseCharClass(CharClass) then begin
+          Dispose(CharClass);
+          Result := ErrorState;
+          Exit;
+        end;
+        {move past the closing square bracket}
+        Assert(FPosn^ = ']',
+               'the rcParseCharClass terminated without finding a "]"');
+        inc(FPosn);
+
+        {add a new state for the character class}
+        Result := rcAddState(MatchType, #0, CharClass,
+                             NewFinalState, UnusedState);
+      end;
+    '.' :
+      begin
+        {move past the period metacharacter}
+        inc(FPosn);
+
+        {add a new state for the 'any character' token}
+        Result := rcAddState(mtAnyChar, #0, nil,
+                             NewFinalState, UnusedState);
+      end;
+  else
+    {otherwise parse a single character}
+    Result := rcParseChar;
+  end;{case}
+end;
+{--------}
+function TRegexEngine.rcParseCCChar(out EscapeChar : Boolean) : AnsiChar;
+begin
+  EscapeChar:=False;
+  {if we hit the end of the string, it's an error}
+  if (FPosn^ = #0) then begin
+    FErrorCode := recSuddenEnd;
+    Result := #0;
+    Exit;
+  end;
+  {if the current char is a metacharacter (at least in terms of a
+   character class), it's an error}
+  if FPosn^ in [']', '-'] then begin
+    FErrorCode := recMetaChar;
+    Result := #0;
+    Exit;
+  end;
+  {otherwise return the character and advance past it}
+  if (FPosn^ = '\') then
+    {..it's an escaped character: get the next character instead}
+    begin
+    inc(FPosn);
+    EscapeChar:=True;
+    Result := rcReturnEscapeChar;
+    end
+  else
+    Result := FPosn^;
+  inc(FPosn);
+end;
+{--------}
+function TRegexEngine.rcParseChar : integer;
+var
+  Ch : AnsiChar;
+begin
+  {if we hit the end of the string, it's an error}
+  if (FPosn^ = #0) then begin
+    Result := ErrorState;
+    FErrorCode := recSuddenEnd;
+    Exit;
+  end;
+  {if the current char is one of the metacharacters, it's an error}
+  if FPosn^ in MetaCharacters then begin
+    Result := ErrorState;
+    FErrorCode := recMetaChar;
+    Exit;
+  end;
+  {otherwise add a state for the character}
+  {..if it's an escaped character: get the next character instead}
+  if (FPosn^ = '\') then
+    begin
+    inc(FPosn);
+    ch := rcReturnEscapeChar;
+    end
+  else
+    ch :=FPosn^;
+  if IgnoreCase then
+    Ch := Upcase(ch);
+  Result := rcAddState(mtChar, Ch, nil, NewFinalState, UnusedState);
+  inc(FPosn);
+end;
+{--------}
+function TRegexEngine.rcParseCharClass(aClass : PCharSet) : boolean;
+begin
+  {assume we can't parse a character class properly}
+  Result := false;
+  {parse a character range; if we can't there was an error and the
+   caller will take care of it}
+  if not rcParseCharRange(aClass) then
+    Exit;
+  {if the current character was not the right bracket, parse another
+   character class (note: we're removing the tail recursion here)}
+  while (FPosn^ <> ']') do begin
+    if not rcParseCharRange(aClass) then
+      Exit;
+  end;
+  {if we reach here we were successful}
+  Result := true;
+end;
+{--------}
+function TRegexEngine.rcParseCharRange(aClass : PCharSet) : boolean;
+var
+  StartChar : AnsiChar;
+  EndChar   : AnsiChar;
+  Ch        : AnsiChar;
+  EscChar   : Boolean;
+begin
+  {assume we can't parse a character range properly}
+  Result := false;
+  {parse a single character; if it's null there was an error}
+  StartChar := rcParseCCChar(EscChar);
+  if (StartChar = #0) then
+    Exit;
+  if EscChar then
+    begin
+    case StartChar of
+      'd' : aClass^ := aClass^ + cs_digits;
+      'D' : aClass^ := aClass^ + cs_allchars-cs_digits;
+      's' : aClass^ := aClass^ + cs_whitespace;
+      'S' : aClass^ := aClass^ + cs_allchars-cs_whitespace;
+      'w' : aClass^ := aClass^ + cs_wordchars;
+      'W' : aClass^ := aClass^ + cs_allchars-cs_wordchars
+    else
+      EscChar := False;
+    end;
+    if EscChar then
+      begin
+      Result := True;
+      Exit;
+      end;
+    end;
+  {if the current character is not a dash, the range consisted of a
+   single character}
+  if (FPosn^ <> '-') then begin
+    if IgnoreCase then
+      Include(aClass^, Upcase(StartChar))
+    else
+      Include(aClass^, StartChar)
+  end
+  {otherwise it's a real range, so get the character at the end of the
+   range; if that's null, there was an error}
+  else begin
+    inc(FPosn); {move past the '-'}
+    EndChar := rcParseCCChar(EscChar);
+    if (EndChar = #0) then
+      Exit;
+    {build the range as a character set}
+    if (StartChar > EndChar) then begin
+      Ch := StartChar;
+      StartChar := EndChar;
+      EndChar := Ch;
+    end;
+    for Ch := StartChar to EndChar do begin
+      Include(aClass^, Ch);
+      if IgnoreCase then
+        Include(aClass^, Upcase(Ch));
+    end;
+  end;
+  {if we reach here we were successful}
+  Result := true;
+end;
+{--------}
+function TRegexEngine.rcParseExpr : integer;
+var
+  StartState1 : integer;
+  StartState2 : integer;
+  EndState1   : integer;
+  OverallStartState : integer;
+begin
+  {assume the worst}
+  Result := ErrorState;
+  {parse an initial term}
+  StartState1 := rcParseTerm;
+  if (StartState1 = ErrorState) then
+    Exit;
+  {if the current character is *not* a pipe character, no alternation
+   is present so return the start state of the initial term as our
+   start state}
+  if (FPosn^ <> '|') then
+    Result := StartState1
+  {otherwise, we need to parse another expr and join the two together
+   in the transition table}
+  else begin
+
+    {advance past the pipe}
+    inc(FPosn);
+    {the initial term's end state does not exist yet (although there
+     is a state in the term that points to it), so create it}
+    EndState1 := rcAddState(mtNone, #0, nil, UnusedState, UnusedState);
+    {for the OR construction we need a new initial state: it will
+     point to the initial term and the second just-about-to-be-parsed
+     expr}
+    OverallStartState := rcAddState(mtNone, #0, nil,
+                                    UnusedState, UnusedState);
+    {parse another expr}
+    StartState2 := rcParseExpr;
+    if (StartState2 = ErrorState) then
+      Exit;
+    {alter the state state for the overall expr so that the second
+     link points to the start of the second expr}
+    Result := rcSetState(OverallStartState, StartState1, StartState2);
+    {now set the end state for the initial term to point to the final
+     end state for the second expr and the overall expr}
+    rcSetState(EndState1, FStateCount, UnusedState);
+  end;
+end;
+{--------}
+function TRegexEngine.rcParseFactor : integer;
+var
+  StartStateAtom : integer;
+  EndStateAtom   : integer;
+  TempEndStateAtom : integer;
+  Int            : string;
+  n,m,nState     : integer;
+  i              : integer;
+begin
+  {assume the worst}
+  Result := ErrorState;
+  {first parse an atom}
+  StartStateAtom := rcParseAtom;
+  if (StartStateAtom = ErrorState) then
+    Exit;
+  {check for a closure operator}
+  case FPosn^ of
+    '?' : begin
+            {move past the ? operator}
+            inc(FPosn);
+            {the atom's end state doesn't exist yet, so create one}
+            EndStateAtom := rcAddState(mtNone, #0, nil,
+                                       UnusedState, UnusedState);
+            {create a new start state for the overall regex}
+            Result := rcAddState(mtNone, #0, nil,
+                                 StartStateAtom, EndStateAtom);
+            {make sure the new end state points to the next unused
+             state}
+            rcSetState(EndStateAtom, FStateCount, UnusedState);
+          end;
+    '*' : begin
+            {move past the * operator}
+            inc(FPosn);
+            {the atom's end state doesn't exist yet, so create one;
+             it'll be the start of the overall regex subexpression}
+            Result := rcAddState(mtNone, #0, nil,
+                                 NewFinalState, StartStateAtom);
+          end;
+    '+' : begin
+            {move past the + operator}
+            inc(FPosn);
+            {the atom's end state doesn't exist yet, so create one}
+            rcAddState(mtNone, #0, nil, NewFinalState, StartStateAtom);
+            {the start of the overall regex subexpression will be the
+             atom's start state}
+            Result := StartStateAtom;
+          end;
+    '{' : begin // {n,m}
+            {move past the brace }
+            inc(FPosn);
+
+            {Parse the value of n}
+            Int := '';
+            while not (FPosn^ in [',','}',#0]) do
+              begin
+              int := int+FPosn^;
+              inc(FPosn);
+              end;
+            if FPosn^ = #0 then exit; // No end-brace or comma -> invalid regex
+            if int <> '' then
+              n := StrToIntDef(Int,-2)
+            else
+              n := -1; // if n is 'empty', set it to -1
+            if n = -2 then exit; // Invalid value for n -> invalid RegEx
+
+            if FPosn^ <> '}' then
+              begin
+
+              {move past the , }
+              inc(FPosn);
+              {Parse the value of m}
+              Int := '';
+              while not (FPosn^ in ['}',#0]) do
+                begin
+                int := int+FPosn^;
+                inc(FPosn);
+                end;
+              if FPosn^ <> '}' then exit; // No end-brace -> invalid regex
+              if int <> '' then m := StrToIntDef(Int,-2)
+              else m := -1;
+              if m = -2 then exit; // Invalid RegEx
+              end
+            else
+              m := -3;
+
+            {move past the brace }
+            inc(FPosn);
+
+            if (n=0) and (m=-1) then
+            {the atom's end state doesn't exist yet, so create one;
+             it'll be the start of the overall regex subexpression}
+              Result := rcAddState(mtNone, #0, nil, NewFinalState, StartStateAtom)
+            else
+              begin
+              EndStateAtom := FStateCount-1;
+              TempEndStateAtom:=StartStateAtom;
+              for i := 1 to n-1 do
+                begin
+                TempEndStateAtom:=FStateCount;
+                for nState:=StartStateAtom to EndStateAtom do
+                  begin
+                  FStateTable[FStateCount]:=FStateTable[nState];
+                  if FStateTable[FStateCount].sdNextState1 in [StartStateAtom..EndStateAtom+1] then
+                    FStateTable[FStateCount].sdNextState1 := i+FStateTable[FStateCount].sdNextState1+ (EndStateAtom-StartStateAtom) *i;
+                  if FStateTable[FStateCount].sdNextState2 in [StartStateAtom..EndStateAtom+1] then
+                    FStateTable[FStateCount].sdNextState2 := i+FStateTable[FStateCount].sdNextState2 + (EndStateAtom-StartStateAtom) *i;
+                  inc(FStateCount);
+
+                  if FStateCount=length(FStateTable) then
+                    setlength(FStateTable,(FStateCount * 3) div 2);
+                  end;
+                end;
+
+            for i := n to m-1 do
+              begin
+              rcAddState(mtNone, #0, nil, NewFinalState, EndStateAtom+(EndStateAtom-StartStateAtom+1) * (m-1) + (m-n)+1);
+
+              TempEndStateAtom:=FStateCount;
+              for nState:=StartStateAtom to EndStateAtom do
+                begin
+                FStateTable[FStateCount]:=FStateTable[nState];
+                if FStateTable[FStateCount].sdNextState1 in [StartStateAtom..EndStateAtom+1] then
+                  FStateTable[FStateCount].sdNextState1 := i+FStateTable[FStateCount].sdNextState1+ (EndStateAtom-StartStateAtom) * i+(i-n+1);
+                if FStateTable[FStateCount].sdNextState2 in [StartStateAtom..EndStateAtom+1] then
+                  FStateTable[FStateCount].sdNextState2 := i+FStateTable[FStateCount].sdNextState2 + (EndStateAtom-StartStateAtom) * i+(i-n+1);
+                inc(FStateCount);
+
+                if FStateCount=length(FStateTable) then
+                  setlength(FStateTable,(FStateCount * 3) div 2);
+                end;
+              end;
+
+              if m = -1 then
+                rcAddState(mtNone, #0, nil, NewFinalState, TempEndStateAtom);
+
+              Result := StartStateAtom;
+              end;
+          end;
+
+  else
+    Result := StartStateAtom;
+  end;{case}
+end;
+{--------}
+function TRegexEngine.rcParseTerm : integer;
+var
+  StartState2 : integer;
+  EndState1   : integer;
+begin
+  {parse an initial factor, the state number returned will also be our
+   return state number}
+  Result := rcParseFactor;
+  if (Result = ErrorState) then
+    Exit;
+  {Note: we have to "break the grammar" here. We've parsed a regular
+         subexpression and we're possibly following on with another
+         regular subexpression. There's no nice operator to key off
+         for concatenation: we just have to know that for
+         concatenating two subexpressions, the current character will
+         be
+           - an open parenthesis
+           - an open square bracket
+           - an any char operator
+           - a character that's not a metacharacter
+         i.e., the three possibilities for the start of an "atom" in
+         our grammar}
+  if (FPosn^ = '(') or
+     (FPosn^ = '[') or
+     (FPosn^ = '.') or
+     ((FPosn^ <> #0) and not (FPosn^ in MetaCharacters)) then begin
+
+    {the initial factor's end state does not exist yet (although there
+     is a state in the term that points to it), so create it}
+    EndState1 := rcAddState(mtNone, #0, nil, UnusedState, UnusedState);
+    {parse another term}
+    StartState2 := rcParseTerm;
+    if (StartState2 = ErrorState) then begin
+      Result := ErrorState;
+      Exit;
+    end;
+    {join the first factor to the second term}
+    rcSetState(EndState1, StartState2, UnusedState);
+  end;
+end;
+
+procedure TRegexEngine.WriteTable;
+var i : integer;
+begin
+  for i := 0 to FStateCount-1 do with FStateTable[i] do
+    writeln('s:',i,' mt:',sdMatchType ,' ns1:',sdNextState1,' ns2:',sdNextState2,' char:',sdChar);
+end;
+
+procedure TRegexEngine.DequeEnqueue(aValue: integer);
+begin
+  FList[FTail] := aValue;
+  inc(FTail);
+  if (FTail = FCapacity) then
+    FTail := 0
+  else if (FTail = FHead) then
+    DequeGrow;
+end;
+
+procedure TRegexEngine.DequePush(aValue: integer);
+begin
+  if (FHead = 0) then
+    FHead := FCapacity;
+  dec(FHead);
+  FList[FHead] := aValue;
+  if (FTail = FHead) then
+    DequeGrow;
+end;
+
+function TRegexEngine.DequePop: integer;
+begin
+  Result := FList[FHead];
+  inc(FHead);
+  if (FHead = FCapacity) then
+    FHead := 0;
+end;
+
+procedure TRegexEngine.DequeGrow;
+var
+  OldCount : integer;
+  i, j     : integer;
+begin
+  {grow the list by 50%}
+  OldCount := FCapacity;
+  FCapacity:=(OldCount * 3) div 2;
+  SetLength(FList,FCapacity);
+  {expand the data into the increased space, maintaining the deque}
+  if (FHead = 0) then
+    FTail := OldCount
+  else begin
+    j := FCapacity;
+    for i := pred(OldCount) downto FHead do begin
+      dec(j);
+      FList[j] := FList[i]
+    end;
+    FHead := j;
+  end;
+end;
+
+function TRegexEngine.rcReturnEscapeChar: AnsiChar;
+begin
+  case FPosn^ of
+    't' : Result := #9;
+    'n' : Result := #10;
+    'r' : Result := #13;
+    'f' : Result := #12;
+    'a' : Result := #7;
+  else
+    Result := FPosn^;
+  end;
+end;
+
+{--------}
+procedure TRegexEngine.rcSetIgnoreCase(aValue : boolean);
+begin
+  if (aValue <> FIgnoreCase) then begin
+    rcClear;
+    FIgnoreCase := aValue;
+  end;
+end;
+{--------}
+procedure TRegexEngine.rcSetRegexStr(const aRegexStr : string);
+begin
+  if (aRegexStr <> FRegexStr) then begin
+    rcClear;
+    FRegexStr := aRegexStr;
+  end;
+end;
+{--------}
+function TRegexEngine.rcSetState(aState     : integer;
+                                   aNextState1: integer;
+                                   aNextState2: integer) : integer;
+begin
+  Assert((0 <= aState) and (aState < FStateCount),
+         'trying to change an invalid state');
+
+  {get the state record and change the transition information}
+  FStateTable[aState].sdNextState1 := aNextState1;
+  FStateTable[aState].sdNextState2 := aNextState2;
+  Result := aState;
+end;
+{--------}
+procedure TRegexEngine.rcSetUpcase(aValue : TUpcaseFunc);
+begin
+  if not Assigned(aValue) then
+    FUpcase := SystemUpcase
+  else
+    FUpcase := aValue;
+end;
+
+procedure TRegexEngine.rcSetMultiLine(aValue: Boolean);
+begin
+  FMultiLine:=aValue;
+end;
+
+{====================================================================}
+
+end.

+ 117 - 0
packages/regexpr/src/regexprcompat.pp

@@ -0,0 +1,117 @@
+{
+    This file is part of the Free Pascal packages library.
+    Copyright (c) 2008 by Joost van der Sluis, member of the
+    Free Pascal development team
+
+    Compatibility unit for the old regexpr unit.
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+unit RegExprCompat;
+
+{$mode objfpc}{$H+}
+
+interface
+
+uses
+  Regex;
+
+type
+   tregexprflag = (
+     ref_singleline,
+     {** This indicates that a start of line is either the
+         start of the pattern or a linebreak. }
+     ref_multiline,
+     {** The match will be done in a case-insensitive way
+          according to US-ASCII character set. }
+     ref_caseinsensitive);
+   tregexprflags = set of tregexprflag;
+
+   TRegExprEngine = TRegexEngine;
+
+function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags;var RegExprEngine: TRegExprEngine): boolean;
+function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags): TREGExprEngine;
+procedure DestroyRegExprEngine(var regexpr : TRegExprEngine);
+
+function RegExprPos(RegExprEngine : TRegExprEngine;p : pchar;var index,len : longint) : boolean;
+function RegExprReplaceAll(RegExprEngine : TRegExprEngine;const src,newstr : ansistring;var dest : ansistring) : sizeint;
+
+function RegExprEscapeStr (const S : string) : string;
+
+implementation
+
+function GenerateRegExprEngine(regexpr: pchar; flags: tregexprflags;
+  var RegExprEngine: TRegExprEngine): boolean;
+var ErrorPos  : Integer;
+    ErrorCode : TRegexError;
+
+begin
+  RegExprEngine := TRegExprEngine.Create(regexpr);
+  if ref_multiline in flags then RegExprEngine.MultiLine:=True;
+  if ref_caseinsensitive in flags then RegExprEngine.IgnoreCase:=True;
+  Result := RegExprEngine.Parse(ErrorPos,ErrorCode);
+end;
+
+function GenerateRegExprEngine(regexpr: pchar; flags: tregexprflags
+  ): TREGExprEngine;
+  
+var r: TRegExprEngine;
+
+begin
+  GenerateRegExprEngine(regexpr,flags,r);
+  GenerateRegExprEngine:=r;
+end;
+
+procedure DestroyRegExprEngine(var regexpr: TRegExprEngine);
+begin
+  regexpr.Free;
+end;
+
+function RegExprPos(RegExprEngine: TRegExprEngine; p: pchar; var index,
+  len: longint): boolean;
+begin
+  Len := 1;
+  Result := RegExprEngine.MatchString(p,index,len);
+  Len := Len - index;
+  Dec(Index);
+end;
+
+function RegExprReplaceAll(RegExprEngine: TRegExprEngine; const src,
+  newstr: ansistring; var dest: ansistring): sizeint;
+begin
+  result := RegExprEngine.ReplaceAllString(src,newstr,Dest);
+end;
+
+function RegExprEscapeStr(const S: string): string;
+var
+  i, len   : integer;
+  s1: string;
+begin
+  RegExprEscapeStr:= '';
+  s1:='';
+  if (S = '') then
+   exit;
+
+  len := Length (S);
+
+  for i := 1 to len do
+    begin
+      if (S [i] in ['(','|', '.', '*', '?', '^', '$', '-', '[', '{', '}', ']', ')', '\']) then
+        begin
+          s1 := s1 + '\';
+        end;
+
+      s1 := s1 + S[i];
+    end;
+  RegExprEscapeStr:=s1;
+end;
+
+end.
+

+ 34 - 7
packages/regexpr/tests/testreg1.pp

@@ -1,9 +1,16 @@
 {$IFDEF FPC}
 {$MODE OBJFPC}
 {$ENDIF}
+
+{$DEFINE UseRegexCompat}
+
 program testreg1;
 uses
+{$IFDEF UseRegexCompat}
+   regexprcompat;
+{$ELSE}
    regexpr;
+{$ENDIF}
 
 var
    r         : tregexprengine;
@@ -318,6 +325,16 @@ begin
      do_error(705);
    DestroyregExprEngine(r);
 
+{$IFDEF UseRegexCompat}
+   initok:=GenerateRegExprEngine('Cat(AZ){2,}Q',[],r);
+   if not initok then
+     do_error(705);
+   if not(RegExprPos(r,'BCatAZAZAZAZQDABCD',index,len)) or
+     (index<>1) or (len<>12) then
+     do_error(705);
+   DestroyregExprEngine(r);
+{$ENDIF}
+
    initok:=GenerateRegExprEngine('CatAZ{0,}',[],r);
    if not initok then
      do_error(706);
@@ -364,6 +381,14 @@ begin
      do_error(725);
    DestroyregExprEngine(r);
 
+   initok:=GenerateRegExprEngine('Cat(AZ){1,3}',[],r);
+   if not initok then
+     do_error(725);
+   if not(RegExprPos(r,'BCatAZAZDABCD',index,len)) or
+     (index<>1) or (len<>7) then
+     do_error(725);
+   DestroyregExprEngine(r);
+
    initok:=GenerateRegExprEngine('CatAz{1,5}',[],r);
    if not initok then
      do_error(726);
@@ -650,11 +675,13 @@ begin
 
    { test real backtracking }
 
-(*   r:=GenerateRegExprEngine('nofoo|foo',[]);
+{$IFDEF UseRegexCompat}
+   r:=GenerateRegExprEngine('nofoo|foo',[]);
    if not(RegExprPos(r,'1234   foo1234XXXX',index,len)) or
      (index<>7) or (len<>3) then
      do_error(1300);
-   DestroyregExprEngine(r);*)
+   DestroyregExprEngine(r);
+{$ENDIF}
 
   GenerateRegExprEngine('abc\(123\)$',[],r);
   if not (RegExprPos(r,'1234 abc(123)', index, len)) or
@@ -762,7 +789,6 @@ begin
      do_error(1505);
    DestroyregExprEngine(r);
 
-{
   initok:=GenerateRegExprEngine('\.localhost$',[],r);
   if not initok then
      do_error(1506);
@@ -778,6 +804,7 @@ begin
      do_error(1507);
    DestroyregExprEngine(r);
 
+{$IFDEF UseRegexCompat}
   initok:=GenerateRegExprEngine('.*[^e]\.localhost$',[],r);
   if not initok then
      do_error(1508);
@@ -796,22 +823,22 @@ begin
   if not initok then
      do_error(1500);
    if not(RegExprPos(r,'1234   nofoo1234XXXX',index,len)) or
-     (index<>8) or (len<>9) then
+     (index<>7) or (len<>9) then
      do_error(1500);
    DestroyregExprEngine(r);
 
    r:=GenerateRegExprEngine('(nofoo|foo|anotherfoo)1234',[]);
    if not(RegExprPos(r,'1234   nofoo1234XXXX',index,len)) or
-     (index<>8) or (len<>9) then
+     (index<>7) or (len<>9) then
      do_error(1009);
    DestroyregExprEngine(r);
 
    r:=GenerateRegExprEngine('nofoo1234|foo1234',[]);
-   if (r.data=nil) or not(RegExprPos(r,'1234   foo1234XXXX',index,len)) or
+   if {(r.data=nil) or} not(RegExprPos(r,'1234   foo1234XXXX',index,len)) or
      (index<>7) or (len<>7) then
      do_error(1010);
    DestroyregExprEngine(r);
-   }
+{$ENDIF}
 
   { *************************************************************************
                               replacement tests