浏览代码

Merged revisions 10611-10613,10617,10622-10624,10639-10641,10645-10646,10649,10658,10660-10662,10664-10667,10673-10675,10678-10680,10684-10687,10689,10695,10698-10701,10703,10709,10713-10716,10718-10719,10723,10735-10739,10749,10751,10755-10757,10759-10761,10769,10772-10778,10782-10785,10787-10790,10794-10795,10798,10830-10832,10837,10848,10858,10860,10862-10864,10875,10882,10891,10907,10909,10915-10916,10922-10923,10928,10930,10933,10935,10939,10942,10948,10952-10956,10960,10964-10965,10972,10974-10975,10977,10980,10982-10983,10985,10987,10989-10990 via svnmerge from
svn+ssh://[email protected]/FPC/svn/fpc/trunk

........
r10611 | joost | 2008-04-06 23:23:13 +0200 (Sun, 06 Apr 2008) | 3 lines

* Added Regex unit with an NFA regexpression parser, based on code from Julian Bucknall which is used with his permission
* Added RegExprCompat unit which contains an interface to the Regex unit which is compatible with the existing RegExpr unit
* Added some more tests and added a define UseRegexCompat to make it possible to choose what regex-implementation to test. Test which fail on the existing regex-parser are placed between ifdefs
........
r10987 | joost | 2008-05-17 21:24:47 +0200 (Sat, 17 May 2008) | 1 line

* Moved regexpr unit to old
........
r10989 | joost | 2008-05-17 21:32:47 +0200 (Sat, 17 May 2008) | 1 line

* Renamed the RegExprCompat unit to RegExpr. This wrapper around the Regex unit now replaces the old RegExpr unit.
........
r10990 | joost | 2008-05-17 21:43:26 +0200 (Sat, 17 May 2008) | 1 line

* Removed define UseRegexCompat from test
........

git-svn-id: branches/fixes_2_2@11108 -

joost 17 年之前
父节点
当前提交
3a6ebb5bdc

+ 2 - 0
.gitattributes

@@ -3891,6 +3891,8 @@ packages/pxlib/src/pxlib.pp svneol=native#text/plain
 packages/regexpr/Makefile svneol=native#text/plain
 packages/regexpr/Makefile.fpc svneol=native#text/plain
 packages/regexpr/fpmake.pp svneol=native#text/plain
+packages/regexpr/src/old/regexpr.pp svneol=native#text/plain
+packages/regexpr/src/regex.pp svneol=native#text/plain
 packages/regexpr/src/regexpr.pp svneol=native#text/plain
 packages/regexpr/tests/testreg1.pp svneol=native#text/plain
 packages/rexx/Makefile svneol=native#text/plain

+ 170 - 58
packages/regexpr/Makefile

@@ -1,8 +1,8 @@
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2008/01/26]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2008/05/26]
 #
 default: all
-MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-wince arm-gba arm-nds arm-embedded arm-symbian powerpc64-linux powerpc64-darwin powerpc64-embedded
+MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian powerpc64-linux powerpc64-darwin powerpc64-embedded avr-embedded armeb-linux armeb-embedded
 BSDs = freebsd netbsd openbsd darwin
 UNIXs = linux $(BSDs) solaris qnx
 LIMIT83fs = go32v2 os2 emx watcom
@@ -167,6 +167,17 @@ OS_TARGET:=$(word 5,$(FPC_COMPILERINFO))
 endif
 FULL_TARGET=$(CPU_TARGET)-$(OS_TARGET)
 FULL_SOURCE=$(CPU_SOURCE)-$(OS_SOURCE)
+ifeq ($(CPU_TARGET),armeb)
+ARCH=arm
+override FPCOPT+=-Cb
+else
+ifeq ($(CPU_TARGET),armel)
+ARCH=arm
+override FPCOPT+=-CaEABI
+else
+ARCH=$(CPU_TARGET)
+endif
+endif
 ifneq ($(findstring $(OS_SOURCE),$(LIMIT83fs)),)
 TARGETSUFFIX=$(OS_TARGET)
 SOURCESUFFIX=$(OS_SOURCE)
@@ -174,9 +185,15 @@ else
 TARGETSUFFIX=$(FULL_TARGET)
 SOURCESUFFIX=$(FULL_SOURCE)
 endif
+ifeq  ($(OS_TARGET),darwin)
+ifneq ($(OS_SOURCE),darwin)
+CROSSCOMPILE=1
+endif
+else
 ifneq ($(FULL_TARGET),$(FULL_SOURCE))
 CROSSCOMPILE=1
 endif
+endif
 ifeq ($(findstring makefile,$(MAKECMDGOALS)),)
 ifeq ($(findstring $(FULL_TARGET),$(MAKEFILETARGETS)),)
 $(error The Makefile doesn't support target $(FULL_TARGET), please run fpcmake first)
@@ -188,7 +205,7 @@ endif
 ifeq ($(OS_TARGET),linux)
 linuxHier=1
 endif
-export OS_TARGET OS_SOURCE CPU_TARGET CPU_SOURCE FULL_TARGET FULL_SOURCE TARGETSUFFIX SOURCESUFFIX CROSSCOMPILE
+export OS_TARGET OS_SOURCE ARCH CPU_TARGET CPU_SOURCE FULL_TARGET FULL_SOURCE TARGETSUFFIX SOURCESUFFIX CROSSCOMPILE
 ifdef FPCDIR
 override FPCDIR:=$(subst \,/,$(FPCDIR))
 ifeq ($(wildcard $(addprefix $(FPCDIR)/,rtl units)),)
@@ -243,163 +260,178 @@ PACKAGESDIR:=$(wildcard $(FPCDIR) $(FPCDIR)/packages $(FPCDIR)/packages/base $(F
 override PACKAGE_NAME=regexpr
 override PACKAGE_VERSION=2.0.0
 ifeq ($(FULL_TARGET),i386-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-go32v2)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-win32)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-os2)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-freebsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-beos)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
+endif
+ifeq ($(FULL_TARGET),i386-haiku)
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-netbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-solaris)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-qnx)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-netware)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-openbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-wdosx)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-darwin)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-emx)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-watcom)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-netwlibc)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-wince)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-symbian)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),m68k-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),m68k-freebsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),m68k-netbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),m68k-amiga)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),m68k-atari)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),m68k-openbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),m68k-palmos)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),m68k-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),powerpc-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),powerpc-netbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),powerpc-amiga)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),powerpc-macos)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),powerpc-darwin)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),powerpc-morphos)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),powerpc-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),sparc-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),sparc-netbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),sparc-solaris)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),sparc-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),x86_64-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),x86_64-freebsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),x86_64-darwin)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),x86_64-win64)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),arm-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),arm-palmos)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
+endif
+ifeq ($(FULL_TARGET),arm-darwin)
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),arm-wince)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),arm-gba)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),arm-nds)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),arm-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),arm-symbian)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),powerpc64-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),powerpc64-darwin)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),powerpc64-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
+endif
+ifeq ($(FULL_TARGET),avr-embedded)
+override TARGET_UNITS+=regex regexpr
+endif
+ifeq ($(FULL_TARGET),armeb-linux)
+override TARGET_UNITS+=regex regexpr
+endif
+ifeq ($(FULL_TARGET),armeb-embedded)
+override TARGET_UNITS+=regex regexpr
 endif
 ifeq ($(FULL_TARGET),i386-linux)
 override TARGET_EXAMPLES+=testreg1
@@ -419,6 +451,9 @@ endif
 ifeq ($(FULL_TARGET),i386-beos)
 override TARGET_EXAMPLES+=testreg1
 endif
+ifeq ($(FULL_TARGET),i386-haiku)
+override TARGET_EXAMPLES+=testreg1
+endif
 ifeq ($(FULL_TARGET),i386-netbsd)
 override TARGET_EXAMPLES+=testreg1
 endif
@@ -536,6 +571,9 @@ endif
 ifeq ($(FULL_TARGET),arm-palmos)
 override TARGET_EXAMPLES+=testreg1
 endif
+ifeq ($(FULL_TARGET),arm-darwin)
+override TARGET_EXAMPLES+=testreg1
+endif
 ifeq ($(FULL_TARGET),arm-wince)
 override TARGET_EXAMPLES+=testreg1
 endif
@@ -560,6 +598,15 @@ endif
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 override TARGET_EXAMPLES+=testreg1
 endif
+ifeq ($(FULL_TARGET),avr-embedded)
+override TARGET_EXAMPLES+=testreg1
+endif
+ifeq ($(FULL_TARGET),armeb-linux)
+override TARGET_EXAMPLES+=testreg1
+endif
+ifeq ($(FULL_TARGET),armeb-embedded)
+override TARGET_EXAMPLES+=testreg1
+endif
 override INSTALL_FPCPACKAGE=y
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_OPTIONS+=-S2
@@ -579,6 +626,9 @@ endif
 ifeq ($(FULL_TARGET),i386-beos)
 override COMPILER_OPTIONS+=-S2
 endif
+ifeq ($(FULL_TARGET),i386-haiku)
+override COMPILER_OPTIONS+=-S2
+endif
 ifeq ($(FULL_TARGET),i386-netbsd)
 override COMPILER_OPTIONS+=-S2
 endif
@@ -696,6 +746,9 @@ endif
 ifeq ($(FULL_TARGET),arm-palmos)
 override COMPILER_OPTIONS+=-S2
 endif
+ifeq ($(FULL_TARGET),arm-darwin)
+override COMPILER_OPTIONS+=-S2
+endif
 ifeq ($(FULL_TARGET),arm-wince)
 override COMPILER_OPTIONS+=-S2
 endif
@@ -720,6 +773,15 @@ endif
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 override COMPILER_OPTIONS+=-S2
 endif
+ifeq ($(FULL_TARGET),avr-embedded)
+override COMPILER_OPTIONS+=-S2
+endif
+ifeq ($(FULL_TARGET),armeb-linux)
+override COMPILER_OPTIONS+=-S2
+endif
+ifeq ($(FULL_TARGET),armeb-embedded)
+override COMPILER_OPTIONS+=-S2
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_INCLUDEDIR+=src
 endif
@@ -738,6 +800,9 @@ endif
 ifeq ($(FULL_TARGET),i386-beos)
 override COMPILER_INCLUDEDIR+=src
 endif
+ifeq ($(FULL_TARGET),i386-haiku)
+override COMPILER_INCLUDEDIR+=src
+endif
 ifeq ($(FULL_TARGET),i386-netbsd)
 override COMPILER_INCLUDEDIR+=src
 endif
@@ -855,6 +920,9 @@ endif
 ifeq ($(FULL_TARGET),arm-palmos)
 override COMPILER_INCLUDEDIR+=src
 endif
+ifeq ($(FULL_TARGET),arm-darwin)
+override COMPILER_INCLUDEDIR+=src
+endif
 ifeq ($(FULL_TARGET),arm-wince)
 override COMPILER_INCLUDEDIR+=src
 endif
@@ -879,6 +947,15 @@ endif
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 override COMPILER_INCLUDEDIR+=src
 endif
+ifeq ($(FULL_TARGET),avr-embedded)
+override COMPILER_INCLUDEDIR+=src
+endif
+ifeq ($(FULL_TARGET),armeb-linux)
+override COMPILER_INCLUDEDIR+=src
+endif
+ifeq ($(FULL_TARGET),armeb-embedded)
+override COMPILER_INCLUDEDIR+=src
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_SOURCEDIR+=src tests examples
 endif
@@ -897,6 +974,9 @@ endif
 ifeq ($(FULL_TARGET),i386-beos)
 override COMPILER_SOURCEDIR+=src tests examples
 endif
+ifeq ($(FULL_TARGET),i386-haiku)
+override COMPILER_SOURCEDIR+=src tests examples
+endif
 ifeq ($(FULL_TARGET),i386-netbsd)
 override COMPILER_SOURCEDIR+=src tests examples
 endif
@@ -1014,6 +1094,9 @@ endif
 ifeq ($(FULL_TARGET),arm-palmos)
 override COMPILER_SOURCEDIR+=src tests examples
 endif
+ifeq ($(FULL_TARGET),arm-darwin)
+override COMPILER_SOURCEDIR+=src tests examples
+endif
 ifeq ($(FULL_TARGET),arm-wince)
 override COMPILER_SOURCEDIR+=src tests examples
 endif
@@ -1038,6 +1121,15 @@ endif
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 override COMPILER_SOURCEDIR+=src tests examples
 endif
+ifeq ($(FULL_TARGET),avr-embedded)
+override COMPILER_SOURCEDIR+=src tests examples
+endif
+ifeq ($(FULL_TARGET),armeb-linux)
+override COMPILER_SOURCEDIR+=src tests examples
+endif
+ifeq ($(FULL_TARGET),armeb-embedded)
+override COMPILER_SOURCEDIR+=src tests examples
+endif
 ifdef REQUIRE_UNITSDIR
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
 endif
@@ -1327,6 +1419,11 @@ BATCHEXT=.sh
 EXEEXT=
 SHORTSUFFIX=be
 endif
+ifeq ($(OS_TARGET),haiku)
+BATCHEXT=.sh
+EXEEXT=
+SHORTSUFFIX=hai
+endif
 ifeq ($(OS_TARGET),solaris)
 BATCHEXT=.sh
 EXEEXT=
@@ -1804,6 +1901,9 @@ endif
 ifeq ($(FULL_TARGET),i386-beos)
 REQUIRE_PACKAGES_RTL=1
 endif
+ifeq ($(FULL_TARGET),i386-haiku)
+REQUIRE_PACKAGES_RTL=1
+endif
 ifeq ($(FULL_TARGET),i386-netbsd)
 REQUIRE_PACKAGES_RTL=1
 endif
@@ -1921,6 +2021,9 @@ endif
 ifeq ($(FULL_TARGET),arm-palmos)
 REQUIRE_PACKAGES_RTL=1
 endif
+ifeq ($(FULL_TARGET),arm-darwin)
+REQUIRE_PACKAGES_RTL=1
+endif
 ifeq ($(FULL_TARGET),arm-wince)
 REQUIRE_PACKAGES_RTL=1
 endif
@@ -1945,6 +2048,15 @@ endif
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 REQUIRE_PACKAGES_RTL=1
 endif
+ifeq ($(FULL_TARGET),avr-embedded)
+REQUIRE_PACKAGES_RTL=1
+endif
+ifeq ($(FULL_TARGET),armeb-linux)
+REQUIRE_PACKAGES_RTL=1
+endif
+ifeq ($(FULL_TARGET),armeb-embedded)
+REQUIRE_PACKAGES_RTL=1
+endif
 ifdef REQUIRE_PACKAGES_RTL
 PACKAGEDIR_RTL:=$(firstword $(subst /Makefile.fpc,,$(strip $(wildcard $(addsuffix /rtl/Makefile.fpc,$(PACKAGESDIR))))))
 ifneq ($(PACKAGEDIR_RTL),)
@@ -1972,13 +2084,13 @@ override COMPILER_UNITDIR+=$(UNITDIR_RTL)
 endif
 endif
 ifndef NOCPUDEF
-override FPCOPTDEF=$(CPU_TARGET)
+override FPCOPTDEF=$(ARCH)
 endif
 ifneq ($(OS_TARGET),$(OS_SOURCE))
 override FPCOPT+=-T$(OS_TARGET)
 endif
 ifneq ($(CPU_TARGET),$(CPU_SOURCE))
-override FPCOPT+=-P$(CPU_TARGET)
+override FPCOPT+=-P$(ARCH)
 endif
 ifeq ($(OS_SOURCE),openbsd)
 override FPCOPT+=-FD$(NEW_BINUTILS_PATH)

+ 1 - 1
packages/regexpr/Makefile.fpc

@@ -7,7 +7,7 @@ name=regexpr
 version=2.0.0
 
 [target]
-units=regexpr
+units=regex regexpr
 examples=testreg1
 
 [compiler]

+ 1207 - 0
packages/regexpr/src/old/regexpr.pp

@@ -0,0 +1,1207 @@
+{ $DEFINE DEBUG}
+
+{
+    This unit implements basic regular expression support
+
+    This file is part of the Free Pascal run time library.
+    Copyright (c) 2000-2006 by Florian Klaempfland Carl Eric Codere
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+{.$ define DEBUG}
+
+(*
+  - newline handling (uses all known formats of ASCII, #10,#13,#13#10 and #$85
+
+  TODO:
+     - correct backtracking, for example in (...)*
+     - full | support (currently requires to put all items with | operator
+        between parenthesis (in a group) to take care over order priority.
+          Therefore the following would work: (foo)|(nofoo) but not
+          foo|nofoo
+     - getting substrings and using substrings with \1 etc.
+     - do we treat several multiline characters in a row as a single
+        newline character for $ and ^?
+*)
+
+{$IFDEF FPC}
+{$mode objfpc}
+{$ENDIF}
+
+{** @abstract(Regular expression unit)
+
+    This unit implements a basic regular expression parser that mostly conforms
+    to the POSIX extended-regular expression syntax. It also supports standard
+    escape characters for patterns (as defined in PERL).
+}
+unit regexpr;
+
+  interface
+
+    { the following declarions are only in the interface because }
+    { some procedures return pregexprentry but programs which   }
+    { use this unit shouldn't access this data structures        }
+    type
+       tcharset = set of char;
+       tregexprentrytype = (ret_charset,ret_or,
+          ret_illegalend,ret_backtrace,ret_startline,
+          ret_endline,ret_pattern);
+
+       pregexprentry = ^tregexprentry;
+       tregexprentry = record
+          next,nextdestroy : pregexprentry;
+          case typ : tregexprentrytype of
+             ret_charset : (chars : tcharset; elsepath : pregexprentry);
+             {** This is a complete pattern path ()+ , ()* or ()?, n,m }
+             ret_pattern: (pattern: pregexprentry; minoccurs: integer; maxoccurs: integer; alternative : pregexprentry);
+       end;
+
+       tregexprflag = (
+         ref_singleline,
+         {** This indicates that a start of line is either the
+             start of the pattern or a linebreak. }
+         ref_multiline,
+         {** The match will be done in a case-insensitive way
+              according to US-ASCII character set. }
+         ref_caseinsensitive);
+       tregexprflags = set of tregexprflag;
+
+       TRegExprEngine = record
+          Data : pregexprentry;
+          DestroyList : pregexprentry;
+          Flags : TRegExprFlags;
+       end;
+
+     const
+        cs_allchars : tcharset = [#0..#255];
+        cs_wordchars : tcharset = ['A'..'Z','a'..'z','_','0'..'9'];
+        cs_newline : tcharset = [#10];
+        cs_digits : tcharset = ['0'..'9'];
+        cs_whitespace : tcharset = [' ',#9];
+
+     var
+        { these are initilized in the init section of the unit }
+        cs_nonwordchars : tcharset;
+        cs_nondigits : tcharset;
+        cs_nonwhitespace : tcharset;
+
+     { the following procedures can be used by units basing }
+     { on the regexpr unit                                  }
+
+     {** From a regular expression, compile an encoded version of the regular expression.
+
+         @param(regexpr Regular expression to compile)
+         @param(flags Flags relating to the type of parsing that will occur)
+         @param(RegExprEngine The actual encoded version of the regular expression)
+         @returns(true if success, otherwise syntax error in compiling regular expression)
+     }
+     function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags;var RegExprEngine: TRegExprEngine): boolean;
+
+{$IFDEF FPC}
+    {** Backward compatibility routine }
+     function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags): TREGExprEngine;
+{$ENDIF}
+
+     {** Frees all up resources used for the encoded regular expression }
+     procedure DestroyRegExprEngine(var regexpr : TRegExprEngine);
+
+     {** @abstract(Matches a regular expression)
+
+        @param(RegExprEngine The actual compiled regular expression to match against)
+        @param(p The text to search for for a match)
+        @param(index zero-based index to the start of the match -1 if no match in p)
+        @param(len length of the match)
+        @returns(true if there was a match, otherwise false)
+     }
+     function RegExprPos(RegExprEngine : TRegExprEngine;p : pchar;var index,len : longint) : boolean;
+
+     function RegExprReplaceAll(RegExprEngine : TRegExprEngine;const src,newstr : ansistring;var dest : ansistring) : sizeint;
+
+     { This function Escape known regex chars and place the result on Return. If something went wrong the
+       function will return false. }
+     function RegExprEscapeStr (const S : string) : string;
+
+  implementation
+
+{$ifdef DEBUG}
+     procedure writecharset(c : tcharset);
+
+       var
+          b : byte;
+
+       begin
+          for b:=20 to 255 do
+            if chr(b) in c then
+              write(chr(b));
+          writeln;
+       end;
+
+
+    const
+
+      typ2str : array[tregexprentrytype] of string =
+      (
+        'ret_charset',
+        'ret_or',
+        'ret_illegalend',
+        'ret_backtrace',
+        'ret_startline',
+        'ret_endline',
+        'ret_pattern'
+      );
+
+
+     { Dumps all the next elements of a tree }
+     procedure dumptree(space: string; regentry: pregexprentry);
+      begin
+        while assigned(regentry) do
+          begin
+            WriteLn(space+'------- Node Type ',typ2str[regentry^.typ]);
+            if (regentry^.typ = ret_charset) then
+              WriteCharSet(regentry^.chars);
+            { dump embedded pattern information }
+            if regentry^.typ = ret_pattern then
+               begin
+                 dumptree(space+#9,regentry^.pattern);
+                 WriteLn(space+#9,' --- Alternative nodes ');
+                 if assigned(regentry^.alternative) then
+                   dumptree(space+#9#9,regentry^.alternative);
+               end;
+            if regentry^.typ = ret_startline then
+               dumptree(space+#9,regentry^.pattern);
+            regentry:=regentry^.next;
+          end;
+      end;
+{$endif DEBUG}
+
+
+     {** Determines the length of a pattern, including sub-patterns.
+
+         It goes through the nodes and returns the pattern length
+         between the two, using minOccurs as required.
+
+         Called recursively.
+     }
+     function patlength(hp1: pregexprentry): integer;
+       var
+        count: integer;
+        hp: pregexprentry;
+       begin
+        count:=0;
+        if hp1^.typ=ret_pattern then
+            hp:=hp1^.pattern
+        else
+            hp:=hp1;
+        { now go through all chars and get the length
+          does not currently take care of embedded patterns
+        }
+        while assigned(hp) do
+          begin
+            if hp^.typ = ret_pattern then
+              begin
+                inc(count,patlength(hp));
+              end
+            else
+            if hp^.typ = ret_charset then
+               inc(count);
+            hp:=hp^.next;
+          end;
+        if hp1^.typ=ret_pattern then
+          begin
+            count:=hp1^.minOccurs*count;
+          end;
+         patlength:=count;
+       end;
+
+     function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags; var RegExprEngine:TRegExprEngine) : boolean;
+
+       var
+          first : pregexprentry;
+
+       procedure doregister(p : pregexprentry);
+
+         begin
+           p^.nextdestroy:=first;
+           first:=p;
+         end;
+
+       var
+          currentpos : pchar;
+          error : boolean;
+
+       procedure readchars(var chars: tcharset);
+
+         var
+            c1 : char;
+
+         begin
+            chars:=[];
+            case currentpos^ of
+               #0:
+                  exit;
+               '.':
+                  begin
+                     inc(currentpos);
+                     chars:=cs_allchars-cs_newline;
+                  end;
+               '\':
+                  begin
+                     inc(currentpos);
+                     case currentpos^ of
+                        #0:
+                          begin
+                             error:=true;
+                             exit;
+                          end;
+                        't':
+                           begin
+                              inc(currentpos);
+                              chars:=[#9];
+                           end;
+                        'n':
+                           begin
+                              inc(currentpos);
+                              chars:=[#10];
+                           end;
+                        'r':
+                           begin
+                              inc(currentpos);
+                              chars:=[#13];
+                           end;
+                        'd':
+                          begin
+                             inc(currentpos);
+                             chars:=cs_digits;
+                          end;
+                        'D':
+                          begin
+                             inc(currentpos);
+                             chars:=cs_nondigits;
+                          end;
+                        's':
+                          begin
+                             inc(currentpos);
+                             chars:=cs_whitespace;
+                          end;
+                        'S':
+                          begin
+                             inc(currentpos);
+                             chars:=cs_nonwhitespace;
+                          end;
+                        'w':
+                           begin
+                              inc(currentpos);
+                              chars:=cs_wordchars;
+                           end;
+                        'W':
+                           begin
+                              inc(currentpos);
+                              chars:=cs_nonwordchars;
+                           end;
+                        'f' :
+                            begin
+                              inc(currentpos);
+                              chars:= [#12];
+                            end;
+                        'a' :
+                            begin
+                              inc(currentpos);
+                              chars:= [#7];
+                            end;
+                         else
+                           begin { Some basic escaping...}
+                              chars := [currentpos^];
+                              inc (currentpos);
+                              {error:=true;
+                              exit;}
+                           end;
+                     end;
+                  end;
+               else
+                 begin
+                    if ref_caseinsensitive in flags then
+                       c1:=upcase(currentpos^)
+                    else
+                       c1:=currentpos^;
+
+                    inc(currentpos);
+                    if currentpos^='-' then
+                      begin
+                         inc(currentpos);
+                         if currentpos^=#0 then
+                           begin
+                              error:=true;
+                              exit;
+                           end;
+                         if ref_caseinsensitive in flags then
+                           chars:=[c1..upcase(currentpos^)]
+                         else
+                           chars:=[c1..currentpos^];
+                         inc(currentpos);
+                      end
+                    else
+                      chars:=[c1];
+                 end;
+            end;
+         end;
+
+
+       procedure readcharset(var charset: tcharset);
+
+         var
+           chars: tcharset;
+         begin
+            charset:=[];
+            case currentpos^ of
+               #0:
+                  exit;
+               '[':
+                  begin
+                     inc(currentpos);
+                     while currentpos^<>']' do
+                       begin
+                          if currentpos^='^' then
+                            begin
+                               inc(currentpos);
+                               readchars(chars);
+                               charset:=charset+(cs_allchars-chars);
+                            end
+                          else
+                            begin
+                              readchars(chars);
+                              charset:=charset+chars;
+                            end;
+                          if error or (currentpos^=#0) then
+                            begin
+                               error:=true;
+                               exit;
+                            end;
+                       end;
+                     inc(currentpos);
+                  end;
+               '^':
+                  begin
+                     inc(currentpos);
+                     readchars(chars);
+                     charset:=cs_allchars-chars;
+                  end;
+               else
+                  begin
+                    readchars(chars);
+                    charset:=chars;
+                  end;
+            end;
+         end;
+
+
+       (* takes care of parsing the {n}, {n,} and {n,m} regular expression
+          elements. In case of error, sets error to true and returns false,
+          otherwise returns true and set minoccurs and maxoccurs accordingly
+          (-1 if not present). *)
+       function parseoccurences(var currentpos: pchar; var minoccurs,maxoccurs: integer): boolean;
+         var
+          minOccursString: string;
+          maxOccursString: string;
+         begin
+           parseoccurences:=false;
+           minOccurs:=-1;
+           maxOccurs:=-1;
+           inc(currentpos);
+           minOccursString:='';
+           if currentPos^ = #0 then
+             begin
+               error:=true;
+               exit;
+             end;
+            while (currentpos^<>#0) and (currentpos^ in ['0'..'9']) do
+                begin
+                   minOccursString:=minOccursString+currentPos^;
+                   inc(currentpos);
+                end;
+            if length(minOccursString) = 0 then
+                begin
+                  error:=true;
+                  exit;
+                end;
+            Val(minOccursString,minOccurs);
+            { possible cases here: commad or end bracket }
+            if currentpos^= '}' then
+              begin
+                inc(currentpos);
+                maxOccurs:=minOccurs;
+                parseoccurences:=true;
+                exit;
+              end;
+            if currentpos^= ',' then
+              begin
+                maxOccursString:='';
+                inc(currentpos);
+                while (currentpos^<>#0) and (currentpos^ in ['0'..'9']) do
+                begin
+                   maxOccursString:=maxOccursString+currentPos^;
+                   inc(currentpos);
+                end;
+                if currentpos^= '}' then
+                 begin
+                   { If the length of the string is zero, then there is
+                     no upper bound. }
+                   if length(maxOccursString) > 0 then
+                      Val(maxOccursString,maxOccurs)
+                   else
+                      maxOccurs:=high(integer);
+                   inc(currentpos);
+                   parseoccurences:=true;
+                   exit;
+                 end;
+              end;
+              error:=true;
+         end;
+
+
+       function parseregexpr(next,elsepath : pregexprentry) : pregexprentry;
+
+         var
+            hp : pregexprentry;
+            minOccurs,maxOccurs: integer;
+            hp3: pregexprentry;
+            cs : tcharset;
+            chaining : ^pregexprentry;
+
+         begin
+            chaining:=nil;
+            parseregexpr:=nil;
+            elsepath:=nil;
+            if error then
+              exit;
+            { this dummy allows us to redirect the elsepath later }
+{            new(ep);
+            doregister(ep);
+            ep^.typ:=ret_charset;
+            ep^.chars:=[];
+            ep^.elsepath:=elsepath;
+            elsepath:=ep;}
+            while true do
+              begin
+                 if error then
+                   exit;
+                 case currentpos^ of
+                    '(':
+                       begin
+                          inc(currentpos);
+                          hp:=parseregexpr(nil,nil);
+                          { Special characters after the bracket }
+                           if error then
+                              exit;
+                          if currentpos^<>')' then
+                            begin
+                               error:=true;
+                               exit;
+                            end;
+                          inc(currentpos);
+                            case currentpos^ of
+                            '*':
+                               begin
+                                  inc(currentpos);
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.alternative:=nil;
+                                  hp3^.pattern:=hp;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=0;
+                                  hp3^.maxoccurs:=high(integer);
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+                               end;
+                            '+':
+                               begin
+                                  inc(currentpos);
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.alternative:=nil;
+                                  hp3^.pattern:=hp;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=1;
+                                  hp3^.maxoccurs:=high(integer);
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+                               end;
+
+                            '?':
+                               begin
+                                  inc(currentpos);
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.alternative:=nil;
+                                  hp3^.pattern:=hp;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=0;
+                                  hp3^.maxoccurs:=1;
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+                               end;
+                            '{':
+                               begin
+                                 if not parseOccurences(currentPos,minOccurs,maxOccurs) then
+                                   exit;
+                                  inc(currentpos);
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.alternative:=nil;
+                                  hp3^.pattern:=hp;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=minOccurs;
+                                  hp3^.maxoccurs:=maxOccurs;
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+                               end;
+                            else
+                              begin
+                                { go to end of this list - always the
+                                  last next used }
+(*
+                                hp3:=hp;
+                                while assigned(hp3^.next) do
+                                  begin
+                                    hp3:=hp3^.next;
+                                  end;
+                                if assigned(chaining) then
+                                   chaining^:=hp
+                                else
+                                   parseregexpr:=hp;
+                                chaining:=@hp3^.next;*)
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.alternative:=nil;
+                                  hp3^.pattern:=hp;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=1;
+                                  hp3^.maxoccurs:=1;
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+
+                              end;
+                          end;
+                       end;
+{ This is only partially implemented currently, as the terms before
+  the | character must be grouped together with parenthesis, which
+  is also compatible with other regular expressions.
+}
+                    '|':
+                       begin
+{$ifdef DEBUG}
+                          writeln('Creating or entry');
+{$endif DEBUG}
+                          if (not assigned (hp3)) then
+                            begin
+                              error:=true;
+                              exit;
+                            end;
+                          if (hp3^.typ <> ret_pattern) then
+                            begin
+                              error:=true;
+                              exit;
+                            end;
+                          while currentpos^='|' do
+                            begin
+                              inc(currentpos);
+                              if currentpos^=#0 then
+                                begin
+                                   error:=true;
+                                   exit;
+                                end;
+                              { always put the longest pattern first, so
+                                swap the trees as necessary.
+                              }
+                              hp := parseregexpr (next, elsepath);
+                              if patlength(hp) > patlength(hp3^.pattern) then
+                                begin
+                                  hp3^.alternative:=hp3^.pattern;
+                                  hp3^.pattern:=hp;
+                                end
+                              else
+                                 hp3^.alternative:=hp;
+                            end;
+                       end;
+                    ')':
+                       exit;
+                    '^':
+                       begin
+                          inc(currentpos);
+                          hp:=parseregexpr(nil,nil);
+                          { Special characters after the bracket }
+                           if error then
+                              exit;
+                           new(hp3);
+                           doregister(hp3);
+                           hp3^.typ:=ret_startline;
+                           hp3^.pattern:=hp;
+                           hp3^.elsepath:=elsepath;
+                           hp3^.next:=nil;
+                           if assigned(chaining) then
+                              chaining^:=hp3
+                           else
+                              parseregexpr:=hp3;
+                           chaining:=@hp3^.next;
+                       end;
+                    '$':
+                       begin
+                          inc(currentpos);
+                          new(hp);
+                          doregister(hp);
+                          hp^.typ:=ret_endline;
+                          hp^.elsepath:=elsepath;
+                          hp^.next:=nil;
+                          if assigned(chaining) then
+                            chaining^:=hp
+                          else
+                            parseregexpr:=hp;
+                          chaining:=@hp^.next;
+                       end;
+                    #0:
+                       exit;
+                    else
+                      begin
+                         readcharset(cs);
+                         if error then
+                           exit;
+                         case currentpos^ of
+                            '*':
+                               begin
+                                  inc(currentpos);
+                                  new(hp);
+                                  doregister(hp);
+                                  hp^.typ:=ret_charset;
+                                  hp^.chars:=cs;
+                                  hp^.elsepath:=nil;
+                                  hp^.next:=nil;
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.alternative:=nil;
+                                  hp3^.pattern:=hp;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=0;
+                                  hp3^.maxoccurs:=high(integer);
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+                               end;
+                            '+':
+                               begin
+                                  inc(currentpos);
+                                  new(hp);
+                                  doregister(hp);
+                                  hp^.typ:=ret_charset;
+                                  hp^.chars:=cs;
+                                  hp^.elsepath:=nil;
+                                  hp^.next:=nil;
+
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.alternative:=nil;
+                                  hp3^.pattern:=hp;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=1;
+                                  hp3^.maxoccurs:=high(integer);
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+                               end;
+                            '?':
+                               begin
+                                  inc(currentpos);
+                                  new(hp);
+                                  doregister(hp);
+                                  hp^.typ:=ret_charset;
+                                  hp^.chars:=cs;
+                                  hp^.elsepath:=nil;
+                                  hp^.next:=nil;
+
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.pattern:=hp;
+                                  hp3^.alternative:=nil;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=0;
+                                  hp3^.maxoccurs:=1;
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+                               end;
+                            '{':
+                               begin
+                                 if not parseOccurences(currentPos,minOccurs,maxOccurs) then
+                                   exit;
+                                  new(hp);
+                                  doregister(hp);
+                                  hp^.typ:=ret_charset;
+                                  hp^.chars:=cs;
+                                  hp^.elsepath:=nil;
+                                  hp^.next:=nil;
+
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.alternative:=nil;
+                                  hp3^.pattern:=hp;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=minOccurs;
+                                  hp3^.maxoccurs:=maxOccurs;
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+
+                                end;
+                            else
+                               { Normal character }
+                               begin
+                                  new(hp);
+                                  doregister(hp);
+                                  hp^.typ:=ret_charset;
+                                  hp^.chars:=cs;
+                                  hp^.elsepath:=elsepath;
+                                  hp^.next:=next;
+                                  if assigned(chaining) then
+                                    chaining^:=hp
+                                  else
+                                    parseregexpr:=hp;
+                                  chaining:=@hp^.next;
+                                  continue;
+                               end;
+                           { This was a pattern }
+                         end; { END CASE }
+                      end;
+                 end;
+              end;
+         end;
+
+       var
+          endp : pregexprentry;
+
+       begin
+          GenerateRegExprEngine:=false;
+          RegExprEngine.Data:=nil;
+          RegExprEngine.DestroyList:=nil;
+          if regexpr=nil then
+            exit;
+          first:=nil;
+          if (ref_singleline in flags) and (ref_multiline in flags) then
+            exit;
+          currentpos:=regexpr;
+          GenerateRegExprEngine:=true;
+          error:=false;
+          new(endp);
+          doregister(endp);
+          endp^.typ:=ret_illegalend;
+          RegExprEngine.flags:=flags;
+          RegExprEngine.Data:=parseregexpr(nil,endp);
+{$IFDEF DEBUG}
+          writeln('========== Generating tree ============');
+          dumptree('',RegExprEngine.Data);
+{$ENDIF}
+          RegExprEngine.DestroyList:=first;
+          if error or (currentpos^<>#0) then
+            begin
+              GenerateRegExprEngine:=false;
+              DestroyRegExprEngine(RegExprEngine);
+            end;
+       end;
+
+
+{$IFDEF FPC}
+    function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags): TREGExprEngine;
+    var
+     r: TRegExprEngine;
+    begin
+      GenerateRegExprEngine(regexpr,flags,r);
+      GenerateRegExprEngine:=r;
+    end;
+{$ENDIF}
+
+    procedure DestroyRegExprEngine(var regexpr : TRegExprEngine);
+
+       var
+          hp : pregexprentry;
+
+       begin
+          hp:=regexpr.DestroyList;
+          while assigned(hp) do
+            begin
+               regexpr.DestroyList:=hp^.nextdestroy;
+               dispose(hp);
+               hp:=regexpr.DestroyList;
+            end;
+          regexpr.Data:=nil;
+          regexpr.DestroyList:=nil;
+       end;
+
+     function RegExprPos(regexprengine : TRegExprEngine;p : pchar;var index,len : longint) : boolean;
+
+       var
+          lastpos : pchar;
+          firstpos: pchar;
+
+       { Does the actual search of the data - return true if the term was found }
+       function dosearch(regexprentry : pregexprentry;pos : pchar) : boolean;
+       var
+          found: boolean;
+          checkvalue: boolean;
+          savedpos: pchar;
+          counter: word;
+
+         begin
+            dosearch:=false;
+            while true do
+              begin
+                 {$IFDEF Debug}
+                 writeln('Entering ',typ2str[regexprentry^.typ]);
+                 writeln('Pattern length ',patlength(regexprentry));
+                 {$ENDIF Debug}
+                 case regexprentry^.typ of
+                    ret_endline:
+                      begin
+                         { automatically a match! }
+                         if pos^ = #0 then
+                            begin
+                              dosearch:=true;
+                              exit;
+                            end;
+                         if ref_multiline in regexprengine.flags then
+                            begin
+                              { Supports DOS/Commodore/UNIX/IBM Mainframe line formats }
+                              { avoid reading invalid memory also }
+                                  if (pos^=#13) and ((pos+1)^=#10) then
+                                    begin
+                                      regexprentry:=regexprentry^.next;
+                                    end
+                                  else
+                                  if (pos^=#$85) or (pos^=#10) or ((pos^=#13) and ((pos-1) >= firstpos) and ((pos-1)^ <> #10)) then
+                                    begin
+                                       regexprentry:=regexprentry^.next;
+                                    end
+                                  else
+                                    begin
+                                       dosearch:=false;
+                                       exit;
+                                    end;
+                             end
+                           else
+                             exit;
+                      end;
+                    ret_pattern:
+                      begin
+                         found:=false;
+                         { Take care of occurences here }
+                         savedpos:=pos;
+                         counter:=0;
+                         repeat
+                           found:=dosearch(regexprentry^.pattern,pos);
+                           if not found then
+                            break;
+                           pos:=lastpos;
+                           inc(counter);
+                         until (not found) or (counter >= regexprentry^.maxoccurs) or (pos^= #0);
+
+                         if counter = 0 then
+                           begin
+                             { If there was no occurence and the minimum occurence is > 0 then
+                               problem.
+                             }
+                             if (regexprentry^.minoccurs > 0) then
+                              begin
+                                dosearch:=false;
+                                { verify alternative path as required }
+                                if assigned(regexprentry^.alternative) then
+                                  begin
+                                     dosearch:=dosearch(regexprentry^.alternative,savedpos);
+                                     exit;
+                                  end;
+                                exit;
+                              end;
+                             dosearch:=true;
+                             lastpos:=savedpos;
+                           end
+                         else
+                           { found }
+                           begin
+                              { Possible choices :
+                                 - found and (counter >= minOccurences) and (counter =< maxOccurences) = true
+                                 - found and (counter < minOccurences) or (counter > maxOccurences) = false
+                              }
+                              if (counter < regexprentry^.minoccurs) or (counter > regexprentry^.maxoccurs) then
+                                begin
+                                  dosearch:=false;
+                                  exit;
+                                end;
+                              dosearch:=true;
+                              { if all matches were found, and the current position
+                                points to zero (processed all characters) }
+                              if pos^=#0 then
+                                begin
+                                  dosearch:=true;
+                                  exit;
+                                end;
+                           end;
+                         { If we are that means the matches were valid, go to next element to match
+                         }
+                         regexprentry:=regexprentry^.next;
+                         if (counter = 0) and not assigned(regexprentry) then
+                           exit;
+                      end;
+                    ret_startline:
+                      begin
+                         checkvalue:=pos=firstpos;
+                         if ref_multiline in regexprengine.flags then
+                           begin
+                             { Supports DOS/Commodore/UNIX/IBM Mainframe line formats }
+                             { avoid reading invalid memory also }
+                             if
+                                 (
+                                   ((pos-1) >= firstpos) and ((pos-1)^=#$85)
+                                  )
+                              or
+                                 (
+                                   ((pos-1) >= firstpos) and ((pos-1)^=#10)
+                                  )
+                              or
+                                (
+                                 ((pos-1) >= firstpos) and ((pos-1)^=#13) and
+                                 ((pos)^ <> #10)
+                                )
+                             then
+                               begin
+                                 checkvalue:=true;
+                               end;
+                           end;
+                          if checkvalue then
+                            begin
+                              dosearch:=dosearch(regexprentry^.pattern,pos);
+                              regexprentry:=regexprentry^.next;
+                              if not dosearch then
+                                exit;
+                              pos:=lastpos;
+                            end
+                          else
+                            begin
+                              dosearch:=false;
+                              exit;
+                            end;
+                      end;
+                    ret_charset:
+                      begin
+                         if (pos^ in regexprentry^.chars) or
+                           ((ref_caseinsensitive in regexprengine.flags) and
+                            (upcase(pos^) in regexprentry^.chars)) then
+                           begin
+{$ifdef DEBUG}
+                              writeln('Found matching: ',pos^);
+{$endif DEBUG}
+                              regexprentry:=regexprentry^.next;
+                              inc(pos);
+                           end
+                         else
+                           begin
+{$ifdef DEBUG}
+                              writeln('Found unmatching: ',pos^);
+{$endif DEBUG}
+                              exit;
+                           end;
+                      end;
+                    ret_backtrace:
+                      begin
+{$ifdef DEBUG}
+                         writeln('Starting backtrace');
+{$endif DEBUG}
+                         if dosearch(regexprentry^.next,pos) then
+                           begin
+                              dosearch:=true;
+                              exit;
+                           end
+                         else if dosearch(regexprentry^.elsepath,pos) then
+                           begin
+                              dosearch:=true;
+                              exit;
+                           end
+                         else
+                           exit;
+                      end;
+                 end;
+                 lastpos:=pos;
+                 if regexprentry=nil then
+                   begin
+                      dosearch:=true;
+                      exit;
+                   end;
+                 if regexprentry^.typ=ret_illegalend then
+                   exit;
+                 { end of string, and we were expecting an end of string }
+                 if (pos^=#0) and (regexprentry^.typ = ret_endline) and
+                    (not assigned(regexprentry^.next)) then
+                   begin
+                     dosearch:=true;
+                     exit;
+                   end;
+                 if pos^=#0 then
+                   exit;
+              end;
+         end;
+
+       begin
+          RegExprPos:=false;
+          index:=0;
+          len:=0;
+          firstpos:=p;
+          if regexprengine.Data=nil then
+            exit;
+          while p^<>#0 do
+            begin
+               if dosearch(regexprengine.Data,p) then
+                 begin
+                    len:=lastpos-p;
+                    RegExprPos:=true;
+                    exit;
+                 end
+               else
+                 begin
+                    inc(p);
+                    inc(index);
+                 end;
+            end;
+          index:=-1;
+       end;
+
+
+  function RegExprReplaceAll(RegExprEngine : TRegExprEngine;const src,newstr : ansistring;var dest : ansistring) : sizeint;
+    var
+      index,len : longint;
+      pos,lastpos : pchar;
+      first : boolean;
+      oldlength : PtrInt;
+    begin
+      pos:=pchar(src);
+      lastpos:=nil;
+      first:=true;
+      Result:=0;
+      { estimate some length }
+      SetLength(dest,length(src)+((length(src) div 10)*length(newstr)));
+      while RegExprPos(RegExprEngine,pos,index,len) do
+        begin
+          inc(pos,index);
+          if (lastpos = nil) or (pos>lastpos) then
+            begin
+              if lastpos = nil then lastpos := pchar(src);
+              { copy skipped part }
+
+              { because we cheat with SetLength a SetLength(...,0) isn't what we want
+                so we've to trick at the first SetLength call
+              }
+              if first then
+                begin
+                  SetLength(dest,(pos-lastpos));
+                  { cast dest here because it is already unified }
+                  move(lastpos^,char(dest[1]),pos-lastpos);
+                end
+              else
+                begin
+                  oldlength:=Length(dest);
+                  SetLength(dest,oldlength+(pos-lastpos));
+                  move(lastpos^,char(dest[oldlength+1]),pos-lastpos);
+                end;
+              first:=false;
+            end;
+          { found }
+          inc(Result);
+          dest:=dest+newstr;
+          inc(pos,len);
+          lastpos:=pos;
+        end;
+      { copy remainder }
+      len:=strlen(pos);
+      if first then
+        begin
+          SetLength(dest,len);
+          move(pos^,char(dest[length(dest)+1]),len);
+        end
+      else
+        begin
+          oldlength:=Length(dest);
+          SetLength(dest,oldlength+len);
+          move(pos^,char(dest[oldlength+1]),len);
+        end
+    end;
+
+
+  function RegExprEscapeStr (const S : string) : string;
+    var
+     i, len   : integer;
+     s1: string;
+    begin
+      RegExprEscapeStr:= '';
+      s1:='';
+      if (S = '') then
+       exit;
+
+      len := Length (S);
+
+      for i := 1 to len do
+        begin
+          if (S [i] in ['(','|', '.', '*', '?', '^', '$', '-', '[', '{', '}', ']', ')', '\']) then
+            begin
+              s1 := s1 + '\';
+            end;
+
+          s1 := s1 + S[i];
+        end;
+      RegExprEscapeStr:=s1;
+    end;
+
+begin
+   cs_nonwordchars:=cs_allchars-cs_wordchars;
+   cs_nondigits:=cs_allchars-cs_digits;
+   cs_nonwhitespace:=cs_allchars-cs_whitespace;
+end.

+ 1230 - 0
packages/regexpr/src/regex.pp

@@ -0,0 +1,1230 @@
+{
+    This file is part of the Free Pascal packages library.
+    Copyright (c) 2008 by Joost van der Sluis, member of the
+    Free Pascal development team
+    
+    Regexpression parser
+    
+    This code is based on the examples in the book
+    'Tomes of Delphi: Algorithms and Data Structures' by Julian M Bucknall
+    The code is used with his permission. For an excellent explanation of
+    this unit, see the book...
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+unit Regex;
+
+{$mode Delphi}{$H+}
+{$INLINE ON}
+
+interface
+
+{Notes:
+  these classes parse regular expressions that follow this grammar:
+
+    <anchorexpr> ::= <expr> |
+                     '^' <expr> |
+                     <expr> '$' |
+                     '^' <expr> '$'
+    <expr> ::= <term> |
+               <term> '|' <expr>                 - alternation
+    <term> ::= <factor> |
+               <factor><term>                    - concatenation
+    <factor> ::= <atom> |
+                 <atom> '?' |                    - zero or one
+                 <atom> '*' |                    - zero or more
+                 <atom> 'n,m' |                  - min n, max m (added by Joost)
+                 <atom> '+'                      - one or more
+    <atom> ::= <char> |
+               '.' |                             - any char
+               '(' <expr> ') |                   - parentheses
+               '[' <charclass> ']' |             - normal class
+               '[^' <charclass> ']'              - negated class
+    <charclass> ::= <charrange> |
+                    <charrange><charclass>
+    <charrange> ::= <ccchar> |
+                    <ccchar> '-' <ccchar>
+    <char> ::= <any character except metacharacters> |
+               '\' <any character at all>
+    <ccchar> ::= <any character except '-' and ']'> |
+                 '\' <any character at all>
+
+  This means that parentheses have maximum precedence, followed
+  by square brackets, followed by the closure operators,
+  followed by concatenation, finally followed by alternation.
+}
+
+uses
+  SysUtils,
+  Classes;
+
+type
+  TUpcaseFunc = function(aCh : AnsiChar) : AnsiChar;
+
+  TNFAMatchType = (  {types of matching performed...}
+     mtNone,           {..no match (an epsilon no-cost move)}
+     mtAnyChar,        {..any character}
+     mtChar,           {..a particular character}
+     mtClass,          {..a character class}
+     mtNegClass,       {..a negated character class}
+     mtTerminal,       {..the final state--no matching}
+     mtUnused);        {..an unused state--no matching}
+
+  TRegexError = (    {error codes for invalid regex strings}
+     recNone,          {..no error}
+     recSuddenEnd,     {..unexpected end of string}
+     recMetaChar,      {..read metacharacter, but needed normal char}
+     recNoCloseParen,  {..expected close paren, but not there}
+     recExtraChars     {..not at end of string after parsing regex}
+     );
+
+  TRegexType = (
+     rtRegEx,
+     rtChars,
+     rtSingleChar
+     );
+
+  PCharSet = ^TCharSet;
+  TCharSet = set of Char;
+
+  { TtdRegexEngine }
+
+  TNFAState = record
+    sdNextState1: integer;
+    sdNextState2: integer;
+    sdClass     : PCharSet;
+    sdMatchType : TNFAMatchType;
+    sdChar      : AnsiChar;
+  end;
+
+
+  { TRegexEngine }
+
+  TRegexEngine = class
+    private
+      FAnchorEnd  : boolean;
+      FAnchorStart: boolean;
+      FErrorCode  : TRegexError;
+      FIgnoreCase : boolean;
+      FMultiLine  : boolean;
+      FPosn       : PAnsiChar;
+      FRegexStr   : string;
+      FStartState : integer;
+      FStateTable : Array of TNFAState;
+      FStateCount : integer;
+      FUpcase     : TUpcaseFunc;
+
+      // The deque (double-ended queue)
+      FList : array of integer;
+      FCapacity : integer;
+      FHead : integer;
+      FTail : integer;
+      
+      FRegexType : TRegexType;
+    protected
+      procedure DequeEnqueue(aValue : integer);
+      procedure DequePush(aValue : integer);
+      function DequePop : integer;
+      procedure DequeGrow;
+
+      procedure rcSetIgnoreCase(aValue : boolean); virtual;
+      procedure rcSetRegexStr(const aRegexStr : string); virtual;
+      procedure rcSetUpcase(aValue : TUpcaseFunc); virtual;
+      procedure rcSetMultiLine(aValue : Boolean); virtual;
+
+      procedure rcClear; virtual;
+      procedure rcError(aIndex      : integer); virtual;
+      procedure rcLevel1Optimize; virtual;
+      function rcMatchSubString(const S   : string;
+                                StartPosn : integer;
+                                var Len   : integer) : boolean; virtual;
+      function rcAddState(aMatchType : TNFAMatchType;
+                          aChar      : AnsiChar;
+                          aCharClass : PCharSet;
+                          aNextState1: integer;
+                          aNextState2: integer) : integer;
+      function rcSetState(aState     : integer;
+                          aNextState1: integer;
+                          aNextState2: integer) : integer;
+
+      function rcParseAnchorExpr : integer; virtual;
+      function rcParseAtom : integer; virtual;
+      function rcParseCCChar(out EscapeChar : Boolean) : AnsiChar; virtual;
+      function rcParseChar : integer; virtual;
+      function rcParseCharClass(aClass : PCharSet) : boolean; virtual;
+      function rcParseCharRange(aClass : PCharSet) : boolean; virtual;
+      function rcParseExpr : integer; virtual;
+      function rcParseFactor : integer; virtual;
+      function rcParseTerm : integer; virtual;
+      Function rcReturnEscapeChar : AnsiChar; virtual;
+    public
+      procedure WriteTable;
+      constructor Create(const aRegexStr : string);
+      destructor Destroy; override;
+
+      function Parse(var aErrorPos : integer;
+                     var aErrorCode: TRegexError) : boolean; virtual;
+      function MatchString(const S : string; out MatchPos : integer; var Offset : integer) : boolean; virtual;
+      function ReplaceAllString(const src, newstr: ansistring; out DestStr : string): Integer;
+
+
+      property IgnoreCase : boolean
+                  read FIgnoreCase write rcSetIgnoreCase;
+      property MultiLine : boolean
+                  read FMultiLine write rcSetMultiLine;
+      property RegexString : string
+                  read FRegexStr write rcSetRegexStr;
+      property Upcase : TUpcaseFunc
+                  read FUpcase write rcSetUpcase;
+  end;
+
+
+Resourcestring
+  eRegexParseError = 'Error at %d when parsing regular expression';
+
+implementation
+
+uses strutils;
+
+const
+  MetaCharacters : set of AnsiChar =
+                   ['[', ']', '(', ')', '|', '*', '+', '?', '-', '.',
+                    '^', '$', '{', '}'];
+  newline : TCharSet = [#10,#13,#$85];
+  {some handy constants}
+  UnusedState = -1;
+  NewFinalState = -2;
+  CreateNewState = -3;
+  ErrorState = -4;
+  MustScan = -5;
+
+  cs_allchars : tcharset = [#0..#255];
+  cs_wordchars : tcharset = ['A'..'Z','a'..'z','_','0'..'9'];
+  cs_newline : tcharset = [#10];
+  cs_digits : tcharset = ['0'..'9'];
+  cs_whitespace : tcharset = [' ',#9];
+
+
+{===Helper routines==================================================}
+function SystemUpcase(aCh : AnsiChar) : AnsiChar; far;
+begin
+  Result := System.Upcase(aCh);
+end;
+{====================================================================}
+
+
+{===TRegexEngine===================================================}
+constructor TRegexEngine.Create(const aRegexStr : string);
+begin
+  inherited Create;
+  FRegexStr := aRegexStr;
+  FIgnoreCase := false;
+  FUpcase := SystemUpcase;
+  SetLength(FStateTable,64);
+  FStateCount:=0;
+  FCapacity:=64;
+  setlength(FList,FCapacity);
+  {let's help out the user of the deque by putting the head and
+   tail pointers in the middle: it's probably more efficient}
+  FHead := FCapacity div 2;
+  FTail := FHead;
+
+  MultiLine:=False;
+end;
+{--------}
+destructor TRegexEngine.Destroy;
+begin
+  if (FStateTable <> nil) then
+    rcClear;
+  inherited Destroy;
+end;
+{--------}
+function TRegexEngine.MatchString(const S : string; out MatchPos : integer; var Offset : integer): boolean;
+var
+  i : integer;
+  ErrorPos  : integer;
+  ErrorCode : TRegexError;
+  pc : pchar;
+  x:integer;
+begin
+  {if the regex string hasn't been parsed yet, do so}
+  if (FStateCount = 0) then begin
+    if not Parse(ErrorPos, ErrorCode) then
+      rcError(ErrorPos);
+  end;
+
+  case FRegexType of
+    rtSingleChar :
+      begin
+      MatchPos := PosEx(char(FRegexStr[1]),s,Offset);
+      Offset := MatchPos+1;
+      Result := (MatchPos>0);
+      end;
+    rtChars :
+      begin
+      MatchPos := PosEx(FRegexStr,s,Offset);
+      Offset := MatchPos+length(FRegexStr);
+      Result := (MatchPos>0);
+      end
+    else
+      begin
+      {now try and see if the string matches (empty strings don't)}
+      Result := False;
+      MatchPos := 0;
+      if (S <> '') then
+        {if the regex specified a start anchor then we
+         need to check the string starting at the first position}
+        if FAnchorStart then begin
+          if rcMatchSubString(S, 1, Offset) then
+            begin
+            MatchPos:=1;
+            Result := True;
+            end
+          {If the first position did not match ang MultiLine is false, the string
+           doesn't match. If MultiLine is true, start at every position after a
+           newline }
+          else if FMultiLine then begin
+            for i := Offset to length(S)-1 do
+              if S[i] in newline then
+               if rcMatchSubString(S, i+1, Offset) then begin
+                MatchPos := i+1;
+                Result := True;
+                Break;
+              end;
+          end
+        end
+        {otherwise we try and match the string at every position and
+         return at the first success}
+        else begin
+          for i := Offset to length(S) do
+            if rcMatchSubString(S, i, Offset) then begin
+              MatchPos:=i;
+              Result := True;
+              Break;
+            end;
+        end;
+      end;
+    end; {case}
+end;
+
+function TRegexEngine.ReplaceAllString(const src, newstr: ansistring; out DestStr : string): Integer;
+
+type TReplRec = record
+                  Pos : integer;
+                  Len : integer;
+                end;
+
+var ofs         : Integer;
+    size_newstr,
+    size, pos   : Integer;
+    ReplArr     : array of TReplRec;
+    racount     : integer;
+    MatchPos    : integer;
+    DestSize    : integer;
+    LastPos     : integer;
+    MoveLen     : integer;
+    i           : integer;
+
+begin
+  setlength(ReplArr,64);
+
+  racount := 0;
+  DestSize:=length(src);
+  size_newstr := length(newstr);
+  Ofs := 1;
+  while MatchString(src,MatchPos,Ofs) do
+    begin
+    if racount = length(ReplArr) then
+      setlength(ReplArr,racount+racount div 2);
+    ReplArr[racount].Pos := MatchPos;
+    ReplArr[racount].Len := ofs;
+    DestSize:=DestSize-ofs+MatchPos+size_newstr;
+    inc(racount);
+    end;
+
+  SetLength(DestStr, SizeOf(Char)*DestSize);
+  MatchPos:=1; LastPos:=1;
+
+  if size_newstr<>0 then for i := 0 to racount -1 do
+    begin
+    MoveLen := ReplArr[i].Pos-LastPos;
+    move(src[LastPos],DestStr[MatchPos],MoveLen);
+    MatchPos:=MatchPos+MoveLen;
+    LastPos := ReplArr[i].Len;
+    move(newstr[1],DestStr[MatchPos],size_newstr);
+    Matchpos := MatchPos+size_newstr;
+    end
+  else for i := 0 to racount -1 do
+    begin
+    MoveLen := ReplArr[i].Pos-LastPos;
+    move(src[LastPos],DestStr[MatchPos],MoveLen);
+    MatchPos:=MatchPos+MoveLen;
+    LastPos := ReplArr[i].Len;
+    end;
+
+  move(src[LastPos],DestStr[MatchPos],length(src)-LastPos+1);
+  Result := racount;
+end;
+
+{--------}
+function TRegexEngine.Parse(var aErrorPos : integer;
+                              var aErrorCode: TRegexError)
+                                                            : boolean;
+begin
+  {clear the current transition table}
+  rcClear;
+  {empty regex strings are not allowed}
+  if (FRegexStr = '') then begin
+    Result := false;
+    aErrorPos := 1;
+    aErrorCode := recSuddenEnd;
+
+    Exit;
+  end;
+  {parse the regex string}
+  if not IgnoreCase then
+    begin
+    if length(FRegexStr)=1 then
+      FRegexType:=rtSingleChar
+    else
+      FRegexType:=rtChars
+    end
+  else
+    FRegexType:=rtRegEx;
+
+  FPosn := PAnsiChar(FRegexStr);
+  FStartState := rcParseAnchorExpr;
+  {if an error occurred or we're not at the end of the regex string,
+   clear the transition table, return false and the error position}
+  if (FStartState = ErrorState) or (FPosn^ <> #0) then begin
+    if (FStartState <> ErrorState) and (FPosn^ <> #0) then
+      FErrorCode := recExtraChars;
+    rcClear;
+    Result := false;
+    aErrorPos := succ(FPosn - PAnsiChar(FRegexStr));
+    aErrorCode := FErrorCode;
+  end
+  {otherwise add a terminal state, optimize, return true}
+  else begin
+    rcAddState(mtTerminal, #0, nil, UnusedState, UnusedState);
+    rcLevel1Optimize;
+    if FAnchorStart or FAnchorEnd then FRegexType:= rtRegEx;
+    Result := true;
+    aErrorPos := 0;
+    aErrorCode := recNone;
+  end;
+end;
+{--------}
+function TRegexEngine.rcAddState(aMatchType : TNFAMatchType;
+                                   aChar      : AnsiChar;
+                                   aCharClass : PCharSet;
+                                   aNextState1: integer;
+                                   aNextState2: integer) : integer;
+begin
+  {set up the fields in the state record}
+  with FStateTable[FStateCount] do
+    begin
+    if (aNextState1 = NewFinalState) then
+      sdNextState1 := FStateCount+1
+    else
+      sdNextState1 := aNextState1;
+    sdNextState2 := aNextState2;
+    sdMatchType := aMatchType;
+    if (aMatchType = mtChar) then
+      sdChar := aChar
+    else if (aMatchType = mtClass) or (aMatchType = mtNegClass) then
+      sdClass := aCharClass;
+    end;
+  Result := FStateCount;
+  inc(FStateCount);
+  if FStateCount=length(FStateTable) then
+    setlength(FStateTable,(FStateCount * 3) div 2);
+
+  if not (aMatchType in [mtChar,mtTerminal]) then FRegexType := rtRegEx;
+end;
+{--------}
+procedure TRegexEngine.rcClear;
+var
+  i : integer;
+begin
+  {free all items in the state transition table}
+  for i := 0 to FStateCount-1 do begin
+    with FStateTable[i] do begin
+      if (sdMatchType = mtClass) or
+         (sdMatchType = mtNegClass) then
+        if (sdClass <> nil) then
+          FreeMem(sdClass, sizeof(TCharSet));
+    end;
+  end;
+  {clear the state transition table}
+  FStateCount:=0;
+  FAnchorStart := false;
+  FAnchorEnd := false;
+end;
+{--------}
+procedure TRegexEngine.rcError(aIndex      : integer);
+begin
+  raise Exception.Create(Format(eRegexParseError,[aIndex]));
+end;
+{--------}
+procedure TRegexEngine.rcLevel1Optimize;
+var
+  i : integer;
+  Walker : integer;
+begin
+  {level 1 optimization removes all states that have only a single
+   no-cost move to another state}
+
+  {cycle through all the state records, except for the last one}
+  for i := 0 to FStateCount - 2 do begin
+    {get this state}
+    with FStateTable[i] do begin
+      {walk the chain pointed to by the first next state, unlinking
+       the states that are simple single no-cost moves}
+      Walker := sdNextState1;
+      while (FStateTable[walker].sdMatchType = mtNone) and
+            (FStateTable[walker].sdNextState2 = UnusedState) do begin
+        sdNextState1 := FStateTable[walker].sdNextState1;
+        Walker := sdNextState1;
+      end;
+      {walk the chain pointed to by the second next state, unlinking
+       the states that are simple single no-cost moves}
+      if (sdNextState2 <> UnusedState) then begin
+        Walker := sdNextState2;
+        while (FStateTable[walker].sdMatchType = mtNone) and
+              (FStateTable[walker].sdNextState2 = UnusedState) do begin
+          sdNextState2 := FStateTable[walker].sdNextState1;
+          Walker := sdNextState2;
+        end;
+      end;
+    end;
+  end;
+
+  {cycle through all the state records, except for the last one,
+   marking unused ones--not strictly necessary but good for debugging}
+  for i := 0 to FStateCount - 2 do begin
+    with FStateTable[i] do begin
+      if (sdMatchType = mtNone) and
+         (sdNextState2 = UnusedState) then
+        sdMatchType := mtUnused;
+    end;
+  end;
+end;
+{--------}
+function TRegexEngine.rcMatchSubString(const s   : string;
+                                         StartPosn : integer;
+                                         var Len   : integer)
+                                                            : boolean;
+var
+  Ch     : AnsiChar;
+  State  : integer;
+  StrInx : integer;
+begin
+  {assume we fail to match}
+  Result := false;
+  Len := StartPosn;
+  {clear the deque}
+  FHead := FCapacity div 2;
+  FTail := FHead;
+
+  
+  {enqueue the special value to start scanning}
+  DequeEnqueue(MustScan);
+  {enqueue the first state}
+  DequeEnqueue(FStartState);
+  {prepare the string index}
+  StrInx := StartPosn;
+  {loop until the deque is empty or we run out of string}
+  repeat
+    {pop the top state from the deque}
+    State := DequePop;
+    {process the "must scan" state first}
+    if (State = MustScan) then begin
+      {if the deque is empty at this point, we might as well give up
+       since there are no states left to process new characters}
+      if (FHead <> FTail) then begin
+        {if we haven't run out of string, get the character, and
+         enqueue the "must scan" state again}
+          if IgnoreCase then
+            Ch := Upcase(s[StrInx])
+          else
+            Ch := s[StrInx];
+          DequeEnqueue(MustScan);
+        inc(StrInx);
+      end;
+    end
+    {otherwise, process the state}
+    else with FStateTable[State] do begin
+      case sdMatchType of
+        mtChar :
+          begin
+            {for a match of a character, enqueue the next state}
+            if (Ch = sdChar) then
+              DequeEnqueue(sdNextState1);
+          end;
+        mtAnyChar :
+          begin
+            {for a match of any character, enqueue the next state}
+            if not (Ch in newline) then
+              DequeEnqueue(sdNextState1);
+          end;
+        mtClass :
+          begin
+            {for a match within a class, enqueue the next state}
+            if (Ch in sdClass^) then
+              DequeEnqueue(sdNextState1);
+          end;
+        mtNegClass :
+          begin
+            {for a match not within a class, enqueue the next state}
+            if not (Ch in sdClass^) then
+              DequeEnqueue(sdNextState1);
+          end;
+        mtTerminal :
+          begin
+            {for a terminal state, the string successfully matched
+             if the regex had no end anchor, or we're at the end
+             of the string or line}
+            if (not FAnchorEnd) or (ch=#0) or (FMultiLine and (ch in newline)) then begin
+              Result := true;
+              Len := StrInx-1;
+//                Exit;
+            end;
+          end;
+        mtNone :
+          begin
+            {for free moves, push the next states onto the deque}
+            Assert(sdNextState2 <> UnusedState,
+                   'optimization should remove all states with one no-cost move');
+            DequePush(sdNextState2);
+            DequePush(sdNextState1);
+          end;
+        mtUnused :
+          begin
+            Assert(false, 'unused states shouldn''t be seen');
+          end;
+      end;
+    end;
+  until (FHead = FTail) or (ch = #0); // deque empty or end of string
+  {if we reach this point we've either exhausted the deque or we've
+   run out of string; if the former, the substring did not match
+   since there are no more states. If the latter, we need to check
+   the states left on the deque to see if one is the terminating
+   state; if so the string matched the regular expression defined by
+   the transition table}
+  while (FHead <> FTail) do begin
+    State := DequePop;
+    with FStateTable[State] do begin
+      case sdMatchType of
+        mtNone :
+          begin
+            {for free moves, push the next states onto the deque}
+            Assert(sdNextState2 <> UnusedState,
+                   'optimization should remove all states with one no-cost move');
+            DequePush(sdNextState2);
+            DequePush(sdNextState1);
+          end;
+        mtTerminal :
+          begin
+            {for a terminal state, the string successfully matched
+             if the regex had no end anchor, or we're at the end
+             of the string or line}
+            if (not FAnchorEnd) or (ch=#0) or (FMultiLine and (ch in newline)) then begin
+              Result := true;
+              Len := StrInx -1;
+              Exit;
+            end;
+          end;
+      end;{case}
+    end;
+  end;
+end;
+{--------}
+function TRegexEngine.rcParseAnchorExpr : integer;
+begin
+  {check for an initial '^'}
+  if (FPosn^ = '^') then begin
+    FAnchorStart := true;
+    inc(FPosn);
+  end;
+
+  {parse an expression}
+  Result := rcParseExpr;
+
+  {if we were successful, check for the final '$'}
+  if (Result <> ErrorState) then begin
+    if (FPosn^ = '$') then begin
+      FAnchorEnd := true;
+      inc(FPosn);
+    end;
+  end;
+end;
+{--------}
+function TRegexEngine.rcParseAtom : integer;
+var
+  MatchType : TNFAMatchType;
+  CharClass : PCharSet;
+begin
+  case FPosn^ of
+    '(' :
+      begin
+        {move past the open parenthesis}
+        inc(FPosn);
+
+        {parse a complete regex between the parentheses}
+        Result := rcParseExpr;
+        if (Result = ErrorState) then
+          Exit;
+        {if the current character is not a close parenthesis,
+         there's an error}
+        if (FPosn^ <> ')') then begin
+          FErrorCode := recNoCloseParen;
+          Result := ErrorState;
+          Exit;
+        end;
+        {move past the close parenthesis}
+        inc(FPosn);
+      end;
+    '[' :
+      begin
+        {move past the open square bracket}
+        inc(FPosn);
+
+        {if the first character in the class is a '^' then the
+         class if negated, otherwise it's a normal one}
+        if (FPosn^ = '^') then begin
+          inc(FPosn);
+          MatchType := mtNegClass;
+        end
+        else begin
+          MatchType := mtClass;
+        end;
+        {allocate the class character set and parse the character
+         class; this will return either with an error, or when the
+         closing square bracket is encountered}
+        New(CharClass);
+        CharClass^ := [];
+        if not rcParseCharClass(CharClass) then begin
+          Dispose(CharClass);
+          Result := ErrorState;
+          Exit;
+        end;
+        {move past the closing square bracket}
+        Assert(FPosn^ = ']',
+               'the rcParseCharClass terminated without finding a "]"');
+        inc(FPosn);
+
+        {add a new state for the character class}
+        Result := rcAddState(MatchType, #0, CharClass,
+                             NewFinalState, UnusedState);
+      end;
+    '.' :
+      begin
+        {move past the period metacharacter}
+        inc(FPosn);
+
+        {add a new state for the 'any character' token}
+        Result := rcAddState(mtAnyChar, #0, nil,
+                             NewFinalState, UnusedState);
+      end;
+  else
+    {otherwise parse a single character}
+    Result := rcParseChar;
+  end;{case}
+end;
+{--------}
+function TRegexEngine.rcParseCCChar(out EscapeChar : Boolean) : AnsiChar;
+begin
+  EscapeChar:=False;
+  {if we hit the end of the string, it's an error}
+  if (FPosn^ = #0) then begin
+    FErrorCode := recSuddenEnd;
+    Result := #0;
+    Exit;
+  end;
+  {if the current char is a metacharacter (at least in terms of a
+   character class), it's an error}
+  if FPosn^ in [']', '-'] then begin
+    FErrorCode := recMetaChar;
+    Result := #0;
+    Exit;
+  end;
+  {otherwise return the character and advance past it}
+  if (FPosn^ = '\') then
+    {..it's an escaped character: get the next character instead}
+    begin
+    inc(FPosn);
+    EscapeChar:=True;
+    Result := rcReturnEscapeChar;
+    end
+  else
+    Result := FPosn^;
+  inc(FPosn);
+end;
+{--------}
+function TRegexEngine.rcParseChar : integer;
+var
+  Ch : AnsiChar;
+begin
+  {if we hit the end of the string, it's an error}
+  if (FPosn^ = #0) then begin
+    Result := ErrorState;
+    FErrorCode := recSuddenEnd;
+    Exit;
+  end;
+  {if the current char is one of the metacharacters, it's an error}
+  if FPosn^ in MetaCharacters then begin
+    Result := ErrorState;
+    FErrorCode := recMetaChar;
+    Exit;
+  end;
+  {otherwise add a state for the character}
+  {..if it's an escaped character: get the next character instead}
+  if (FPosn^ = '\') then
+    begin
+    inc(FPosn);
+    ch := rcReturnEscapeChar;
+    end
+  else
+    ch :=FPosn^;
+  if IgnoreCase then
+    Ch := Upcase(ch);
+  Result := rcAddState(mtChar, Ch, nil, NewFinalState, UnusedState);
+  inc(FPosn);
+end;
+{--------}
+function TRegexEngine.rcParseCharClass(aClass : PCharSet) : boolean;
+begin
+  {assume we can't parse a character class properly}
+  Result := false;
+  {parse a character range; if we can't there was an error and the
+   caller will take care of it}
+  if not rcParseCharRange(aClass) then
+    Exit;
+  {if the current character was not the right bracket, parse another
+   character class (note: we're removing the tail recursion here)}
+  while (FPosn^ <> ']') do begin
+    if not rcParseCharRange(aClass) then
+      Exit;
+  end;
+  {if we reach here we were successful}
+  Result := true;
+end;
+{--------}
+function TRegexEngine.rcParseCharRange(aClass : PCharSet) : boolean;
+var
+  StartChar : AnsiChar;
+  EndChar   : AnsiChar;
+  Ch        : AnsiChar;
+  EscChar   : Boolean;
+begin
+  {assume we can't parse a character range properly}
+  Result := false;
+  {parse a single character; if it's null there was an error}
+  StartChar := rcParseCCChar(EscChar);
+  if (StartChar = #0) then
+    Exit;
+  if EscChar then
+    begin
+    case StartChar of
+      'd' : aClass^ := aClass^ + cs_digits;
+      'D' : aClass^ := aClass^ + cs_allchars-cs_digits;
+      's' : aClass^ := aClass^ + cs_whitespace;
+      'S' : aClass^ := aClass^ + cs_allchars-cs_whitespace;
+      'w' : aClass^ := aClass^ + cs_wordchars;
+      'W' : aClass^ := aClass^ + cs_allchars-cs_wordchars
+    else
+      EscChar := False;
+    end;
+    if EscChar then
+      begin
+      Result := True;
+      Exit;
+      end;
+    end;
+  {if the current character is not a dash, the range consisted of a
+   single character}
+  if (FPosn^ <> '-') then begin
+    if IgnoreCase then
+      Include(aClass^, Upcase(StartChar))
+    else
+      Include(aClass^, StartChar)
+  end
+  {otherwise it's a real range, so get the character at the end of the
+   range; if that's null, there was an error}
+  else begin
+    inc(FPosn); {move past the '-'}
+    EndChar := rcParseCCChar(EscChar);
+    if (EndChar = #0) then
+      Exit;
+    {build the range as a character set}
+    if (StartChar > EndChar) then begin
+      Ch := StartChar;
+      StartChar := EndChar;
+      EndChar := Ch;
+    end;
+    for Ch := StartChar to EndChar do begin
+      Include(aClass^, Ch);
+      if IgnoreCase then
+        Include(aClass^, Upcase(Ch));
+    end;
+  end;
+  {if we reach here we were successful}
+  Result := true;
+end;
+{--------}
+function TRegexEngine.rcParseExpr : integer;
+var
+  StartState1 : integer;
+  StartState2 : integer;
+  EndState1   : integer;
+  OverallStartState : integer;
+begin
+  {assume the worst}
+  Result := ErrorState;
+  {parse an initial term}
+  StartState1 := rcParseTerm;
+  if (StartState1 = ErrorState) then
+    Exit;
+  {if the current character is *not* a pipe character, no alternation
+   is present so return the start state of the initial term as our
+   start state}
+  if (FPosn^ <> '|') then
+    Result := StartState1
+  {otherwise, we need to parse another expr and join the two together
+   in the transition table}
+  else begin
+
+    {advance past the pipe}
+    inc(FPosn);
+    {the initial term's end state does not exist yet (although there
+     is a state in the term that points to it), so create it}
+    EndState1 := rcAddState(mtNone, #0, nil, UnusedState, UnusedState);
+    {for the OR construction we need a new initial state: it will
+     point to the initial term and the second just-about-to-be-parsed
+     expr}
+    OverallStartState := rcAddState(mtNone, #0, nil,
+                                    UnusedState, UnusedState);
+    {parse another expr}
+    StartState2 := rcParseExpr;
+    if (StartState2 = ErrorState) then
+      Exit;
+    {alter the state state for the overall expr so that the second
+     link points to the start of the second expr}
+    Result := rcSetState(OverallStartState, StartState1, StartState2);
+    {now set the end state for the initial term to point to the final
+     end state for the second expr and the overall expr}
+    rcSetState(EndState1, FStateCount, UnusedState);
+  end;
+end;
+{--------}
+function TRegexEngine.rcParseFactor : integer;
+var
+  StartStateAtom : integer;
+  EndStateAtom   : integer;
+  TempEndStateAtom : integer;
+  Int            : string;
+  n,m,nState     : integer;
+  i              : integer;
+begin
+  {assume the worst}
+  Result := ErrorState;
+  {first parse an atom}
+  StartStateAtom := rcParseAtom;
+  if (StartStateAtom = ErrorState) then
+    Exit;
+  {check for a closure operator}
+  case FPosn^ of
+    '?' : begin
+            {move past the ? operator}
+            inc(FPosn);
+            {the atom's end state doesn't exist yet, so create one}
+            EndStateAtom := rcAddState(mtNone, #0, nil,
+                                       UnusedState, UnusedState);
+            {create a new start state for the overall regex}
+            Result := rcAddState(mtNone, #0, nil,
+                                 StartStateAtom, EndStateAtom);
+            {make sure the new end state points to the next unused
+             state}
+            rcSetState(EndStateAtom, FStateCount, UnusedState);
+          end;
+    '*' : begin
+            {move past the * operator}
+            inc(FPosn);
+            {the atom's end state doesn't exist yet, so create one;
+             it'll be the start of the overall regex subexpression}
+            Result := rcAddState(mtNone, #0, nil,
+                                 NewFinalState, StartStateAtom);
+          end;
+    '+' : begin
+            {move past the + operator}
+            inc(FPosn);
+            {the atom's end state doesn't exist yet, so create one}
+            rcAddState(mtNone, #0, nil, NewFinalState, StartStateAtom);
+            {the start of the overall regex subexpression will be the
+             atom's start state}
+            Result := StartStateAtom;
+          end;
+    '{' : begin // {n,m}
+            {move past the brace }
+            inc(FPosn);
+
+            {Parse the value of n}
+            Int := '';
+            while not (FPosn^ in [',','}',#0]) do
+              begin
+              int := int+FPosn^;
+              inc(FPosn);
+              end;
+            if FPosn^ = #0 then exit; // No end-brace or comma -> invalid regex
+            if int <> '' then
+              n := StrToIntDef(Int,-2)
+            else
+              n := -1; // if n is 'empty', set it to -1
+            if n = -2 then exit; // Invalid value for n -> invalid RegEx
+
+            if FPosn^ <> '}' then
+              begin
+
+              {move past the , }
+              inc(FPosn);
+              {Parse the value of m}
+              Int := '';
+              while not (FPosn^ in ['}',#0]) do
+                begin
+                int := int+FPosn^;
+                inc(FPosn);
+                end;
+              if FPosn^ <> '}' then exit; // No end-brace -> invalid regex
+              if int <> '' then m := StrToIntDef(Int,-2)
+              else m := -1;
+              if m = -2 then exit; // Invalid RegEx
+              end
+            else
+              m := -3;
+
+            {move past the brace }
+            inc(FPosn);
+
+            if (n=0) and (m=-1) then
+            {the atom's end state doesn't exist yet, so create one;
+             it'll be the start of the overall regex subexpression}
+              Result := rcAddState(mtNone, #0, nil, NewFinalState, StartStateAtom)
+            else
+              begin
+              EndStateAtom := FStateCount-1;
+              TempEndStateAtom:=StartStateAtom;
+              for i := 1 to n-1 do
+                begin
+                TempEndStateAtom:=FStateCount;
+                for nState:=StartStateAtom to EndStateAtom do
+                  begin
+                  FStateTable[FStateCount]:=FStateTable[nState];
+                  if FStateTable[FStateCount].sdNextState1 in [StartStateAtom..EndStateAtom+1] then
+                    FStateTable[FStateCount].sdNextState1 := i+FStateTable[FStateCount].sdNextState1+ (EndStateAtom-StartStateAtom) *i;
+                  if FStateTable[FStateCount].sdNextState2 in [StartStateAtom..EndStateAtom+1] then
+                    FStateTable[FStateCount].sdNextState2 := i+FStateTable[FStateCount].sdNextState2 + (EndStateAtom-StartStateAtom) *i;
+                  inc(FStateCount);
+
+                  if FStateCount=length(FStateTable) then
+                    setlength(FStateTable,(FStateCount * 3) div 2);
+                  end;
+                end;
+
+            for i := n to m-1 do
+              begin
+              rcAddState(mtNone, #0, nil, NewFinalState, EndStateAtom+(EndStateAtom-StartStateAtom+1) * (m-1) + (m-n)+1);
+
+              TempEndStateAtom:=FStateCount;
+              for nState:=StartStateAtom to EndStateAtom do
+                begin
+                FStateTable[FStateCount]:=FStateTable[nState];
+                if FStateTable[FStateCount].sdNextState1 in [StartStateAtom..EndStateAtom+1] then
+                  FStateTable[FStateCount].sdNextState1 := i+FStateTable[FStateCount].sdNextState1+ (EndStateAtom-StartStateAtom) * i+(i-n+1);
+                if FStateTable[FStateCount].sdNextState2 in [StartStateAtom..EndStateAtom+1] then
+                  FStateTable[FStateCount].sdNextState2 := i+FStateTable[FStateCount].sdNextState2 + (EndStateAtom-StartStateAtom) * i+(i-n+1);
+                inc(FStateCount);
+
+                if FStateCount=length(FStateTable) then
+                  setlength(FStateTable,(FStateCount * 3) div 2);
+                end;
+              end;
+
+              if m = -1 then
+                rcAddState(mtNone, #0, nil, NewFinalState, TempEndStateAtom);
+
+              Result := StartStateAtom;
+              end;
+          end;
+
+  else
+    Result := StartStateAtom;
+  end;{case}
+end;
+{--------}
+function TRegexEngine.rcParseTerm : integer;
+var
+  StartState2 : integer;
+  EndState1   : integer;
+begin
+  {parse an initial factor, the state number returned will also be our
+   return state number}
+  Result := rcParseFactor;
+  if (Result = ErrorState) then
+    Exit;
+  {Note: we have to "break the grammar" here. We've parsed a regular
+         subexpression and we're possibly following on with another
+         regular subexpression. There's no nice operator to key off
+         for concatenation: we just have to know that for
+         concatenating two subexpressions, the current character will
+         be
+           - an open parenthesis
+           - an open square bracket
+           - an any char operator
+           - a character that's not a metacharacter
+         i.e., the three possibilities for the start of an "atom" in
+         our grammar}
+  if (FPosn^ = '(') or
+     (FPosn^ = '[') or
+     (FPosn^ = '.') or
+     ((FPosn^ <> #0) and not (FPosn^ in MetaCharacters)) then begin
+
+    {the initial factor's end state does not exist yet (although there
+     is a state in the term that points to it), so create it}
+    EndState1 := rcAddState(mtNone, #0, nil, UnusedState, UnusedState);
+    {parse another term}
+    StartState2 := rcParseTerm;
+    if (StartState2 = ErrorState) then begin
+      Result := ErrorState;
+      Exit;
+    end;
+    {join the first factor to the second term}
+    rcSetState(EndState1, StartState2, UnusedState);
+  end;
+end;
+
+procedure TRegexEngine.WriteTable;
+var i : integer;
+begin
+  for i := 0 to FStateCount-1 do with FStateTable[i] do
+//  FPC version 2.2.1 can not write enum-types. Disabled in fixes_2_2 only
+//    writeln('s:',i,' mt:',sdMatchType ,' ns1:',sdNextState1,' ns2:',sdNextState2,' char:',sdChar);
+    writeln('s:',i,' ns1:',sdNextState1,' ns2:',sdNextState2,' char:',sdChar);
+end;
+
+procedure TRegexEngine.DequeEnqueue(aValue: integer);
+begin
+  FList[FTail] := aValue;
+  inc(FTail);
+  if (FTail = FCapacity) then
+    FTail := 0
+  else if (FTail = FHead) then
+    DequeGrow;
+end;
+
+procedure TRegexEngine.DequePush(aValue: integer);
+begin
+  if (FHead = 0) then
+    FHead := FCapacity;
+  dec(FHead);
+  FList[FHead] := aValue;
+  if (FTail = FHead) then
+    DequeGrow;
+end;
+
+function TRegexEngine.DequePop: integer;
+begin
+  Result := FList[FHead];
+  inc(FHead);
+  if (FHead = FCapacity) then
+    FHead := 0;
+end;
+
+procedure TRegexEngine.DequeGrow;
+var
+  OldCount : integer;
+  i, j     : integer;
+begin
+  {grow the list by 50%}
+  OldCount := FCapacity;
+  FCapacity:=(OldCount * 3) div 2;
+  SetLength(FList,FCapacity);
+  {expand the data into the increased space, maintaining the deque}
+  if (FHead = 0) then
+    FTail := OldCount
+  else begin
+    j := FCapacity;
+    for i := pred(OldCount) downto FHead do begin
+      dec(j);
+      FList[j] := FList[i]
+    end;
+    FHead := j;
+  end;
+end;
+
+function TRegexEngine.rcReturnEscapeChar: AnsiChar;
+begin
+  case FPosn^ of
+    't' : Result := #9;
+    'n' : Result := #10;
+    'r' : Result := #13;
+    'f' : Result := #12;
+    'a' : Result := #7;
+  else
+    Result := FPosn^;
+  end;
+end;
+
+{--------}
+procedure TRegexEngine.rcSetIgnoreCase(aValue : boolean);
+begin
+  if (aValue <> FIgnoreCase) then begin
+    rcClear;
+    FIgnoreCase := aValue;
+  end;
+end;
+{--------}
+procedure TRegexEngine.rcSetRegexStr(const aRegexStr : string);
+begin
+  if (aRegexStr <> FRegexStr) then begin
+    rcClear;
+    FRegexStr := aRegexStr;
+  end;
+end;
+{--------}
+function TRegexEngine.rcSetState(aState     : integer;
+                                   aNextState1: integer;
+                                   aNextState2: integer) : integer;
+begin
+  Assert((0 <= aState) and (aState < FStateCount),
+         'trying to change an invalid state');
+
+  {get the state record and change the transition information}
+  FStateTable[aState].sdNextState1 := aNextState1;
+  FStateTable[aState].sdNextState2 := aNextState2;
+  Result := aState;
+end;
+{--------}
+procedure TRegexEngine.rcSetUpcase(aValue : TUpcaseFunc);
+begin
+  if not Assigned(aValue) then
+    FUpcase := SystemUpcase
+  else
+    FUpcase := aValue;
+end;
+
+procedure TRegexEngine.rcSetMultiLine(aValue: Boolean);
+begin
+  FMultiLine:=aValue;
+end;
+
+{====================================================================}
+
+end.

+ 80 - 1170
packages/regexpr/src/regexpr.pp

@@ -1,10 +1,9 @@
-{ $DEFINE DEBUG}
-
 {
-    This unit implements basic regular expression support
+    This file is part of the Free Pascal packages library.
+    Copyright (c) 2008 by Joost van der Sluis, member of the
+    Free Pascal development team
 
-    This file is part of the Free Pascal run time library.
-    Copyright (c) 2000-2006 by Florian Klaempfland Carl Eric Codere
+    Compatibility unit for the old regexpr unit.
 
     See the file COPYING.FPC, included in this distribution,
     for details about the copyright.
@@ -14,1194 +13,105 @@
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
  **********************************************************************}
-{.$ define DEBUG}
-
-(*
-  - newline handling (uses all known formats of ASCII, #10,#13,#13#10 and #$85
-
-  TODO:
-     - correct backtracking, for example in (...)*
-     - full | support (currently requires to put all items with | operator
-        between parenthesis (in a group) to take care over order priority.
-          Therefore the following would work: (foo)|(nofoo) but not
-          foo|nofoo
-     - getting substrings and using substrings with \1 etc.
-     - do we treat several multiline characters in a row as a single
-        newline character for $ and ^?
-*)
-
-{$IFDEF FPC}
-{$mode objfpc}
-{$ENDIF}
-
-{** @abstract(Regular expression unit)
-
-    This unit implements a basic regular expression parser that mostly conforms
-    to the POSIX extended-regular expression syntax. It also supports standard
-    escape characters for patterns (as defined in PERL).
-}
-unit regexpr;
-
-  interface
-
-    { the following declarions are only in the interface because }
-    { some procedures return pregexprentry but programs which   }
-    { use this unit shouldn't access this data structures        }
-    type
-       tcharset = set of char;
-       tregexprentrytype = (ret_charset,ret_or,
-          ret_illegalend,ret_backtrace,ret_startline,
-          ret_endline,ret_pattern);
-
-       pregexprentry = ^tregexprentry;
-       tregexprentry = record
-          next,nextdestroy : pregexprentry;
-          case typ : tregexprentrytype of
-             ret_charset : (chars : tcharset; elsepath : pregexprentry);
-             {** This is a complete pattern path ()+ , ()* or ()?, n,m }
-             ret_pattern: (pattern: pregexprentry; minoccurs: integer; maxoccurs: integer; alternative : pregexprentry);
-       end;
-
-       tregexprflag = (
-         ref_singleline,
-         {** This indicates that a start of line is either the
-             start of the pattern or a linebreak. }
-         ref_multiline,
-         {** The match will be done in a case-insensitive way
-              according to US-ASCII character set. }
-         ref_caseinsensitive);
-       tregexprflags = set of tregexprflag;
-
-       TRegExprEngine = record
-          Data : pregexprentry;
-          DestroyList : pregexprentry;
-          Flags : TRegExprFlags;
-       end;
-
-     const
-        cs_allchars : tcharset = [#0..#255];
-        cs_wordchars : tcharset = ['A'..'Z','a'..'z','_','0'..'9'];
-        cs_newline : tcharset = [#10];
-        cs_digits : tcharset = ['0'..'9'];
-        cs_whitespace : tcharset = [' ',#9];
-
-     var
-        { these are initilized in the init section of the unit }
-        cs_nonwordchars : tcharset;
-        cs_nondigits : tcharset;
-        cs_nonwhitespace : tcharset;
-
-     { the following procedures can be used by units basing }
-     { on the regexpr unit                                  }
-
-     {** From a regular expression, compile an encoded version of the regular expression.
-
-         @param(regexpr Regular expression to compile)
-         @param(flags Flags relating to the type of parsing that will occur)
-         @param(RegExprEngine The actual encoded version of the regular expression)
-         @returns(true if success, otherwise syntax error in compiling regular expression)
-     }
-     function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags;var RegExprEngine: TRegExprEngine): boolean;
-
-{$IFDEF FPC}
-    {** Backward compatibility routine }
-     function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags): TREGExprEngine;
-{$ENDIF}
-
-     {** Frees all up resources used for the encoded regular expression }
-     procedure DestroyRegExprEngine(var regexpr : TRegExprEngine);
-
-     {** @abstract(Matches a regular expression)
-
-        @param(RegExprEngine The actual compiled regular expression to match against)
-        @param(p The text to search for for a match)
-        @param(index zero-based index to the start of the match -1 if no match in p)
-        @param(len length of the match)
-        @returns(true if there was a match, otherwise false)
-     }
-     function RegExprPos(RegExprEngine : TRegExprEngine;p : pchar;var index,len : longint) : boolean;
-
-     function RegExprReplaceAll(RegExprEngine : TRegExprEngine;const src,newstr : ansistring;var dest : ansistring) : sizeint;
-
-     { This function Escape known regex chars and place the result on Return. If something went wrong the
-       function will return false. }
-     function RegExprEscapeStr (const S : string) : string;
-
-  implementation
-
-{$ifdef DEBUG}
-     procedure writecharset(c : tcharset);
-
-       var
-          b : byte;
-
-       begin
-          for b:=20 to 255 do
-            if chr(b) in c then
-              write(chr(b));
-          writeln;
-       end;
-
-
-    const
-
-      typ2str : array[tregexprentrytype] of string =
-      (
-        'ret_charset',
-        'ret_or',
-        'ret_illegalend',
-        'ret_backtrace',
-        'ret_startline',
-        'ret_endline',
-        'ret_pattern'
-      );
-
-
-     { Dumps all the next elements of a tree }
-     procedure dumptree(space: string; regentry: pregexprentry);
-      begin
-        while assigned(regentry) do
-          begin
-            WriteLn(space+'------- Node Type ',typ2str[regentry^.typ]);
-            if (regentry^.typ = ret_charset) then
-              WriteCharSet(regentry^.chars);
-            { dump embedded pattern information }
-            if regentry^.typ = ret_pattern then
-               begin
-                 dumptree(space+#9,regentry^.pattern);
-                 WriteLn(space+#9,' --- Alternative nodes ');
-                 if assigned(regentry^.alternative) then
-                   dumptree(space+#9#9,regentry^.alternative);
-               end;
-            if regentry^.typ = ret_startline then
-               dumptree(space+#9,regentry^.pattern);
-            regentry:=regentry^.next;
-          end;
-      end;
-{$endif DEBUG}
-
-
-     {** Determines the length of a pattern, including sub-patterns.
-
-         It goes through the nodes and returns the pattern length
-         between the two, using minOccurs as required.
-
-         Called recursively.
-     }
-     function patlength(hp1: pregexprentry): integer;
-       var
-        count: integer;
-        hp: pregexprentry;
-       begin
-        count:=0;
-        if hp1^.typ=ret_pattern then
-            hp:=hp1^.pattern
-        else
-            hp:=hp1;
-        { now go through all chars and get the length
-          does not currently take care of embedded patterns
-        }
-        while assigned(hp) do
-          begin
-            if hp^.typ = ret_pattern then
-              begin
-                inc(count,patlength(hp));
-              end
-            else
-            if hp^.typ = ret_charset then
-               inc(count);
-            hp:=hp^.next;
-          end;
-        if hp1^.typ=ret_pattern then
-          begin
-            count:=hp1^.minOccurs*count;
-          end;
-         patlength:=count;
-       end;
-
-     function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags; var RegExprEngine:TRegExprEngine) : boolean;
-
-       var
-          first : pregexprentry;
-
-       procedure doregister(p : pregexprentry);
 
-         begin
-           p^.nextdestroy:=first;
-           first:=p;
-         end;
+unit RegExpr;
 
-       var
-          currentpos : pchar;
-          error : boolean;
+{$mode objfpc}{$H+}
 
-       procedure readchars(var chars: tcharset);
+interface
 
-         var
-            c1 : char;
+uses
+  Regex;
 
-         begin
-            chars:=[];
-            case currentpos^ of
-               #0:
-                  exit;
-               '.':
-                  begin
-                     inc(currentpos);
-                     chars:=cs_allchars-cs_newline;
-                  end;
-               '\':
-                  begin
-                     inc(currentpos);
-                     case currentpos^ of
-                        #0:
-                          begin
-                             error:=true;
-                             exit;
-                          end;
-                        't':
-                           begin
-                              inc(currentpos);
-                              chars:=[#9];
-                           end;
-                        'n':
-                           begin
-                              inc(currentpos);
-                              chars:=[#10];
-                           end;
-                        'r':
-                           begin
-                              inc(currentpos);
-                              chars:=[#13];
-                           end;
-                        'd':
-                          begin
-                             inc(currentpos);
-                             chars:=cs_digits;
-                          end;
-                        'D':
-                          begin
-                             inc(currentpos);
-                             chars:=cs_nondigits;
-                          end;
-                        's':
-                          begin
-                             inc(currentpos);
-                             chars:=cs_whitespace;
-                          end;
-                        'S':
-                          begin
-                             inc(currentpos);
-                             chars:=cs_nonwhitespace;
-                          end;
-                        'w':
-                           begin
-                              inc(currentpos);
-                              chars:=cs_wordchars;
-                           end;
-                        'W':
-                           begin
-                              inc(currentpos);
-                              chars:=cs_nonwordchars;
-                           end;
-                        'f' :
-                            begin
-                              inc(currentpos);
-                              chars:= [#12];
-                            end;
-                        'a' :
-                            begin
-                              inc(currentpos);
-                              chars:= [#7];
-                            end;
-                         else
-                           begin { Some basic escaping...}
-                              chars := [currentpos^];
-                              inc (currentpos);
-                              {error:=true;
-                              exit;}
-                           end;
-                     end;
-                  end;
-               else
-                 begin
-                    if ref_caseinsensitive in flags then
-                       c1:=upcase(currentpos^)
-                    else
-                       c1:=currentpos^;
+type
+   tregexprflag = (
+     ref_singleline,
+     {** This indicates that a start of line is either the
+         start of the pattern or a linebreak. }
+     ref_multiline,
+     {** The match will be done in a case-insensitive way
+          according to US-ASCII character set. }
+     ref_caseinsensitive);
+   tregexprflags = set of tregexprflag;
 
-                    inc(currentpos);
-                    if currentpos^='-' then
-                      begin
-                         inc(currentpos);
-                         if currentpos^=#0 then
-                           begin
-                              error:=true;
-                              exit;
-                           end;
-                         if ref_caseinsensitive in flags then
-                           chars:=[c1..upcase(currentpos^)]
-                         else
-                           chars:=[c1..currentpos^];
-                         inc(currentpos);
-                      end
-                    else
-                      chars:=[c1];
-                 end;
-            end;
-         end;
+   TRegExprEngine = TRegexEngine;
 
+function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags;var RegExprEngine: TRegExprEngine): boolean;
+function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags): TREGExprEngine;
+procedure DestroyRegExprEngine(var regexpr : TRegExprEngine);
 
-       procedure readcharset(var charset: tcharset);
+function RegExprPos(RegExprEngine : TRegExprEngine;p : pchar;var index,len : longint) : boolean;
+function RegExprReplaceAll(RegExprEngine : TRegExprEngine;const src,newstr : ansistring;var dest : ansistring) : sizeint;
 
-         var
-           chars: tcharset;
-         begin
-            charset:=[];
-            case currentpos^ of
-               #0:
-                  exit;
-               '[':
-                  begin
-                     inc(currentpos);
-                     while currentpos^<>']' do
-                       begin
-                          if currentpos^='^' then
-                            begin
-                               inc(currentpos);
-                               readchars(chars);
-                               charset:=charset+(cs_allchars-chars);
-                            end
-                          else
-                            begin
-                              readchars(chars);
-                              charset:=charset+chars;
-                            end;
-                          if error or (currentpos^=#0) then
-                            begin
-                               error:=true;
-                               exit;
-                            end;
-                       end;
-                     inc(currentpos);
-                  end;
-               '^':
-                  begin
-                     inc(currentpos);
-                     readchars(chars);
-                     charset:=cs_allchars-chars;
-                  end;
-               else
-                  begin
-                    readchars(chars);
-                    charset:=chars;
-                  end;
-            end;
-         end;
+function RegExprEscapeStr (const S : string) : string;
 
+implementation
 
-       (* takes care of parsing the {n}, {n,} and {n,m} regular expression
-          elements. In case of error, sets error to true and returns false,
-          otherwise returns true and set minoccurs and maxoccurs accordingly
-          (-1 if not present). *)
-       function parseoccurences(var currentpos: pchar; var minoccurs,maxoccurs: integer): boolean;
-         var
-          minOccursString: string;
-          maxOccursString: string;
-         begin
-           parseoccurences:=false;
-           minOccurs:=-1;
-           maxOccurs:=-1;
-           inc(currentpos);
-           minOccursString:='';
-           if currentPos^ = #0 then
-             begin
-               error:=true;
-               exit;
-             end;
-            while (currentpos^<>#0) and (currentpos^ in ['0'..'9']) do
-                begin
-                   minOccursString:=minOccursString+currentPos^;
-                   inc(currentpos);
-                end;
-            if length(minOccursString) = 0 then
-                begin
-                  error:=true;
-                  exit;
-                end;
-            Val(minOccursString,minOccurs);
-            { possible cases here: commad or end bracket }
-            if currentpos^= '}' then
-              begin
-                inc(currentpos);
-                maxOccurs:=minOccurs;
-                parseoccurences:=true;
-                exit;
-              end;
-            if currentpos^= ',' then
-              begin
-                maxOccursString:='';
-                inc(currentpos);
-                while (currentpos^<>#0) and (currentpos^ in ['0'..'9']) do
-                begin
-                   maxOccursString:=maxOccursString+currentPos^;
-                   inc(currentpos);
-                end;
-                if currentpos^= '}' then
-                 begin
-                   { If the length of the string is zero, then there is
-                     no upper bound. }
-                   if length(maxOccursString) > 0 then
-                      Val(maxOccursString,maxOccurs)
-                   else
-                      maxOccurs:=high(integer);
-                   inc(currentpos);
-                   parseoccurences:=true;
-                   exit;
-                 end;
-              end;
-              error:=true;
-         end;
+function GenerateRegExprEngine(regexpr: pchar; flags: tregexprflags;
+  var RegExprEngine: TRegExprEngine): boolean;
+var ErrorPos  : Integer;
+    ErrorCode : TRegexError;
 
+begin
+  RegExprEngine := TRegExprEngine.Create(regexpr);
+  if ref_multiline in flags then RegExprEngine.MultiLine:=True;
+  if ref_caseinsensitive in flags then RegExprEngine.IgnoreCase:=True;
+  Result := RegExprEngine.Parse(ErrorPos,ErrorCode);
+end;
 
-       function parseregexpr(next,elsepath : pregexprentry) : pregexprentry;
-
-         var
-            hp : pregexprentry;
-            minOccurs,maxOccurs: integer;
-            hp3: pregexprentry;
-            cs : tcharset;
-            chaining : ^pregexprentry;
-
-         begin
-            chaining:=nil;
-            parseregexpr:=nil;
-            elsepath:=nil;
-            if error then
-              exit;
-            { this dummy allows us to redirect the elsepath later }
-{            new(ep);
-            doregister(ep);
-            ep^.typ:=ret_charset;
-            ep^.chars:=[];
-            ep^.elsepath:=elsepath;
-            elsepath:=ep;}
-            while true do
-              begin
-                 if error then
-                   exit;
-                 case currentpos^ of
-                    '(':
-                       begin
-                          inc(currentpos);
-                          hp:=parseregexpr(nil,nil);
-                          { Special characters after the bracket }
-                           if error then
-                              exit;
-                          if currentpos^<>')' then
-                            begin
-                               error:=true;
-                               exit;
-                            end;
-                          inc(currentpos);
-                            case currentpos^ of
-                            '*':
-                               begin
-                                  inc(currentpos);
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.alternative:=nil;
-                                  hp3^.pattern:=hp;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=0;
-                                  hp3^.maxoccurs:=high(integer);
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-                               end;
-                            '+':
-                               begin
-                                  inc(currentpos);
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.alternative:=nil;
-                                  hp3^.pattern:=hp;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=1;
-                                  hp3^.maxoccurs:=high(integer);
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-                               end;
-
-                            '?':
-                               begin
-                                  inc(currentpos);
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.alternative:=nil;
-                                  hp3^.pattern:=hp;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=0;
-                                  hp3^.maxoccurs:=1;
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-                               end;
-                            '{':
-                               begin
-                                 if not parseOccurences(currentPos,minOccurs,maxOccurs) then
-                                   exit;
-                                  inc(currentpos);
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.alternative:=nil;
-                                  hp3^.pattern:=hp;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=minOccurs;
-                                  hp3^.maxoccurs:=maxOccurs;
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-                               end;
-                            else
-                              begin
-                                { go to end of this list - always the
-                                  last next used }
-(*
-                                hp3:=hp;
-                                while assigned(hp3^.next) do
-                                  begin
-                                    hp3:=hp3^.next;
-                                  end;
-                                if assigned(chaining) then
-                                   chaining^:=hp
-                                else
-                                   parseregexpr:=hp;
-                                chaining:=@hp3^.next;*)
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.alternative:=nil;
-                                  hp3^.pattern:=hp;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=1;
-                                  hp3^.maxoccurs:=1;
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-
-                              end;
-                          end;
-                       end;
-{ This is only partially implemented currently, as the terms before
-  the | character must be grouped together with parenthesis, which
-  is also compatible with other regular expressions.
-}
-                    '|':
-                       begin
-{$ifdef DEBUG}
-                          writeln('Creating or entry');
-{$endif DEBUG}
-                          if (not assigned (hp3)) then
-                            begin
-                              error:=true;
-                              exit;
-                            end;
-                          if (hp3^.typ <> ret_pattern) then
-                            begin
-                              error:=true;
-                              exit;
-                            end;
-                          while currentpos^='|' do
-                            begin
-                              inc(currentpos);
-                              if currentpos^=#0 then
-                                begin
-                                   error:=true;
-                                   exit;
-                                end;
-                              { always put the longest pattern first, so
-                                swap the trees as necessary.
-                              }
-                              hp := parseregexpr (next, elsepath);
-                              if patlength(hp) > patlength(hp3^.pattern) then
-                                begin
-                                  hp3^.alternative:=hp3^.pattern;
-                                  hp3^.pattern:=hp;
-                                end
-                              else
-                                 hp3^.alternative:=hp;
-                            end;
-                       end;
-                    ')':
-                       exit;
-                    '^':
-                       begin
-                          inc(currentpos);
-                          hp:=parseregexpr(nil,nil);
-                          { Special characters after the bracket }
-                           if error then
-                              exit;
-                           new(hp3);
-                           doregister(hp3);
-                           hp3^.typ:=ret_startline;
-                           hp3^.pattern:=hp;
-                           hp3^.elsepath:=elsepath;
-                           hp3^.next:=nil;
-                           if assigned(chaining) then
-                              chaining^:=hp3
-                           else
-                              parseregexpr:=hp3;
-                           chaining:=@hp3^.next;
-                       end;
-                    '$':
-                       begin
-                          inc(currentpos);
-                          new(hp);
-                          doregister(hp);
-                          hp^.typ:=ret_endline;
-                          hp^.elsepath:=elsepath;
-                          hp^.next:=nil;
-                          if assigned(chaining) then
-                            chaining^:=hp
-                          else
-                            parseregexpr:=hp;
-                          chaining:=@hp^.next;
-                       end;
-                    #0:
-                       exit;
-                    else
-                      begin
-                         readcharset(cs);
-                         if error then
-                           exit;
-                         case currentpos^ of
-                            '*':
-                               begin
-                                  inc(currentpos);
-                                  new(hp);
-                                  doregister(hp);
-                                  hp^.typ:=ret_charset;
-                                  hp^.chars:=cs;
-                                  hp^.elsepath:=nil;
-                                  hp^.next:=nil;
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.alternative:=nil;
-                                  hp3^.pattern:=hp;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=0;
-                                  hp3^.maxoccurs:=high(integer);
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-                               end;
-                            '+':
-                               begin
-                                  inc(currentpos);
-                                  new(hp);
-                                  doregister(hp);
-                                  hp^.typ:=ret_charset;
-                                  hp^.chars:=cs;
-                                  hp^.elsepath:=nil;
-                                  hp^.next:=nil;
-
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.alternative:=nil;
-                                  hp3^.pattern:=hp;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=1;
-                                  hp3^.maxoccurs:=high(integer);
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-                               end;
-                            '?':
-                               begin
-                                  inc(currentpos);
-                                  new(hp);
-                                  doregister(hp);
-                                  hp^.typ:=ret_charset;
-                                  hp^.chars:=cs;
-                                  hp^.elsepath:=nil;
-                                  hp^.next:=nil;
-
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.pattern:=hp;
-                                  hp3^.alternative:=nil;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=0;
-                                  hp3^.maxoccurs:=1;
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-                               end;
-                            '{':
-                               begin
-                                 if not parseOccurences(currentPos,minOccurs,maxOccurs) then
-                                   exit;
-                                  new(hp);
-                                  doregister(hp);
-                                  hp^.typ:=ret_charset;
-                                  hp^.chars:=cs;
-                                  hp^.elsepath:=nil;
-                                  hp^.next:=nil;
-
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.alternative:=nil;
-                                  hp3^.pattern:=hp;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=minOccurs;
-                                  hp3^.maxoccurs:=maxOccurs;
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-
-                                end;
-                            else
-                               { Normal character }
-                               begin
-                                  new(hp);
-                                  doregister(hp);
-                                  hp^.typ:=ret_charset;
-                                  hp^.chars:=cs;
-                                  hp^.elsepath:=elsepath;
-                                  hp^.next:=next;
-                                  if assigned(chaining) then
-                                    chaining^:=hp
-                                  else
-                                    parseregexpr:=hp;
-                                  chaining:=@hp^.next;
-                                  continue;
-                               end;
-                           { This was a pattern }
-                         end; { END CASE }
-                      end;
-                 end;
-              end;
-         end;
-
-       var
-          endp : pregexprentry;
-
-       begin
-          GenerateRegExprEngine:=false;
-          RegExprEngine.Data:=nil;
-          RegExprEngine.DestroyList:=nil;
-          if regexpr=nil then
-            exit;
-          first:=nil;
-          if (ref_singleline in flags) and (ref_multiline in flags) then
-            exit;
-          currentpos:=regexpr;
-          GenerateRegExprEngine:=true;
-          error:=false;
-          new(endp);
-          doregister(endp);
-          endp^.typ:=ret_illegalend;
-          RegExprEngine.flags:=flags;
-          RegExprEngine.Data:=parseregexpr(nil,endp);
-{$IFDEF DEBUG}
-          writeln('========== Generating tree ============');
-          dumptree('',RegExprEngine.Data);
-{$ENDIF}
-          RegExprEngine.DestroyList:=first;
-          if error or (currentpos^<>#0) then
-            begin
-              GenerateRegExprEngine:=false;
-              DestroyRegExprEngine(RegExprEngine);
-            end;
-       end;
-
-
-{$IFDEF FPC}
-    function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags): TREGExprEngine;
-    var
-     r: TRegExprEngine;
-    begin
-      GenerateRegExprEngine(regexpr,flags,r);
-      GenerateRegExprEngine:=r;
-    end;
-{$ENDIF}
-
-    procedure DestroyRegExprEngine(var regexpr : TRegExprEngine);
-
-       var
-          hp : pregexprentry;
-
-       begin
-          hp:=regexpr.DestroyList;
-          while assigned(hp) do
-            begin
-               regexpr.DestroyList:=hp^.nextdestroy;
-               dispose(hp);
-               hp:=regexpr.DestroyList;
-            end;
-          regexpr.Data:=nil;
-          regexpr.DestroyList:=nil;
-       end;
-
-     function RegExprPos(regexprengine : TRegExprEngine;p : pchar;var index,len : longint) : boolean;
-
-       var
-          lastpos : pchar;
-          firstpos: pchar;
+function GenerateRegExprEngine(regexpr: pchar; flags: tregexprflags
+  ): TREGExprEngine;
+  
+var r: TRegExprEngine;
 
-       { Does the actual search of the data - return true if the term was found }
-       function dosearch(regexprentry : pregexprentry;pos : pchar) : boolean;
-       var
-          found: boolean;
-          checkvalue: boolean;
-          savedpos: pchar;
-          counter: word;
+begin
+  GenerateRegExprEngine(regexpr,flags,r);
+  GenerateRegExprEngine:=r;
+end;
 
-         begin
-            dosearch:=false;
-            while true do
-              begin
-                 {$IFDEF Debug}
-                 writeln('Entering ',typ2str[regexprentry^.typ]);
-                 writeln('Pattern length ',patlength(regexprentry));
-                 {$ENDIF Debug}
-                 case regexprentry^.typ of
-                    ret_endline:
-                      begin
-                         { automatically a match! }
-                         if pos^ = #0 then
-                            begin
-                              dosearch:=true;
-                              exit;
-                            end;
-                         if ref_multiline in regexprengine.flags then
-                            begin
-                              { Supports DOS/Commodore/UNIX/IBM Mainframe line formats }
-                              { avoid reading invalid memory also }
-                                  if (pos^=#13) and ((pos+1)^=#10) then
-                                    begin
-                                      regexprentry:=regexprentry^.next;
-                                    end
-                                  else
-                                  if (pos^=#$85) or (pos^=#10) or ((pos^=#13) and ((pos-1) >= firstpos) and ((pos-1)^ <> #10)) then
-                                    begin
-                                       regexprentry:=regexprentry^.next;
-                                    end
-                                  else
-                                    begin
-                                       dosearch:=false;
-                                       exit;
-                                    end;
-                             end
-                           else
-                             exit;
-                      end;
-                    ret_pattern:
-                      begin
-                         found:=false;
-                         { Take care of occurences here }
-                         savedpos:=pos;
-                         counter:=0;
-                         repeat
-                           found:=dosearch(regexprentry^.pattern,pos);
-                           if not found then
-                            break;
-                           pos:=lastpos;
-                           inc(counter);
-                         until (not found) or (counter >= regexprentry^.maxoccurs) or (pos^= #0);
+procedure DestroyRegExprEngine(var regexpr: TRegExprEngine);
+begin
+  regexpr.Free;
+end;
 
-                         if counter = 0 then
-                           begin
-                             { If there was no occurence and the minimum occurence is > 0 then
-                               problem.
-                             }
-                             if (regexprentry^.minoccurs > 0) then
-                              begin
-                                dosearch:=false;
-                                { verify alternative path as required }
-                                if assigned(regexprentry^.alternative) then
-                                  begin
-                                     dosearch:=dosearch(regexprentry^.alternative,savedpos);
-                                     exit;
-                                  end;
-                                exit;
-                              end;
-                             dosearch:=true;
-                             lastpos:=savedpos;
-                           end
-                         else
-                           { found }
-                           begin
-                              { Possible choices :
-                                 - found and (counter >= minOccurences) and (counter =< maxOccurences) = true
-                                 - found and (counter < minOccurences) or (counter > maxOccurences) = false
-                              }
-                              if (counter < regexprentry^.minoccurs) or (counter > regexprentry^.maxoccurs) then
-                                begin
-                                  dosearch:=false;
-                                  exit;
-                                end;
-                              dosearch:=true;
-                              { if all matches were found, and the current position
-                                points to zero (processed all characters) }
-                              if pos^=#0 then
-                                begin
-                                  dosearch:=true;
-                                  exit;
-                                end;
-                           end;
-                         { If we are that means the matches were valid, go to next element to match
-                         }
-                         regexprentry:=regexprentry^.next;
-                         if (counter = 0) and not assigned(regexprentry) then
-                           exit;
-                      end;
-                    ret_startline:
-                      begin
-                         checkvalue:=pos=firstpos;
-                         if ref_multiline in regexprengine.flags then
-                           begin
-                             { Supports DOS/Commodore/UNIX/IBM Mainframe line formats }
-                             { avoid reading invalid memory also }
-                             if
-                                 (
-                                   ((pos-1) >= firstpos) and ((pos-1)^=#$85)
-                                  )
-                              or
-                                 (
-                                   ((pos-1) >= firstpos) and ((pos-1)^=#10)
-                                  )
-                              or
-                                (
-                                 ((pos-1) >= firstpos) and ((pos-1)^=#13) and
-                                 ((pos)^ <> #10)
-                                )
-                             then
-                               begin
-                                 checkvalue:=true;
-                               end;
-                           end;
-                          if checkvalue then
-                            begin
-                              dosearch:=dosearch(regexprentry^.pattern,pos);
-                              regexprentry:=regexprentry^.next;
-                              if not dosearch then
-                                exit;
-                              pos:=lastpos;
-                            end
-                          else
-                            begin
-                              dosearch:=false;
-                              exit;
-                            end;
-                      end;
-                    ret_charset:
-                      begin
-                         if (pos^ in regexprentry^.chars) or
-                           ((ref_caseinsensitive in regexprengine.flags) and
-                            (upcase(pos^) in regexprentry^.chars)) then
-                           begin
-{$ifdef DEBUG}
-                              writeln('Found matching: ',pos^);
-{$endif DEBUG}
-                              regexprentry:=regexprentry^.next;
-                              inc(pos);
-                           end
-                         else
-                           begin
-{$ifdef DEBUG}
-                              writeln('Found unmatching: ',pos^);
-{$endif DEBUG}
-                              exit;
-                           end;
-                      end;
-                    ret_backtrace:
-                      begin
-{$ifdef DEBUG}
-                         writeln('Starting backtrace');
-{$endif DEBUG}
-                         if dosearch(regexprentry^.next,pos) then
-                           begin
-                              dosearch:=true;
-                              exit;
-                           end
-                         else if dosearch(regexprentry^.elsepath,pos) then
-                           begin
-                              dosearch:=true;
-                              exit;
-                           end
-                         else
-                           exit;
-                      end;
-                 end;
-                 lastpos:=pos;
-                 if regexprentry=nil then
-                   begin
-                      dosearch:=true;
-                      exit;
-                   end;
-                 if regexprentry^.typ=ret_illegalend then
-                   exit;
-                 { end of string, and we were expecting an end of string }
-                 if (pos^=#0) and (regexprentry^.typ = ret_endline) and
-                    (not assigned(regexprentry^.next)) then
-                   begin
-                     dosearch:=true;
-                     exit;
-                   end;
-                 if pos^=#0 then
-                   exit;
-              end;
-         end;
+function RegExprPos(RegExprEngine: TRegExprEngine; p: pchar; var index,
+  len: longint): boolean;
+begin
+  Len := 1;
+  Result := RegExprEngine.MatchString(p,index,len);
+  Len := Len - index;
+  Dec(Index);
+end;
+
+function RegExprReplaceAll(RegExprEngine: TRegExprEngine; const src,
+  newstr: ansistring; var dest: ansistring): sizeint;
+begin
+  result := RegExprEngine.ReplaceAllString(src,newstr,Dest);
+end;
 
-       begin
-          RegExprPos:=false;
-          index:=0;
-          len:=0;
-          firstpos:=p;
-          if regexprengine.Data=nil then
-            exit;
-          while p^<>#0 do
-            begin
-               if dosearch(regexprengine.Data,p) then
-                 begin
-                    len:=lastpos-p;
-                    RegExprPos:=true;
-                    exit;
-                 end
-               else
-                 begin
-                    inc(p);
-                    inc(index);
-                 end;
-            end;
-          index:=-1;
-       end;
+function RegExprEscapeStr(const S: string): string;
+var
+  i, len   : integer;
+  s1: string;
+begin
+  RegExprEscapeStr:= '';
+  s1:='';
+  if (S = '') then
+   exit;
 
+  len := Length (S);
 
-  function RegExprReplaceAll(RegExprEngine : TRegExprEngine;const src,newstr : ansistring;var dest : ansistring) : sizeint;
-    var
-      index,len : longint;
-      pos,lastpos : pchar;
-      first : boolean;
-      oldlength : PtrInt;
+  for i := 1 to len do
     begin
-      pos:=pchar(src);
-      lastpos:=nil;
-      first:=true;
-      Result:=0;
-      { estimate some length }
-      SetLength(dest,length(src)+((length(src) div 10)*length(newstr)));
-      while RegExprPos(RegExprEngine,pos,index,len) do
+      if (S [i] in ['(','|', '.', '*', '?', '^', '$', '-', '[', '{', '}', ']', ')', '\']) then
         begin
-          inc(pos,index);
-          if (lastpos = nil) or (pos>lastpos) then
-            begin
-              if lastpos = nil then lastpos := pchar(src);
-              { copy skipped part }
-
-              { because we cheat with SetLength a SetLength(...,0) isn't what we want
-                so we've to trick at the first SetLength call
-              }
-              if first then
-                begin
-                  SetLength(dest,(pos-lastpos));
-                  { cast dest here because it is already unified }
-                  move(lastpos^,char(dest[1]),pos-lastpos);
-                end
-              else
-                begin
-                  oldlength:=Length(dest);
-                  SetLength(dest,oldlength+(pos-lastpos));
-                  move(lastpos^,char(dest[oldlength+1]),pos-lastpos);
-                end;
-              first:=false;
-            end;
-          { found }
-          inc(Result);
-          dest:=dest+newstr;
-          inc(pos,len);
-          lastpos:=pos;
+          s1 := s1 + '\';
         end;
-      { copy remainder }
-      len:=strlen(pos);
-      if first then
-        begin
-          SetLength(dest,len);
-          move(pos^,char(dest[length(dest)+1]),len);
-        end
-      else
-        begin
-          oldlength:=Length(dest);
-          SetLength(dest,oldlength+len);
-          move(pos^,char(dest[oldlength+1]),len);
-        end
-    end;
 
-
-  function RegExprEscapeStr (const S : string) : string;
-    var
-     i, len   : integer;
-     s1: string;
-    begin
-      RegExprEscapeStr:= '';
-      s1:='';
-      if (S = '') then
-       exit;
-
-      len := Length (S);
-
-      for i := 1 to len do
-        begin
-          if (S [i] in ['(','|', '.', '*', '?', '^', '$', '-', '[', '{', '}', ']', ')', '\']) then
-            begin
-              s1 := s1 + '\';
-            end;
-
-          s1 := s1 + S[i];
-        end;
-      RegExprEscapeStr:=s1;
+      s1 := s1 + S[i];
     end;
+  RegExprEscapeStr:=s1;
+end;
 
-begin
-   cs_nonwordchars:=cs_allchars-cs_wordchars;
-   cs_nondigits:=cs_allchars-cs_digits;
-   cs_nonwhitespace:=cs_allchars-cs_whitespace;
 end.
+

+ 22 - 7
packages/regexpr/tests/testreg1.pp

@@ -1,6 +1,7 @@
 {$IFDEF FPC}
 {$MODE OBJFPC}
 {$ENDIF}
+
 program testreg1;
 uses
    regexpr;
@@ -318,6 +319,14 @@ begin
      do_error(705);
    DestroyregExprEngine(r);
 
+   initok:=GenerateRegExprEngine('Cat(AZ){2,}Q',[],r);
+   if not initok then
+     do_error(705);
+   if not(RegExprPos(r,'BCatAZAZAZAZQDABCD',index,len)) or
+     (index<>1) or (len<>12) then
+     do_error(705);
+   DestroyregExprEngine(r);
+
    initok:=GenerateRegExprEngine('CatAZ{0,}',[],r);
    if not initok then
      do_error(706);
@@ -364,6 +373,14 @@ begin
      do_error(725);
    DestroyregExprEngine(r);
 
+   initok:=GenerateRegExprEngine('Cat(AZ){1,3}',[],r);
+   if not initok then
+     do_error(725);
+   if not(RegExprPos(r,'BCatAZAZDABCD',index,len)) or
+     (index<>1) or (len<>7) then
+     do_error(725);
+   DestroyregExprEngine(r);
+
    initok:=GenerateRegExprEngine('CatAz{1,5}',[],r);
    if not initok then
      do_error(726);
@@ -650,11 +667,11 @@ begin
 
    { test real backtracking }
 
-(*   r:=GenerateRegExprEngine('nofoo|foo',[]);
+   r:=GenerateRegExprEngine('nofoo|foo',[]);
    if not(RegExprPos(r,'1234   foo1234XXXX',index,len)) or
      (index<>7) or (len<>3) then
      do_error(1300);
-   DestroyregExprEngine(r);*)
+   DestroyregExprEngine(r);
 
   GenerateRegExprEngine('abc\(123\)$',[],r);
   if not (RegExprPos(r,'1234 abc(123)', index, len)) or
@@ -762,7 +779,6 @@ begin
      do_error(1505);
    DestroyregExprEngine(r);
 
-{
   initok:=GenerateRegExprEngine('\.localhost$',[],r);
   if not initok then
      do_error(1506);
@@ -796,22 +812,21 @@ begin
   if not initok then
      do_error(1500);
    if not(RegExprPos(r,'1234   nofoo1234XXXX',index,len)) or
-     (index<>8) or (len<>9) then
+     (index<>7) or (len<>9) then
      do_error(1500);
    DestroyregExprEngine(r);
 
    r:=GenerateRegExprEngine('(nofoo|foo|anotherfoo)1234',[]);
    if not(RegExprPos(r,'1234   nofoo1234XXXX',index,len)) or
-     (index<>8) or (len<>9) then
+     (index<>7) or (len<>9) then
      do_error(1009);
    DestroyregExprEngine(r);
 
    r:=GenerateRegExprEngine('nofoo1234|foo1234',[]);
-   if (r.data=nil) or not(RegExprPos(r,'1234   foo1234XXXX',index,len)) or
+   if {(r.data=nil) or} not(RegExprPos(r,'1234   foo1234XXXX',index,len)) or
      (index<>7) or (len<>7) then
      do_error(1010);
    DestroyregExprEngine(r);
-   }
 
   { *************************************************************************
                               replacement tests