浏览代码

Merged revisions 10611-10613,10617,10622-10624,10639-10641,10645-10646,10649,10658,10660-10662,10664-10667,10673-10675,10678-10680,10684-10687,10689,10695,10698-10701,10703,10709,10713-10716,10718-10719,10723,10735-10739,10749,10751,10755-10757,10759-10761,10769,10772-10778,10782-10785,10787-10790,10794-10795,10798,10830-10832,10837,10848,10858,10860,10862-10864,10875,10882,10891,10907,10909,10915-10916,10922-10923,10928,10930,10933,10935,10939,10942,10948,10952-10956,10960,10964-10965,10972,10974-10975,10977,10980,10982-10983,10985,10987,10989-10990 via svnmerge from
svn+ssh://[email protected]/FPC/svn/fpc/trunk

........
r10611 | joost | 2008-04-06 23:23:13 +0200 (Sun, 06 Apr 2008) | 3 lines

* Added Regex unit with an NFA regexpression parser, based on code from Julian Bucknall which is used with his permission
* Added RegExprCompat unit which contains an interface to the Regex unit which is compatible with the existing RegExpr unit
* Added some more tests and added a define UseRegexCompat to make it possible to choose what regex-implementation to test. Test which fail on the existing regex-parser are placed between ifdefs
........
r10987 | joost | 2008-05-17 21:24:47 +0200 (Sat, 17 May 2008) | 1 line

* Moved regexpr unit to old
........
r10989 | joost | 2008-05-17 21:32:47 +0200 (Sat, 17 May 2008) | 1 line

* Renamed the RegExprCompat unit to RegExpr. This wrapper around the Regex unit now replaces the old RegExpr unit.
........
r10990 | joost | 2008-05-17 21:43:26 +0200 (Sat, 17 May 2008) | 1 line

* Removed define UseRegexCompat from test
........

git-svn-id: branches/fixes_2_2@11108 -

joost 17 年之前
父节点
当前提交
3a6ebb5bdc

+ 2 - 0
.gitattributes

@@ -3891,6 +3891,8 @@ packages/pxlib/src/pxlib.pp svneol=native#text/plain
 packages/regexpr/Makefile svneol=native#text/plain
 packages/regexpr/Makefile svneol=native#text/plain
 packages/regexpr/Makefile.fpc svneol=native#text/plain
 packages/regexpr/Makefile.fpc svneol=native#text/plain
 packages/regexpr/fpmake.pp svneol=native#text/plain
 packages/regexpr/fpmake.pp svneol=native#text/plain
+packages/regexpr/src/old/regexpr.pp svneol=native#text/plain
+packages/regexpr/src/regex.pp svneol=native#text/plain
 packages/regexpr/src/regexpr.pp svneol=native#text/plain
 packages/regexpr/src/regexpr.pp svneol=native#text/plain
 packages/regexpr/tests/testreg1.pp svneol=native#text/plain
 packages/regexpr/tests/testreg1.pp svneol=native#text/plain
 packages/rexx/Makefile svneol=native#text/plain
 packages/rexx/Makefile svneol=native#text/plain

+ 170 - 58
packages/regexpr/Makefile

@@ -1,8 +1,8 @@
 #
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2008/01/26]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2008/05/26]
 #
 #
 default: all
 default: all
-MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-wince arm-gba arm-nds arm-embedded arm-symbian powerpc64-linux powerpc64-darwin powerpc64-embedded
+MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian powerpc64-linux powerpc64-darwin powerpc64-embedded avr-embedded armeb-linux armeb-embedded
 BSDs = freebsd netbsd openbsd darwin
 BSDs = freebsd netbsd openbsd darwin
 UNIXs = linux $(BSDs) solaris qnx
 UNIXs = linux $(BSDs) solaris qnx
 LIMIT83fs = go32v2 os2 emx watcom
 LIMIT83fs = go32v2 os2 emx watcom
@@ -167,6 +167,17 @@ OS_TARGET:=$(word 5,$(FPC_COMPILERINFO))
 endif
 endif
 FULL_TARGET=$(CPU_TARGET)-$(OS_TARGET)
 FULL_TARGET=$(CPU_TARGET)-$(OS_TARGET)
 FULL_SOURCE=$(CPU_SOURCE)-$(OS_SOURCE)
 FULL_SOURCE=$(CPU_SOURCE)-$(OS_SOURCE)
+ifeq ($(CPU_TARGET),armeb)
+ARCH=arm
+override FPCOPT+=-Cb
+else
+ifeq ($(CPU_TARGET),armel)
+ARCH=arm
+override FPCOPT+=-CaEABI
+else
+ARCH=$(CPU_TARGET)
+endif
+endif
 ifneq ($(findstring $(OS_SOURCE),$(LIMIT83fs)),)
 ifneq ($(findstring $(OS_SOURCE),$(LIMIT83fs)),)
 TARGETSUFFIX=$(OS_TARGET)
 TARGETSUFFIX=$(OS_TARGET)
 SOURCESUFFIX=$(OS_SOURCE)
 SOURCESUFFIX=$(OS_SOURCE)
@@ -174,9 +185,15 @@ else
 TARGETSUFFIX=$(FULL_TARGET)
 TARGETSUFFIX=$(FULL_TARGET)
 SOURCESUFFIX=$(FULL_SOURCE)
 SOURCESUFFIX=$(FULL_SOURCE)
 endif
 endif
+ifeq  ($(OS_TARGET),darwin)
+ifneq ($(OS_SOURCE),darwin)
+CROSSCOMPILE=1
+endif
+else
 ifneq ($(FULL_TARGET),$(FULL_SOURCE))
 ifneq ($(FULL_TARGET),$(FULL_SOURCE))
 CROSSCOMPILE=1
 CROSSCOMPILE=1
 endif
 endif
+endif
 ifeq ($(findstring makefile,$(MAKECMDGOALS)),)
 ifeq ($(findstring makefile,$(MAKECMDGOALS)),)
 ifeq ($(findstring $(FULL_TARGET),$(MAKEFILETARGETS)),)
 ifeq ($(findstring $(FULL_TARGET),$(MAKEFILETARGETS)),)
 $(error The Makefile doesn't support target $(FULL_TARGET), please run fpcmake first)
 $(error The Makefile doesn't support target $(FULL_TARGET), please run fpcmake first)
@@ -188,7 +205,7 @@ endif
 ifeq ($(OS_TARGET),linux)
 ifeq ($(OS_TARGET),linux)
 linuxHier=1
 linuxHier=1
 endif
 endif
-export OS_TARGET OS_SOURCE CPU_TARGET CPU_SOURCE FULL_TARGET FULL_SOURCE TARGETSUFFIX SOURCESUFFIX CROSSCOMPILE
+export OS_TARGET OS_SOURCE ARCH CPU_TARGET CPU_SOURCE FULL_TARGET FULL_SOURCE TARGETSUFFIX SOURCESUFFIX CROSSCOMPILE
 ifdef FPCDIR
 ifdef FPCDIR
 override FPCDIR:=$(subst \,/,$(FPCDIR))
 override FPCDIR:=$(subst \,/,$(FPCDIR))
 ifeq ($(wildcard $(addprefix $(FPCDIR)/,rtl units)),)
 ifeq ($(wildcard $(addprefix $(FPCDIR)/,rtl units)),)
@@ -243,163 +260,178 @@ PACKAGESDIR:=$(wildcard $(FPCDIR) $(FPCDIR)/packages $(FPCDIR)/packages/base $(F
 override PACKAGE_NAME=regexpr
 override PACKAGE_NAME=regexpr
 override PACKAGE_VERSION=2.0.0
 override PACKAGE_VERSION=2.0.0
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-go32v2)
 ifeq ($(FULL_TARGET),i386-go32v2)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-win32)
 ifeq ($(FULL_TARGET),i386-win32)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-os2)
 ifeq ($(FULL_TARGET),i386-os2)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-freebsd)
 ifeq ($(FULL_TARGET),i386-freebsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-beos)
 ifeq ($(FULL_TARGET),i386-beos)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
+endif
+ifeq ($(FULL_TARGET),i386-haiku)
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-netbsd)
 ifeq ($(FULL_TARGET),i386-netbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-solaris)
 ifeq ($(FULL_TARGET),i386-solaris)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-qnx)
 ifeq ($(FULL_TARGET),i386-qnx)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-netware)
 ifeq ($(FULL_TARGET),i386-netware)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-openbsd)
 ifeq ($(FULL_TARGET),i386-openbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-wdosx)
 ifeq ($(FULL_TARGET),i386-wdosx)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-darwin)
 ifeq ($(FULL_TARGET),i386-darwin)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-emx)
 ifeq ($(FULL_TARGET),i386-emx)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-watcom)
 ifeq ($(FULL_TARGET),i386-watcom)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-netwlibc)
 ifeq ($(FULL_TARGET),i386-netwlibc)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-wince)
 ifeq ($(FULL_TARGET),i386-wince)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-embedded)
 ifeq ($(FULL_TARGET),i386-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-symbian)
 ifeq ($(FULL_TARGET),i386-symbian)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),m68k-linux)
 ifeq ($(FULL_TARGET),m68k-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),m68k-freebsd)
 ifeq ($(FULL_TARGET),m68k-freebsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),m68k-netbsd)
 ifeq ($(FULL_TARGET),m68k-netbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),m68k-amiga)
 ifeq ($(FULL_TARGET),m68k-amiga)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),m68k-atari)
 ifeq ($(FULL_TARGET),m68k-atari)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),m68k-openbsd)
 ifeq ($(FULL_TARGET),m68k-openbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),m68k-palmos)
 ifeq ($(FULL_TARGET),m68k-palmos)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),m68k-embedded)
 ifeq ($(FULL_TARGET),m68k-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),powerpc-linux)
 ifeq ($(FULL_TARGET),powerpc-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),powerpc-netbsd)
 ifeq ($(FULL_TARGET),powerpc-netbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),powerpc-amiga)
 ifeq ($(FULL_TARGET),powerpc-amiga)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),powerpc-macos)
 ifeq ($(FULL_TARGET),powerpc-macos)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),powerpc-darwin)
 ifeq ($(FULL_TARGET),powerpc-darwin)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),powerpc-morphos)
 ifeq ($(FULL_TARGET),powerpc-morphos)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),powerpc-embedded)
 ifeq ($(FULL_TARGET),powerpc-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),sparc-linux)
 ifeq ($(FULL_TARGET),sparc-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),sparc-netbsd)
 ifeq ($(FULL_TARGET),sparc-netbsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),sparc-solaris)
 ifeq ($(FULL_TARGET),sparc-solaris)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),sparc-embedded)
 ifeq ($(FULL_TARGET),sparc-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),x86_64-linux)
 ifeq ($(FULL_TARGET),x86_64-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),x86_64-freebsd)
 ifeq ($(FULL_TARGET),x86_64-freebsd)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),x86_64-darwin)
 ifeq ($(FULL_TARGET),x86_64-darwin)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),x86_64-win64)
 ifeq ($(FULL_TARGET),x86_64-win64)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),arm-linux)
 ifeq ($(FULL_TARGET),arm-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),arm-palmos)
 ifeq ($(FULL_TARGET),arm-palmos)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
+endif
+ifeq ($(FULL_TARGET),arm-darwin)
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),arm-wince)
 ifeq ($(FULL_TARGET),arm-wince)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),arm-gba)
 ifeq ($(FULL_TARGET),arm-gba)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),arm-nds)
 ifeq ($(FULL_TARGET),arm-nds)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),arm-embedded)
 ifeq ($(FULL_TARGET),arm-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),arm-symbian)
 ifeq ($(FULL_TARGET),arm-symbian)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),powerpc64-linux)
 ifeq ($(FULL_TARGET),powerpc64-linux)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),powerpc64-darwin)
 ifeq ($(FULL_TARGET),powerpc64-darwin)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 ifeq ($(FULL_TARGET),powerpc64-embedded)
-override TARGET_UNITS+=regexpr
+override TARGET_UNITS+=regex regexpr
+endif
+ifeq ($(FULL_TARGET),avr-embedded)
+override TARGET_UNITS+=regex regexpr
+endif
+ifeq ($(FULL_TARGET),armeb-linux)
+override TARGET_UNITS+=regex regexpr
+endif
+ifeq ($(FULL_TARGET),armeb-embedded)
+override TARGET_UNITS+=regex regexpr
 endif
 endif
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
 override TARGET_EXAMPLES+=testreg1
 override TARGET_EXAMPLES+=testreg1
@@ -419,6 +451,9 @@ endif
 ifeq ($(FULL_TARGET),i386-beos)
 ifeq ($(FULL_TARGET),i386-beos)
 override TARGET_EXAMPLES+=testreg1
 override TARGET_EXAMPLES+=testreg1
 endif
 endif
+ifeq ($(FULL_TARGET),i386-haiku)
+override TARGET_EXAMPLES+=testreg1
+endif
 ifeq ($(FULL_TARGET),i386-netbsd)
 ifeq ($(FULL_TARGET),i386-netbsd)
 override TARGET_EXAMPLES+=testreg1
 override TARGET_EXAMPLES+=testreg1
 endif
 endif
@@ -536,6 +571,9 @@ endif
 ifeq ($(FULL_TARGET),arm-palmos)
 ifeq ($(FULL_TARGET),arm-palmos)
 override TARGET_EXAMPLES+=testreg1
 override TARGET_EXAMPLES+=testreg1
 endif
 endif
+ifeq ($(FULL_TARGET),arm-darwin)
+override TARGET_EXAMPLES+=testreg1
+endif
 ifeq ($(FULL_TARGET),arm-wince)
 ifeq ($(FULL_TARGET),arm-wince)
 override TARGET_EXAMPLES+=testreg1
 override TARGET_EXAMPLES+=testreg1
 endif
 endif
@@ -560,6 +598,15 @@ endif
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 override TARGET_EXAMPLES+=testreg1
 override TARGET_EXAMPLES+=testreg1
 endif
 endif
+ifeq ($(FULL_TARGET),avr-embedded)
+override TARGET_EXAMPLES+=testreg1
+endif
+ifeq ($(FULL_TARGET),armeb-linux)
+override TARGET_EXAMPLES+=testreg1
+endif
+ifeq ($(FULL_TARGET),armeb-embedded)
+override TARGET_EXAMPLES+=testreg1
+endif
 override INSTALL_FPCPACKAGE=y
 override INSTALL_FPCPACKAGE=y
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_OPTIONS+=-S2
 override COMPILER_OPTIONS+=-S2
@@ -579,6 +626,9 @@ endif
 ifeq ($(FULL_TARGET),i386-beos)
 ifeq ($(FULL_TARGET),i386-beos)
 override COMPILER_OPTIONS+=-S2
 override COMPILER_OPTIONS+=-S2
 endif
 endif
+ifeq ($(FULL_TARGET),i386-haiku)
+override COMPILER_OPTIONS+=-S2
+endif
 ifeq ($(FULL_TARGET),i386-netbsd)
 ifeq ($(FULL_TARGET),i386-netbsd)
 override COMPILER_OPTIONS+=-S2
 override COMPILER_OPTIONS+=-S2
 endif
 endif
@@ -696,6 +746,9 @@ endif
 ifeq ($(FULL_TARGET),arm-palmos)
 ifeq ($(FULL_TARGET),arm-palmos)
 override COMPILER_OPTIONS+=-S2
 override COMPILER_OPTIONS+=-S2
 endif
 endif
+ifeq ($(FULL_TARGET),arm-darwin)
+override COMPILER_OPTIONS+=-S2
+endif
 ifeq ($(FULL_TARGET),arm-wince)
 ifeq ($(FULL_TARGET),arm-wince)
 override COMPILER_OPTIONS+=-S2
 override COMPILER_OPTIONS+=-S2
 endif
 endif
@@ -720,6 +773,15 @@ endif
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 override COMPILER_OPTIONS+=-S2
 override COMPILER_OPTIONS+=-S2
 endif
 endif
+ifeq ($(FULL_TARGET),avr-embedded)
+override COMPILER_OPTIONS+=-S2
+endif
+ifeq ($(FULL_TARGET),armeb-linux)
+override COMPILER_OPTIONS+=-S2
+endif
+ifeq ($(FULL_TARGET),armeb-embedded)
+override COMPILER_OPTIONS+=-S2
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_INCLUDEDIR+=src
 override COMPILER_INCLUDEDIR+=src
 endif
 endif
@@ -738,6 +800,9 @@ endif
 ifeq ($(FULL_TARGET),i386-beos)
 ifeq ($(FULL_TARGET),i386-beos)
 override COMPILER_INCLUDEDIR+=src
 override COMPILER_INCLUDEDIR+=src
 endif
 endif
+ifeq ($(FULL_TARGET),i386-haiku)
+override COMPILER_INCLUDEDIR+=src
+endif
 ifeq ($(FULL_TARGET),i386-netbsd)
 ifeq ($(FULL_TARGET),i386-netbsd)
 override COMPILER_INCLUDEDIR+=src
 override COMPILER_INCLUDEDIR+=src
 endif
 endif
@@ -855,6 +920,9 @@ endif
 ifeq ($(FULL_TARGET),arm-palmos)
 ifeq ($(FULL_TARGET),arm-palmos)
 override COMPILER_INCLUDEDIR+=src
 override COMPILER_INCLUDEDIR+=src
 endif
 endif
+ifeq ($(FULL_TARGET),arm-darwin)
+override COMPILER_INCLUDEDIR+=src
+endif
 ifeq ($(FULL_TARGET),arm-wince)
 ifeq ($(FULL_TARGET),arm-wince)
 override COMPILER_INCLUDEDIR+=src
 override COMPILER_INCLUDEDIR+=src
 endif
 endif
@@ -879,6 +947,15 @@ endif
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 override COMPILER_INCLUDEDIR+=src
 override COMPILER_INCLUDEDIR+=src
 endif
 endif
+ifeq ($(FULL_TARGET),avr-embedded)
+override COMPILER_INCLUDEDIR+=src
+endif
+ifeq ($(FULL_TARGET),armeb-linux)
+override COMPILER_INCLUDEDIR+=src
+endif
+ifeq ($(FULL_TARGET),armeb-embedded)
+override COMPILER_INCLUDEDIR+=src
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_SOURCEDIR+=src tests examples
 override COMPILER_SOURCEDIR+=src tests examples
 endif
 endif
@@ -897,6 +974,9 @@ endif
 ifeq ($(FULL_TARGET),i386-beos)
 ifeq ($(FULL_TARGET),i386-beos)
 override COMPILER_SOURCEDIR+=src tests examples
 override COMPILER_SOURCEDIR+=src tests examples
 endif
 endif
+ifeq ($(FULL_TARGET),i386-haiku)
+override COMPILER_SOURCEDIR+=src tests examples
+endif
 ifeq ($(FULL_TARGET),i386-netbsd)
 ifeq ($(FULL_TARGET),i386-netbsd)
 override COMPILER_SOURCEDIR+=src tests examples
 override COMPILER_SOURCEDIR+=src tests examples
 endif
 endif
@@ -1014,6 +1094,9 @@ endif
 ifeq ($(FULL_TARGET),arm-palmos)
 ifeq ($(FULL_TARGET),arm-palmos)
 override COMPILER_SOURCEDIR+=src tests examples
 override COMPILER_SOURCEDIR+=src tests examples
 endif
 endif
+ifeq ($(FULL_TARGET),arm-darwin)
+override COMPILER_SOURCEDIR+=src tests examples
+endif
 ifeq ($(FULL_TARGET),arm-wince)
 ifeq ($(FULL_TARGET),arm-wince)
 override COMPILER_SOURCEDIR+=src tests examples
 override COMPILER_SOURCEDIR+=src tests examples
 endif
 endif
@@ -1038,6 +1121,15 @@ endif
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 override COMPILER_SOURCEDIR+=src tests examples
 override COMPILER_SOURCEDIR+=src tests examples
 endif
 endif
+ifeq ($(FULL_TARGET),avr-embedded)
+override COMPILER_SOURCEDIR+=src tests examples
+endif
+ifeq ($(FULL_TARGET),armeb-linux)
+override COMPILER_SOURCEDIR+=src tests examples
+endif
+ifeq ($(FULL_TARGET),armeb-embedded)
+override COMPILER_SOURCEDIR+=src tests examples
+endif
 ifdef REQUIRE_UNITSDIR
 ifdef REQUIRE_UNITSDIR
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
 endif
 endif
@@ -1327,6 +1419,11 @@ BATCHEXT=.sh
 EXEEXT=
 EXEEXT=
 SHORTSUFFIX=be
 SHORTSUFFIX=be
 endif
 endif
+ifeq ($(OS_TARGET),haiku)
+BATCHEXT=.sh
+EXEEXT=
+SHORTSUFFIX=hai
+endif
 ifeq ($(OS_TARGET),solaris)
 ifeq ($(OS_TARGET),solaris)
 BATCHEXT=.sh
 BATCHEXT=.sh
 EXEEXT=
 EXEEXT=
@@ -1804,6 +1901,9 @@ endif
 ifeq ($(FULL_TARGET),i386-beos)
 ifeq ($(FULL_TARGET),i386-beos)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_RTL=1
 endif
 endif
+ifeq ($(FULL_TARGET),i386-haiku)
+REQUIRE_PACKAGES_RTL=1
+endif
 ifeq ($(FULL_TARGET),i386-netbsd)
 ifeq ($(FULL_TARGET),i386-netbsd)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_RTL=1
 endif
 endif
@@ -1921,6 +2021,9 @@ endif
 ifeq ($(FULL_TARGET),arm-palmos)
 ifeq ($(FULL_TARGET),arm-palmos)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_RTL=1
 endif
 endif
+ifeq ($(FULL_TARGET),arm-darwin)
+REQUIRE_PACKAGES_RTL=1
+endif
 ifeq ($(FULL_TARGET),arm-wince)
 ifeq ($(FULL_TARGET),arm-wince)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_RTL=1
 endif
 endif
@@ -1945,6 +2048,15 @@ endif
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 ifeq ($(FULL_TARGET),powerpc64-embedded)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_RTL=1
 endif
 endif
+ifeq ($(FULL_TARGET),avr-embedded)
+REQUIRE_PACKAGES_RTL=1
+endif
+ifeq ($(FULL_TARGET),armeb-linux)
+REQUIRE_PACKAGES_RTL=1
+endif
+ifeq ($(FULL_TARGET),armeb-embedded)
+REQUIRE_PACKAGES_RTL=1
+endif
 ifdef REQUIRE_PACKAGES_RTL
 ifdef REQUIRE_PACKAGES_RTL
 PACKAGEDIR_RTL:=$(firstword $(subst /Makefile.fpc,,$(strip $(wildcard $(addsuffix /rtl/Makefile.fpc,$(PACKAGESDIR))))))
 PACKAGEDIR_RTL:=$(firstword $(subst /Makefile.fpc,,$(strip $(wildcard $(addsuffix /rtl/Makefile.fpc,$(PACKAGESDIR))))))
 ifneq ($(PACKAGEDIR_RTL),)
 ifneq ($(PACKAGEDIR_RTL),)
@@ -1972,13 +2084,13 @@ override COMPILER_UNITDIR+=$(UNITDIR_RTL)
 endif
 endif
 endif
 endif
 ifndef NOCPUDEF
 ifndef NOCPUDEF
-override FPCOPTDEF=$(CPU_TARGET)
+override FPCOPTDEF=$(ARCH)
 endif
 endif
 ifneq ($(OS_TARGET),$(OS_SOURCE))
 ifneq ($(OS_TARGET),$(OS_SOURCE))
 override FPCOPT+=-T$(OS_TARGET)
 override FPCOPT+=-T$(OS_TARGET)
 endif
 endif
 ifneq ($(CPU_TARGET),$(CPU_SOURCE))
 ifneq ($(CPU_TARGET),$(CPU_SOURCE))
-override FPCOPT+=-P$(CPU_TARGET)
+override FPCOPT+=-P$(ARCH)
 endif
 endif
 ifeq ($(OS_SOURCE),openbsd)
 ifeq ($(OS_SOURCE),openbsd)
 override FPCOPT+=-FD$(NEW_BINUTILS_PATH)
 override FPCOPT+=-FD$(NEW_BINUTILS_PATH)

+ 1 - 1
packages/regexpr/Makefile.fpc

@@ -7,7 +7,7 @@ name=regexpr
 version=2.0.0
 version=2.0.0
 
 
 [target]
 [target]
-units=regexpr
+units=regex regexpr
 examples=testreg1
 examples=testreg1
 
 
 [compiler]
 [compiler]

+ 1207 - 0
packages/regexpr/src/old/regexpr.pp

@@ -0,0 +1,1207 @@
+{ $DEFINE DEBUG}
+
+{
+    This unit implements basic regular expression support
+
+    This file is part of the Free Pascal run time library.
+    Copyright (c) 2000-2006 by Florian Klaempfland Carl Eric Codere
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+{.$ define DEBUG}
+
+(*
+  - newline handling (uses all known formats of ASCII, #10,#13,#13#10 and #$85
+
+  TODO:
+     - correct backtracking, for example in (...)*
+     - full | support (currently requires to put all items with | operator
+        between parenthesis (in a group) to take care over order priority.
+          Therefore the following would work: (foo)|(nofoo) but not
+          foo|nofoo
+     - getting substrings and using substrings with \1 etc.
+     - do we treat several multiline characters in a row as a single
+        newline character for $ and ^?
+*)
+
+{$IFDEF FPC}
+{$mode objfpc}
+{$ENDIF}
+
+{** @abstract(Regular expression unit)
+
+    This unit implements a basic regular expression parser that mostly conforms
+    to the POSIX extended-regular expression syntax. It also supports standard
+    escape characters for patterns (as defined in PERL).
+}
+unit regexpr;
+
+  interface
+
+    { the following declarions are only in the interface because }
+    { some procedures return pregexprentry but programs which   }
+    { use this unit shouldn't access this data structures        }
+    type
+       tcharset = set of char;
+       tregexprentrytype = (ret_charset,ret_or,
+          ret_illegalend,ret_backtrace,ret_startline,
+          ret_endline,ret_pattern);
+
+       pregexprentry = ^tregexprentry;
+       tregexprentry = record
+          next,nextdestroy : pregexprentry;
+          case typ : tregexprentrytype of
+             ret_charset : (chars : tcharset; elsepath : pregexprentry);
+             {** This is a complete pattern path ()+ , ()* or ()?, n,m }
+             ret_pattern: (pattern: pregexprentry; minoccurs: integer; maxoccurs: integer; alternative : pregexprentry);
+       end;
+
+       tregexprflag = (
+         ref_singleline,
+         {** This indicates that a start of line is either the
+             start of the pattern or a linebreak. }
+         ref_multiline,
+         {** The match will be done in a case-insensitive way
+              according to US-ASCII character set. }
+         ref_caseinsensitive);
+       tregexprflags = set of tregexprflag;
+
+       TRegExprEngine = record
+          Data : pregexprentry;
+          DestroyList : pregexprentry;
+          Flags : TRegExprFlags;
+       end;
+
+     const
+        cs_allchars : tcharset = [#0..#255];
+        cs_wordchars : tcharset = ['A'..'Z','a'..'z','_','0'..'9'];
+        cs_newline : tcharset = [#10];
+        cs_digits : tcharset = ['0'..'9'];
+        cs_whitespace : tcharset = [' ',#9];
+
+     var
+        { these are initilized in the init section of the unit }
+        cs_nonwordchars : tcharset;
+        cs_nondigits : tcharset;
+        cs_nonwhitespace : tcharset;
+
+     { the following procedures can be used by units basing }
+     { on the regexpr unit                                  }
+
+     {** From a regular expression, compile an encoded version of the regular expression.
+
+         @param(regexpr Regular expression to compile)
+         @param(flags Flags relating to the type of parsing that will occur)
+         @param(RegExprEngine The actual encoded version of the regular expression)
+         @returns(true if success, otherwise syntax error in compiling regular expression)
+     }
+     function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags;var RegExprEngine: TRegExprEngine): boolean;
+
+{$IFDEF FPC}
+    {** Backward compatibility routine }
+     function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags): TREGExprEngine;
+{$ENDIF}
+
+     {** Frees all up resources used for the encoded regular expression }
+     procedure DestroyRegExprEngine(var regexpr : TRegExprEngine);
+
+     {** @abstract(Matches a regular expression)
+
+        @param(RegExprEngine The actual compiled regular expression to match against)
+        @param(p The text to search for for a match)
+        @param(index zero-based index to the start of the match -1 if no match in p)
+        @param(len length of the match)
+        @returns(true if there was a match, otherwise false)
+     }
+     function RegExprPos(RegExprEngine : TRegExprEngine;p : pchar;var index,len : longint) : boolean;
+
+     function RegExprReplaceAll(RegExprEngine : TRegExprEngine;const src,newstr : ansistring;var dest : ansistring) : sizeint;
+
+     { This function Escape known regex chars and place the result on Return. If something went wrong the
+       function will return false. }
+     function RegExprEscapeStr (const S : string) : string;
+
+  implementation
+
+{$ifdef DEBUG}
+     procedure writecharset(c : tcharset);
+
+       var
+          b : byte;
+
+       begin
+          for b:=20 to 255 do
+            if chr(b) in c then
+              write(chr(b));
+          writeln;
+       end;
+
+
+    const
+
+      typ2str : array[tregexprentrytype] of string =
+      (
+        'ret_charset',
+        'ret_or',
+        'ret_illegalend',
+        'ret_backtrace',
+        'ret_startline',
+        'ret_endline',
+        'ret_pattern'
+      );
+
+
+     { Dumps all the next elements of a tree }
+     procedure dumptree(space: string; regentry: pregexprentry);
+      begin
+        while assigned(regentry) do
+          begin
+            WriteLn(space+'------- Node Type ',typ2str[regentry^.typ]);
+            if (regentry^.typ = ret_charset) then
+              WriteCharSet(regentry^.chars);
+            { dump embedded pattern information }
+            if regentry^.typ = ret_pattern then
+               begin
+                 dumptree(space+#9,regentry^.pattern);
+                 WriteLn(space+#9,' --- Alternative nodes ');
+                 if assigned(regentry^.alternative) then
+                   dumptree(space+#9#9,regentry^.alternative);
+               end;
+            if regentry^.typ = ret_startline then
+               dumptree(space+#9,regentry^.pattern);
+            regentry:=regentry^.next;
+          end;
+      end;
+{$endif DEBUG}
+
+
+     {** Determines the length of a pattern, including sub-patterns.
+
+         It goes through the nodes and returns the pattern length
+         between the two, using minOccurs as required.
+
+         Called recursively.
+     }
+     function patlength(hp1: pregexprentry): integer;
+       var
+        count: integer;
+        hp: pregexprentry;
+       begin
+        count:=0;
+        if hp1^.typ=ret_pattern then
+            hp:=hp1^.pattern
+        else
+            hp:=hp1;
+        { now go through all chars and get the length
+          does not currently take care of embedded patterns
+        }
+        while assigned(hp) do
+          begin
+            if hp^.typ = ret_pattern then
+              begin
+                inc(count,patlength(hp));
+              end
+            else
+            if hp^.typ = ret_charset then
+               inc(count);
+            hp:=hp^.next;
+          end;
+        if hp1^.typ=ret_pattern then
+          begin
+            count:=hp1^.minOccurs*count;
+          end;
+         patlength:=count;
+       end;
+
+     function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags; var RegExprEngine:TRegExprEngine) : boolean;
+
+       var
+          first : pregexprentry;
+
+       procedure doregister(p : pregexprentry);
+
+         begin
+           p^.nextdestroy:=first;
+           first:=p;
+         end;
+
+       var
+          currentpos : pchar;
+          error : boolean;
+
+       procedure readchars(var chars: tcharset);
+
+         var
+            c1 : char;
+
+         begin
+            chars:=[];
+            case currentpos^ of
+               #0:
+                  exit;
+               '.':
+                  begin
+                     inc(currentpos);
+                     chars:=cs_allchars-cs_newline;
+                  end;
+               '\':
+                  begin
+                     inc(currentpos);
+                     case currentpos^ of
+                        #0:
+                          begin
+                             error:=true;
+                             exit;
+                          end;
+                        't':
+                           begin
+                              inc(currentpos);
+                              chars:=[#9];
+                           end;
+                        'n':
+                           begin
+                              inc(currentpos);
+                              chars:=[#10];
+                           end;
+                        'r':
+                           begin
+                              inc(currentpos);
+                              chars:=[#13];
+                           end;
+                        'd':
+                          begin
+                             inc(currentpos);
+                             chars:=cs_digits;
+                          end;
+                        'D':
+                          begin
+                             inc(currentpos);
+                             chars:=cs_nondigits;
+                          end;
+                        's':
+                          begin
+                             inc(currentpos);
+                             chars:=cs_whitespace;
+                          end;
+                        'S':
+                          begin
+                             inc(currentpos);
+                             chars:=cs_nonwhitespace;
+                          end;
+                        'w':
+                           begin
+                              inc(currentpos);
+                              chars:=cs_wordchars;
+                           end;
+                        'W':
+                           begin
+                              inc(currentpos);
+                              chars:=cs_nonwordchars;
+                           end;
+                        'f' :
+                            begin
+                              inc(currentpos);
+                              chars:= [#12];
+                            end;
+                        'a' :
+                            begin
+                              inc(currentpos);
+                              chars:= [#7];
+                            end;
+                         else
+                           begin { Some basic escaping...}
+                              chars := [currentpos^];
+                              inc (currentpos);
+                              {error:=true;
+                              exit;}
+                           end;
+                     end;
+                  end;
+               else
+                 begin
+                    if ref_caseinsensitive in flags then
+                       c1:=upcase(currentpos^)
+                    else
+                       c1:=currentpos^;
+
+                    inc(currentpos);
+                    if currentpos^='-' then
+                      begin
+                         inc(currentpos);
+                         if currentpos^=#0 then
+                           begin
+                              error:=true;
+                              exit;
+                           end;
+                         if ref_caseinsensitive in flags then
+                           chars:=[c1..upcase(currentpos^)]
+                         else
+                           chars:=[c1..currentpos^];
+                         inc(currentpos);
+                      end
+                    else
+                      chars:=[c1];
+                 end;
+            end;
+         end;
+
+
+       procedure readcharset(var charset: tcharset);
+
+         var
+           chars: tcharset;
+         begin
+            charset:=[];
+            case currentpos^ of
+               #0:
+                  exit;
+               '[':
+                  begin
+                     inc(currentpos);
+                     while currentpos^<>']' do
+                       begin
+                          if currentpos^='^' then
+                            begin
+                               inc(currentpos);
+                               readchars(chars);
+                               charset:=charset+(cs_allchars-chars);
+                            end
+                          else
+                            begin
+                              readchars(chars);
+                              charset:=charset+chars;
+                            end;
+                          if error or (currentpos^=#0) then
+                            begin
+                               error:=true;
+                               exit;
+                            end;
+                       end;
+                     inc(currentpos);
+                  end;
+               '^':
+                  begin
+                     inc(currentpos);
+                     readchars(chars);
+                     charset:=cs_allchars-chars;
+                  end;
+               else
+                  begin
+                    readchars(chars);
+                    charset:=chars;
+                  end;
+            end;
+         end;
+
+
+       (* takes care of parsing the {n}, {n,} and {n,m} regular expression
+          elements. In case of error, sets error to true and returns false,
+          otherwise returns true and set minoccurs and maxoccurs accordingly
+          (-1 if not present). *)
+       function parseoccurences(var currentpos: pchar; var minoccurs,maxoccurs: integer): boolean;
+         var
+          minOccursString: string;
+          maxOccursString: string;
+         begin
+           parseoccurences:=false;
+           minOccurs:=-1;
+           maxOccurs:=-1;
+           inc(currentpos);
+           minOccursString:='';
+           if currentPos^ = #0 then
+             begin
+               error:=true;
+               exit;
+             end;
+            while (currentpos^<>#0) and (currentpos^ in ['0'..'9']) do
+                begin
+                   minOccursString:=minOccursString+currentPos^;
+                   inc(currentpos);
+                end;
+            if length(minOccursString) = 0 then
+                begin
+                  error:=true;
+                  exit;
+                end;
+            Val(minOccursString,minOccurs);
+            { possible cases here: commad or end bracket }
+            if currentpos^= '}' then
+              begin
+                inc(currentpos);
+                maxOccurs:=minOccurs;
+                parseoccurences:=true;
+                exit;
+              end;
+            if currentpos^= ',' then
+              begin
+                maxOccursString:='';
+                inc(currentpos);
+                while (currentpos^<>#0) and (currentpos^ in ['0'..'9']) do
+                begin
+                   maxOccursString:=maxOccursString+currentPos^;
+                   inc(currentpos);
+                end;
+                if currentpos^= '}' then
+                 begin
+                   { If the length of the string is zero, then there is
+                     no upper bound. }
+                   if length(maxOccursString) > 0 then
+                      Val(maxOccursString,maxOccurs)
+                   else
+                      maxOccurs:=high(integer);
+                   inc(currentpos);
+                   parseoccurences:=true;
+                   exit;
+                 end;
+              end;
+              error:=true;
+         end;
+
+
+       function parseregexpr(next,elsepath : pregexprentry) : pregexprentry;
+
+         var
+            hp : pregexprentry;
+            minOccurs,maxOccurs: integer;
+            hp3: pregexprentry;
+            cs : tcharset;
+            chaining : ^pregexprentry;
+
+         begin
+            chaining:=nil;
+            parseregexpr:=nil;
+            elsepath:=nil;
+            if error then
+              exit;
+            { this dummy allows us to redirect the elsepath later }
+{            new(ep);
+            doregister(ep);
+            ep^.typ:=ret_charset;
+            ep^.chars:=[];
+            ep^.elsepath:=elsepath;
+            elsepath:=ep;}
+            while true do
+              begin
+                 if error then
+                   exit;
+                 case currentpos^ of
+                    '(':
+                       begin
+                          inc(currentpos);
+                          hp:=parseregexpr(nil,nil);
+                          { Special characters after the bracket }
+                           if error then
+                              exit;
+                          if currentpos^<>')' then
+                            begin
+                               error:=true;
+                               exit;
+                            end;
+                          inc(currentpos);
+                            case currentpos^ of
+                            '*':
+                               begin
+                                  inc(currentpos);
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.alternative:=nil;
+                                  hp3^.pattern:=hp;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=0;
+                                  hp3^.maxoccurs:=high(integer);
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+                               end;
+                            '+':
+                               begin
+                                  inc(currentpos);
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.alternative:=nil;
+                                  hp3^.pattern:=hp;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=1;
+                                  hp3^.maxoccurs:=high(integer);
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+                               end;
+
+                            '?':
+                               begin
+                                  inc(currentpos);
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.alternative:=nil;
+                                  hp3^.pattern:=hp;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=0;
+                                  hp3^.maxoccurs:=1;
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+                               end;
+                            '{':
+                               begin
+                                 if not parseOccurences(currentPos,minOccurs,maxOccurs) then
+                                   exit;
+                                  inc(currentpos);
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.alternative:=nil;
+                                  hp3^.pattern:=hp;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=minOccurs;
+                                  hp3^.maxoccurs:=maxOccurs;
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+                               end;
+                            else
+                              begin
+                                { go to end of this list - always the
+                                  last next used }
+(*
+                                hp3:=hp;
+                                while assigned(hp3^.next) do
+                                  begin
+                                    hp3:=hp3^.next;
+                                  end;
+                                if assigned(chaining) then
+                                   chaining^:=hp
+                                else
+                                   parseregexpr:=hp;
+                                chaining:=@hp3^.next;*)
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.alternative:=nil;
+                                  hp3^.pattern:=hp;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=1;
+                                  hp3^.maxoccurs:=1;
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+
+                              end;
+                          end;
+                       end;
+{ This is only partially implemented currently, as the terms before
+  the | character must be grouped together with parenthesis, which
+  is also compatible with other regular expressions.
+}
+                    '|':
+                       begin
+{$ifdef DEBUG}
+                          writeln('Creating or entry');
+{$endif DEBUG}
+                          if (not assigned (hp3)) then
+                            begin
+                              error:=true;
+                              exit;
+                            end;
+                          if (hp3^.typ <> ret_pattern) then
+                            begin
+                              error:=true;
+                              exit;
+                            end;
+                          while currentpos^='|' do
+                            begin
+                              inc(currentpos);
+                              if currentpos^=#0 then
+                                begin
+                                   error:=true;
+                                   exit;
+                                end;
+                              { always put the longest pattern first, so
+                                swap the trees as necessary.
+                              }
+                              hp := parseregexpr (next, elsepath);
+                              if patlength(hp) > patlength(hp3^.pattern) then
+                                begin
+                                  hp3^.alternative:=hp3^.pattern;
+                                  hp3^.pattern:=hp;
+                                end
+                              else
+                                 hp3^.alternative:=hp;
+                            end;
+                       end;
+                    ')':
+                       exit;
+                    '^':
+                       begin
+                          inc(currentpos);
+                          hp:=parseregexpr(nil,nil);
+                          { Special characters after the bracket }
+                           if error then
+                              exit;
+                           new(hp3);
+                           doregister(hp3);
+                           hp3^.typ:=ret_startline;
+                           hp3^.pattern:=hp;
+                           hp3^.elsepath:=elsepath;
+                           hp3^.next:=nil;
+                           if assigned(chaining) then
+                              chaining^:=hp3
+                           else
+                              parseregexpr:=hp3;
+                           chaining:=@hp3^.next;
+                       end;
+                    '$':
+                       begin
+                          inc(currentpos);
+                          new(hp);
+                          doregister(hp);
+                          hp^.typ:=ret_endline;
+                          hp^.elsepath:=elsepath;
+                          hp^.next:=nil;
+                          if assigned(chaining) then
+                            chaining^:=hp
+                          else
+                            parseregexpr:=hp;
+                          chaining:=@hp^.next;
+                       end;
+                    #0:
+                       exit;
+                    else
+                      begin
+                         readcharset(cs);
+                         if error then
+                           exit;
+                         case currentpos^ of
+                            '*':
+                               begin
+                                  inc(currentpos);
+                                  new(hp);
+                                  doregister(hp);
+                                  hp^.typ:=ret_charset;
+                                  hp^.chars:=cs;
+                                  hp^.elsepath:=nil;
+                                  hp^.next:=nil;
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.alternative:=nil;
+                                  hp3^.pattern:=hp;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=0;
+                                  hp3^.maxoccurs:=high(integer);
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+                               end;
+                            '+':
+                               begin
+                                  inc(currentpos);
+                                  new(hp);
+                                  doregister(hp);
+                                  hp^.typ:=ret_charset;
+                                  hp^.chars:=cs;
+                                  hp^.elsepath:=nil;
+                                  hp^.next:=nil;
+
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.alternative:=nil;
+                                  hp3^.pattern:=hp;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=1;
+                                  hp3^.maxoccurs:=high(integer);
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+                               end;
+                            '?':
+                               begin
+                                  inc(currentpos);
+                                  new(hp);
+                                  doregister(hp);
+                                  hp^.typ:=ret_charset;
+                                  hp^.chars:=cs;
+                                  hp^.elsepath:=nil;
+                                  hp^.next:=nil;
+
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.pattern:=hp;
+                                  hp3^.alternative:=nil;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=0;
+                                  hp3^.maxoccurs:=1;
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+                               end;
+                            '{':
+                               begin
+                                 if not parseOccurences(currentPos,minOccurs,maxOccurs) then
+                                   exit;
+                                  new(hp);
+                                  doregister(hp);
+                                  hp^.typ:=ret_charset;
+                                  hp^.chars:=cs;
+                                  hp^.elsepath:=nil;
+                                  hp^.next:=nil;
+
+                                  new(hp3);
+                                  doregister(hp3);
+                                  hp3^.typ:=ret_pattern;
+                                  hp3^.alternative:=nil;
+                                  hp3^.pattern:=hp;
+                                  hp3^.elsepath:=elsepath;
+                                  hp3^.minoccurs:=minOccurs;
+                                  hp3^.maxoccurs:=maxOccurs;
+                                  hp3^.next:=nil;
+                                  if assigned(chaining) then
+                                    chaining^:=hp3
+                                  else
+                                    parseregexpr:=hp3;
+                                  chaining:=@hp3^.next;
+
+                                end;
+                            else
+                               { Normal character }
+                               begin
+                                  new(hp);
+                                  doregister(hp);
+                                  hp^.typ:=ret_charset;
+                                  hp^.chars:=cs;
+                                  hp^.elsepath:=elsepath;
+                                  hp^.next:=next;
+                                  if assigned(chaining) then
+                                    chaining^:=hp
+                                  else
+                                    parseregexpr:=hp;
+                                  chaining:=@hp^.next;
+                                  continue;
+                               end;
+                           { This was a pattern }
+                         end; { END CASE }
+                      end;
+                 end;
+              end;
+         end;
+
+       var
+          endp : pregexprentry;
+
+       begin
+          GenerateRegExprEngine:=false;
+          RegExprEngine.Data:=nil;
+          RegExprEngine.DestroyList:=nil;
+          if regexpr=nil then
+            exit;
+          first:=nil;
+          if (ref_singleline in flags) and (ref_multiline in flags) then
+            exit;
+          currentpos:=regexpr;
+          GenerateRegExprEngine:=true;
+          error:=false;
+          new(endp);
+          doregister(endp);
+          endp^.typ:=ret_illegalend;
+          RegExprEngine.flags:=flags;
+          RegExprEngine.Data:=parseregexpr(nil,endp);
+{$IFDEF DEBUG}
+          writeln('========== Generating tree ============');
+          dumptree('',RegExprEngine.Data);
+{$ENDIF}
+          RegExprEngine.DestroyList:=first;
+          if error or (currentpos^<>#0) then
+            begin
+              GenerateRegExprEngine:=false;
+              DestroyRegExprEngine(RegExprEngine);
+            end;
+       end;
+
+
+{$IFDEF FPC}
+    function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags): TREGExprEngine;
+    var
+     r: TRegExprEngine;
+    begin
+      GenerateRegExprEngine(regexpr,flags,r);
+      GenerateRegExprEngine:=r;
+    end;
+{$ENDIF}
+
+    procedure DestroyRegExprEngine(var regexpr : TRegExprEngine);
+
+       var
+          hp : pregexprentry;
+
+       begin
+          hp:=regexpr.DestroyList;
+          while assigned(hp) do
+            begin
+               regexpr.DestroyList:=hp^.nextdestroy;
+               dispose(hp);
+               hp:=regexpr.DestroyList;
+            end;
+          regexpr.Data:=nil;
+          regexpr.DestroyList:=nil;
+       end;
+
+     function RegExprPos(regexprengine : TRegExprEngine;p : pchar;var index,len : longint) : boolean;
+
+       var
+          lastpos : pchar;
+          firstpos: pchar;
+
+       { Does the actual search of the data - return true if the term was found }
+       function dosearch(regexprentry : pregexprentry;pos : pchar) : boolean;
+       var
+          found: boolean;
+          checkvalue: boolean;
+          savedpos: pchar;
+          counter: word;
+
+         begin
+            dosearch:=false;
+            while true do
+              begin
+                 {$IFDEF Debug}
+                 writeln('Entering ',typ2str[regexprentry^.typ]);
+                 writeln('Pattern length ',patlength(regexprentry));
+                 {$ENDIF Debug}
+                 case regexprentry^.typ of
+                    ret_endline:
+                      begin
+                         { automatically a match! }
+                         if pos^ = #0 then
+                            begin
+                              dosearch:=true;
+                              exit;
+                            end;
+                         if ref_multiline in regexprengine.flags then
+                            begin
+                              { Supports DOS/Commodore/UNIX/IBM Mainframe line formats }
+                              { avoid reading invalid memory also }
+                                  if (pos^=#13) and ((pos+1)^=#10) then
+                                    begin
+                                      regexprentry:=regexprentry^.next;
+                                    end
+                                  else
+                                  if (pos^=#$85) or (pos^=#10) or ((pos^=#13) and ((pos-1) >= firstpos) and ((pos-1)^ <> #10)) then
+                                    begin
+                                       regexprentry:=regexprentry^.next;
+                                    end
+                                  else
+                                    begin
+                                       dosearch:=false;
+                                       exit;
+                                    end;
+                             end
+                           else
+                             exit;
+                      end;
+                    ret_pattern:
+                      begin
+                         found:=false;
+                         { Take care of occurences here }
+                         savedpos:=pos;
+                         counter:=0;
+                         repeat
+                           found:=dosearch(regexprentry^.pattern,pos);
+                           if not found then
+                            break;
+                           pos:=lastpos;
+                           inc(counter);
+                         until (not found) or (counter >= regexprentry^.maxoccurs) or (pos^= #0);
+
+                         if counter = 0 then
+                           begin
+                             { If there was no occurence and the minimum occurence is > 0 then
+                               problem.
+                             }
+                             if (regexprentry^.minoccurs > 0) then
+                              begin
+                                dosearch:=false;
+                                { verify alternative path as required }
+                                if assigned(regexprentry^.alternative) then
+                                  begin
+                                     dosearch:=dosearch(regexprentry^.alternative,savedpos);
+                                     exit;
+                                  end;
+                                exit;
+                              end;
+                             dosearch:=true;
+                             lastpos:=savedpos;
+                           end
+                         else
+                           { found }
+                           begin
+                              { Possible choices :
+                                 - found and (counter >= minOccurences) and (counter =< maxOccurences) = true
+                                 - found and (counter < minOccurences) or (counter > maxOccurences) = false
+                              }
+                              if (counter < regexprentry^.minoccurs) or (counter > regexprentry^.maxoccurs) then
+                                begin
+                                  dosearch:=false;
+                                  exit;
+                                end;
+                              dosearch:=true;
+                              { if all matches were found, and the current position
+                                points to zero (processed all characters) }
+                              if pos^=#0 then
+                                begin
+                                  dosearch:=true;
+                                  exit;
+                                end;
+                           end;
+                         { If we are that means the matches were valid, go to next element to match
+                         }
+                         regexprentry:=regexprentry^.next;
+                         if (counter = 0) and not assigned(regexprentry) then
+                           exit;
+                      end;
+                    ret_startline:
+                      begin
+                         checkvalue:=pos=firstpos;
+                         if ref_multiline in regexprengine.flags then
+                           begin
+                             { Supports DOS/Commodore/UNIX/IBM Mainframe line formats }
+                             { avoid reading invalid memory also }
+                             if
+                                 (
+                                   ((pos-1) >= firstpos) and ((pos-1)^=#$85)
+                                  )
+                              or
+                                 (
+                                   ((pos-1) >= firstpos) and ((pos-1)^=#10)
+                                  )
+                              or
+                                (
+                                 ((pos-1) >= firstpos) and ((pos-1)^=#13) and
+                                 ((pos)^ <> #10)
+                                )
+                             then
+                               begin
+                                 checkvalue:=true;
+                               end;
+                           end;
+                          if checkvalue then
+                            begin
+                              dosearch:=dosearch(regexprentry^.pattern,pos);
+                              regexprentry:=regexprentry^.next;
+                              if not dosearch then
+                                exit;
+                              pos:=lastpos;
+                            end
+                          else
+                            begin
+                              dosearch:=false;
+                              exit;
+                            end;
+                      end;
+                    ret_charset:
+                      begin
+                         if (pos^ in regexprentry^.chars) or
+                           ((ref_caseinsensitive in regexprengine.flags) and
+                            (upcase(pos^) in regexprentry^.chars)) then
+                           begin
+{$ifdef DEBUG}
+                              writeln('Found matching: ',pos^);
+{$endif DEBUG}
+                              regexprentry:=regexprentry^.next;
+                              inc(pos);
+                           end
+                         else
+                           begin
+{$ifdef DEBUG}
+                              writeln('Found unmatching: ',pos^);
+{$endif DEBUG}
+                              exit;
+                           end;
+                      end;
+                    ret_backtrace:
+                      begin
+{$ifdef DEBUG}
+                         writeln('Starting backtrace');
+{$endif DEBUG}
+                         if dosearch(regexprentry^.next,pos) then
+                           begin
+                              dosearch:=true;
+                              exit;
+                           end
+                         else if dosearch(regexprentry^.elsepath,pos) then
+                           begin
+                              dosearch:=true;
+                              exit;
+                           end
+                         else
+                           exit;
+                      end;
+                 end;
+                 lastpos:=pos;
+                 if regexprentry=nil then
+                   begin
+                      dosearch:=true;
+                      exit;
+                   end;
+                 if regexprentry^.typ=ret_illegalend then
+                   exit;
+                 { end of string, and we were expecting an end of string }
+                 if (pos^=#0) and (regexprentry^.typ = ret_endline) and
+                    (not assigned(regexprentry^.next)) then
+                   begin
+                     dosearch:=true;
+                     exit;
+                   end;
+                 if pos^=#0 then
+                   exit;
+              end;
+         end;
+
+       begin
+          RegExprPos:=false;
+          index:=0;
+          len:=0;
+          firstpos:=p;
+          if regexprengine.Data=nil then
+            exit;
+          while p^<>#0 do
+            begin
+               if dosearch(regexprengine.Data,p) then
+                 begin
+                    len:=lastpos-p;
+                    RegExprPos:=true;
+                    exit;
+                 end
+               else
+                 begin
+                    inc(p);
+                    inc(index);
+                 end;
+            end;
+          index:=-1;
+       end;
+
+
+  function RegExprReplaceAll(RegExprEngine : TRegExprEngine;const src,newstr : ansistring;var dest : ansistring) : sizeint;
+    var
+      index,len : longint;
+      pos,lastpos : pchar;
+      first : boolean;
+      oldlength : PtrInt;
+    begin
+      pos:=pchar(src);
+      lastpos:=nil;
+      first:=true;
+      Result:=0;
+      { estimate some length }
+      SetLength(dest,length(src)+((length(src) div 10)*length(newstr)));
+      while RegExprPos(RegExprEngine,pos,index,len) do
+        begin
+          inc(pos,index);
+          if (lastpos = nil) or (pos>lastpos) then
+            begin
+              if lastpos = nil then lastpos := pchar(src);
+              { copy skipped part }
+
+              { because we cheat with SetLength a SetLength(...,0) isn't what we want
+                so we've to trick at the first SetLength call
+              }
+              if first then
+                begin
+                  SetLength(dest,(pos-lastpos));
+                  { cast dest here because it is already unified }
+                  move(lastpos^,char(dest[1]),pos-lastpos);
+                end
+              else
+                begin
+                  oldlength:=Length(dest);
+                  SetLength(dest,oldlength+(pos-lastpos));
+                  move(lastpos^,char(dest[oldlength+1]),pos-lastpos);
+                end;
+              first:=false;
+            end;
+          { found }
+          inc(Result);
+          dest:=dest+newstr;
+          inc(pos,len);
+          lastpos:=pos;
+        end;
+      { copy remainder }
+      len:=strlen(pos);
+      if first then
+        begin
+          SetLength(dest,len);
+          move(pos^,char(dest[length(dest)+1]),len);
+        end
+      else
+        begin
+          oldlength:=Length(dest);
+          SetLength(dest,oldlength+len);
+          move(pos^,char(dest[oldlength+1]),len);
+        end
+    end;
+
+
+  function RegExprEscapeStr (const S : string) : string;
+    var
+     i, len   : integer;
+     s1: string;
+    begin
+      RegExprEscapeStr:= '';
+      s1:='';
+      if (S = '') then
+       exit;
+
+      len := Length (S);
+
+      for i := 1 to len do
+        begin
+          if (S [i] in ['(','|', '.', '*', '?', '^', '$', '-', '[', '{', '}', ']', ')', '\']) then
+            begin
+              s1 := s1 + '\';
+            end;
+
+          s1 := s1 + S[i];
+        end;
+      RegExprEscapeStr:=s1;
+    end;
+
+begin
+   cs_nonwordchars:=cs_allchars-cs_wordchars;
+   cs_nondigits:=cs_allchars-cs_digits;
+   cs_nonwhitespace:=cs_allchars-cs_whitespace;
+end.

+ 1230 - 0
packages/regexpr/src/regex.pp

@@ -0,0 +1,1230 @@
+{
+    This file is part of the Free Pascal packages library.
+    Copyright (c) 2008 by Joost van der Sluis, member of the
+    Free Pascal development team
+    
+    Regexpression parser
+    
+    This code is based on the examples in the book
+    'Tomes of Delphi: Algorithms and Data Structures' by Julian M Bucknall
+    The code is used with his permission. For an excellent explanation of
+    this unit, see the book...
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+unit Regex;
+
+{$mode Delphi}{$H+}
+{$INLINE ON}
+
+interface
+
+{Notes:
+  these classes parse regular expressions that follow this grammar:
+
+    <anchorexpr> ::= <expr> |
+                     '^' <expr> |
+                     <expr> '$' |
+                     '^' <expr> '$'
+    <expr> ::= <term> |
+               <term> '|' <expr>                 - alternation
+    <term> ::= <factor> |
+               <factor><term>                    - concatenation
+    <factor> ::= <atom> |
+                 <atom> '?' |                    - zero or one
+                 <atom> '*' |                    - zero or more
+                 <atom> 'n,m' |                  - min n, max m (added by Joost)
+                 <atom> '+'                      - one or more
+    <atom> ::= <char> |
+               '.' |                             - any char
+               '(' <expr> ') |                   - parentheses
+               '[' <charclass> ']' |             - normal class
+               '[^' <charclass> ']'              - negated class
+    <charclass> ::= <charrange> |
+                    <charrange><charclass>
+    <charrange> ::= <ccchar> |
+                    <ccchar> '-' <ccchar>
+    <char> ::= <any character except metacharacters> |
+               '\' <any character at all>
+    <ccchar> ::= <any character except '-' and ']'> |
+                 '\' <any character at all>
+
+  This means that parentheses have maximum precedence, followed
+  by square brackets, followed by the closure operators,
+  followed by concatenation, finally followed by alternation.
+}
+
+uses
+  SysUtils,
+  Classes;
+
+type
+  TUpcaseFunc = function(aCh : AnsiChar) : AnsiChar;
+
+  TNFAMatchType = (  {types of matching performed...}
+     mtNone,           {..no match (an epsilon no-cost move)}
+     mtAnyChar,        {..any character}
+     mtChar,           {..a particular character}
+     mtClass,          {..a character class}
+     mtNegClass,       {..a negated character class}
+     mtTerminal,       {..the final state--no matching}
+     mtUnused);        {..an unused state--no matching}
+
+  TRegexError = (    {error codes for invalid regex strings}
+     recNone,          {..no error}
+     recSuddenEnd,     {..unexpected end of string}
+     recMetaChar,      {..read metacharacter, but needed normal char}
+     recNoCloseParen,  {..expected close paren, but not there}
+     recExtraChars     {..not at end of string after parsing regex}
+     );
+
+  TRegexType = (
+     rtRegEx,
+     rtChars,
+     rtSingleChar
+     );
+
+  PCharSet = ^TCharSet;
+  TCharSet = set of Char;
+
+  { TtdRegexEngine }
+
+  TNFAState = record
+    sdNextState1: integer;
+    sdNextState2: integer;
+    sdClass     : PCharSet;
+    sdMatchType : TNFAMatchType;
+    sdChar      : AnsiChar;
+  end;
+
+
+  { TRegexEngine }
+
+  TRegexEngine = class
+    private
+      FAnchorEnd  : boolean;
+      FAnchorStart: boolean;
+      FErrorCode  : TRegexError;
+      FIgnoreCase : boolean;
+      FMultiLine  : boolean;
+      FPosn       : PAnsiChar;
+      FRegexStr   : string;
+      FStartState : integer;
+      FStateTable : Array of TNFAState;
+      FStateCount : integer;
+      FUpcase     : TUpcaseFunc;
+
+      // The deque (double-ended queue)
+      FList : array of integer;
+      FCapacity : integer;
+      FHead : integer;
+      FTail : integer;
+      
+      FRegexType : TRegexType;
+    protected
+      procedure DequeEnqueue(aValue : integer);
+      procedure DequePush(aValue : integer);
+      function DequePop : integer;
+      procedure DequeGrow;
+
+      procedure rcSetIgnoreCase(aValue : boolean); virtual;
+      procedure rcSetRegexStr(const aRegexStr : string); virtual;
+      procedure rcSetUpcase(aValue : TUpcaseFunc); virtual;
+      procedure rcSetMultiLine(aValue : Boolean); virtual;
+
+      procedure rcClear; virtual;
+      procedure rcError(aIndex      : integer); virtual;
+      procedure rcLevel1Optimize; virtual;
+      function rcMatchSubString(const S   : string;
+                                StartPosn : integer;
+                                var Len   : integer) : boolean; virtual;
+      function rcAddState(aMatchType : TNFAMatchType;
+                          aChar      : AnsiChar;
+                          aCharClass : PCharSet;
+                          aNextState1: integer;
+                          aNextState2: integer) : integer;
+      function rcSetState(aState     : integer;
+                          aNextState1: integer;
+                          aNextState2: integer) : integer;
+
+      function rcParseAnchorExpr : integer; virtual;
+      function rcParseAtom : integer; virtual;
+      function rcParseCCChar(out EscapeChar : Boolean) : AnsiChar; virtual;
+      function rcParseChar : integer; virtual;
+      function rcParseCharClass(aClass : PCharSet) : boolean; virtual;
+      function rcParseCharRange(aClass : PCharSet) : boolean; virtual;
+      function rcParseExpr : integer; virtual;
+      function rcParseFactor : integer; virtual;
+      function rcParseTerm : integer; virtual;
+      Function rcReturnEscapeChar : AnsiChar; virtual;
+    public
+      procedure WriteTable;
+      constructor Create(const aRegexStr : string);
+      destructor Destroy; override;
+
+      function Parse(var aErrorPos : integer;
+                     var aErrorCode: TRegexError) : boolean; virtual;
+      function MatchString(const S : string; out MatchPos : integer; var Offset : integer) : boolean; virtual;
+      function ReplaceAllString(const src, newstr: ansistring; out DestStr : string): Integer;
+
+
+      property IgnoreCase : boolean
+                  read FIgnoreCase write rcSetIgnoreCase;
+      property MultiLine : boolean
+                  read FMultiLine write rcSetMultiLine;
+      property RegexString : string
+                  read FRegexStr write rcSetRegexStr;
+      property Upcase : TUpcaseFunc
+                  read FUpcase write rcSetUpcase;
+  end;
+
+
+Resourcestring
+  eRegexParseError = 'Error at %d when parsing regular expression';
+
+implementation
+
+uses strutils;
+
+const
+  MetaCharacters : set of AnsiChar =
+                   ['[', ']', '(', ')', '|', '*', '+', '?', '-', '.',
+                    '^', '$', '{', '}'];
+  newline : TCharSet = [#10,#13,#$85];
+  {some handy constants}
+  UnusedState = -1;
+  NewFinalState = -2;
+  CreateNewState = -3;
+  ErrorState = -4;
+  MustScan = -5;
+
+  cs_allchars : tcharset = [#0..#255];
+  cs_wordchars : tcharset = ['A'..'Z','a'..'z','_','0'..'9'];
+  cs_newline : tcharset = [#10];
+  cs_digits : tcharset = ['0'..'9'];
+  cs_whitespace : tcharset = [' ',#9];
+
+
+{===Helper routines==================================================}
+function SystemUpcase(aCh : AnsiChar) : AnsiChar; far;
+begin
+  Result := System.Upcase(aCh);
+end;
+{====================================================================}
+
+
+{===TRegexEngine===================================================}
+constructor TRegexEngine.Create(const aRegexStr : string);
+begin
+  inherited Create;
+  FRegexStr := aRegexStr;
+  FIgnoreCase := false;
+  FUpcase := SystemUpcase;
+  SetLength(FStateTable,64);
+  FStateCount:=0;
+  FCapacity:=64;
+  setlength(FList,FCapacity);
+  {let's help out the user of the deque by putting the head and
+   tail pointers in the middle: it's probably more efficient}
+  FHead := FCapacity div 2;
+  FTail := FHead;
+
+  MultiLine:=False;
+end;
+{--------}
+destructor TRegexEngine.Destroy;
+begin
+  if (FStateTable <> nil) then
+    rcClear;
+  inherited Destroy;
+end;
+{--------}
+function TRegexEngine.MatchString(const S : string; out MatchPos : integer; var Offset : integer): boolean;
+var
+  i : integer;
+  ErrorPos  : integer;
+  ErrorCode : TRegexError;
+  pc : pchar;
+  x:integer;
+begin
+  {if the regex string hasn't been parsed yet, do so}
+  if (FStateCount = 0) then begin
+    if not Parse(ErrorPos, ErrorCode) then
+      rcError(ErrorPos);
+  end;
+
+  case FRegexType of
+    rtSingleChar :
+      begin
+      MatchPos := PosEx(char(FRegexStr[1]),s,Offset);
+      Offset := MatchPos+1;
+      Result := (MatchPos>0);
+      end;
+    rtChars :
+      begin
+      MatchPos := PosEx(FRegexStr,s,Offset);
+      Offset := MatchPos+length(FRegexStr);
+      Result := (MatchPos>0);
+      end
+    else
+      begin
+      {now try and see if the string matches (empty strings don't)}
+      Result := False;
+      MatchPos := 0;
+      if (S <> '') then
+        {if the regex specified a start anchor then we
+         need to check the string starting at the first position}
+        if FAnchorStart then begin
+          if rcMatchSubString(S, 1, Offset) then
+            begin
+            MatchPos:=1;
+            Result := True;
+            end
+          {If the first position did not match ang MultiLine is false, the string
+           doesn't match. If MultiLine is true, start at every position after a
+           newline }
+          else if FMultiLine then begin
+            for i := Offset to length(S)-1 do
+              if S[i] in newline then
+               if rcMatchSubString(S, i+1, Offset) then begin
+                MatchPos := i+1;
+                Result := True;
+                Break;
+              end;
+          end
+        end
+        {otherwise we try and match the string at every position and
+         return at the first success}
+        else begin
+          for i := Offset to length(S) do
+            if rcMatchSubString(S, i, Offset) then begin
+              MatchPos:=i;
+              Result := True;
+              Break;
+            end;
+        end;
+      end;
+    end; {case}
+end;
+
+function TRegexEngine.ReplaceAllString(const src, newstr: ansistring; out DestStr : string): Integer;
+
+type TReplRec = record
+                  Pos : integer;
+                  Len : integer;
+                end;
+
+var ofs         : Integer;
+    size_newstr,
+    size, pos   : Integer;
+    ReplArr     : array of TReplRec;
+    racount     : integer;
+    MatchPos    : integer;
+    DestSize    : integer;
+    LastPos     : integer;
+    MoveLen     : integer;
+    i           : integer;
+
+begin
+  setlength(ReplArr,64);
+
+  racount := 0;
+  DestSize:=length(src);
+  size_newstr := length(newstr);
+  Ofs := 1;
+  while MatchString(src,MatchPos,Ofs) do
+    begin
+    if racount = length(ReplArr) then
+      setlength(ReplArr,racount+racount div 2);
+    ReplArr[racount].Pos := MatchPos;
+    ReplArr[racount].Len := ofs;
+    DestSize:=DestSize-ofs+MatchPos+size_newstr;
+    inc(racount);
+    end;
+
+  SetLength(DestStr, SizeOf(Char)*DestSize);
+  MatchPos:=1; LastPos:=1;
+
+  if size_newstr<>0 then for i := 0 to racount -1 do
+    begin
+    MoveLen := ReplArr[i].Pos-LastPos;
+    move(src[LastPos],DestStr[MatchPos],MoveLen);
+    MatchPos:=MatchPos+MoveLen;
+    LastPos := ReplArr[i].Len;
+    move(newstr[1],DestStr[MatchPos],size_newstr);
+    Matchpos := MatchPos+size_newstr;
+    end
+  else for i := 0 to racount -1 do
+    begin
+    MoveLen := ReplArr[i].Pos-LastPos;
+    move(src[LastPos],DestStr[MatchPos],MoveLen);
+    MatchPos:=MatchPos+MoveLen;
+    LastPos := ReplArr[i].Len;
+    end;
+
+  move(src[LastPos],DestStr[MatchPos],length(src)-LastPos+1);
+  Result := racount;
+end;
+
+{--------}
+function TRegexEngine.Parse(var aErrorPos : integer;
+                              var aErrorCode: TRegexError)
+                                                            : boolean;
+begin
+  {clear the current transition table}
+  rcClear;
+  {empty regex strings are not allowed}
+  if (FRegexStr = '') then begin
+    Result := false;
+    aErrorPos := 1;
+    aErrorCode := recSuddenEnd;
+
+    Exit;
+  end;
+  {parse the regex string}
+  if not IgnoreCase then
+    begin
+    if length(FRegexStr)=1 then
+      FRegexType:=rtSingleChar
+    else
+      FRegexType:=rtChars
+    end
+  else
+    FRegexType:=rtRegEx;
+
+  FPosn := PAnsiChar(FRegexStr);
+  FStartState := rcParseAnchorExpr;
+  {if an error occurred or we're not at the end of the regex string,
+   clear the transition table, return false and the error position}
+  if (FStartState = ErrorState) or (FPosn^ <> #0) then begin
+    if (FStartState <> ErrorState) and (FPosn^ <> #0) then
+      FErrorCode := recExtraChars;
+    rcClear;
+    Result := false;
+    aErrorPos := succ(FPosn - PAnsiChar(FRegexStr));
+    aErrorCode := FErrorCode;
+  end
+  {otherwise add a terminal state, optimize, return true}
+  else begin
+    rcAddState(mtTerminal, #0, nil, UnusedState, UnusedState);
+    rcLevel1Optimize;
+    if FAnchorStart or FAnchorEnd then FRegexType:= rtRegEx;
+    Result := true;
+    aErrorPos := 0;
+    aErrorCode := recNone;
+  end;
+end;
+{--------}
+function TRegexEngine.rcAddState(aMatchType : TNFAMatchType;
+                                   aChar      : AnsiChar;
+                                   aCharClass : PCharSet;
+                                   aNextState1: integer;
+                                   aNextState2: integer) : integer;
+begin
+  {set up the fields in the state record}
+  with FStateTable[FStateCount] do
+    begin
+    if (aNextState1 = NewFinalState) then
+      sdNextState1 := FStateCount+1
+    else
+      sdNextState1 := aNextState1;
+    sdNextState2 := aNextState2;
+    sdMatchType := aMatchType;
+    if (aMatchType = mtChar) then
+      sdChar := aChar
+    else if (aMatchType = mtClass) or (aMatchType = mtNegClass) then
+      sdClass := aCharClass;
+    end;
+  Result := FStateCount;
+  inc(FStateCount);
+  if FStateCount=length(FStateTable) then
+    setlength(FStateTable,(FStateCount * 3) div 2);
+
+  if not (aMatchType in [mtChar,mtTerminal]) then FRegexType := rtRegEx;
+end;
+{--------}
+procedure TRegexEngine.rcClear;
+var
+  i : integer;
+begin
+  {free all items in the state transition table}
+  for i := 0 to FStateCount-1 do begin
+    with FStateTable[i] do begin
+      if (sdMatchType = mtClass) or
+         (sdMatchType = mtNegClass) then
+        if (sdClass <> nil) then
+          FreeMem(sdClass, sizeof(TCharSet));
+    end;
+  end;
+  {clear the state transition table}
+  FStateCount:=0;
+  FAnchorStart := false;
+  FAnchorEnd := false;
+end;
+{--------}
+procedure TRegexEngine.rcError(aIndex      : integer);
+begin
+  raise Exception.Create(Format(eRegexParseError,[aIndex]));
+end;
+{--------}
+procedure TRegexEngine.rcLevel1Optimize;
+var
+  i : integer;
+  Walker : integer;
+begin
+  {level 1 optimization removes all states that have only a single
+   no-cost move to another state}
+
+  {cycle through all the state records, except for the last one}
+  for i := 0 to FStateCount - 2 do begin
+    {get this state}
+    with FStateTable[i] do begin
+      {walk the chain pointed to by the first next state, unlinking
+       the states that are simple single no-cost moves}
+      Walker := sdNextState1;
+      while (FStateTable[walker].sdMatchType = mtNone) and
+            (FStateTable[walker].sdNextState2 = UnusedState) do begin
+        sdNextState1 := FStateTable[walker].sdNextState1;
+        Walker := sdNextState1;
+      end;
+      {walk the chain pointed to by the second next state, unlinking
+       the states that are simple single no-cost moves}
+      if (sdNextState2 <> UnusedState) then begin
+        Walker := sdNextState2;
+        while (FStateTable[walker].sdMatchType = mtNone) and
+              (FStateTable[walker].sdNextState2 = UnusedState) do begin
+          sdNextState2 := FStateTable[walker].sdNextState1;
+          Walker := sdNextState2;
+        end;
+      end;
+    end;
+  end;
+
+  {cycle through all the state records, except for the last one,
+   marking unused ones--not strictly necessary but good for debugging}
+  for i := 0 to FStateCount - 2 do begin
+    with FStateTable[i] do begin
+      if (sdMatchType = mtNone) and
+         (sdNextState2 = UnusedState) then
+        sdMatchType := mtUnused;
+    end;
+  end;
+end;
+{--------}
+function TRegexEngine.rcMatchSubString(const s   : string;
+                                         StartPosn : integer;
+                                         var Len   : integer)
+                                                            : boolean;
+var
+  Ch     : AnsiChar;
+  State  : integer;
+  StrInx : integer;
+begin
+  {assume we fail to match}
+  Result := false;
+  Len := StartPosn;
+  {clear the deque}
+  FHead := FCapacity div 2;
+  FTail := FHead;
+
+  
+  {enqueue the special value to start scanning}
+  DequeEnqueue(MustScan);
+  {enqueue the first state}
+  DequeEnqueue(FStartState);
+  {prepare the string index}
+  StrInx := StartPosn;
+  {loop until the deque is empty or we run out of string}
+  repeat
+    {pop the top state from the deque}
+    State := DequePop;
+    {process the "must scan" state first}
+    if (State = MustScan) then begin
+      {if the deque is empty at this point, we might as well give up
+       since there are no states left to process new characters}
+      if (FHead <> FTail) then begin
+        {if we haven't run out of string, get the character, and
+         enqueue the "must scan" state again}
+          if IgnoreCase then
+            Ch := Upcase(s[StrInx])
+          else
+            Ch := s[StrInx];
+          DequeEnqueue(MustScan);
+        inc(StrInx);
+      end;
+    end
+    {otherwise, process the state}
+    else with FStateTable[State] do begin
+      case sdMatchType of
+        mtChar :
+          begin
+            {for a match of a character, enqueue the next state}
+            if (Ch = sdChar) then
+              DequeEnqueue(sdNextState1);
+          end;
+        mtAnyChar :
+          begin
+            {for a match of any character, enqueue the next state}
+            if not (Ch in newline) then
+              DequeEnqueue(sdNextState1);
+          end;
+        mtClass :
+          begin
+            {for a match within a class, enqueue the next state}
+            if (Ch in sdClass^) then
+              DequeEnqueue(sdNextState1);
+          end;
+        mtNegClass :
+          begin
+            {for a match not within a class, enqueue the next state}
+            if not (Ch in sdClass^) then
+              DequeEnqueue(sdNextState1);
+          end;
+        mtTerminal :
+          begin
+            {for a terminal state, the string successfully matched
+             if the regex had no end anchor, or we're at the end
+             of the string or line}
+            if (not FAnchorEnd) or (ch=#0) or (FMultiLine and (ch in newline)) then begin
+              Result := true;
+              Len := StrInx-1;
+//                Exit;
+            end;
+          end;
+        mtNone :
+          begin
+            {for free moves, push the next states onto the deque}
+            Assert(sdNextState2 <> UnusedState,
+                   'optimization should remove all states with one no-cost move');
+            DequePush(sdNextState2);
+            DequePush(sdNextState1);
+          end;
+        mtUnused :
+          begin
+            Assert(false, 'unused states shouldn''t be seen');
+          end;
+      end;
+    end;
+  until (FHead = FTail) or (ch = #0); // deque empty or end of string
+  {if we reach this point we've either exhausted the deque or we've
+   run out of string; if the former, the substring did not match
+   since there are no more states. If the latter, we need to check
+   the states left on the deque to see if one is the terminating
+   state; if so the string matched the regular expression defined by
+   the transition table}
+  while (FHead <> FTail) do begin
+    State := DequePop;
+    with FStateTable[State] do begin
+      case sdMatchType of
+        mtNone :
+          begin
+            {for free moves, push the next states onto the deque}
+            Assert(sdNextState2 <> UnusedState,
+                   'optimization should remove all states with one no-cost move');
+            DequePush(sdNextState2);
+            DequePush(sdNextState1);
+          end;
+        mtTerminal :
+          begin
+            {for a terminal state, the string successfully matched
+             if the regex had no end anchor, or we're at the end
+             of the string or line}
+            if (not FAnchorEnd) or (ch=#0) or (FMultiLine and (ch in newline)) then begin
+              Result := true;
+              Len := StrInx -1;
+              Exit;
+            end;
+          end;
+      end;{case}
+    end;
+  end;
+end;
+{--------}
+function TRegexEngine.rcParseAnchorExpr : integer;
+begin
+  {check for an initial '^'}
+  if (FPosn^ = '^') then begin
+    FAnchorStart := true;
+    inc(FPosn);
+  end;
+
+  {parse an expression}
+  Result := rcParseExpr;
+
+  {if we were successful, check for the final '$'}
+  if (Result <> ErrorState) then begin
+    if (FPosn^ = '$') then begin
+      FAnchorEnd := true;
+      inc(FPosn);
+    end;
+  end;
+end;
+{--------}
+function TRegexEngine.rcParseAtom : integer;
+var
+  MatchType : TNFAMatchType;
+  CharClass : PCharSet;
+begin
+  case FPosn^ of
+    '(' :
+      begin
+        {move past the open parenthesis}
+        inc(FPosn);
+
+        {parse a complete regex between the parentheses}
+        Result := rcParseExpr;
+        if (Result = ErrorState) then
+          Exit;
+        {if the current character is not a close parenthesis,
+         there's an error}
+        if (FPosn^ <> ')') then begin
+          FErrorCode := recNoCloseParen;
+          Result := ErrorState;
+          Exit;
+        end;
+        {move past the close parenthesis}
+        inc(FPosn);
+      end;
+    '[' :
+      begin
+        {move past the open square bracket}
+        inc(FPosn);
+
+        {if the first character in the class is a '^' then the
+         class if negated, otherwise it's a normal one}
+        if (FPosn^ = '^') then begin
+          inc(FPosn);
+          MatchType := mtNegClass;
+        end
+        else begin
+          MatchType := mtClass;
+        end;
+        {allocate the class character set and parse the character
+         class; this will return either with an error, or when the
+         closing square bracket is encountered}
+        New(CharClass);
+        CharClass^ := [];
+        if not rcParseCharClass(CharClass) then begin
+          Dispose(CharClass);
+          Result := ErrorState;
+          Exit;
+        end;
+        {move past the closing square bracket}
+        Assert(FPosn^ = ']',
+               'the rcParseCharClass terminated without finding a "]"');
+        inc(FPosn);
+
+        {add a new state for the character class}
+        Result := rcAddState(MatchType, #0, CharClass,
+                             NewFinalState, UnusedState);
+      end;
+    '.' :
+      begin
+        {move past the period metacharacter}
+        inc(FPosn);
+
+        {add a new state for the 'any character' token}
+        Result := rcAddState(mtAnyChar, #0, nil,
+                             NewFinalState, UnusedState);
+      end;
+  else
+    {otherwise parse a single character}
+    Result := rcParseChar;
+  end;{case}
+end;
+{--------}
+function TRegexEngine.rcParseCCChar(out EscapeChar : Boolean) : AnsiChar;
+begin
+  EscapeChar:=False;
+  {if we hit the end of the string, it's an error}
+  if (FPosn^ = #0) then begin
+    FErrorCode := recSuddenEnd;
+    Result := #0;
+    Exit;
+  end;
+  {if the current char is a metacharacter (at least in terms of a
+   character class), it's an error}
+  if FPosn^ in [']', '-'] then begin
+    FErrorCode := recMetaChar;
+    Result := #0;
+    Exit;
+  end;
+  {otherwise return the character and advance past it}
+  if (FPosn^ = '\') then
+    {..it's an escaped character: get the next character instead}
+    begin
+    inc(FPosn);
+    EscapeChar:=True;
+    Result := rcReturnEscapeChar;
+    end
+  else
+    Result := FPosn^;
+  inc(FPosn);
+end;
+{--------}
+function TRegexEngine.rcParseChar : integer;
+var
+  Ch : AnsiChar;
+begin
+  {if we hit the end of the string, it's an error}
+  if (FPosn^ = #0) then begin
+    Result := ErrorState;
+    FErrorCode := recSuddenEnd;
+    Exit;
+  end;
+  {if the current char is one of the metacharacters, it's an error}
+  if FPosn^ in MetaCharacters then begin
+    Result := ErrorState;
+    FErrorCode := recMetaChar;
+    Exit;
+  end;
+  {otherwise add a state for the character}
+  {..if it's an escaped character: get the next character instead}
+  if (FPosn^ = '\') then
+    begin
+    inc(FPosn);
+    ch := rcReturnEscapeChar;
+    end
+  else
+    ch :=FPosn^;
+  if IgnoreCase then
+    Ch := Upcase(ch);
+  Result := rcAddState(mtChar, Ch, nil, NewFinalState, UnusedState);
+  inc(FPosn);
+end;
+{--------}
+function TRegexEngine.rcParseCharClass(aClass : PCharSet) : boolean;
+begin
+  {assume we can't parse a character class properly}
+  Result := false;
+  {parse a character range; if we can't there was an error and the
+   caller will take care of it}
+  if not rcParseCharRange(aClass) then
+    Exit;
+  {if the current character was not the right bracket, parse another
+   character class (note: we're removing the tail recursion here)}
+  while (FPosn^ <> ']') do begin
+    if not rcParseCharRange(aClass) then
+      Exit;
+  end;
+  {if we reach here we were successful}
+  Result := true;
+end;
+{--------}
+function TRegexEngine.rcParseCharRange(aClass : PCharSet) : boolean;
+var
+  StartChar : AnsiChar;
+  EndChar   : AnsiChar;
+  Ch        : AnsiChar;
+  EscChar   : Boolean;
+begin
+  {assume we can't parse a character range properly}
+  Result := false;
+  {parse a single character; if it's null there was an error}
+  StartChar := rcParseCCChar(EscChar);
+  if (StartChar = #0) then
+    Exit;
+  if EscChar then
+    begin
+    case StartChar of
+      'd' : aClass^ := aClass^ + cs_digits;
+      'D' : aClass^ := aClass^ + cs_allchars-cs_digits;
+      's' : aClass^ := aClass^ + cs_whitespace;
+      'S' : aClass^ := aClass^ + cs_allchars-cs_whitespace;
+      'w' : aClass^ := aClass^ + cs_wordchars;
+      'W' : aClass^ := aClass^ + cs_allchars-cs_wordchars
+    else
+      EscChar := False;
+    end;
+    if EscChar then
+      begin
+      Result := True;
+      Exit;
+      end;
+    end;
+  {if the current character is not a dash, the range consisted of a
+   single character}
+  if (FPosn^ <> '-') then begin
+    if IgnoreCase then
+      Include(aClass^, Upcase(StartChar))
+    else
+      Include(aClass^, StartChar)
+  end
+  {otherwise it's a real range, so get the character at the end of the
+   range; if that's null, there was an error}
+  else begin
+    inc(FPosn); {move past the '-'}
+    EndChar := rcParseCCChar(EscChar);
+    if (EndChar = #0) then
+      Exit;
+    {build the range as a character set}
+    if (StartChar > EndChar) then begin
+      Ch := StartChar;
+      StartChar := EndChar;
+      EndChar := Ch;
+    end;
+    for Ch := StartChar to EndChar do begin
+      Include(aClass^, Ch);
+      if IgnoreCase then
+        Include(aClass^, Upcase(Ch));
+    end;
+  end;
+  {if we reach here we were successful}
+  Result := true;
+end;
+{--------}
+function TRegexEngine.rcParseExpr : integer;
+var
+  StartState1 : integer;
+  StartState2 : integer;
+  EndState1   : integer;
+  OverallStartState : integer;
+begin
+  {assume the worst}
+  Result := ErrorState;
+  {parse an initial term}
+  StartState1 := rcParseTerm;
+  if (StartState1 = ErrorState) then
+    Exit;
+  {if the current character is *not* a pipe character, no alternation
+   is present so return the start state of the initial term as our
+   start state}
+  if (FPosn^ <> '|') then
+    Result := StartState1
+  {otherwise, we need to parse another expr and join the two together
+   in the transition table}
+  else begin
+
+    {advance past the pipe}
+    inc(FPosn);
+    {the initial term's end state does not exist yet (although there
+     is a state in the term that points to it), so create it}
+    EndState1 := rcAddState(mtNone, #0, nil, UnusedState, UnusedState);
+    {for the OR construction we need a new initial state: it will
+     point to the initial term and the second just-about-to-be-parsed
+     expr}
+    OverallStartState := rcAddState(mtNone, #0, nil,
+                                    UnusedState, UnusedState);
+    {parse another expr}
+    StartState2 := rcParseExpr;
+    if (StartState2 = ErrorState) then
+      Exit;
+    {alter the state state for the overall expr so that the second
+     link points to the start of the second expr}
+    Result := rcSetState(OverallStartState, StartState1, StartState2);
+    {now set the end state for the initial term to point to the final
+     end state for the second expr and the overall expr}
+    rcSetState(EndState1, FStateCount, UnusedState);
+  end;
+end;
+{--------}
+function TRegexEngine.rcParseFactor : integer;
+var
+  StartStateAtom : integer;
+  EndStateAtom   : integer;
+  TempEndStateAtom : integer;
+  Int            : string;
+  n,m,nState     : integer;
+  i              : integer;
+begin
+  {assume the worst}
+  Result := ErrorState;
+  {first parse an atom}
+  StartStateAtom := rcParseAtom;
+  if (StartStateAtom = ErrorState) then
+    Exit;
+  {check for a closure operator}
+  case FPosn^ of
+    '?' : begin
+            {move past the ? operator}
+            inc(FPosn);
+            {the atom's end state doesn't exist yet, so create one}
+            EndStateAtom := rcAddState(mtNone, #0, nil,
+                                       UnusedState, UnusedState);
+            {create a new start state for the overall regex}
+            Result := rcAddState(mtNone, #0, nil,
+                                 StartStateAtom, EndStateAtom);
+            {make sure the new end state points to the next unused
+             state}
+            rcSetState(EndStateAtom, FStateCount, UnusedState);
+          end;
+    '*' : begin
+            {move past the * operator}
+            inc(FPosn);
+            {the atom's end state doesn't exist yet, so create one;
+             it'll be the start of the overall regex subexpression}
+            Result := rcAddState(mtNone, #0, nil,
+                                 NewFinalState, StartStateAtom);
+          end;
+    '+' : begin
+            {move past the + operator}
+            inc(FPosn);
+            {the atom's end state doesn't exist yet, so create one}
+            rcAddState(mtNone, #0, nil, NewFinalState, StartStateAtom);
+            {the start of the overall regex subexpression will be the
+             atom's start state}
+            Result := StartStateAtom;
+          end;
+    '{' : begin // {n,m}
+            {move past the brace }
+            inc(FPosn);
+
+            {Parse the value of n}
+            Int := '';
+            while not (FPosn^ in [',','}',#0]) do
+              begin
+              int := int+FPosn^;
+              inc(FPosn);
+              end;
+            if FPosn^ = #0 then exit; // No end-brace or comma -> invalid regex
+            if int <> '' then
+              n := StrToIntDef(Int,-2)
+            else
+              n := -1; // if n is 'empty', set it to -1
+            if n = -2 then exit; // Invalid value for n -> invalid RegEx
+
+            if FPosn^ <> '}' then
+              begin
+
+              {move past the , }
+              inc(FPosn);
+              {Parse the value of m}
+              Int := '';
+              while not (FPosn^ in ['}',#0]) do
+                begin
+                int := int+FPosn^;
+                inc(FPosn);
+                end;
+              if FPosn^ <> '}' then exit; // No end-brace -> invalid regex
+              if int <> '' then m := StrToIntDef(Int,-2)
+              else m := -1;
+              if m = -2 then exit; // Invalid RegEx
+              end
+            else
+              m := -3;
+
+            {move past the brace }
+            inc(FPosn);
+
+            if (n=0) and (m=-1) then
+            {the atom's end state doesn't exist yet, so create one;
+             it'll be the start of the overall regex subexpression}
+              Result := rcAddState(mtNone, #0, nil, NewFinalState, StartStateAtom)
+            else
+              begin
+              EndStateAtom := FStateCount-1;
+              TempEndStateAtom:=StartStateAtom;
+              for i := 1 to n-1 do
+                begin
+                TempEndStateAtom:=FStateCount;
+                for nState:=StartStateAtom to EndStateAtom do
+                  begin
+                  FStateTable[FStateCount]:=FStateTable[nState];
+                  if FStateTable[FStateCount].sdNextState1 in [StartStateAtom..EndStateAtom+1] then
+                    FStateTable[FStateCount].sdNextState1 := i+FStateTable[FStateCount].sdNextState1+ (EndStateAtom-StartStateAtom) *i;
+                  if FStateTable[FStateCount].sdNextState2 in [StartStateAtom..EndStateAtom+1] then
+                    FStateTable[FStateCount].sdNextState2 := i+FStateTable[FStateCount].sdNextState2 + (EndStateAtom-StartStateAtom) *i;
+                  inc(FStateCount);
+
+                  if FStateCount=length(FStateTable) then
+                    setlength(FStateTable,(FStateCount * 3) div 2);
+                  end;
+                end;
+
+            for i := n to m-1 do
+              begin
+              rcAddState(mtNone, #0, nil, NewFinalState, EndStateAtom+(EndStateAtom-StartStateAtom+1) * (m-1) + (m-n)+1);
+
+              TempEndStateAtom:=FStateCount;
+              for nState:=StartStateAtom to EndStateAtom do
+                begin
+                FStateTable[FStateCount]:=FStateTable[nState];
+                if FStateTable[FStateCount].sdNextState1 in [StartStateAtom..EndStateAtom+1] then
+                  FStateTable[FStateCount].sdNextState1 := i+FStateTable[FStateCount].sdNextState1+ (EndStateAtom-StartStateAtom) * i+(i-n+1);
+                if FStateTable[FStateCount].sdNextState2 in [StartStateAtom..EndStateAtom+1] then
+                  FStateTable[FStateCount].sdNextState2 := i+FStateTable[FStateCount].sdNextState2 + (EndStateAtom-StartStateAtom) * i+(i-n+1);
+                inc(FStateCount);
+
+                if FStateCount=length(FStateTable) then
+                  setlength(FStateTable,(FStateCount * 3) div 2);
+                end;
+              end;
+
+              if m = -1 then
+                rcAddState(mtNone, #0, nil, NewFinalState, TempEndStateAtom);
+
+              Result := StartStateAtom;
+              end;
+          end;
+
+  else
+    Result := StartStateAtom;
+  end;{case}
+end;
+{--------}
+function TRegexEngine.rcParseTerm : integer;
+var
+  StartState2 : integer;
+  EndState1   : integer;
+begin
+  {parse an initial factor, the state number returned will also be our
+   return state number}
+  Result := rcParseFactor;
+  if (Result = ErrorState) then
+    Exit;
+  {Note: we have to "break the grammar" here. We've parsed a regular
+         subexpression and we're possibly following on with another
+         regular subexpression. There's no nice operator to key off
+         for concatenation: we just have to know that for
+         concatenating two subexpressions, the current character will
+         be
+           - an open parenthesis
+           - an open square bracket
+           - an any char operator
+           - a character that's not a metacharacter
+         i.e., the three possibilities for the start of an "atom" in
+         our grammar}
+  if (FPosn^ = '(') or
+     (FPosn^ = '[') or
+     (FPosn^ = '.') or
+     ((FPosn^ <> #0) and not (FPosn^ in MetaCharacters)) then begin
+
+    {the initial factor's end state does not exist yet (although there
+     is a state in the term that points to it), so create it}
+    EndState1 := rcAddState(mtNone, #0, nil, UnusedState, UnusedState);
+    {parse another term}
+    StartState2 := rcParseTerm;
+    if (StartState2 = ErrorState) then begin
+      Result := ErrorState;
+      Exit;
+    end;
+    {join the first factor to the second term}
+    rcSetState(EndState1, StartState2, UnusedState);
+  end;
+end;
+
+procedure TRegexEngine.WriteTable;
+var i : integer;
+begin
+  for i := 0 to FStateCount-1 do with FStateTable[i] do
+//  FPC version 2.2.1 can not write enum-types. Disabled in fixes_2_2 only
+//    writeln('s:',i,' mt:',sdMatchType ,' ns1:',sdNextState1,' ns2:',sdNextState2,' char:',sdChar);
+    writeln('s:',i,' ns1:',sdNextState1,' ns2:',sdNextState2,' char:',sdChar);
+end;
+
+procedure TRegexEngine.DequeEnqueue(aValue: integer);
+begin
+  FList[FTail] := aValue;
+  inc(FTail);
+  if (FTail = FCapacity) then
+    FTail := 0
+  else if (FTail = FHead) then
+    DequeGrow;
+end;
+
+procedure TRegexEngine.DequePush(aValue: integer);
+begin
+  if (FHead = 0) then
+    FHead := FCapacity;
+  dec(FHead);
+  FList[FHead] := aValue;
+  if (FTail = FHead) then
+    DequeGrow;
+end;
+
+function TRegexEngine.DequePop: integer;
+begin
+  Result := FList[FHead];
+  inc(FHead);
+  if (FHead = FCapacity) then
+    FHead := 0;
+end;
+
+procedure TRegexEngine.DequeGrow;
+var
+  OldCount : integer;
+  i, j     : integer;
+begin
+  {grow the list by 50%}
+  OldCount := FCapacity;
+  FCapacity:=(OldCount * 3) div 2;
+  SetLength(FList,FCapacity);
+  {expand the data into the increased space, maintaining the deque}
+  if (FHead = 0) then
+    FTail := OldCount
+  else begin
+    j := FCapacity;
+    for i := pred(OldCount) downto FHead do begin
+      dec(j);
+      FList[j] := FList[i]
+    end;
+    FHead := j;
+  end;
+end;
+
+function TRegexEngine.rcReturnEscapeChar: AnsiChar;
+begin
+  case FPosn^ of
+    't' : Result := #9;
+    'n' : Result := #10;
+    'r' : Result := #13;
+    'f' : Result := #12;
+    'a' : Result := #7;
+  else
+    Result := FPosn^;
+  end;
+end;
+
+{--------}
+procedure TRegexEngine.rcSetIgnoreCase(aValue : boolean);
+begin
+  if (aValue <> FIgnoreCase) then begin
+    rcClear;
+    FIgnoreCase := aValue;
+  end;
+end;
+{--------}
+procedure TRegexEngine.rcSetRegexStr(const aRegexStr : string);
+begin
+  if (aRegexStr <> FRegexStr) then begin
+    rcClear;
+    FRegexStr := aRegexStr;
+  end;
+end;
+{--------}
+function TRegexEngine.rcSetState(aState     : integer;
+                                   aNextState1: integer;
+                                   aNextState2: integer) : integer;
+begin
+  Assert((0 <= aState) and (aState < FStateCount),
+         'trying to change an invalid state');
+
+  {get the state record and change the transition information}
+  FStateTable[aState].sdNextState1 := aNextState1;
+  FStateTable[aState].sdNextState2 := aNextState2;
+  Result := aState;
+end;
+{--------}
+procedure TRegexEngine.rcSetUpcase(aValue : TUpcaseFunc);
+begin
+  if not Assigned(aValue) then
+    FUpcase := SystemUpcase
+  else
+    FUpcase := aValue;
+end;
+
+procedure TRegexEngine.rcSetMultiLine(aValue: Boolean);
+begin
+  FMultiLine:=aValue;
+end;
+
+{====================================================================}
+
+end.

+ 80 - 1170
packages/regexpr/src/regexpr.pp

@@ -1,10 +1,9 @@
-{ $DEFINE DEBUG}
-
 {
 {
-    This unit implements basic regular expression support
+    This file is part of the Free Pascal packages library.
+    Copyright (c) 2008 by Joost van der Sluis, member of the
+    Free Pascal development team
 
 
-    This file is part of the Free Pascal run time library.
-    Copyright (c) 2000-2006 by Florian Klaempfland Carl Eric Codere
+    Compatibility unit for the old regexpr unit.
 
 
     See the file COPYING.FPC, included in this distribution,
     See the file COPYING.FPC, included in this distribution,
     for details about the copyright.
     for details about the copyright.
@@ -14,1194 +13,105 @@
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
 
  **********************************************************************}
  **********************************************************************}
-{.$ define DEBUG}
-
-(*
-  - newline handling (uses all known formats of ASCII, #10,#13,#13#10 and #$85
-
-  TODO:
-     - correct backtracking, for example in (...)*
-     - full | support (currently requires to put all items with | operator
-        between parenthesis (in a group) to take care over order priority.
-          Therefore the following would work: (foo)|(nofoo) but not
-          foo|nofoo
-     - getting substrings and using substrings with \1 etc.
-     - do we treat several multiline characters in a row as a single
-        newline character for $ and ^?
-*)
-
-{$IFDEF FPC}
-{$mode objfpc}
-{$ENDIF}
-
-{** @abstract(Regular expression unit)
-
-    This unit implements a basic regular expression parser that mostly conforms
-    to the POSIX extended-regular expression syntax. It also supports standard
-    escape characters for patterns (as defined in PERL).
-}
-unit regexpr;
-
-  interface
-
-    { the following declarions are only in the interface because }
-    { some procedures return pregexprentry but programs which   }
-    { use this unit shouldn't access this data structures        }
-    type
-       tcharset = set of char;
-       tregexprentrytype = (ret_charset,ret_or,
-          ret_illegalend,ret_backtrace,ret_startline,
-          ret_endline,ret_pattern);
-
-       pregexprentry = ^tregexprentry;
-       tregexprentry = record
-          next,nextdestroy : pregexprentry;
-          case typ : tregexprentrytype of
-             ret_charset : (chars : tcharset; elsepath : pregexprentry);
-             {** This is a complete pattern path ()+ , ()* or ()?, n,m }
-             ret_pattern: (pattern: pregexprentry; minoccurs: integer; maxoccurs: integer; alternative : pregexprentry);
-       end;
-
-       tregexprflag = (
-         ref_singleline,
-         {** This indicates that a start of line is either the
-             start of the pattern or a linebreak. }
-         ref_multiline,
-         {** The match will be done in a case-insensitive way
-              according to US-ASCII character set. }
-         ref_caseinsensitive);
-       tregexprflags = set of tregexprflag;
-
-       TRegExprEngine = record
-          Data : pregexprentry;
-          DestroyList : pregexprentry;
-          Flags : TRegExprFlags;
-       end;
-
-     const
-        cs_allchars : tcharset = [#0..#255];
-        cs_wordchars : tcharset = ['A'..'Z','a'..'z','_','0'..'9'];
-        cs_newline : tcharset = [#10];
-        cs_digits : tcharset = ['0'..'9'];
-        cs_whitespace : tcharset = [' ',#9];
-
-     var
-        { these are initilized in the init section of the unit }
-        cs_nonwordchars : tcharset;
-        cs_nondigits : tcharset;
-        cs_nonwhitespace : tcharset;
-
-     { the following procedures can be used by units basing }
-     { on the regexpr unit                                  }
-
-     {** From a regular expression, compile an encoded version of the regular expression.
-
-         @param(regexpr Regular expression to compile)
-         @param(flags Flags relating to the type of parsing that will occur)
-         @param(RegExprEngine The actual encoded version of the regular expression)
-         @returns(true if success, otherwise syntax error in compiling regular expression)
-     }
-     function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags;var RegExprEngine: TRegExprEngine): boolean;
-
-{$IFDEF FPC}
-    {** Backward compatibility routine }
-     function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags): TREGExprEngine;
-{$ENDIF}
-
-     {** Frees all up resources used for the encoded regular expression }
-     procedure DestroyRegExprEngine(var regexpr : TRegExprEngine);
-
-     {** @abstract(Matches a regular expression)
-
-        @param(RegExprEngine The actual compiled regular expression to match against)
-        @param(p The text to search for for a match)
-        @param(index zero-based index to the start of the match -1 if no match in p)
-        @param(len length of the match)
-        @returns(true if there was a match, otherwise false)
-     }
-     function RegExprPos(RegExprEngine : TRegExprEngine;p : pchar;var index,len : longint) : boolean;
-
-     function RegExprReplaceAll(RegExprEngine : TRegExprEngine;const src,newstr : ansistring;var dest : ansistring) : sizeint;
-
-     { This function Escape known regex chars and place the result on Return. If something went wrong the
-       function will return false. }
-     function RegExprEscapeStr (const S : string) : string;
-
-  implementation
-
-{$ifdef DEBUG}
-     procedure writecharset(c : tcharset);
-
-       var
-          b : byte;
-
-       begin
-          for b:=20 to 255 do
-            if chr(b) in c then
-              write(chr(b));
-          writeln;
-       end;
-
-
-    const
-
-      typ2str : array[tregexprentrytype] of string =
-      (
-        'ret_charset',
-        'ret_or',
-        'ret_illegalend',
-        'ret_backtrace',
-        'ret_startline',
-        'ret_endline',
-        'ret_pattern'
-      );
-
-
-     { Dumps all the next elements of a tree }
-     procedure dumptree(space: string; regentry: pregexprentry);
-      begin
-        while assigned(regentry) do
-          begin
-            WriteLn(space+'------- Node Type ',typ2str[regentry^.typ]);
-            if (regentry^.typ = ret_charset) then
-              WriteCharSet(regentry^.chars);
-            { dump embedded pattern information }
-            if regentry^.typ = ret_pattern then
-               begin
-                 dumptree(space+#9,regentry^.pattern);
-                 WriteLn(space+#9,' --- Alternative nodes ');
-                 if assigned(regentry^.alternative) then
-                   dumptree(space+#9#9,regentry^.alternative);
-               end;
-            if regentry^.typ = ret_startline then
-               dumptree(space+#9,regentry^.pattern);
-            regentry:=regentry^.next;
-          end;
-      end;
-{$endif DEBUG}
-
-
-     {** Determines the length of a pattern, including sub-patterns.
-
-         It goes through the nodes and returns the pattern length
-         between the two, using minOccurs as required.
-
-         Called recursively.
-     }
-     function patlength(hp1: pregexprentry): integer;
-       var
-        count: integer;
-        hp: pregexprentry;
-       begin
-        count:=0;
-        if hp1^.typ=ret_pattern then
-            hp:=hp1^.pattern
-        else
-            hp:=hp1;
-        { now go through all chars and get the length
-          does not currently take care of embedded patterns
-        }
-        while assigned(hp) do
-          begin
-            if hp^.typ = ret_pattern then
-              begin
-                inc(count,patlength(hp));
-              end
-            else
-            if hp^.typ = ret_charset then
-               inc(count);
-            hp:=hp^.next;
-          end;
-        if hp1^.typ=ret_pattern then
-          begin
-            count:=hp1^.minOccurs*count;
-          end;
-         patlength:=count;
-       end;
-
-     function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags; var RegExprEngine:TRegExprEngine) : boolean;
-
-       var
-          first : pregexprentry;
-
-       procedure doregister(p : pregexprentry);
 
 
-         begin
-           p^.nextdestroy:=first;
-           first:=p;
-         end;
+unit RegExpr;
 
 
-       var
-          currentpos : pchar;
-          error : boolean;
+{$mode objfpc}{$H+}
 
 
-       procedure readchars(var chars: tcharset);
+interface
 
 
-         var
-            c1 : char;
+uses
+  Regex;
 
 
-         begin
-            chars:=[];
-            case currentpos^ of
-               #0:
-                  exit;
-               '.':
-                  begin
-                     inc(currentpos);
-                     chars:=cs_allchars-cs_newline;
-                  end;
-               '\':
-                  begin
-                     inc(currentpos);
-                     case currentpos^ of
-                        #0:
-                          begin
-                             error:=true;
-                             exit;
-                          end;
-                        't':
-                           begin
-                              inc(currentpos);
-                              chars:=[#9];
-                           end;
-                        'n':
-                           begin
-                              inc(currentpos);
-                              chars:=[#10];
-                           end;
-                        'r':
-                           begin
-                              inc(currentpos);
-                              chars:=[#13];
-                           end;
-                        'd':
-                          begin
-                             inc(currentpos);
-                             chars:=cs_digits;
-                          end;
-                        'D':
-                          begin
-                             inc(currentpos);
-                             chars:=cs_nondigits;
-                          end;
-                        's':
-                          begin
-                             inc(currentpos);
-                             chars:=cs_whitespace;
-                          end;
-                        'S':
-                          begin
-                             inc(currentpos);
-                             chars:=cs_nonwhitespace;
-                          end;
-                        'w':
-                           begin
-                              inc(currentpos);
-                              chars:=cs_wordchars;
-                           end;
-                        'W':
-                           begin
-                              inc(currentpos);
-                              chars:=cs_nonwordchars;
-                           end;
-                        'f' :
-                            begin
-                              inc(currentpos);
-                              chars:= [#12];
-                            end;
-                        'a' :
-                            begin
-                              inc(currentpos);
-                              chars:= [#7];
-                            end;
-                         else
-                           begin { Some basic escaping...}
-                              chars := [currentpos^];
-                              inc (currentpos);
-                              {error:=true;
-                              exit;}
-                           end;
-                     end;
-                  end;
-               else
-                 begin
-                    if ref_caseinsensitive in flags then
-                       c1:=upcase(currentpos^)
-                    else
-                       c1:=currentpos^;
+type
+   tregexprflag = (
+     ref_singleline,
+     {** This indicates that a start of line is either the
+         start of the pattern or a linebreak. }
+     ref_multiline,
+     {** The match will be done in a case-insensitive way
+          according to US-ASCII character set. }
+     ref_caseinsensitive);
+   tregexprflags = set of tregexprflag;
 
 
-                    inc(currentpos);
-                    if currentpos^='-' then
-                      begin
-                         inc(currentpos);
-                         if currentpos^=#0 then
-                           begin
-                              error:=true;
-                              exit;
-                           end;
-                         if ref_caseinsensitive in flags then
-                           chars:=[c1..upcase(currentpos^)]
-                         else
-                           chars:=[c1..currentpos^];
-                         inc(currentpos);
-                      end
-                    else
-                      chars:=[c1];
-                 end;
-            end;
-         end;
+   TRegExprEngine = TRegexEngine;
 
 
+function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags;var RegExprEngine: TRegExprEngine): boolean;
+function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags): TREGExprEngine;
+procedure DestroyRegExprEngine(var regexpr : TRegExprEngine);
 
 
-       procedure readcharset(var charset: tcharset);
+function RegExprPos(RegExprEngine : TRegExprEngine;p : pchar;var index,len : longint) : boolean;
+function RegExprReplaceAll(RegExprEngine : TRegExprEngine;const src,newstr : ansistring;var dest : ansistring) : sizeint;
 
 
-         var
-           chars: tcharset;
-         begin
-            charset:=[];
-            case currentpos^ of
-               #0:
-                  exit;
-               '[':
-                  begin
-                     inc(currentpos);
-                     while currentpos^<>']' do
-                       begin
-                          if currentpos^='^' then
-                            begin
-                               inc(currentpos);
-                               readchars(chars);
-                               charset:=charset+(cs_allchars-chars);
-                            end
-                          else
-                            begin
-                              readchars(chars);
-                              charset:=charset+chars;
-                            end;
-                          if error or (currentpos^=#0) then
-                            begin
-                               error:=true;
-                               exit;
-                            end;
-                       end;
-                     inc(currentpos);
-                  end;
-               '^':
-                  begin
-                     inc(currentpos);
-                     readchars(chars);
-                     charset:=cs_allchars-chars;
-                  end;
-               else
-                  begin
-                    readchars(chars);
-                    charset:=chars;
-                  end;
-            end;
-         end;
+function RegExprEscapeStr (const S : string) : string;
 
 
+implementation
 
 
-       (* takes care of parsing the {n}, {n,} and {n,m} regular expression
-          elements. In case of error, sets error to true and returns false,
-          otherwise returns true and set minoccurs and maxoccurs accordingly
-          (-1 if not present). *)
-       function parseoccurences(var currentpos: pchar; var minoccurs,maxoccurs: integer): boolean;
-         var
-          minOccursString: string;
-          maxOccursString: string;
-         begin
-           parseoccurences:=false;
-           minOccurs:=-1;
-           maxOccurs:=-1;
-           inc(currentpos);
-           minOccursString:='';
-           if currentPos^ = #0 then
-             begin
-               error:=true;
-               exit;
-             end;
-            while (currentpos^<>#0) and (currentpos^ in ['0'..'9']) do
-                begin
-                   minOccursString:=minOccursString+currentPos^;
-                   inc(currentpos);
-                end;
-            if length(minOccursString) = 0 then
-                begin
-                  error:=true;
-                  exit;
-                end;
-            Val(minOccursString,minOccurs);
-            { possible cases here: commad or end bracket }
-            if currentpos^= '}' then
-              begin
-                inc(currentpos);
-                maxOccurs:=minOccurs;
-                parseoccurences:=true;
-                exit;
-              end;
-            if currentpos^= ',' then
-              begin
-                maxOccursString:='';
-                inc(currentpos);
-                while (currentpos^<>#0) and (currentpos^ in ['0'..'9']) do
-                begin
-                   maxOccursString:=maxOccursString+currentPos^;
-                   inc(currentpos);
-                end;
-                if currentpos^= '}' then
-                 begin
-                   { If the length of the string is zero, then there is
-                     no upper bound. }
-                   if length(maxOccursString) > 0 then
-                      Val(maxOccursString,maxOccurs)
-                   else
-                      maxOccurs:=high(integer);
-                   inc(currentpos);
-                   parseoccurences:=true;
-                   exit;
-                 end;
-              end;
-              error:=true;
-         end;
+function GenerateRegExprEngine(regexpr: pchar; flags: tregexprflags;
+  var RegExprEngine: TRegExprEngine): boolean;
+var ErrorPos  : Integer;
+    ErrorCode : TRegexError;
 
 
+begin
+  RegExprEngine := TRegExprEngine.Create(regexpr);
+  if ref_multiline in flags then RegExprEngine.MultiLine:=True;
+  if ref_caseinsensitive in flags then RegExprEngine.IgnoreCase:=True;
+  Result := RegExprEngine.Parse(ErrorPos,ErrorCode);
+end;
 
 
-       function parseregexpr(next,elsepath : pregexprentry) : pregexprentry;
-
-         var
-            hp : pregexprentry;
-            minOccurs,maxOccurs: integer;
-            hp3: pregexprentry;
-            cs : tcharset;
-            chaining : ^pregexprentry;
-
-         begin
-            chaining:=nil;
-            parseregexpr:=nil;
-            elsepath:=nil;
-            if error then
-              exit;
-            { this dummy allows us to redirect the elsepath later }
-{            new(ep);
-            doregister(ep);
-            ep^.typ:=ret_charset;
-            ep^.chars:=[];
-            ep^.elsepath:=elsepath;
-            elsepath:=ep;}
-            while true do
-              begin
-                 if error then
-                   exit;
-                 case currentpos^ of
-                    '(':
-                       begin
-                          inc(currentpos);
-                          hp:=parseregexpr(nil,nil);
-                          { Special characters after the bracket }
-                           if error then
-                              exit;
-                          if currentpos^<>')' then
-                            begin
-                               error:=true;
-                               exit;
-                            end;
-                          inc(currentpos);
-                            case currentpos^ of
-                            '*':
-                               begin
-                                  inc(currentpos);
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.alternative:=nil;
-                                  hp3^.pattern:=hp;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=0;
-                                  hp3^.maxoccurs:=high(integer);
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-                               end;
-                            '+':
-                               begin
-                                  inc(currentpos);
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.alternative:=nil;
-                                  hp3^.pattern:=hp;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=1;
-                                  hp3^.maxoccurs:=high(integer);
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-                               end;
-
-                            '?':
-                               begin
-                                  inc(currentpos);
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.alternative:=nil;
-                                  hp3^.pattern:=hp;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=0;
-                                  hp3^.maxoccurs:=1;
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-                               end;
-                            '{':
-                               begin
-                                 if not parseOccurences(currentPos,minOccurs,maxOccurs) then
-                                   exit;
-                                  inc(currentpos);
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.alternative:=nil;
-                                  hp3^.pattern:=hp;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=minOccurs;
-                                  hp3^.maxoccurs:=maxOccurs;
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-                               end;
-                            else
-                              begin
-                                { go to end of this list - always the
-                                  last next used }
-(*
-                                hp3:=hp;
-                                while assigned(hp3^.next) do
-                                  begin
-                                    hp3:=hp3^.next;
-                                  end;
-                                if assigned(chaining) then
-                                   chaining^:=hp
-                                else
-                                   parseregexpr:=hp;
-                                chaining:=@hp3^.next;*)
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.alternative:=nil;
-                                  hp3^.pattern:=hp;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=1;
-                                  hp3^.maxoccurs:=1;
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-
-                              end;
-                          end;
-                       end;
-{ This is only partially implemented currently, as the terms before
-  the | character must be grouped together with parenthesis, which
-  is also compatible with other regular expressions.
-}
-                    '|':
-                       begin
-{$ifdef DEBUG}
-                          writeln('Creating or entry');
-{$endif DEBUG}
-                          if (not assigned (hp3)) then
-                            begin
-                              error:=true;
-                              exit;
-                            end;
-                          if (hp3^.typ <> ret_pattern) then
-                            begin
-                              error:=true;
-                              exit;
-                            end;
-                          while currentpos^='|' do
-                            begin
-                              inc(currentpos);
-                              if currentpos^=#0 then
-                                begin
-                                   error:=true;
-                                   exit;
-                                end;
-                              { always put the longest pattern first, so
-                                swap the trees as necessary.
-                              }
-                              hp := parseregexpr (next, elsepath);
-                              if patlength(hp) > patlength(hp3^.pattern) then
-                                begin
-                                  hp3^.alternative:=hp3^.pattern;
-                                  hp3^.pattern:=hp;
-                                end
-                              else
-                                 hp3^.alternative:=hp;
-                            end;
-                       end;
-                    ')':
-                       exit;
-                    '^':
-                       begin
-                          inc(currentpos);
-                          hp:=parseregexpr(nil,nil);
-                          { Special characters after the bracket }
-                           if error then
-                              exit;
-                           new(hp3);
-                           doregister(hp3);
-                           hp3^.typ:=ret_startline;
-                           hp3^.pattern:=hp;
-                           hp3^.elsepath:=elsepath;
-                           hp3^.next:=nil;
-                           if assigned(chaining) then
-                              chaining^:=hp3
-                           else
-                              parseregexpr:=hp3;
-                           chaining:=@hp3^.next;
-                       end;
-                    '$':
-                       begin
-                          inc(currentpos);
-                          new(hp);
-                          doregister(hp);
-                          hp^.typ:=ret_endline;
-                          hp^.elsepath:=elsepath;
-                          hp^.next:=nil;
-                          if assigned(chaining) then
-                            chaining^:=hp
-                          else
-                            parseregexpr:=hp;
-                          chaining:=@hp^.next;
-                       end;
-                    #0:
-                       exit;
-                    else
-                      begin
-                         readcharset(cs);
-                         if error then
-                           exit;
-                         case currentpos^ of
-                            '*':
-                               begin
-                                  inc(currentpos);
-                                  new(hp);
-                                  doregister(hp);
-                                  hp^.typ:=ret_charset;
-                                  hp^.chars:=cs;
-                                  hp^.elsepath:=nil;
-                                  hp^.next:=nil;
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.alternative:=nil;
-                                  hp3^.pattern:=hp;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=0;
-                                  hp3^.maxoccurs:=high(integer);
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-                               end;
-                            '+':
-                               begin
-                                  inc(currentpos);
-                                  new(hp);
-                                  doregister(hp);
-                                  hp^.typ:=ret_charset;
-                                  hp^.chars:=cs;
-                                  hp^.elsepath:=nil;
-                                  hp^.next:=nil;
-
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.alternative:=nil;
-                                  hp3^.pattern:=hp;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=1;
-                                  hp3^.maxoccurs:=high(integer);
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-                               end;
-                            '?':
-                               begin
-                                  inc(currentpos);
-                                  new(hp);
-                                  doregister(hp);
-                                  hp^.typ:=ret_charset;
-                                  hp^.chars:=cs;
-                                  hp^.elsepath:=nil;
-                                  hp^.next:=nil;
-
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.pattern:=hp;
-                                  hp3^.alternative:=nil;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=0;
-                                  hp3^.maxoccurs:=1;
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-                               end;
-                            '{':
-                               begin
-                                 if not parseOccurences(currentPos,minOccurs,maxOccurs) then
-                                   exit;
-                                  new(hp);
-                                  doregister(hp);
-                                  hp^.typ:=ret_charset;
-                                  hp^.chars:=cs;
-                                  hp^.elsepath:=nil;
-                                  hp^.next:=nil;
-
-                                  new(hp3);
-                                  doregister(hp3);
-                                  hp3^.typ:=ret_pattern;
-                                  hp3^.alternative:=nil;
-                                  hp3^.pattern:=hp;
-                                  hp3^.elsepath:=elsepath;
-                                  hp3^.minoccurs:=minOccurs;
-                                  hp3^.maxoccurs:=maxOccurs;
-                                  hp3^.next:=nil;
-                                  if assigned(chaining) then
-                                    chaining^:=hp3
-                                  else
-                                    parseregexpr:=hp3;
-                                  chaining:=@hp3^.next;
-
-                                end;
-                            else
-                               { Normal character }
-                               begin
-                                  new(hp);
-                                  doregister(hp);
-                                  hp^.typ:=ret_charset;
-                                  hp^.chars:=cs;
-                                  hp^.elsepath:=elsepath;
-                                  hp^.next:=next;
-                                  if assigned(chaining) then
-                                    chaining^:=hp
-                                  else
-                                    parseregexpr:=hp;
-                                  chaining:=@hp^.next;
-                                  continue;
-                               end;
-                           { This was a pattern }
-                         end; { END CASE }
-                      end;
-                 end;
-              end;
-         end;
-
-       var
-          endp : pregexprentry;
-
-       begin
-          GenerateRegExprEngine:=false;
-          RegExprEngine.Data:=nil;
-          RegExprEngine.DestroyList:=nil;
-          if regexpr=nil then
-            exit;
-          first:=nil;
-          if (ref_singleline in flags) and (ref_multiline in flags) then
-            exit;
-          currentpos:=regexpr;
-          GenerateRegExprEngine:=true;
-          error:=false;
-          new(endp);
-          doregister(endp);
-          endp^.typ:=ret_illegalend;
-          RegExprEngine.flags:=flags;
-          RegExprEngine.Data:=parseregexpr(nil,endp);
-{$IFDEF DEBUG}
-          writeln('========== Generating tree ============');
-          dumptree('',RegExprEngine.Data);
-{$ENDIF}
-          RegExprEngine.DestroyList:=first;
-          if error or (currentpos^<>#0) then
-            begin
-              GenerateRegExprEngine:=false;
-              DestroyRegExprEngine(RegExprEngine);
-            end;
-       end;
-
-
-{$IFDEF FPC}
-    function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags): TREGExprEngine;
-    var
-     r: TRegExprEngine;
-    begin
-      GenerateRegExprEngine(regexpr,flags,r);
-      GenerateRegExprEngine:=r;
-    end;
-{$ENDIF}
-
-    procedure DestroyRegExprEngine(var regexpr : TRegExprEngine);
-
-       var
-          hp : pregexprentry;
-
-       begin
-          hp:=regexpr.DestroyList;
-          while assigned(hp) do
-            begin
-               regexpr.DestroyList:=hp^.nextdestroy;
-               dispose(hp);
-               hp:=regexpr.DestroyList;
-            end;
-          regexpr.Data:=nil;
-          regexpr.DestroyList:=nil;
-       end;
-
-     function RegExprPos(regexprengine : TRegExprEngine;p : pchar;var index,len : longint) : boolean;
-
-       var
-          lastpos : pchar;
-          firstpos: pchar;
+function GenerateRegExprEngine(regexpr: pchar; flags: tregexprflags
+  ): TREGExprEngine;
+  
+var r: TRegExprEngine;
 
 
-       { Does the actual search of the data - return true if the term was found }
-       function dosearch(regexprentry : pregexprentry;pos : pchar) : boolean;
-       var
-          found: boolean;
-          checkvalue: boolean;
-          savedpos: pchar;
-          counter: word;
+begin
+  GenerateRegExprEngine(regexpr,flags,r);
+  GenerateRegExprEngine:=r;
+end;
 
 
-         begin
-            dosearch:=false;
-            while true do
-              begin
-                 {$IFDEF Debug}
-                 writeln('Entering ',typ2str[regexprentry^.typ]);
-                 writeln('Pattern length ',patlength(regexprentry));
-                 {$ENDIF Debug}
-                 case regexprentry^.typ of
-                    ret_endline:
-                      begin
-                         { automatically a match! }
-                         if pos^ = #0 then
-                            begin
-                              dosearch:=true;
-                              exit;
-                            end;
-                         if ref_multiline in regexprengine.flags then
-                            begin
-                              { Supports DOS/Commodore/UNIX/IBM Mainframe line formats }
-                              { avoid reading invalid memory also }
-                                  if (pos^=#13) and ((pos+1)^=#10) then
-                                    begin
-                                      regexprentry:=regexprentry^.next;
-                                    end
-                                  else
-                                  if (pos^=#$85) or (pos^=#10) or ((pos^=#13) and ((pos-1) >= firstpos) and ((pos-1)^ <> #10)) then
-                                    begin
-                                       regexprentry:=regexprentry^.next;
-                                    end
-                                  else
-                                    begin
-                                       dosearch:=false;
-                                       exit;
-                                    end;
-                             end
-                           else
-                             exit;
-                      end;
-                    ret_pattern:
-                      begin
-                         found:=false;
-                         { Take care of occurences here }
-                         savedpos:=pos;
-                         counter:=0;
-                         repeat
-                           found:=dosearch(regexprentry^.pattern,pos);
-                           if not found then
-                            break;
-                           pos:=lastpos;
-                           inc(counter);
-                         until (not found) or (counter >= regexprentry^.maxoccurs) or (pos^= #0);
+procedure DestroyRegExprEngine(var regexpr: TRegExprEngine);
+begin
+  regexpr.Free;
+end;
 
 
-                         if counter = 0 then
-                           begin
-                             { If there was no occurence and the minimum occurence is > 0 then
-                               problem.
-                             }
-                             if (regexprentry^.minoccurs > 0) then
-                              begin
-                                dosearch:=false;
-                                { verify alternative path as required }
-                                if assigned(regexprentry^.alternative) then
-                                  begin
-                                     dosearch:=dosearch(regexprentry^.alternative,savedpos);
-                                     exit;
-                                  end;
-                                exit;
-                              end;
-                             dosearch:=true;
-                             lastpos:=savedpos;
-                           end
-                         else
-                           { found }
-                           begin
-                              { Possible choices :
-                                 - found and (counter >= minOccurences) and (counter =< maxOccurences) = true
-                                 - found and (counter < minOccurences) or (counter > maxOccurences) = false
-                              }
-                              if (counter < regexprentry^.minoccurs) or (counter > regexprentry^.maxoccurs) then
-                                begin
-                                  dosearch:=false;
-                                  exit;
-                                end;
-                              dosearch:=true;
-                              { if all matches were found, and the current position
-                                points to zero (processed all characters) }
-                              if pos^=#0 then
-                                begin
-                                  dosearch:=true;
-                                  exit;
-                                end;
-                           end;
-                         { If we are that means the matches were valid, go to next element to match
-                         }
-                         regexprentry:=regexprentry^.next;
-                         if (counter = 0) and not assigned(regexprentry) then
-                           exit;
-                      end;
-                    ret_startline:
-                      begin
-                         checkvalue:=pos=firstpos;
-                         if ref_multiline in regexprengine.flags then
-                           begin
-                             { Supports DOS/Commodore/UNIX/IBM Mainframe line formats }
-                             { avoid reading invalid memory also }
-                             if
-                                 (
-                                   ((pos-1) >= firstpos) and ((pos-1)^=#$85)
-                                  )
-                              or
-                                 (
-                                   ((pos-1) >= firstpos) and ((pos-1)^=#10)
-                                  )
-                              or
-                                (
-                                 ((pos-1) >= firstpos) and ((pos-1)^=#13) and
-                                 ((pos)^ <> #10)
-                                )
-                             then
-                               begin
-                                 checkvalue:=true;
-                               end;
-                           end;
-                          if checkvalue then
-                            begin
-                              dosearch:=dosearch(regexprentry^.pattern,pos);
-                              regexprentry:=regexprentry^.next;
-                              if not dosearch then
-                                exit;
-                              pos:=lastpos;
-                            end
-                          else
-                            begin
-                              dosearch:=false;
-                              exit;
-                            end;
-                      end;
-                    ret_charset:
-                      begin
-                         if (pos^ in regexprentry^.chars) or
-                           ((ref_caseinsensitive in regexprengine.flags) and
-                            (upcase(pos^) in regexprentry^.chars)) then
-                           begin
-{$ifdef DEBUG}
-                              writeln('Found matching: ',pos^);
-{$endif DEBUG}
-                              regexprentry:=regexprentry^.next;
-                              inc(pos);
-                           end
-                         else
-                           begin
-{$ifdef DEBUG}
-                              writeln('Found unmatching: ',pos^);
-{$endif DEBUG}
-                              exit;
-                           end;
-                      end;
-                    ret_backtrace:
-                      begin
-{$ifdef DEBUG}
-                         writeln('Starting backtrace');
-{$endif DEBUG}
-                         if dosearch(regexprentry^.next,pos) then
-                           begin
-                              dosearch:=true;
-                              exit;
-                           end
-                         else if dosearch(regexprentry^.elsepath,pos) then
-                           begin
-                              dosearch:=true;
-                              exit;
-                           end
-                         else
-                           exit;
-                      end;
-                 end;
-                 lastpos:=pos;
-                 if regexprentry=nil then
-                   begin
-                      dosearch:=true;
-                      exit;
-                   end;
-                 if regexprentry^.typ=ret_illegalend then
-                   exit;
-                 { end of string, and we were expecting an end of string }
-                 if (pos^=#0) and (regexprentry^.typ = ret_endline) and
-                    (not assigned(regexprentry^.next)) then
-                   begin
-                     dosearch:=true;
-                     exit;
-                   end;
-                 if pos^=#0 then
-                   exit;
-              end;
-         end;
+function RegExprPos(RegExprEngine: TRegExprEngine; p: pchar; var index,
+  len: longint): boolean;
+begin
+  Len := 1;
+  Result := RegExprEngine.MatchString(p,index,len);
+  Len := Len - index;
+  Dec(Index);
+end;
+
+function RegExprReplaceAll(RegExprEngine: TRegExprEngine; const src,
+  newstr: ansistring; var dest: ansistring): sizeint;
+begin
+  result := RegExprEngine.ReplaceAllString(src,newstr,Dest);
+end;
 
 
-       begin
-          RegExprPos:=false;
-          index:=0;
-          len:=0;
-          firstpos:=p;
-          if regexprengine.Data=nil then
-            exit;
-          while p^<>#0 do
-            begin
-               if dosearch(regexprengine.Data,p) then
-                 begin
-                    len:=lastpos-p;
-                    RegExprPos:=true;
-                    exit;
-                 end
-               else
-                 begin
-                    inc(p);
-                    inc(index);
-                 end;
-            end;
-          index:=-1;
-       end;
+function RegExprEscapeStr(const S: string): string;
+var
+  i, len   : integer;
+  s1: string;
+begin
+  RegExprEscapeStr:= '';
+  s1:='';
+  if (S = '') then
+   exit;
 
 
+  len := Length (S);
 
 
-  function RegExprReplaceAll(RegExprEngine : TRegExprEngine;const src,newstr : ansistring;var dest : ansistring) : sizeint;
-    var
-      index,len : longint;
-      pos,lastpos : pchar;
-      first : boolean;
-      oldlength : PtrInt;
+  for i := 1 to len do
     begin
     begin
-      pos:=pchar(src);
-      lastpos:=nil;
-      first:=true;
-      Result:=0;
-      { estimate some length }
-      SetLength(dest,length(src)+((length(src) div 10)*length(newstr)));
-      while RegExprPos(RegExprEngine,pos,index,len) do
+      if (S [i] in ['(','|', '.', '*', '?', '^', '$', '-', '[', '{', '}', ']', ')', '\']) then
         begin
         begin
-          inc(pos,index);
-          if (lastpos = nil) or (pos>lastpos) then
-            begin
-              if lastpos = nil then lastpos := pchar(src);
-              { copy skipped part }
-
-              { because we cheat with SetLength a SetLength(...,0) isn't what we want
-                so we've to trick at the first SetLength call
-              }
-              if first then
-                begin
-                  SetLength(dest,(pos-lastpos));
-                  { cast dest here because it is already unified }
-                  move(lastpos^,char(dest[1]),pos-lastpos);
-                end
-              else
-                begin
-                  oldlength:=Length(dest);
-                  SetLength(dest,oldlength+(pos-lastpos));
-                  move(lastpos^,char(dest[oldlength+1]),pos-lastpos);
-                end;
-              first:=false;
-            end;
-          { found }
-          inc(Result);
-          dest:=dest+newstr;
-          inc(pos,len);
-          lastpos:=pos;
+          s1 := s1 + '\';
         end;
         end;
-      { copy remainder }
-      len:=strlen(pos);
-      if first then
-        begin
-          SetLength(dest,len);
-          move(pos^,char(dest[length(dest)+1]),len);
-        end
-      else
-        begin
-          oldlength:=Length(dest);
-          SetLength(dest,oldlength+len);
-          move(pos^,char(dest[oldlength+1]),len);
-        end
-    end;
 
 
-
-  function RegExprEscapeStr (const S : string) : string;
-    var
-     i, len   : integer;
-     s1: string;
-    begin
-      RegExprEscapeStr:= '';
-      s1:='';
-      if (S = '') then
-       exit;
-
-      len := Length (S);
-
-      for i := 1 to len do
-        begin
-          if (S [i] in ['(','|', '.', '*', '?', '^', '$', '-', '[', '{', '}', ']', ')', '\']) then
-            begin
-              s1 := s1 + '\';
-            end;
-
-          s1 := s1 + S[i];
-        end;
-      RegExprEscapeStr:=s1;
+      s1 := s1 + S[i];
     end;
     end;
+  RegExprEscapeStr:=s1;
+end;
 
 
-begin
-   cs_nonwordchars:=cs_allchars-cs_wordchars;
-   cs_nondigits:=cs_allchars-cs_digits;
-   cs_nonwhitespace:=cs_allchars-cs_whitespace;
 end.
 end.
+

+ 22 - 7
packages/regexpr/tests/testreg1.pp

@@ -1,6 +1,7 @@
 {$IFDEF FPC}
 {$IFDEF FPC}
 {$MODE OBJFPC}
 {$MODE OBJFPC}
 {$ENDIF}
 {$ENDIF}
+
 program testreg1;
 program testreg1;
 uses
 uses
    regexpr;
    regexpr;
@@ -318,6 +319,14 @@ begin
      do_error(705);
      do_error(705);
    DestroyregExprEngine(r);
    DestroyregExprEngine(r);
 
 
+   initok:=GenerateRegExprEngine('Cat(AZ){2,}Q',[],r);
+   if not initok then
+     do_error(705);
+   if not(RegExprPos(r,'BCatAZAZAZAZQDABCD',index,len)) or
+     (index<>1) or (len<>12) then
+     do_error(705);
+   DestroyregExprEngine(r);
+
    initok:=GenerateRegExprEngine('CatAZ{0,}',[],r);
    initok:=GenerateRegExprEngine('CatAZ{0,}',[],r);
    if not initok then
    if not initok then
      do_error(706);
      do_error(706);
@@ -364,6 +373,14 @@ begin
      do_error(725);
      do_error(725);
    DestroyregExprEngine(r);
    DestroyregExprEngine(r);
 
 
+   initok:=GenerateRegExprEngine('Cat(AZ){1,3}',[],r);
+   if not initok then
+     do_error(725);
+   if not(RegExprPos(r,'BCatAZAZDABCD',index,len)) or
+     (index<>1) or (len<>7) then
+     do_error(725);
+   DestroyregExprEngine(r);
+
    initok:=GenerateRegExprEngine('CatAz{1,5}',[],r);
    initok:=GenerateRegExprEngine('CatAz{1,5}',[],r);
    if not initok then
    if not initok then
      do_error(726);
      do_error(726);
@@ -650,11 +667,11 @@ begin
 
 
    { test real backtracking }
    { test real backtracking }
 
 
-(*   r:=GenerateRegExprEngine('nofoo|foo',[]);
+   r:=GenerateRegExprEngine('nofoo|foo',[]);
    if not(RegExprPos(r,'1234   foo1234XXXX',index,len)) or
    if not(RegExprPos(r,'1234   foo1234XXXX',index,len)) or
      (index<>7) or (len<>3) then
      (index<>7) or (len<>3) then
      do_error(1300);
      do_error(1300);
-   DestroyregExprEngine(r);*)
+   DestroyregExprEngine(r);
 
 
   GenerateRegExprEngine('abc\(123\)$',[],r);
   GenerateRegExprEngine('abc\(123\)$',[],r);
   if not (RegExprPos(r,'1234 abc(123)', index, len)) or
   if not (RegExprPos(r,'1234 abc(123)', index, len)) or
@@ -762,7 +779,6 @@ begin
      do_error(1505);
      do_error(1505);
    DestroyregExprEngine(r);
    DestroyregExprEngine(r);
 
 
-{
   initok:=GenerateRegExprEngine('\.localhost$',[],r);
   initok:=GenerateRegExprEngine('\.localhost$',[],r);
   if not initok then
   if not initok then
      do_error(1506);
      do_error(1506);
@@ -796,22 +812,21 @@ begin
   if not initok then
   if not initok then
      do_error(1500);
      do_error(1500);
    if not(RegExprPos(r,'1234   nofoo1234XXXX',index,len)) or
    if not(RegExprPos(r,'1234   nofoo1234XXXX',index,len)) or
-     (index<>8) or (len<>9) then
+     (index<>7) or (len<>9) then
      do_error(1500);
      do_error(1500);
    DestroyregExprEngine(r);
    DestroyregExprEngine(r);
 
 
    r:=GenerateRegExprEngine('(nofoo|foo|anotherfoo)1234',[]);
    r:=GenerateRegExprEngine('(nofoo|foo|anotherfoo)1234',[]);
    if not(RegExprPos(r,'1234   nofoo1234XXXX',index,len)) or
    if not(RegExprPos(r,'1234   nofoo1234XXXX',index,len)) or
-     (index<>8) or (len<>9) then
+     (index<>7) or (len<>9) then
      do_error(1009);
      do_error(1009);
    DestroyregExprEngine(r);
    DestroyregExprEngine(r);
 
 
    r:=GenerateRegExprEngine('nofoo1234|foo1234',[]);
    r:=GenerateRegExprEngine('nofoo1234|foo1234',[]);
-   if (r.data=nil) or not(RegExprPos(r,'1234   foo1234XXXX',index,len)) or
+   if {(r.data=nil) or} not(RegExprPos(r,'1234   foo1234XXXX',index,len)) or
      (index<>7) or (len<>7) then
      (index<>7) or (len<>7) then
      do_error(1010);
      do_error(1010);
    DestroyregExprEngine(r);
    DestroyregExprEngine(r);
-   }
 
 
   { *************************************************************************
   { *************************************************************************
                               replacement tests
                               replacement tests