Browse Source

* fixes from Ido Kanner
+ RegExprEscapeStr from Ido Kanner
* fixed memory leak

git-svn-id: trunk@953 -

florian 20 years ago
parent
commit
51d0c0ef2a
2 changed files with 100 additions and 12 deletions
  1. 55 9
      packages/base/regexpr/regexpr.pp
  2. 45 3
      packages/base/regexpr/testreg1.pp

+ 55 - 9
packages/base/regexpr/regexpr.pp

@@ -13,7 +13,7 @@
 
 
  **********************************************************************}
  **********************************************************************}
 { $define DEBUG}
 { $define DEBUG}
-{
+(*
   TODO:
   TODO:
      - correct backtracking, for example in (...)*
      - correct backtracking, for example in (...)*
      - | support
      - | support
@@ -22,7 +22,12 @@
      - newline handling in DOS?
      - newline handling in DOS?
      - locals dependend upper/lowercase routines
      - locals dependend upper/lowercase routines
      - extend the interface
      - extend the interface
-}
+     - support for number of matches:
+       {n}    Match exactly n times
+       {n,}   Match at least n times
+       {n,m}  Match at least n but not more than m times
+
+*)
 
 
 {$mode objfpc}
 {$mode objfpc}
 
 
@@ -78,6 +83,9 @@ unit regexpr;
 
 
      function RegExprPos(regexprengine : TRegExprEngine;p : pchar;var index,len : longint) : boolean;
      function RegExprPos(regexprengine : TRegExprEngine;p : pchar;var index,len : longint) : boolean;
 
 
+     { This function Escape known regex chars and place the result on Return. If something went wrong the function will return false. }
+     function RegExprEscapeStr (const S : AnsiString) : AnsiString;
+
   implementation
   implementation
 
 
 {$ifdef DEBUG}
 {$ifdef DEBUG}
@@ -102,9 +110,8 @@ unit regexpr;
        procedure doregister(p : pregexprentry);
        procedure doregister(p : pregexprentry);
 
 
          begin
          begin
-            p^.nextdestroy:=first;
-            if not(assigned(first)) then
-              first:=p;
+           p^.nextdestroy:=first;
+           first:=p;
          end;
          end;
 
 
        var
        var
@@ -180,6 +187,16 @@ unit regexpr;
                               inc(currentpos);
                               inc(currentpos);
                               readchars:=cs_nonwordchars;
                               readchars:=cs_nonwordchars;
                            end;
                            end;
+                        'f' :
+                            begin
+                              inc(currentpos);
+                              readchars:= [#12];
+                            end;
+                        'a' :
+                            begin
+                              inc(currentpos);
+                              readchars:= [#7];
+                            end;
                          else
                          else
                            begin //Some basic escaping...
                            begin //Some basic escaping...
                               readchars := [currentpos^];
                               readchars := [currentpos^];
@@ -300,12 +317,15 @@ unit regexpr;
                             end;
                             end;
                           inc(currentpos);
                           inc(currentpos);
                        end;
                        end;
-{
-                    '|':
+
+(*                    '|':
                        begin
                        begin
 {$ifdef DEBUG}
 {$ifdef DEBUG}
                           writeln('Creating backtrace entry');
                           writeln('Creating backtrace entry');
 {$endif DEBUG}
 {$endif DEBUG}
+                          if (not assigned (hp2)) then
+                            new (hp2);
+
                           while currentpos^='|' do
                           while currentpos^='|' do
                             begin
                             begin
                               inc(currentpos);
                               inc(currentpos);
@@ -323,7 +343,7 @@ unit regexpr;
                               new(hp);
                               new(hp);
                               doregister(hp);
                               doregister(hp);
                               hp^.typ:=ret_backtrace;
                               hp^.typ:=ret_backtrace;
-                              hp^.elsepath:=parseregexpr();
+                              hp^.elsepath:= parseregexpr (next, elsepath);
                               hp^.next:=next;
                               hp^.next:=next;
                               if assigned(chaining) then
                               if assigned(chaining) then
                                 chaining^:=hp
                                 chaining^:=hp
@@ -333,7 +353,7 @@ unit regexpr;
                             end;
                             end;
                           exit;
                           exit;
                        end;
                        end;
-}
+*)
                     ')':
                     ')':
                        exit;
                        exit;
                     '^':
                     '^':
@@ -603,6 +623,32 @@ unit regexpr;
           index:=-1;
           index:=-1;
        end;
        end;
 
 
+
+  function RegExprEscapeStr (const S : AnsiString) : AnsiString;
+    var
+     i, len   : SizeUInt;
+
+    begin
+      Result := '';
+      if (S = '') then
+       exit;
+
+      SetLength(Result,Length(S)*2);
+
+      len := Length (S);
+
+      for i := 1 to len do
+        begin
+          if (S [i] in ['(','|', '.', '*', '?', '^', '$', '-', '[', '{', '}', ']', ')', '\']) then
+            begin
+              Result := Result + '\';
+            end;
+
+          Result := Result + S[i];
+        end;
+      SetLength(Result,Length(Result));
+    end;
+
 begin
 begin
    cs_nonwordchars:=cs_allchars-cs_wordchars;
    cs_nonwordchars:=cs_allchars-cs_wordchars;
    cs_nondigits:=cs_allchars-cs_digits;
    cs_nondigits:=cs_allchars-cs_digits;

+ 45 - 3
packages/base/regexpr/testreg1.pp

@@ -1,10 +1,12 @@
+{$MODE OBJFPC}
 program testreg1;
 program testreg1;
 uses
 uses
    regexpr;
    regexpr;
 
 
 var
 var
-   r : tregexprengine;
+   r         : tregexprengine;
    index,len : longint;
    index,len : longint;
+   S         : String;
 
 
 procedure do_error(i : longint);
 procedure do_error(i : longint);
 
 
@@ -111,11 +113,51 @@ begin
 
 
    { test real backtracking }
    { test real backtracking }
 
 
-   r:=GenerateRegExprEngine('nofoo|foo',[]);
+(*   r:=GenerateRegExprEngine('nofoo|foo',[]);
    if not(RegExprPos(r,'1234   foo1234XXXX',index,len)) or
    if not(RegExprPos(r,'1234   foo1234XXXX',index,len)) or
      (index<>7) or (len<>3) then
      (index<>7) or (len<>3) then
      do_error(1300);
      do_error(1300);
-   DestroyregExprEngine(r);
+   DestroyregExprEngine(r);*)
+
+  r := GenerateRegExprEngine('abc\(123\)$',[]);
+  if not (RegExprPos(r,'1234 abc(123)', index, len)) or
+         (index <> 5) or (len <> 8) then
+    do_error (1400);
+  DestroyregExprEngine(r);
+
+  r := GenerateRegExprEngine('^\t$',[ref_singleline]);
+  if not (RegExprPos(r,#9, index, len)) or
+         (index <> 0) or (len <> 1) then
+    do_error (1401);
+  DestroyregExprEngine(r);
+
+  r := GenerateRegExprEngine('^\n$',[ref_singleline]);
+  if not (RegExprPos(r,#10, index, len)) or
+         (index <> 0) or (len <> 1) then
+    do_error (1402);
+  DestroyregExprEngine(r);
+
+  r := GenerateRegExprEngine('^\f$',[ref_singleline]);
+  if not (RegExprPos(r,#12, index, len)) or
+         (index <> 0) or (len <> 1) then
+    do_error (1403);
+  DestroyregExprEngine(r);
+
+  r := GenerateRegExprEngine('^\r$',[ref_singleline]);
+  if not (RegExprPos(r,#13, index, len)) or
+         (index <> 0) or (len <> 1) then
+    do_error (1404);
+  DestroyregExprEngine(r);
+
+  r := GenerateRegExprEngine('^\a$',[ref_singleline]);
+  if not (RegExprPos(r,#7, index, len)) or
+         (index <> 0) or (len <> 1) then
+    do_error (1405);
+  DestroyregExprEngine(r);
+
+   s := '^Hello World \.  [a-z] \D { } |() ?a*.*\\ 1 $';
+   writeln ('Before Escaping: ', s);
+   writeln ('Afther Escaping: ', RegExprEscapeStr(s));
 
 
    {
    {
    r:=GenerateRegExprEngine('(nofoo|foo)1234',[]);
    r:=GenerateRegExprEngine('(nofoo|foo)1234',[]);