Browse Source

* Fix bug #0034429, replace now can use escape sequences, and lineending can be set

git-svn-id: trunk@39993 -
michael 6 years ago
parent
commit
65433a005e

+ 3 - 0
.gitattributes

@@ -7349,6 +7349,9 @@ packages/regexpr/src/oldregexpr.pp svneol=native#text/pascal
 packages/regexpr/src/regex.pp svneol=native#text/plain
 packages/regexpr/src/regex.pp svneol=native#text/plain
 packages/regexpr/src/regexpr.pas svneol=native#text/pascal
 packages/regexpr/src/regexpr.pas svneol=native#text/pascal
 packages/regexpr/src/uregexpr.pp svneol=native#text/plain
 packages/regexpr/src/uregexpr.pp svneol=native#text/plain
+packages/regexpr/tests/tcregexp.pp svneol=native#text/plain
+packages/regexpr/tests/testregexpr.lpi svneol=native#text/plain
+packages/regexpr/tests/testregexpr.pp svneol=native#text/plain
 packages/rexx/Makefile svneol=native#text/plain
 packages/rexx/Makefile svneol=native#text/plain
 packages/rexx/Makefile.fpc svneol=native#text/plain
 packages/rexx/Makefile.fpc svneol=native#text/plain
 packages/rexx/Makefile.fpc.fpcmake svneol=native#text/plain
 packages/rexx/Makefile.fpc.fpcmake svneol=native#text/plain

+ 195 - 147
packages/regexpr/src/regexpr.pas

@@ -74,7 +74,6 @@ interface
 {$IFDEF UseSetOfChar}
 {$IFDEF UseSetOfChar}
  {$DEFINE UseFirstCharSet} // Fast skip between matches for r.e. that starts with determined set of chars
  {$DEFINE UseFirstCharSet} // Fast skip between matches for r.e. that starts with determined set of chars
 {$ENDIF}
 {$ENDIF}
-{$DEFINE UseOsLineEndOnReplace} // On Replace if replace-with has "\n", use System.LineEnding (#10 #13 or #13#10); else use #10
 {$IFNDEF UNICODE}
 {$IFNDEF UNICODE}
 {$UNDEF UnicodeWordDetection}
 {$UNDEF UnicodeWordDetection}
 {$ENDIF}
 {$ENDIF}
@@ -185,6 +184,7 @@ type
 
 
  TRegExpr = class
  TRegExpr = class
    private
    private
+     FUseOsLineEndOnReplace: Boolean;
     startp : array [0 .. NSUBEXP - 1] of PRegExprChar; // founded expr starting points
     startp : array [0 .. NSUBEXP - 1] of PRegExprChar; // founded expr starting points
     endp : array [0 .. NSUBEXP - 1] of PRegExprChar; // founded expr end points
     endp : array [0 .. NSUBEXP - 1] of PRegExprChar; // founded expr end points
 
 
@@ -260,6 +260,7 @@ type
     fLinePairedSeparatorAssigned : boolean;
     fLinePairedSeparatorAssigned : boolean;
     fLinePairedSeparatorHead,
     fLinePairedSeparatorHead,
     fLinePairedSeparatorTail : REChar;
     fLinePairedSeparatorTail : REChar;
+    FReplaceLineEnd: String;
     {$IFNDEF UniCode}
     {$IFNDEF UniCode}
     fLineSeparatorsSet : set of REChar;
     fLineSeparatorsSet : set of REChar;
     {$ENDIF}
     {$ENDIF}
@@ -300,6 +301,7 @@ type
     {==================== Compiler section ===================}
     {==================== Compiler section ===================}
     // compile a regular expression into internal code
     // compile a regular expression into internal code
     function CompileRegExpr (exp : PRegExprChar) : boolean;
     function CompileRegExpr (exp : PRegExprChar) : boolean;
+    procedure SetUseOsLineEndOnReplace(AValue: Boolean);
 
 
     // set the next-pointer at the end of a node chain
     // set the next-pointer at the end of a node chain
     procedure Tail (p : PRegExprChar; val : PRegExprChar);
     procedure Tail (p : PRegExprChar; val : PRegExprChar);
@@ -326,6 +328,10 @@ type
     // something followed by possible [*+?]
     // something followed by possible [*+?]
     function ParsePiece (var flagp : integer) : PRegExprChar;
     function ParsePiece (var flagp : integer) : PRegExprChar;
 
 
+    function HexDig (ch : REChar) : PtrInt;
+
+    function UnQuoteChar (var APtr : PRegExprChar) : REChar;
+
     // the lowest level
     // the lowest level
     function ParseAtom (var flagp : integer) : PRegExprChar;
     function ParseAtom (var flagp : integer) : PRegExprChar;
 
 
@@ -377,53 +383,6 @@ type
     class function VersionMajor : integer; //###0.944
     class function VersionMajor : integer; //###0.944
     class function VersionMinor : integer; //###0.944
     class function VersionMinor : integer; //###0.944
 
 
-    // Regular expression.
-    // For optimization, TRegExpr will automatically compiles it into 'P-code'
-    // (You can see it with help of Dump method) and stores in internal
-    // structures. Real [re]compilation occures only when it really needed -
-    // while calling Exec[Next], Substitute, Dump, etc
-    // and only if Expression or other P-code affected properties was changed
-    // after last [re]compilation.
-    // If any errors while [re]compilation occures, Error method is called
-    // (by default Error raises exception - see below)
-    property Expression : RegExprString read GetExpression write SetExpression;
-
-    // Set/get default values of r.e.syntax modifiers. Modifiers in
-    // r.e. (?ismx-ismx) will replace this default values.
-    // If you try to set unsupported modifier, Error will be called
-    // (by defaul Error raises exception ERegExpr).
-    property ModifierStr : RegExprString read GetModifierStr write SetModifierStr;
-
-    // Modifier /i - caseinsensitive, initialized from RegExprModifierI
-    property ModifierI : boolean index 1 read GetModifier write SetModifier;
-
-    // Modifier /r - use r.e.syntax extended for russian,
-    // (was property ExtSyntaxEnabled in previous versions)
-    // If true, then а-я  additional include russian letter 'ё',
-    // А-Я  additional include 'Ё', and а-Я include all russian symbols.
-    // You have to turn it off if it can interfere with you national alphabet.
-    // , initialized from RegExprModifierR
-    property ModifierR : boolean index 2 read GetModifier write SetModifier;
-
-    // Modifier /s - '.' works as any char (else as [^\n]),
-    // , initialized from RegExprModifierS
-    property ModifierS : boolean index 3 read GetModifier write SetModifier;
-
-    // Switching off modifier /g switchs all operators in
-    // non-greedy style, so if ModifierG = False, then
-    // all '*' works as '*?', all '+' as '+?' and so on.
-    // , initialized from RegExprModifierG
-    property ModifierG : boolean index 4 read GetModifier write SetModifier;
-
-    // Treat string as multiple lines. That is, change `^' and `$' from
-    // matching at only the very start or end of the string to the start
-    // or end of any line anywhere within the string.
-    // , initialized from RegExprModifierM
-    property ModifierM : boolean index 5 read GetModifier write SetModifier;
-
-    // Modifier /x - eXtended syntax, allow r.e. text formatting,
-    // see description in the help. Initialized from RegExprModifierX
-    property ModifierX : boolean index 6 read GetModifier write SetModifier;
 
 
     // match a programm against a string AInputString
     // match a programm against a string AInputString
     // !!! Exec store AInputString into InputString property
     // !!! Exec store AInputString into InputString property
@@ -449,10 +408,6 @@ type
     // (AOffset=1 - first char of InputString)
     // (AOffset=1 - first char of InputString)
     function ExecPos (AOffset: PtrInt {$IFDEF DefParam}= 1{$ENDIF}) : boolean;
     function ExecPos (AOffset: PtrInt {$IFDEF DefParam}= 1{$ENDIF}) : boolean;
 
 
-    // returns current input string (from last Exec call or last assign
-    // to this property).
-    // Any assignment to this property clear Match* properties !
-    property InputString : RegExprString read GetInputString write SetInputString;
 
 
     // Returns ATemplate with '$&' or '$0' replaced by whole r.e.
     // Returns ATemplate with '$&' or '$0' replaced by whole r.e.
     // occurence and '$n' replaced by occurence of subexpression #n.
     // occurence and '$n' replaced by occurence of subexpression #n.
@@ -474,6 +429,7 @@ type
       const AReplaceStr : RegExprString;
       const AReplaceStr : RegExprString;
       AUseSubstitution : boolean{$IFDEF DefParam}= False{$ENDIF}) //###0.946
       AUseSubstitution : boolean{$IFDEF DefParam}= False{$ENDIF}) //###0.946
      : RegExprString;  overload;
      : RegExprString;  overload;
+
     function Replace (Const AInputStr : RegExprString;
     function Replace (Const AInputStr : RegExprString;
       AReplaceFunc : TRegExprReplaceFunction)
       AReplaceFunc : TRegExprReplaceFunction)
      : RegExprString; overload;
      : RegExprString; overload;
@@ -493,6 +449,82 @@ type
       AReplaceFunc : TRegExprReplaceFunction):
       AReplaceFunc : TRegExprReplaceFunction):
       RegExprString;
       RegExprString;
 
 
+    // Returns ID of last error, 0 if no errors (unusable if
+    // Error method raises exception) and clear internal status
+    // into 0 (no errors).
+    function LastError : integer;
+
+    // Returns Error message for error with ID = AErrorID.
+    function ErrorMsg (AErrorID : integer) : RegExprString; virtual;
+
+
+
+    // Converts Ch into upper case if it in lower case or in lower
+    // if it in upper (uses current system local setings)
+    class function InvertCaseFunction  (const Ch : REChar) : REChar;
+
+    // [Re]compile r.e. Useful for example for GUI r.e. editors (to check
+    // all properties validity).
+    procedure Compile; //###0.941
+
+    {$IFDEF RegExpPCodeDump}
+    // dump a compiled regexp in vaguely comprehensible form
+    function Dump : RegExprString;
+    {$ENDIF}
+
+    // Regular expression.
+    // For optimization, TRegExpr will automatically compiles it into 'P-code'
+    // (You can see it with help of Dump method) and stores in internal
+    // structures. Real [re]compilation occures only when it really needed -
+    // while calling Exec[Next], Substitute, Dump, etc
+    // and only if Expression or other P-code affected properties was changed
+    // after last [re]compilation.
+    // If any errors while [re]compilation occures, Error method is called
+    // (by default Error raises exception - see below)
+    property Expression : RegExprString read GetExpression write SetExpression;
+
+    // Set/get default values of r.e.syntax modifiers. Modifiers in
+    // r.e. (?ismx-ismx) will replace this default values.
+    // If you try to set unsupported modifier, Error will be called
+    // (by defaul Error raises exception ERegExpr).
+    property ModifierStr : RegExprString read GetModifierStr write SetModifierStr;
+
+    // Modifier /i - caseinsensitive, initialized from RegExprModifierI
+    property ModifierI : boolean index 1 read GetModifier write SetModifier;
+
+    // Modifier /r - use r.e.syntax extended for russian,
+    // (was property ExtSyntaxEnabled in previous versions)
+    // If true, then а-я  additional include russian letter 'ё',
+    // А-Я  additional include 'Ё', and а-Я include all russian symbols.
+    // You have to turn it off if it can interfere with you national alphabet.
+    // , initialized from RegExprModifierR
+    property ModifierR : boolean index 2 read GetModifier write SetModifier;
+
+    // Modifier /s - '.' works as any char (else as [^\n]),
+    // , initialized from RegExprModifierS
+    property ModifierS : boolean index 3 read GetModifier write SetModifier;
+
+    // Switching off modifier /g switchs all operators in
+    // non-greedy style, so if ModifierG = False, then
+    // all '*' works as '*?', all '+' as '+?' and so on.
+    // , initialized from RegExprModifierG
+    property ModifierG : boolean index 4 read GetModifier write SetModifier;
+
+    // Treat string as multiple lines. That is, change `^' and `$' from
+    // matching at only the very start or end of the string to the start
+    // or end of any line anywhere within the string.
+    // , initialized from RegExprModifierM
+    property ModifierM : boolean index 5 read GetModifier write SetModifier;
+
+    // Modifier /x - eXtended syntax, allow r.e. text formatting,
+    // see description in the help. Initialized from RegExprModifierX
+
+    property ModifierX : boolean index 6 read GetModifier write SetModifier;
+    // returns current input string (from last Exec call or last assign
+    // to this property).
+    // Any assignment to this property clear Match* properties !
+    property InputString : RegExprString read GetInputString write SetInputString;
+
     // Number of subexpressions has been found in last Exec* call.
     // Number of subexpressions has been found in last Exec* call.
     // If there are no subexpr. but whole expr was found (Exec* returned True),
     // If there are no subexpr. but whole expr was found (Exec* returned True),
     // then SubExprMatchCount=0, if no subexpressions nor whole
     // then SubExprMatchCount=0, if no subexpressions nor whole
@@ -527,14 +559,6 @@ type
     // not found in input string.
     // not found in input string.
     property Match [Idx : integer] : RegExprString read GetMatch;
     property Match [Idx : integer] : RegExprString read GetMatch;
 
 
-    // Returns ID of last error, 0 if no errors (unusable if
-    // Error method raises exception) and clear internal status
-    // into 0 (no errors).
-    function LastError : integer;
-
-    // Returns Error message for error with ID = AErrorID.
-    function ErrorMsg (AErrorID : integer) : RegExprString; virtual;
-
     // Returns position in r.e. where compiler stopped.
     // Returns position in r.e. where compiler stopped.
     // Useful for error diagnostics
     // Useful for error diagnostics
     property CompilerErrorPos : PtrInt read GetCompilerErrorPos;
     property CompilerErrorPos : PtrInt read GetCompilerErrorPos;
@@ -558,22 +582,14 @@ type
     // must contain exactly two chars or no chars at all
     // must contain exactly two chars or no chars at all
     property LinePairedSeparator : RegExprString read GetLinePairedSeparator write SetLinePairedSeparator; //###0.941
     property LinePairedSeparator : RegExprString read GetLinePairedSeparator write SetLinePairedSeparator; //###0.941
 
 
-    // Converts Ch into upper case if it in lower case or in lower
-    // if it in upper (uses current system local setings)
-    class function InvertCaseFunction  (const Ch : REChar) : REChar;
 
 
     // Set this property if you want to override case-insensitive functionality.
     // Set this property if you want to override case-insensitive functionality.
     // Create set it to RegExprInvertCaseFunction (InvertCaseFunction by default)
     // Create set it to RegExprInvertCaseFunction (InvertCaseFunction by default)
     property InvertCase : TRegExprInvertCaseFunction read fInvertCase write fInvertCase; //##0.935
     property InvertCase : TRegExprInvertCaseFunction read fInvertCase write fInvertCase; //##0.935
 
 
-    // [Re]compile r.e. Useful for example for GUI r.e. editors (to check
-    // all properties validity).
-    procedure Compile; //###0.941
-
-    {$IFDEF RegExpPCodeDump}
-    // dump a compiled regexp in vaguely comprehensible form
-    function Dump : RegExprString;
-    {$ENDIF}
+    // Use OS line end on replace or not. Default is True for backwards compatibility.
+    // Set to false to use #10.
+    Property UseOsLineEndOnReplace : Boolean Read FUseOsLineEndOnReplace Write SetUseOsLineEndOnReplace;
   end;
   end;
 
 
  ERegExpr = class (Exception)
  ERegExpr = class (Exception)
@@ -1109,6 +1125,9 @@ constructor TRegExpr.Create;
 
 
   fLineSeparators := RegExprLineSeparators; //###0.941
   fLineSeparators := RegExprLineSeparators; //###0.941
   LinePairedSeparator := RegExprLinePairedSeparator; //###0.941
   LinePairedSeparator := RegExprLinePairedSeparator; //###0.941
+
+  FUseOsLineEndOnReplace:=True;
+  FReplaceLineEnd:=sLineBreak;
  end; { of constructor TRegExpr.Create
  end; { of constructor TRegExpr.Create
 --------------------------------------------------------------}
 --------------------------------------------------------------}
 
 
@@ -1724,6 +1743,16 @@ function TRegExpr.CompileRegExpr (exp : PRegExprChar) : boolean;
  end; { of function TRegExpr.CompileRegExpr
  end; { of function TRegExpr.CompileRegExpr
 --------------------------------------------------------------}
 --------------------------------------------------------------}
 
 
+procedure TRegExpr.SetUseOsLineEndOnReplace(AValue: Boolean);
+begin
+  if FUseOsLineEndOnReplace=AValue then Exit;
+  FUseOsLineEndOnReplace:=AValue;
+  if FUseOsLineEndOnReplace then
+    FReplaceLineEnd:=sLineBreak
+  else
+    FReplaceLineEnd:=#10;
+end;
+
 function TRegExpr.ParseReg (paren : integer; var flagp : integer) : PRegExprChar;
 function TRegExpr.ParseReg (paren : integer; var flagp : integer) : PRegExprChar;
 // regular expression, i.e. main body or parenthesized thing
 // regular expression, i.e. main body or parenthesized thing
 // Caller must absorb opening parenthesis.
 // Caller must absorb opening parenthesis.
@@ -2063,6 +2092,71 @@ function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
  end; { of function TRegExpr.ParsePiece
  end; { of function TRegExpr.ParsePiece
 --------------------------------------------------------------}
 --------------------------------------------------------------}
 
 
+function TRegExpr.HexDig (ch : REChar) : PtrInt;
+
+ begin
+  Result := Ord(Ch);
+  Case Result of
+    Ord('A')..Ord('F') : Result:=10+Result-Ord('A');
+    Ord('a')..Ord('f') : Result:=10+Result-Ord('a');
+    Ord('0')..Ord('9') : Result:=Result-Ord('0');
+  else
+    Error (reeBadHexDigit);
+  end;
+ end;
+
+function TRegExpr.UnQuoteChar (var APtr : PRegExprChar) : REChar;
+ begin
+  case APtr^ of
+    't': Result := #$9;  // \t => tab (HT/TAB)
+    'n': Result := #$a;  // \n => newline (NL)
+    'r': Result := #$d;  // \r => carriage return (CR)
+    'f': Result := #$c;  // \f => form feed (FF)
+    'a': Result := #$7;  // \a => alarm (bell) (BEL)
+    'e': Result := #$1b; // \e => escape (ESC)
+    'x': begin // \x: hex char
+      Result := #0;
+      inc (APtr);
+      if APtr^ = #0 then begin
+        Error (reeNoHexCodeAfterBSlashX);
+        EXIT;
+       end;
+      if APtr^ = '{' then begin // \x{nnnn} //###0.936
+         REPEAT
+          inc (APtr);
+          if APtr^ = #0 then begin
+            Error (reeNoHexCodeAfterBSlashX);
+            EXIT;
+           end;
+          if APtr^ <> '}' then begin
+             if (Ord (Result)
+                 ShR (SizeOf (REChar) * 8 - 4)) and $F <> 0 then begin
+               Error (reeHexCodeAfterBSlashXTooBig);
+               EXIT;
+              end;
+             Result := REChar ((Ord (Result) ShL 4) or HexDig (APtr^));
+             // HexDig will cause Error if bad hex digit found
+            end
+           else BREAK;
+         UNTIL False;
+        end
+       else begin
+         Result := REChar (HexDig (APtr^));
+         // HexDig will cause Error if bad hex digit found
+         inc (APtr);
+         if APtr^ = #0 then begin
+           Error (reeNoHexCodeAfterBSlashX);
+           EXIT;
+          end;
+         Result := REChar ((Ord (Result) ShL 4) or HexDig (APtr^));
+         // HexDig will cause Error if bad hex digit found
+        end;
+     end;
+    else Result := APtr^;
+   end;
+ end;
+
+
 function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
 function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
 // the lowest level
 // the lowest level
 // Optimization:  gobbles an entire sequence of ordinary characters so that
 // Optimization:  gobbles an entire sequence of ordinary characters so that
@@ -2104,19 +2198,6 @@ function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
     do EmitC (s [i]);
     do EmitC (s [i]);
   end;
   end;
 
 
- function HexDig (ch : REChar) : PtrInt;
-
-  begin
-   Result := Ord(Ch);
-   Case Result of
-     Ord('A')..Ord('F') : Result:=10+Result-Ord('A');
-     Ord('a')..Ord('f') : Result:=10+Result-Ord('a');
-     Ord('0')..Ord('9') : Result:=Result-Ord('0');
-   else
-     Error (reeBadHexDigit);
-   end;
-  end;
-
  function EmitRange (AOpCode : REChar) : PRegExprChar;
  function EmitRange (AOpCode : REChar) : PRegExprChar;
   begin
   begin
    {$IFDEF UseSetOfChar}
    {$IFDEF UseSetOfChar}
@@ -2234,57 +2315,6 @@ function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
     do EmitRangeC (s [i]);
     do EmitRangeC (s [i]);
   end;
   end;
 
 
- function UnQuoteChar (var APtr : PRegExprChar) : REChar; //###0.934
-  begin
-   case APtr^ of
-     't': Result := #$9;  // \t => tab (HT/TAB)
-     'n': Result := #$a;  // \n => newline (NL)
-     'r': Result := #$d;  // \r => carriage return (CR)
-     'f': Result := #$c;  // \f => form feed (FF)
-     'a': Result := #$7;  // \a => alarm (bell) (BEL)
-     'e': Result := #$1b; // \e => escape (ESC)
-     'x': begin // \x: hex char
-       Result := #0;
-       inc (APtr);
-       if APtr^ = #0 then begin
-         Error (reeNoHexCodeAfterBSlashX);
-         EXIT;
-        end;
-       if APtr^ = '{' then begin // \x{nnnn} //###0.936
-          REPEAT
-           inc (APtr);
-           if APtr^ = #0 then begin
-             Error (reeNoHexCodeAfterBSlashX);
-             EXIT;
-            end;
-           if APtr^ <> '}' then begin
-              if (Ord (Result)
-                  ShR (SizeOf (REChar) * 8 - 4)) and $F <> 0 then begin
-                Error (reeHexCodeAfterBSlashXTooBig);
-                EXIT;
-               end;
-              Result := REChar ((Ord (Result) ShL 4) or HexDig (APtr^));
-              // HexDig will cause Error if bad hex digit found
-             end
-            else BREAK;
-          UNTIL False;
-         end
-        else begin
-          Result := REChar (HexDig (APtr^));
-          // HexDig will cause Error if bad hex digit found
-          inc (APtr);
-          if APtr^ = #0 then begin
-            Error (reeNoHexCodeAfterBSlashX);
-            EXIT;
-           end;
-          Result := REChar ((Ord (Result) ShL 4) or HexDig (APtr^));
-          // HexDig will cause Error if bad hex digit found
-         end;
-      end;
-     else Result := APtr^;
-    end;
-  end;
-
  begin
  begin
   Result := nil;
   Result := nil;
   flags:=0;
   flags:=0;
@@ -3569,7 +3599,6 @@ function TRegExpr.ExecPrim (AOffset: PtrInt) : boolean;
 --------------------------------------------------------------}
 --------------------------------------------------------------}
 
 
 
 
-
 function TRegExpr.ExecNext : boolean;
 function TRegExpr.ExecNext : boolean;
  var offset : PtrInt;
  var offset : PtrInt;
  begin
  begin
@@ -3700,13 +3729,11 @@ var
   n : PtrInt;
   n : PtrInt;
   Ch : REChar;
   Ch : REChar;
   Mode: TSubstMode;
   Mode: TSubstMode;
-  LineEnd: String = {$ifdef UseOsLineEndOnReplace} System.LineEnding {$else} Chr(10) {$endif};
+  QuotedChar: REChar;
 
 
   function ParseVarName (var APtr : PRegExprChar) : PtrInt;
   function ParseVarName (var APtr : PRegExprChar) : PtrInt;
   // extract name of variable (digits, may be enclosed with
   // extract name of variable (digits, may be enclosed with
   // curly braces) from APtr^, uses TemplateEnd !!!
   // curly braces) from APtr^, uses TemplateEnd !!!
-  const
-   Digits = ['0' .. '9'];
   var
   var
    p : PRegExprChar;
    p : PRegExprChar;
    Delimited : boolean;
    Delimited : boolean;
@@ -3767,8 +3794,18 @@ begin
         Ch := p^;
         Ch := p^;
         inc (p);
         inc (p);
         case Ch of
         case Ch of
-          'n' : inc(ResultLen, Length(LineEnd));
+          'n': inc(ResultLen, Length(FReplaceLineEnd));
           'u', 'l', 'U', 'L': {nothing};
           'u', 'l', 'U', 'L': {nothing};
+          'x': begin
+              inc(ResultLen);
+              if (p^ = '{') then begin // skip \x{....}
+                   while ((p^ <> '}') and (p < TemplateEnd)) do
+                     p := p + 1;
+                   p := p + 1;
+                  end
+                 else
+                   p := p + 2  // skip \x..
+            end;
           else inc(ResultLen);
           else inc(ResultLen);
         end;
         end;
       end
       end
@@ -3807,8 +3844,15 @@ begin
         inc (p);
         inc (p);
         case Ch of
         case Ch of
           'n' : begin
           'n' : begin
-              p0 := @LineEnd[1];
-              p1 := p0 + Length(LineEnd);
+              p0 := @FReplaceLineEnd[1];
+              p1 := p0 + Length(FReplaceLineEnd);
+            end;
+          'x', 't', 'r', 'f', 'a', 'e': begin
+             p := p - 1;  // UnquoteChar expects the escaped char under the pointer
+             QuotedChar := UnquoteChar(p);
+             p := p + 1;  // Skip after last part of the escaped sequence - UnquoteChar stops on the last symbol of it
+             p0 := @QuotedChar;
+             p1 := p0 + 1;
             end;
             end;
           'l' : begin
           'l' : begin
               Mode := smodeOneLower;
               Mode := smodeOneLower;
@@ -4001,6 +4045,12 @@ function TRegExpr.Dump : RegExprString;
 {$IFDEF UseSetOfChar} //###0.929
 {$IFDEF UseSetOfChar} //###0.929
   Ch : REChar;
   Ch : REChar;
 {$ENDIF}
 {$ENDIF}
+  function PrintableChar(AChar: REChar): string; inline;
+    begin
+      if AChar < ' '
+       then Result := '#' + IntToStr (Ord (AChar))
+       else Result := AChar;
+    end;
  begin
  begin
   if not IsProgrammOk //###0.929
   if not IsProgrammOk //###0.929
    then EXIT;
    then EXIT;
@@ -4025,7 +4075,7 @@ function TRegExpr.Dump : RegExprString;
         or (op = EXACTLY) or (op = EXACTLYCI) then begin
         or (op = EXACTLY) or (op = EXACTLYCI) then begin
          // Literal string, where present.
          // Literal string, where present.
          while s^ <> #0 do begin
          while s^ <> #0 do begin
-           Result := Result + s^;
+           Result := Result + PrintableChar(s^);
            inc (s);
            inc (s);
           end;
           end;
          inc (s);
          inc (s);
@@ -4044,9 +4094,7 @@ function TRegExpr.Dump : RegExprString;
      if op = ANYOFFULLSET then begin
      if op = ANYOFFULLSET then begin
        for Ch := #0 to #255 do
        for Ch := #0 to #255 do
         if Ch in PSetOfREChar (s)^ then
         if Ch in PSetOfREChar (s)^ then
-         if Ch < ' '
-          then Result := Result + '#' + IntToStr (Ord (Ch)) //###0.936
-          else Result := Result + Ch;
+          Result := Result + PrintableChar(Ch);
        inc (s, SizeOf (TSetOfREChar));
        inc (s, SizeOf (TSetOfREChar));
       end;
       end;
      {$ENDIF}
      {$ENDIF}
@@ -4080,7 +4128,7 @@ function TRegExpr.Dump : RegExprString;
    if Ch in FirstCharSet
    if Ch in FirstCharSet
     then begin
     then begin
       if Ch < ' '
       if Ch < ' '
-       then Result := Result + '#' + IntToStr(Ord(Ch)) //###0.948
+       then Result := Result + PrintableChar(Ch) //###0.948
        else Result := Result + Ch;
        else Result := Result + Ch;
     end;
     end;
   {$ENDIF}
   {$ENDIF}

+ 437 - 0
packages/regexpr/tests/tcregexp.pp

@@ -0,0 +1,437 @@
+unit tcregexp;
+
+{$mode objfpc}{$H+}
+
+{ $DEFINE DUMPTESTS} //define this to dump a
+
+interface
+
+uses
+  Classes, SysUtils, fpcunit, testregistry, regexpr;
+
+type
+
+  { TTestRegexpr }
+
+  TTestRegexpr= class(TTestCase)
+  private
+    FRE: TRegExpr;
+  protected
+    class function PrintableString(AString: string): string;
+    Procedure RunRETest(aIndex : Integer);
+    procedure SetUp; override;
+    procedure TearDown; override;
+    Property RE : TRegExpr read FRE;
+  published
+    procedure TestEmpty;
+    Procedure RunTest1;
+    Procedure RunTest2;
+    Procedure RunTest3;
+    Procedure RunTest4;
+    Procedure RunTest5;
+    Procedure RunTest6;
+    Procedure RunTest7;
+    Procedure RunTest8;
+    Procedure RunTest9;
+    Procedure RunTest10;
+    Procedure RunTest11;
+    Procedure RunTest12;
+    Procedure RunTest13;
+    Procedure RunTest14;
+    Procedure RunTest15;
+    Procedure RunTest16;
+    Procedure RunTest17;
+    Procedure RunTest18;
+    Procedure RunTest19;
+    Procedure RunTest20;
+    Procedure RunTest21;
+    Procedure RunTest22;
+    Procedure RunTest23;
+    Procedure RunTest24;
+    Procedure RunTest25;
+  end;
+
+implementation
+
+Type
+  TRegExTest = record
+    Expression: string;
+    InputText: string;
+    SubstitutionText: string;
+    ExpectedResult: string;
+    MatchStart: integer;
+  end;
+
+const
+  testCases: array [1..25] of TRegExTest = (
+    (
+    expression: '\nd';
+    inputText: 'abc'#13#10'def';
+    substitutionText: '\n\x{10}\r\\';
+    expectedResult: 'abc'#13#10#16#13'\ef'
+    ),
+    (
+    expression: '(\w*)';
+    inputText: 'name.ext';
+    substitutionText: '$1.new';
+    expectedResult: 'name.new.new.ext.new.new'
+    ),
+    (
+    expression: #$d'('#$a')';
+    inputText: 'word'#$d#$a;
+    substitutionText: '$1';
+    expectedResult: 'word'#$a
+    ),
+    (
+    expression: '(word)';
+    inputText: 'word';
+    substitutionText: '\U$1\\r';
+    expectedResult: 'WORD\r'
+    ),
+    (
+    expression: '(word)';
+    inputText: 'word';
+    substitutionText: '$1\n';
+    expectedResult: 'word'#$a
+    ),
+    (
+    expression: '[A-Z]';
+    inputText: '234578923457823659GHJK38';
+    substitutionText: '';
+    expectedResult: 'G';
+    matchStart: 19;
+    ),
+    (
+    expression: '[A-Z]*?';
+    inputText: '234578923457823659ARTZU38';
+    substitutionText: '';
+    expectedResult: '';
+    matchStart: 1
+    ),
+    (
+    expression: '[A-Z]+';
+    inputText: '234578923457823659ARTZU38';
+    substitutionText: '';
+    expectedResult: 'ARTZU';
+    matchStart: 19
+    ),
+    (
+    expression: '[A-Z][A-Z]*';
+    inputText: '234578923457823659ARTZU38';
+    substitutionText: '';
+    expectedResult: 'ARTZU';
+    matchStart: 19
+    ),
+    (
+    expression: '[A-Z][A-Z]?';
+    inputText: '234578923457823659ARTZU38';
+    substitutionText: '';
+    expectedResult: 'AR';
+    matchStart: 19
+    ),
+    (
+    expression: '[^\d]+';
+    inputText: '234578923457823659ARTZU38';
+    substitutionText: '';
+    expectedResult: 'ARTZU';
+    matchStart: 19
+    ),
+    (
+    expression: '[A-Z][A-Z]?[A-Z]';
+    inputText: '234578923457823659ARTZU38';
+    substitutionText: '';
+    expectedResult: 'ART';
+    matchStart: 19
+    ),
+    (
+    expression: '[A-Z][A-Z]*[0-9]';
+    inputText: '234578923457823659ARTZU38';
+    substitutionText: '';
+    expectedResult: 'ARTZU3';
+    matchStart: 19
+    ),
+    (
+    expression: '[A-Z]+[0-9]';
+    inputText: '234578923457823659ARTZU38';
+    substitutionText: '';
+    expectedResult: 'ARTZU3';
+    matchStart: 19
+    ),
+    (
+    expression: '(?i)[A-Z]';
+    inputText: '234578923457823659a38';
+    substitutionText: '';
+    expectedResult: 'a';
+    matchStart: 19
+    ),
+    (
+    expression: '(?i)[a-z]';
+    inputText: '234578923457823659A38';
+    substitutionText: '';
+    expectedResult: 'A';
+    matchStart: 19
+    ),
+    (
+    expression: '(foo)1234';
+    inputText: '1234   foo1234XXXX';
+    substitutionText: '';
+    expectedResult: 'foo1234';
+    matchStart: 8
+    ),
+    (
+    expression: '(((foo)))1234';
+    inputText: '1234   foo1234XXXX';
+    substitutionText: '';
+    expectedResult: 'foo1234';
+    matchStart: 8
+    ),
+    (
+    expression: '(foo)(1234)';
+    inputText: '1234   foo1234XXXX';
+    substitutionText: '';
+    expectedResult: 'foo1234';
+    matchStart: 8
+    ),
+    (
+    expression: 'nofoo|foo';
+    inputText: '1234   foo1234XXXX';
+    substitutionText: '';
+    expectedResult: 'foo';
+    matchStart: 8
+    ),
+    (
+    expression: '(nofoo|foo)1234';
+    inputText: '1234   nofoo1234XXXX';
+    substitutionText: '';
+    expectedResult: 'nofoo1234';
+    matchStart: 8
+    ),
+    (
+    expression: '(nofoo|foo|anotherfoo)1234';
+    inputText: '1234   nofoo1234XXXX';
+    substitutionText: '';
+    expectedResult: 'nofoo1234';
+    matchStart: 8
+    ),
+    (
+    expression: 'nofoo1234|foo1234';
+    inputText: '1234   foo1234XXXX';
+    substitutionText: '';
+    expectedResult: 'foo1234';
+    matchStart: 8
+    ),
+    (
+    expression: '(\w*)';
+    inputText: 'name.ext';
+    substitutionText: '';
+    expectedResult: 'name';
+    matchStart: 1
+    ),
+    (
+    expression: '\r(\n)';
+    inputText: #$d#$a;
+    substitutionText: '';
+    expectedResult: #$d#$a;
+    matchStart: 1
+    )
+  );
+
+
+procedure TTestRegexpr.TestEmpty;
+begin
+  AssertNotNull('Have RE',RE);
+  AssertFalse('UseOsLineEndOnReplace correcly set', RE.UseOsLineEndOnReplace);
+end;
+
+procedure TTestRegexpr.RunTest1;
+begin
+  RunRETest(1);
+end;
+
+procedure TTestRegexpr.RunTest2;
+begin
+  RunRETest(2);
+end;
+
+procedure TTestRegexpr.RunTest3;
+begin
+  RunRETest(3);
+end;
+
+procedure TTestRegexpr.RunTest4;
+begin
+  RunRETest(4);
+end;
+
+procedure TTestRegexpr.RunTest5;
+begin
+  RunRETest(5);
+end;
+
+procedure TTestRegexpr.RunTest6;
+begin
+  RunRETest(6);
+end;
+
+procedure TTestRegexpr.RunTest7;
+begin
+  RunRETest(7);
+end;
+
+procedure TTestRegexpr.RunTest8;
+begin
+  RunRETest(8);
+end;
+
+procedure TTestRegexpr.RunTest9;
+begin
+  RunRETest(9);
+end;
+
+procedure TTestRegexpr.RunTest10;
+begin
+  RunRETest(10);
+end;
+
+procedure TTestRegexpr.RunTest11;
+begin
+  RunRETest(11);
+end;
+
+procedure TTestRegexpr.RunTest12;
+begin
+  RunRETest(12);
+end;
+
+procedure TTestRegexpr.RunTest13;
+begin
+  RunRETest(13);
+end;
+
+procedure TTestRegexpr.RunTest14;
+begin
+  RunRETest(14);
+end;
+
+procedure TTestRegexpr.RunTest15;
+begin
+  RunRETest(15);
+end;
+
+procedure TTestRegexpr.RunTest16;
+begin
+  RunRETest(16);
+end;
+
+procedure TTestRegexpr.RunTest17;
+begin
+  RunRETest(17);
+end;
+
+procedure TTestRegexpr.RunTest18;
+begin
+  RunRETest(18);
+end;
+
+procedure TTestRegexpr.RunTest19;
+begin
+  RunRETest(19);
+end;
+
+procedure TTestRegexpr.RunTest20;
+begin
+  RunRETest(20);
+end;
+
+procedure TTestRegexpr.RunTest21;
+begin
+  RunRETest(21);
+end;
+
+procedure TTestRegexpr.RunTest22;
+begin
+  RunRETest(22);
+end;
+
+procedure TTestRegexpr.RunTest23;
+begin
+  RunRETest(23);
+end;
+
+procedure TTestRegexpr.RunTest24;
+begin
+  RunRETest(24);
+end;
+
+procedure TTestRegexpr.RunTest25;
+begin
+  RunRETest(25);
+end;
+
+
+Class function TTestRegexpr.PrintableString(AString: string): string;
+
+var
+    ch: Char;
+
+begin
+  Result := '';
+  for ch in AString do
+    if ch < #31 then
+      Result := Result + '#' + IntToStr(Ord(ch))
+    else
+      Result := Result + ch;
+end;
+
+procedure TTestRegexpr.RunRETest(aIndex: Integer);
+
+
+var
+  T: TRegExTest;
+  act : String;
+
+begin
+  T:=testCases[aIndex];
+  RE.Expression:=T.Expression;
+  RE.Compile;
+{$IFDEF DUMPTESTS}
+  Writeln('Test: ',TestName);
+  writeln('  Modifiers "', RE.ModifierStr, '"');
+  writeln('  Regular expression: ', T.Expression,' ,');
+  writeln('  compiled into p-code: ');
+  writeln('  ',RE.Dump);
+  writeln('  Input text: "', PrintableString(T.inputText), '"');
+  if (T.substitutionText <> '')  then
+    Writeln('  Substitution text: "', PrintableString(T.substitutionText), '"');
+{$ENDIF}
+  if (T.SubstitutionText <> '') then
+    begin
+    act:=RE.Replace(T.InputText,T.SubstitutionText,True);
+    AssertEquals('Replace failed', T.ExpectedResult,Act)
+    end
+  else
+    begin
+    RE.Exec(T.inputText);
+    AssertEquals('Search position',T.MatchStart,RE.MatchPos[0]);
+    AssertEquals('Matched text',T.ExpectedResult,RE.Match[0]);
+    end;
+end;
+
+procedure TTestRegexpr.SetUp;
+begin
+  Inherited;
+  FRE := TRegExpr.Create;
+  FRE.UseOsLineEndOnReplace:=False;
+end;
+
+procedure TTestRegexpr.TearDown;
+begin
+  FreeAndNil(FRE);
+  Inherited;
+end;
+
+initialization
+
+  RegisterTest(TTestRegexpr);
+end.
+

+ 63 - 0
packages/regexpr/tests/testregexpr.lpi

@@ -0,0 +1,63 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<CONFIG>
+  <ProjectOptions>
+    <Version Value="11"/>
+    <General>
+      <SessionStorage Value="InProjectDir"/>
+      <MainUnit Value="0"/>
+      <Title Value="testregexpr"/>
+      <UseAppBundle Value="False"/>
+      <ResourceType Value="res"/>
+    </General>
+    <BuildModes Count="1">
+      <Item1 Name="Default" Default="True"/>
+    </BuildModes>
+    <PublishOptions>
+      <Version Value="2"/>
+      <UseFileFilters Value="True"/>
+    </PublishOptions>
+    <RunParams>
+      <FormatVersion Value="2"/>
+      <Modes Count="0"/>
+    </RunParams>
+    <RequiredPackages Count="1">
+      <Item1>
+        <PackageName Value="FCL"/>
+      </Item1>
+    </RequiredPackages>
+    <Units Count="2">
+      <Unit0>
+        <Filename Value="testregexpr.pp"/>
+        <IsPartOfProject Value="True"/>
+      </Unit0>
+      <Unit1>
+        <Filename Value="tcregexp.pp"/>
+        <IsPartOfProject Value="True"/>
+      </Unit1>
+    </Units>
+  </ProjectOptions>
+  <CompilerOptions>
+    <Version Value="11"/>
+    <Target>
+      <Filename Value="testregexpr"/>
+    </Target>
+    <SearchPaths>
+      <IncludeFiles Value="$(ProjOutDir)"/>
+      <OtherUnitFiles Value="../src"/>
+      <UnitOutputDirectory Value="lib/$(TargetCPU)-$(TargetOS)"/>
+    </SearchPaths>
+  </CompilerOptions>
+  <Debugging>
+    <Exceptions Count="3">
+      <Item1>
+        <Name Value="EAbort"/>
+      </Item1>
+      <Item2>
+        <Name Value="ECodetoolError"/>
+      </Item2>
+      <Item3>
+        <Name Value="EFOpenError"/>
+      </Item3>
+    </Exceptions>
+  </Debugging>
+</CONFIG>

+ 28 - 0
packages/regexpr/tests/testregexpr.pp

@@ -0,0 +1,28 @@
+program testregexpr;
+
+{$mode objfpc}{$H+}
+
+uses
+  Classes, consoletestrunner, tcregexp;
+
+type
+
+  { TMyTestRunner }
+
+  TMyTestRunner = class(TTestRunner)
+  protected
+  // override the protected methods of TTestRunner to customize its behavior
+  end;
+
+var
+  Application: TMyTestRunner;
+
+begin
+  DefaultFormat:=fPlain;
+  DefaultRunAllTests:=True;
+  Application := TMyTestRunner.Create(nil);
+  Application.Initialize;
+  Application.Title:='testregexpr';
+  Application.Run;
+  Application.Free;
+end.