|
@@ -32,8 +32,8 @@ unit RegExpr;
|
|
not be charged seperatly.
|
|
not be charged seperatly.
|
|
4. Altered versions must be plainly marked as such, and must
|
|
4. Altered versions must be plainly marked as such, and must
|
|
not be misrepresented as being the original software.
|
|
not be misrepresented as being the original software.
|
|
- 5. RegExp Studio application and all the visual components as
|
|
|
|
- well as documentation is not part of the TRegExpr library
|
|
|
|
|
|
+ 5. RegExp Studio application and all the visual components as
|
|
|
|
+ well as documentation is not part of the TRegExpr library
|
|
and is not free for usage.
|
|
and is not free for usage.
|
|
|
|
|
|
mailto:[email protected]
|
|
mailto:[email protected]
|
|
@@ -89,7 +89,9 @@ interface
|
|
{$ENDIF}
|
|
{$ENDIF}
|
|
{$DEFINE ComplexBraces} // support braces in complex cases
|
|
{$DEFINE ComplexBraces} // support braces in complex cases
|
|
{$IFNDEF UniCode} // the option applicable only for non-UniCode mode
|
|
{$IFNDEF UniCode} // the option applicable only for non-UniCode mode
|
|
|
|
+ {$IFNDEF FPC_REQUIRES_PROPER_ALIGNMENT} //sets have to be aligned
|
|
{$DEFINE UseSetOfChar} // Significant optimization by using set of char
|
|
{$DEFINE UseSetOfChar} // Significant optimization by using set of char
|
|
|
|
+ {$ENDIF}
|
|
{$ENDIF}
|
|
{$ENDIF}
|
|
{$IFDEF UseSetOfChar}
|
|
{$IFDEF UseSetOfChar}
|
|
{$DEFINE UseFirstCharSet} // Fast skip between matches for r.e. that starts with determined set of chars
|
|
{$DEFINE UseFirstCharSet} // Fast skip between matches for r.e. that starts with determined set of chars
|
|
@@ -132,8 +134,15 @@ type
|
|
|
|
|
|
const
|
|
const
|
|
REOpSz = SizeOf (TREOp) div SizeOf (REChar); // size of p-code in RegExprString units
|
|
REOpSz = SizeOf (TREOp) div SizeOf (REChar); // size of p-code in RegExprString units
|
|
- RENextOffSz = SizeOf (TRENextOff) div SizeOf (REChar); // size of Next 'pointer' -"-
|
|
|
|
|
|
+ {$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
|
|
|
|
+ // add space for aligning pointer
|
|
|
|
+ // -1 is the correct max size but also needed for InsertOperator that needs a multiple of pointer size
|
|
|
|
+ RENextOffSz = (2 * SizeOf (TRENextOff) div SizeOf (REChar))-1;
|
|
|
|
+ REBracesArgSz = (2 * SizeOf (TREBracesArg) div SizeOf (REChar)); // add space for aligning pointer
|
|
|
|
+ {$ELSE}
|
|
|
|
+ RENextOffSz = (SizeOf (TRENextOff) div SizeOf (REChar)); // size of Next 'pointer' -"-
|
|
REBracesArgSz = SizeOf (TREBracesArg) div SizeOf (REChar); // size of BRACES arguments -"-
|
|
REBracesArgSz = SizeOf (TREBracesArg) div SizeOf (REChar); // size of BRACES arguments -"-
|
|
|
|
+ {$ENDIF}
|
|
|
|
|
|
type
|
|
type
|
|
TRegExprInvertCaseFunction = function (const Ch : REChar) : REChar
|
|
TRegExprInvertCaseFunction = function (const Ch : REChar) : REChar
|
|
@@ -672,6 +681,24 @@ const
|
|
XIgnoredChars = [' ', #9, #$d, #$a];
|
|
XIgnoredChars = [' ', #9, #$d, #$a];
|
|
{$ENDIF}
|
|
{$ENDIF}
|
|
|
|
|
|
|
|
+ function AlignToPtr(const p: Pointer): Pointer;
|
|
|
|
+ begin
|
|
|
|
+ {$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
|
|
|
|
+ Result := Align(p, SizeOf(Pointer));
|
|
|
|
+ {$ELSE}
|
|
|
|
+ Result := p;
|
|
|
|
+ {$ENDIF}
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ function AlignToInt(const p: Pointer): Pointer;
|
|
|
|
+ begin
|
|
|
|
+ {$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
|
|
|
|
+ Result := Align(p, SizeOf(integer));
|
|
|
|
+ {$ELSE}
|
|
|
|
+ Result := p;
|
|
|
|
+ {$ENDIF}
|
|
|
|
+ end;
|
|
|
|
+
|
|
{=============================================================}
|
|
{=============================================================}
|
|
{=================== WideString functions ====================}
|
|
{=================== WideString functions ====================}
|
|
{=============================================================}
|
|
{=============================================================}
|
|
@@ -1474,7 +1501,7 @@ procedure TRegExpr.Tail (p : PRegExprChar; val : PRegExprChar);
|
|
UNTIL false;
|
|
UNTIL false;
|
|
// Set Next 'pointer'
|
|
// Set Next 'pointer'
|
|
if val < scan
|
|
if val < scan
|
|
- then PRENextOff (scan + REOpSz)^ := - (scan - val) //###0.948
|
|
|
|
|
|
+ then PRENextOff (AlignToPtr(scan + REOpSz))^ := - (scan - val) //###0.948
|
|
// work around PWideChar subtraction bug (Delphi uses
|
|
// work around PWideChar subtraction bug (Delphi uses
|
|
// shr after subtraction to calculate widechar distance %-( )
|
|
// shr after subtraction to calculate widechar distance %-( )
|
|
// so, if difference is negative we have .. the "feature" :(
|
|
// so, if difference is negative we have .. the "feature" :(
|
|
@@ -1482,7 +1509,7 @@ procedure TRegExpr.Tail (p : PRegExprChar; val : PRegExprChar);
|
|
// "P – Q computes the difference between the address given
|
|
// "P – Q computes the difference between the address given
|
|
// by P (the higher address) and the address given by Q (the
|
|
// by P (the higher address) and the address given by Q (the
|
|
// lower address)" - Delphi help quotation.
|
|
// lower address)" - Delphi help quotation.
|
|
- else PRENextOff (scan + REOpSz)^ := val - scan; //###0.933
|
|
|
|
|
|
+ else PRENextOff (AlignToPtr(scan + REOpSz))^ := val - scan; //###0.933
|
|
end; { of procedure TRegExpr.Tail
|
|
end; { of procedure TRegExpr.Tail
|
|
--------------------------------------------------------------}
|
|
--------------------------------------------------------------}
|
|
|
|
|
|
@@ -1503,7 +1530,7 @@ function TRegExpr.EmitNode (op : TREOp) : PRegExprChar; //###0.933
|
|
if Result <> @regdummy then begin
|
|
if Result <> @regdummy then begin
|
|
PREOp (regcode)^ := op;
|
|
PREOp (regcode)^ := op;
|
|
inc (regcode, REOpSz);
|
|
inc (regcode, REOpSz);
|
|
- PRENextOff (regcode)^ := 0; // Next "pointer" := nil
|
|
|
|
|
|
+ PRENextOff (AlignToPtr(regcode))^ := 0; // Next "pointer" := nil
|
|
inc (regcode, RENextOffSz);
|
|
inc (regcode, RENextOffSz);
|
|
{$IFDEF DebugSynRegExpr}
|
|
{$IFDEF DebugSynRegExpr}
|
|
if regcode-programm>regsize then
|
|
if regcode-programm>regsize then
|
|
@@ -1546,8 +1573,8 @@ procedure TRegExpr.InsertOperator (op : TREOp; opnd : PRegExprChar; sz : integer
|
|
{$IFDEF DebugSynRegExpr}
|
|
{$IFDEF DebugSynRegExpr}
|
|
if regcode-programm>regsize then
|
|
if regcode-programm>regsize then
|
|
raise Exception.Create('TRegExpr.InsertOperator buffer overrun');
|
|
raise Exception.Create('TRegExpr.InsertOperator buffer overrun');
|
|
- if (opnd<regcode) or (opnd-regcode>regsize) then
|
|
|
|
- raise Exception.Create('TRegExpr.InsertOperator invalid opnd');
|
|
|
|
|
|
+// if (opnd<regcode) or (opnd-regcode>regsize) then
|
|
|
|
+ // raise Exception.Create('TRegExpr.InsertOperator invalid opnd');
|
|
{$ENDIF}
|
|
{$ENDIF}
|
|
dst := regcode;
|
|
dst := regcode;
|
|
while src > opnd do begin
|
|
while src > opnd do begin
|
|
@@ -1896,11 +1923,11 @@ function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
|
|
if regcode <> @regdummy then begin
|
|
if regcode <> @regdummy then begin
|
|
off := (Result + REOpSz + RENextOffSz)
|
|
off := (Result + REOpSz + RENextOffSz)
|
|
- (regcode - REOpSz - RENextOffSz); // back to Atom after LOOPENTRY
|
|
- (regcode - REOpSz - RENextOffSz); // back to Atom after LOOPENTRY
|
|
- PREBracesArg (regcode)^ := ABracesMin;
|
|
|
|
|
|
+ PREBracesArg (AlignToInt(regcode))^ := ABracesMin;
|
|
inc (regcode, REBracesArgSz);
|
|
inc (regcode, REBracesArgSz);
|
|
- PREBracesArg (regcode)^ := ABracesMax;
|
|
|
|
|
|
+ PREBracesArg (AlignToInt(regcode))^ := ABracesMax;
|
|
inc (regcode, REBracesArgSz);
|
|
inc (regcode, REBracesArgSz);
|
|
- PRENextOff (regcode)^ := off;
|
|
|
|
|
|
+ PRENextOff (AlignToPtr(regcode))^ := off;
|
|
inc (regcode, RENextOffSz);
|
|
inc (regcode, RENextOffSz);
|
|
{$IFDEF DebugSynRegExpr}
|
|
{$IFDEF DebugSynRegExpr}
|
|
if regcode-programm>regsize then
|
|
if regcode-programm>regsize then
|
|
@@ -1922,8 +1949,8 @@ function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
|
|
else TheOp := BRACES;
|
|
else TheOp := BRACES;
|
|
InsertOperator (TheOp, Result, REOpSz + RENextOffSz + REBracesArgSz * 2);
|
|
InsertOperator (TheOp, Result, REOpSz + RENextOffSz + REBracesArgSz * 2);
|
|
if regcode <> @regdummy then begin
|
|
if regcode <> @regdummy then begin
|
|
- PREBracesArg (Result + REOpSz + RENextOffSz)^ := ABracesMin;
|
|
|
|
- PREBracesArg (Result + REOpSz + RENextOffSz + REBracesArgSz)^ := ABracesMax;
|
|
|
|
|
|
+ PREBracesArg (AlignToInt(Result + REOpSz + RENextOffSz))^ := ABracesMin;
|
|
|
|
+ PREBracesArg (AlignToInt(Result + REOpSz + RENextOffSz + REBracesArgSz))^ := ABracesMax;
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
@@ -2801,7 +2828,7 @@ function TRegExpr.regnext (p : PRegExprChar) : PRegExprChar;
|
|
Result := nil;
|
|
Result := nil;
|
|
EXIT;
|
|
EXIT;
|
|
end;
|
|
end;
|
|
- offset := PRENextOff (p + REOpSz)^; //###0.933 inlined NEXT
|
|
|
|
|
|
+ offset := PRENextOff (AlignToPtr(p + REOpSz))^; //###0.933 inlined NEXT
|
|
if offset = 0
|
|
if offset = 0
|
|
then Result := nil
|
|
then Result := nil
|
|
else Result := p + offset;
|
|
else Result := p + offset;
|
|
@@ -2834,7 +2861,7 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
|
|
scan := prog;
|
|
scan := prog;
|
|
|
|
|
|
while scan <> nil do begin
|
|
while scan <> nil do begin
|
|
- len := PRENextOff (scan + 1)^; //###0.932 inlined regnext
|
|
|
|
|
|
+ len := PRENextOff (AlignToPtr(scan + 1))^; //###0.932 inlined regnext
|
|
if len = 0
|
|
if len = 0
|
|
then next := nil
|
|
then next := nil
|
|
else next := scan + len;
|
|
else next := scan + len;
|
|
@@ -3123,9 +3150,9 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
|
|
Error (reeLoopWithoutEntry);
|
|
Error (reeLoopWithoutEntry);
|
|
EXIT;
|
|
EXIT;
|
|
end;
|
|
end;
|
|
- opnd := scan + PRENextOff (scan + REOpSz + RENextOffSz + 2 * REBracesArgSz)^;
|
|
|
|
- BracesMin := PREBracesArg (scan + REOpSz + RENextOffSz)^;
|
|
|
|
- BracesMax := PREBracesArg (scan + REOpSz + RENextOffSz + REBracesArgSz)^;
|
|
|
|
|
|
+ opnd := scan + PRENextOff (AlignToPtr(scan + REOpSz + RENextOffSz + 2 * REBracesArgSz))^;
|
|
|
|
+ BracesMin := PREBracesArg (AlignToInt(scan + REOpSz + RENextOffSz))^;
|
|
|
|
+ BracesMax := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz))^;
|
|
save := reginput;
|
|
save := reginput;
|
|
if LoopStack [LoopStackIdx] >= BracesMin then begin // Min alredy matched - we can work
|
|
if LoopStack [LoopStackIdx] >= BracesMin then begin // Min alredy matched - we can work
|
|
if scan^ = LOOP then begin
|
|
if scan^ = LOOP then begin
|
|
@@ -3189,8 +3216,8 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
|
|
else if (scan^ = PLUS) or (scan^ = PLUSNG)
|
|
else if (scan^ = PLUS) or (scan^ = PLUSNG)
|
|
then BracesMin := 1 // PLUS
|
|
then BracesMin := 1 // PLUS
|
|
else begin // BRACES
|
|
else begin // BRACES
|
|
- BracesMin := PREBracesArg (scan + REOpSz + RENextOffSz)^;
|
|
|
|
- BracesMax := PREBracesArg (scan + REOpSz + RENextOffSz + REBracesArgSz)^;
|
|
|
|
|
|
+ BracesMin := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^;
|
|
|
|
+ BracesMax := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz))^;
|
|
end;
|
|
end;
|
|
save := reginput;
|
|
save := reginput;
|
|
opnd := scan + REOpSz + RENextOffSz;
|
|
opnd := scan + REOpSz + RENextOffSz;
|
|
@@ -3370,8 +3397,8 @@ procedure TRegExpr.FillFirstCharSet (prog : PRegExprChar);
|
|
EXIT;
|
|
EXIT;
|
|
end;
|
|
end;
|
|
LOOP, LOOPNG: begin //###0.940
|
|
LOOP, LOOPNG: begin //###0.940
|
|
- opnd := scan + PRENextOff (scan + REOpSz + RENextOffSz + REBracesArgSz * 2)^;
|
|
|
|
- min_cnt := PREBracesArg (scan + REOpSz + RENextOffSz)^;
|
|
|
|
|
|
+ opnd := scan + PRENextOff (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz * 2))^;
|
|
|
|
+ min_cnt := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^;
|
|
FillFirstCharSet (opnd);
|
|
FillFirstCharSet (opnd);
|
|
if min_cnt = 0
|
|
if min_cnt = 0
|
|
then FillFirstCharSet (next);
|
|
then FillFirstCharSet (next);
|
|
@@ -3386,7 +3413,7 @@ procedure TRegExpr.FillFirstCharSet (prog : PRegExprChar);
|
|
end;
|
|
end;
|
|
BRACES, BRACESNG: begin //###0.940
|
|
BRACES, BRACESNG: begin //###0.940
|
|
opnd := scan + REOpSz + RENextOffSz + REBracesArgSz * 2;
|
|
opnd := scan + REOpSz + RENextOffSz + REBracesArgSz * 2;
|
|
- min_cnt := PREBracesArg (scan + REOpSz + RENextOffSz)^; // BRACES
|
|
|
|
|
|
+ min_cnt := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^; // BRACES
|
|
FillFirstCharSet (opnd);
|
|
FillFirstCharSet (opnd);
|
|
if min_cnt > 0
|
|
if min_cnt > 0
|
|
then EXIT;
|
|
then EXIT;
|
|
@@ -4064,14 +4091,14 @@ function TRegExpr.Dump : RegExprString;
|
|
{$ENDIF}
|
|
{$ENDIF}
|
|
if (op = BRACES) or (op = BRACESNG) then begin //###0.941
|
|
if (op = BRACES) or (op = BRACESNG) then begin //###0.941
|
|
// show min/max argument of BRACES operator
|
|
// show min/max argument of BRACES operator
|
|
- Result := Result + Format ('{%d,%d}', [PREBracesArg (s)^, PREBracesArg (s + REBracesArgSz)^]);
|
|
|
|
|
|
+ Result := Result + Format ('{%d,%d}', [PREBracesArg (AlignToInt(s))^, PREBracesArg (AlignToInt(s + REBracesArgSz))^]);
|
|
inc (s, REBracesArgSz * 2);
|
|
inc (s, REBracesArgSz * 2);
|
|
end;
|
|
end;
|
|
{$IFDEF ComplexBraces}
|
|
{$IFDEF ComplexBraces}
|
|
if (op = LOOP) or (op = LOOPNG) then begin //###0.940
|
|
if (op = LOOP) or (op = LOOPNG) then begin //###0.940
|
|
Result := Result + Format (' -> (%d) {%d,%d}', [
|
|
Result := Result + Format (' -> (%d) {%d,%d}', [
|
|
- (s - programm - (REOpSz + RENextOffSz)) + PRENextOff (s + 2 * REBracesArgSz)^,
|
|
|
|
- PREBracesArg (s)^, PREBracesArg (s + REBracesArgSz)^]);
|
|
|
|
|
|
+ (s - programm - (REOpSz + RENextOffSz)) + PRENextOff (AlignToPtr(s + 2 * REBracesArgSz))^,
|
|
|
|
+ PREBracesArg (AlignToInt(s))^, PREBracesArg (AlignToInt(s + REBracesArgSz))^]);
|
|
inc (s, 2 * REBracesArgSz + RENextOffSz);
|
|
inc (s, 2 * REBracesArgSz + RENextOffSz);
|
|
end;
|
|
end;
|
|
{$ENDIF}
|
|
{$ENDIF}
|