Browse Source

regexpr: Merges patch from bug #19109 fixes alignment issues

git-svn-id: trunk@18900 -
sekelsenmat 14 years ago
parent
commit
3e7a09df25
1 changed files with 53 additions and 26 deletions
  1. 53 26
      packages/regexpr/src/regexpr.pas

+ 53 - 26
packages/regexpr/src/regexpr.pas

@@ -32,8 +32,8 @@ unit RegExpr;
     not be charged seperatly.
     not be charged seperatly.
  4. Altered versions must be plainly marked as such, and must
  4. Altered versions must be plainly marked as such, and must
     not be misrepresented as being the original software.
     not be misrepresented as being the original software.
- 5. RegExp Studio application and all the visual components as 
-    well as documentation is not part of the TRegExpr library 
+ 5. RegExp Studio application and all the visual components as
+    well as documentation is not part of the TRegExpr library
     and is not free for usage.
     and is not free for usage.
 
 
                                     mailto:[email protected]
                                     mailto:[email protected]
@@ -89,7 +89,9 @@ interface
 {$ENDIF}
 {$ENDIF}
 {$DEFINE ComplexBraces} // support braces in complex cases
 {$DEFINE ComplexBraces} // support braces in complex cases
 {$IFNDEF UniCode} // the option applicable only for non-UniCode mode
 {$IFNDEF UniCode} // the option applicable only for non-UniCode mode
+ {$IFNDEF FPC_REQUIRES_PROPER_ALIGNMENT}  //sets have to be aligned
  {$DEFINE UseSetOfChar} // Significant optimization by using set of char
  {$DEFINE UseSetOfChar} // Significant optimization by using set of char
+ {$ENDIF}
 {$ENDIF}
 {$ENDIF}
 {$IFDEF UseSetOfChar}
 {$IFDEF UseSetOfChar}
  {$DEFINE UseFirstCharSet} // Fast skip between matches for r.e. that starts with determined set of chars
  {$DEFINE UseFirstCharSet} // Fast skip between matches for r.e. that starts with determined set of chars
@@ -132,8 +134,15 @@ type
 
 
 const
 const
  REOpSz = SizeOf (TREOp) div SizeOf (REChar); // size of p-code in RegExprString units
  REOpSz = SizeOf (TREOp) div SizeOf (REChar); // size of p-code in RegExprString units
- RENextOffSz = SizeOf (TRENextOff) div SizeOf (REChar); // size of Next 'pointer' -"-
+ {$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
+ // add space for aligning pointer
+ // -1 is the correct max size but also needed for InsertOperator that needs a multiple of pointer size
+ RENextOffSz = (2 * SizeOf (TRENextOff) div SizeOf (REChar))-1;
+ REBracesArgSz = (2 * SizeOf (TREBracesArg) div SizeOf (REChar)); // add space for aligning pointer
+ {$ELSE}
+ RENextOffSz = (SizeOf (TRENextOff) div SizeOf (REChar)); // size of Next 'pointer' -"-
  REBracesArgSz = SizeOf (TREBracesArg) div SizeOf (REChar); // size of BRACES arguments -"-
  REBracesArgSz = SizeOf (TREBracesArg) div SizeOf (REChar); // size of BRACES arguments -"-
+ {$ENDIF}
 
 
 type
 type
  TRegExprInvertCaseFunction = function (const Ch : REChar) : REChar
  TRegExprInvertCaseFunction = function (const Ch : REChar) : REChar
@@ -672,6 +681,24 @@ const
  XIgnoredChars = [' ', #9, #$d, #$a];
  XIgnoredChars = [' ', #9, #$d, #$a];
  {$ENDIF}
  {$ENDIF}
 
 
+ function AlignToPtr(const p: Pointer): Pointer;
+ begin
+ {$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
+   Result := Align(p, SizeOf(Pointer));
+ {$ELSE}
+   Result := p;
+ {$ENDIF}
+ end;
+
+ function AlignToInt(const p: Pointer): Pointer;
+ begin
+ {$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
+   Result := Align(p, SizeOf(integer));
+ {$ELSE}
+   Result := p;
+ {$ENDIF}
+ end;
+
 {=============================================================}
 {=============================================================}
 {=================== WideString functions ====================}
 {=================== WideString functions ====================}
 {=============================================================}
 {=============================================================}
@@ -1474,7 +1501,7 @@ procedure TRegExpr.Tail (p : PRegExprChar; val : PRegExprChar);
   UNTIL false;
   UNTIL false;
   // Set Next 'pointer'
   // Set Next 'pointer'
   if val < scan
   if val < scan
-   then PRENextOff (scan + REOpSz)^ := - (scan - val) //###0.948
+   then PRENextOff (AlignToPtr(scan + REOpSz))^ := - (scan - val) //###0.948
    // work around PWideChar subtraction bug (Delphi uses
    // work around PWideChar subtraction bug (Delphi uses
    // shr after subtraction to calculate widechar distance %-( )
    // shr after subtraction to calculate widechar distance %-( )
    // so, if difference is negative we have .. the "feature" :(
    // so, if difference is negative we have .. the "feature" :(
@@ -1482,7 +1509,7 @@ procedure TRegExpr.Tail (p : PRegExprChar; val : PRegExprChar);
    // "P – Q computes the difference between the address given
    // "P – Q computes the difference between the address given
    // by P (the higher address) and the address given by Q (the
    // by P (the higher address) and the address given by Q (the
    // lower address)" - Delphi help quotation.
    // lower address)" - Delphi help quotation.
-   else PRENextOff (scan + REOpSz)^ := val - scan; //###0.933
+   else PRENextOff (AlignToPtr(scan + REOpSz))^ := val - scan; //###0.933
  end; { of procedure TRegExpr.Tail
  end; { of procedure TRegExpr.Tail
 --------------------------------------------------------------}
 --------------------------------------------------------------}
 
 
@@ -1503,7 +1530,7 @@ function TRegExpr.EmitNode (op : TREOp) : PRegExprChar; //###0.933
   if Result <> @regdummy then begin
   if Result <> @regdummy then begin
      PREOp (regcode)^ := op;
      PREOp (regcode)^ := op;
      inc (regcode, REOpSz);
      inc (regcode, REOpSz);
-     PRENextOff (regcode)^ := 0; // Next "pointer" := nil
+     PRENextOff (AlignToPtr(regcode))^ := 0; // Next "pointer" := nil
      inc (regcode, RENextOffSz);
      inc (regcode, RENextOffSz);
      {$IFDEF DebugSynRegExpr}
      {$IFDEF DebugSynRegExpr}
      if regcode-programm>regsize then
      if regcode-programm>regsize then
@@ -1546,8 +1573,8 @@ procedure TRegExpr.InsertOperator (op : TREOp; opnd : PRegExprChar; sz : integer
   {$IFDEF DebugSynRegExpr}
   {$IFDEF DebugSynRegExpr}
   if regcode-programm>regsize then
   if regcode-programm>regsize then
     raise Exception.Create('TRegExpr.InsertOperator buffer overrun');
     raise Exception.Create('TRegExpr.InsertOperator buffer overrun');
-  if (opnd<regcode) or (opnd-regcode>regsize) then
-    raise Exception.Create('TRegExpr.InsertOperator invalid opnd');
+//  if (opnd<regcode) or (opnd-regcode>regsize) then
+ //   raise Exception.Create('TRegExpr.InsertOperator invalid opnd');
   {$ENDIF}
   {$ENDIF}
   dst := regcode;
   dst := regcode;
   while src > opnd do begin
   while src > opnd do begin
@@ -1896,11 +1923,11 @@ function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
    if regcode <> @regdummy then begin
    if regcode <> @regdummy then begin
       off := (Result + REOpSz + RENextOffSz)
       off := (Result + REOpSz + RENextOffSz)
        - (regcode - REOpSz - RENextOffSz); // back to Atom after LOOPENTRY
        - (regcode - REOpSz - RENextOffSz); // back to Atom after LOOPENTRY
-      PREBracesArg (regcode)^ := ABracesMin;
+      PREBracesArg (AlignToInt(regcode))^ := ABracesMin;
       inc (regcode, REBracesArgSz);
       inc (regcode, REBracesArgSz);
-      PREBracesArg (regcode)^ := ABracesMax;
+      PREBracesArg (AlignToInt(regcode))^ := ABracesMax;
       inc (regcode, REBracesArgSz);
       inc (regcode, REBracesArgSz);
-      PRENextOff (regcode)^ := off;
+      PRENextOff (AlignToPtr(regcode))^ := off;
       inc (regcode, RENextOffSz);
       inc (regcode, RENextOffSz);
       {$IFDEF DebugSynRegExpr}
       {$IFDEF DebugSynRegExpr}
       if regcode-programm>regsize then
       if regcode-programm>regsize then
@@ -1922,8 +1949,8 @@ function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
     else TheOp := BRACES;
     else TheOp := BRACES;
    InsertOperator (TheOp, Result, REOpSz + RENextOffSz + REBracesArgSz * 2);
    InsertOperator (TheOp, Result, REOpSz + RENextOffSz + REBracesArgSz * 2);
    if regcode <> @regdummy then begin
    if regcode <> @regdummy then begin
-     PREBracesArg (Result + REOpSz + RENextOffSz)^ := ABracesMin;
-     PREBracesArg (Result + REOpSz + RENextOffSz + REBracesArgSz)^ := ABracesMax;
+     PREBracesArg (AlignToInt(Result + REOpSz + RENextOffSz))^ := ABracesMin;
+     PREBracesArg (AlignToInt(Result + REOpSz + RENextOffSz + REBracesArgSz))^ := ABracesMax;
     end;
     end;
   end;
   end;
 
 
@@ -2801,7 +2828,7 @@ function TRegExpr.regnext (p : PRegExprChar) : PRegExprChar;
     Result := nil;
     Result := nil;
     EXIT;
     EXIT;
    end;
    end;
-  offset := PRENextOff (p + REOpSz)^; //###0.933 inlined NEXT
+  offset := PRENextOff (AlignToPtr(p + REOpSz))^; //###0.933 inlined NEXT
   if offset = 0
   if offset = 0
    then Result := nil
    then Result := nil
    else Result := p + offset;
    else Result := p + offset;
@@ -2834,7 +2861,7 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
   scan := prog;
   scan := prog;
 
 
   while scan <> nil do begin
   while scan <> nil do begin
-     len := PRENextOff (scan + 1)^; //###0.932 inlined regnext
+     len := PRENextOff (AlignToPtr(scan + 1))^; //###0.932 inlined regnext
      if len = 0
      if len = 0
       then next := nil
       then next := nil
       else next := scan + len;
       else next := scan + len;
@@ -3123,9 +3150,9 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
              Error (reeLoopWithoutEntry);
              Error (reeLoopWithoutEntry);
              EXIT;
              EXIT;
             end;
             end;
-           opnd := scan + PRENextOff (scan + REOpSz + RENextOffSz + 2 * REBracesArgSz)^;
-           BracesMin := PREBracesArg (scan + REOpSz + RENextOffSz)^;
-           BracesMax := PREBracesArg (scan + REOpSz + RENextOffSz + REBracesArgSz)^;
+           opnd := scan + PRENextOff (AlignToPtr(scan + REOpSz + RENextOffSz + 2 * REBracesArgSz))^;
+           BracesMin := PREBracesArg (AlignToInt(scan + REOpSz + RENextOffSz))^;
+           BracesMax := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz))^;
            save := reginput;
            save := reginput;
            if LoopStack [LoopStackIdx] >= BracesMin then begin // Min alredy matched - we can work
            if LoopStack [LoopStackIdx] >= BracesMin then begin // Min alredy matched - we can work
               if scan^ = LOOP then begin
               if scan^ = LOOP then begin
@@ -3189,8 +3216,8 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
             else if (scan^ = PLUS) or (scan^ = PLUSNG)
             else if (scan^ = PLUS) or (scan^ = PLUSNG)
              then BracesMin := 1 // PLUS
              then BracesMin := 1 // PLUS
              else begin // BRACES
              else begin // BRACES
-               BracesMin := PREBracesArg (scan + REOpSz + RENextOffSz)^;
-               BracesMax := PREBracesArg (scan + REOpSz + RENextOffSz + REBracesArgSz)^;
+               BracesMin := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^;
+               BracesMax := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz))^;
               end;
               end;
            save := reginput;
            save := reginput;
            opnd := scan + REOpSz + RENextOffSz;
            opnd := scan + REOpSz + RENextOffSz;
@@ -3370,8 +3397,8 @@ procedure TRegExpr.FillFirstCharSet (prog : PRegExprChar);
            EXIT;
            EXIT;
           end;
           end;
          LOOP, LOOPNG: begin //###0.940
          LOOP, LOOPNG: begin //###0.940
-           opnd := scan + PRENextOff (scan + REOpSz + RENextOffSz + REBracesArgSz * 2)^;
-           min_cnt := PREBracesArg (scan + REOpSz + RENextOffSz)^;
+           opnd := scan + PRENextOff (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz * 2))^;
+           min_cnt := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^;
            FillFirstCharSet (opnd);
            FillFirstCharSet (opnd);
            if min_cnt = 0
            if min_cnt = 0
             then FillFirstCharSet (next);
             then FillFirstCharSet (next);
@@ -3386,7 +3413,7 @@ procedure TRegExpr.FillFirstCharSet (prog : PRegExprChar);
           end;
           end;
          BRACES, BRACESNG: begin //###0.940
          BRACES, BRACESNG: begin //###0.940
            opnd := scan + REOpSz + RENextOffSz + REBracesArgSz * 2;
            opnd := scan + REOpSz + RENextOffSz + REBracesArgSz * 2;
-           min_cnt := PREBracesArg (scan + REOpSz + RENextOffSz)^; // BRACES
+           min_cnt := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^; // BRACES
            FillFirstCharSet (opnd);
            FillFirstCharSet (opnd);
            if min_cnt > 0
            if min_cnt > 0
             then EXIT;
             then EXIT;
@@ -4064,14 +4091,14 @@ function TRegExpr.Dump : RegExprString;
      {$ENDIF}
      {$ENDIF}
      if (op = BRACES) or (op = BRACESNG) then begin //###0.941
      if (op = BRACES) or (op = BRACESNG) then begin //###0.941
        // show min/max argument of BRACES operator
        // show min/max argument of BRACES operator
-       Result := Result + Format ('{%d,%d}', [PREBracesArg (s)^, PREBracesArg (s + REBracesArgSz)^]);
+       Result := Result + Format ('{%d,%d}', [PREBracesArg (AlignToInt(s))^, PREBracesArg (AlignToInt(s + REBracesArgSz))^]);
        inc (s, REBracesArgSz * 2);
        inc (s, REBracesArgSz * 2);
       end;
       end;
      {$IFDEF ComplexBraces}
      {$IFDEF ComplexBraces}
      if (op = LOOP) or (op = LOOPNG) then begin //###0.940
      if (op = LOOP) or (op = LOOPNG) then begin //###0.940
        Result := Result + Format (' -> (%d) {%d,%d}', [
        Result := Result + Format (' -> (%d) {%d,%d}', [
-        (s - programm - (REOpSz + RENextOffSz)) + PRENextOff (s + 2 * REBracesArgSz)^,
-        PREBracesArg (s)^, PREBracesArg (s + REBracesArgSz)^]);
+        (s - programm - (REOpSz + RENextOffSz)) + PRENextOff (AlignToPtr(s + 2 * REBracesArgSz))^,
+        PREBracesArg (AlignToInt(s))^, PREBracesArg (AlignToInt(s + REBracesArgSz))^]);
        inc (s, 2 * REBracesArgSz + RENextOffSz);
        inc (s, 2 * REBracesArgSz + RENextOffSz);
       end;
       end;
      {$ENDIF}
      {$ENDIF}