J. Gareth "Curious Kit" Moreton 3 years ago
parent
commit
9b4033fa80
7 changed files with 1204 additions and 4 deletions
  1. 67 0
      compiler/cgutils.pas
  2. 328 2
      compiler/x86/nx86add.pas
  3. 157 2
      tests/bench/bdiv.pp
  4. 156 0
      tests/bench/bdiv_s32.inc
  5. 124 0
      tests/bench/bdiv_s64.inc
  6. 204 0
      tests/bench/bdiv_u32.inc
  7. 168 0
      tests/bench/bdiv_u64.inc

+ 67 - 0
compiler/cgutils.pas

@@ -218,6 +218,12 @@ unit cgutils;
     procedure calc_divconst_magic_signed(N: byte; d: aInt; out magic_m: aInt; out magic_s: byte);
     procedure calc_divconst_magic_signed(N: byte; d: aInt; out magic_m: aInt; out magic_s: byte);
     procedure calc_divconst_magic_unsigned(N: byte; d: aWord; out magic_m: aWord; out magic_add: boolean; out magic_shift: byte);
     procedure calc_divconst_magic_unsigned(N: byte; d: aWord; out magic_m: aWord; out magic_add: boolean; out magic_shift: byte);
 
 
+    { Functions for calculating the multiplicative inverse, or reciprocal, of
+      a divisor mod 2^N.  That is, a number r such that dr = 1 (mod 2^N).
+
+      WARNING: d must not be a power of 2 (including 2^0 = 1) }
+    procedure calc_mul_inverse(N: byte; d: aWord; out reciprocal: aWord; out shift: Byte);
+
 implementation
 implementation
 
 
 uses
 uses
@@ -491,6 +497,67 @@ uses
         magic_m:=(q2+1) and mask;        { resulting magic number }
         magic_m:=(q2+1) and mask;        { resulting magic number }
         magic_shift:=p-N;     { resulting shift }
         magic_shift:=p-N;     { resulting shift }
       end;
       end;
+
+
+    procedure calc_mul_inverse(N: byte; d: aWord; out reciprocal: aWord; out shift: Byte);
+      var
+        mask, oldr, newd, swap_r, swap_d, q: aWord;
+      begin
+        { WARNING: d must not be a power of 2 (including 2^0 = 1) }
+{$push}
+{$warnings off }
+        if N=(SizeOf(aWord) * 8) then
+          newd:=0
+        else
+          newd:=aWord(1) shl N; { Used later }
+        mask:=newd-1;
+        oldr:=mask;
+{$pop}
+
+        { Trim off powers of 2 so d is an odd number }
+{$if defined(cpu64bitalu)}
+        shift:=BsfQWord(d);
+{$elseif defined(cpu32bitalu)}
+        shift:=BsfDWord(d);
+{$elseif defined(cpu16bitalu)}
+        shift:=BsfWord(d);
+{$elseif defined(cpu8bitalu)}
+        shift:=BsfByte(d);
+{$else}
+{$error ALU not defined}
+{$endif}
+        if shift = 255 then
+          { This is a divide by zero that should have been caught earlier }
+          InternalError(2021091001);
+
+        d := d shr shift;
+
+        { Calculate reciprocal using the Extended Euclidean Algorithm as
+          described on page 244 of Hacker's Delight, Second Edition.
+
+          x1 = oldr
+          x2 = reciprocal
+          x3 = swap_r
+
+          v1 = newd
+          v2 = d
+          v3 = swap_d
+        }
+        newd:=newd-d; { -d }
+        reciprocal:=1;
+
+        repeat
+          q := newd div d;
+
+          swap_d:=(newd-(q*d)) and mask;
+          newd:=d;
+          d:=swap_d;
+
+          swap_r:=(oldr-(q*reciprocal)) and mask;
+          oldr:=reciprocal;
+          reciprocal:=swap_r;
+        until d<=1;
+      end;
 {$pop}
 {$pop}
 
 
 end.
 end.

+ 328 - 2
compiler/x86/nx86add.pas

@@ -48,6 +48,7 @@ unit nx86add;
         procedure second_addfloatavx;
         procedure second_addfloatavx;
       public
       public
         function pass_1 : tnode;override;
         function pass_1 : tnode;override;
+        function simplify(forinline : boolean) : tnode; override;
         function use_fma : boolean;override;
         function use_fma : boolean;override;
         procedure second_addfloat;override;
         procedure second_addfloat;override;
 {$ifndef i8086}
 {$ifndef i8086}
@@ -78,8 +79,8 @@ unit nx86add;
       symconst,symdef,
       symconst,symdef,
       cgobj,hlcgobj,cgx86,cga,cgutils,
       cgobj,hlcgobj,cgx86,cga,cgutils,
       tgobj,ncgutil,
       tgobj,ncgutil,
-      ncon,nset,ninl,ncnv,
-      defutil,
+      ncon,nset,ninl,ncnv,ncal,nmat,
+      defutil,defcmp,constexp,
       htypechk;
       htypechk;
 
 
 { Range check must be disabled explicitly as the code serves
 { Range check must be disabled explicitly as the code serves
@@ -1185,6 +1186,331 @@ unit nx86add;
       end;
       end;
 
 
 
 
+    function tx86addnode.simplify(forinline : boolean) : tnode;
+      var
+        t, m, ThisNode, ConstNode: TNode;
+        lt,rt, ThisType: TNodeType;
+        ThisDef: TDef;
+        DoOptimisation: Boolean;
+
+        reciprocal, comparison, divisor: AWord;
+        shift, N: Byte;
+      begin
+        { Load into local variables to reduce the number of pointer deallocations }
+        rt:=right.nodetype;
+        lt:=left.nodetype;
+
+        DoOptimisation:=False;
+
+{$if defined(cpu64bitalu) or defined(cpu32bitalu) or defined(cpu16bitalu)}
+        if (cs_opt_level1 in current_settings.optimizerswitches) and
+          { The presence of overflow checks tends to cause internal errors with the multiplication nodes }
+          not (cs_check_overflow in current_settings.localswitches) and
+          (nodetype in [equaln,unequaln]) then
+          begin
+            if (lt=modn) and (rt=ordconstn) and (TOrdConstNode(right).value.uvalue=0) then
+              begin
+                t:=left;
+                m:=right;
+              end
+            else if (rt=modn) and (lt=ordconstn) and (TOrdConstNode(left).value.uvalue=0) then
+              begin
+                t:=right;
+                m:=left;
+              end
+            else
+              begin
+                t:=nil;
+                m:=nil;
+              end;
+
+            if Assigned(t) and (TModDivNode(t).right.nodetype=ordconstn) and
+{$ifndef cpu64bitalu}
+              { Converting Int64 and QWord division doesn't work under i386 }
+{$ifndef cpu32bitalu}
+              (TModDivNode(t).resultdef.size < 4) and
+{$else cpu32bitalu}
+              (TModDivNode(t).resultdef.size < 8) and
+{$endif cpu32bitalu}
+{$endif cpu64bitalu}
+              (TOrdConstNode(TModDivNode(t).right).value>=3) then
+              begin
+                divisor:=TOrdConstNode(TModDivNode(t).right).value.uvalue;
+
+                { Exclude powers of 2, as there are more efficient ways to handle those }
+                if PopCnt(divisor)>1 then
+                  begin
+                    if is_signed(TModDivNode(t).left.resultdef) then
+                      begin
+                        { See pages 250-251 of Hacker's Delight, Second Edition
+                          for an explanation and proof of the algorithm, but
+                          essentially, we're doing the following:
+
+                          - Convert the divisor d to the form k.2^b if it isn't
+                            already odd (in which case, k = d and b = 0)
+                          - Calculate r, the multiplicative inverse of k modulo 2^N
+                          - Calculate c = floor(2^(N-1) / k) & -(2^b)
+                          - Let q = ((n * r) + c) ror b (mod 2^N)
+                          - Repurpose c to equal floor(2c / 2^b) = c shr (b - 1)
+                            (some RISC platforms will benefit from doing this over
+                            precalculating the modified constant. For x86,
+                            it's better with the constant precalculated for
+                            32-bit and under, but for 64-bit, use SHR. )
+                          - If q is below or equal to c, then (n mod d) = 0
+                          }
+                        while True do
+                          begin
+                            ThisNode:=TModDivNode(t).left;
+                            case ThisNode.nodetype of
+                              typeconvn:
+                                begin
+                                  ThisDef:=TTypeConvNode(ThisNode).left.resultdef;
+                                  { See if we can simplify things to a smaller ordinal to
+                                    reduce code size and increase speed }
+                                  if is_signed(ThisDef) and
+                                    is_integer(ThisDef) and
+                                    { Byte-sized multiplications can cause problems }
+                                    (ThisDef.size>=2) and
+                                    { Make sure the divisor is in range }
+                                    (divisor>=TOrdDef(ThisDef).low) and
+                                    (divisor<=TOrdDef(ThisDef).high) then
+                                    begin
+                                      TOrdConstNode(TModDivNode(t).right).resultdef:=ThisDef;
+                                      TOrdConstNode(m).resultdef:=ThisDef;
+                                      TModDivNode(t).resultdef:=ThisDef;
+
+                                      { Destroy the typeconv node }
+                                      TModDivNode(t).left:=TTypeConvNode(ThisNode).left;
+                                      TTypeConvNode(ThisNode).left:=nil;
+                                      ThisNode.Free;
+                                      Continue;
+                                    end;
+                                  end;
+                              ordconstn:
+                                begin
+                                  { Just simplify into a constant }
+                                  Result:=inherited simplify(forinline);
+                                  Exit;
+                                end;
+                              else
+                                ;
+                            end;
+
+                            DoOptimisation:=True;
+                            Break;
+                          end;
+
+                        if DoOptimisation then
+                          begin
+                            ThisDef:=TModDivNode(t).left.resultdef;
+
+                            if nodetype = equaln then
+                              ThisType:=lten
+                            else
+                              ThisType:=gtn;
+
+                            N:=ThisDef.size*8;
+                            calc_mul_inverse(N, TOrdConstNode(TModDivNode(t).right).value.uvalue, reciprocal, shift);
+
+                            { Construct the following node tree for odd divisors:
+                                <lten> (for equaln) or <gtn> (for notequaln)
+                                  <addn>
+                                    <muln>
+                                      <typeconv signed-to-unsigned>
+                                        <numerator node (TModDivNode(t).left)>
+                                      <reciprocal constant>
+                                    <comparison constant (effectively a signed shift)>
+                                  <comparison constant * 2>
+
+                              For even divisors, convert them to the form k.2^b, with
+                              odd k, then construct the following:
+                                <lten> (for equaln) or <gtn> (for notequaln)
+                                  <ror>
+                                    (b)
+                                    <addn>
+                                      <muln>
+                                        <typeconv signed-to-unsigned>
+                                          <numerator node (TModDivNode(t).left)>
+                                        <reciprocal constant>
+                                      <comparison constant (effectively a signed shift)>
+                                  <comparison constant shr (b - 1)>
+                            }
+
+                            ThisNode:=ctypeconvnode.create_internal(TModDivNode(t).left, ThisDef);
+                            TTypeConvNode(ThisNode).convtype:=tc_int_2_int;
+                            ThisDef:=get_unsigned_inttype(ThisDef);
+                            ThisNode.resultdef:=ThisDef;
+
+                            TModDivNode(t).left:=nil;
+
+                            ConstNode:=cordconstnode.create(reciprocal, ThisDef, False);
+                            ConstNode.resultdef:=ThisDef;
+
+                            ThisNode:=caddnode.create_internal(muln, ThisNode, ConstNode);
+                            ThisNode.resultdef:=ThisDef;
+
+{$push}
+{$warnings off}
+                            if shift>0 then
+                              comparison:=((aWord(1) shl ((N-1) and (SizeOf(aWord)*8-1))) div (divisor shr shift)) and -(1 shl shift)
+                            else
+                              comparison:=(aWord(1) shl ((N-1) and (SizeOf(aWord)*8-1))) div divisor;
+{$pop}
+                            ConstNode:=cordconstnode.create(comparison, ThisDef, False);
+                            ConstNode.resultdef:=ThisDef;
+
+                            ThisNode:=caddnode.create_internal(addn, ThisNode, ConstNode);
+                            ThisNode.resultdef:=ThisDef;
+
+                            if shift>0 then
+                              begin
+                                ConstNode:=cordconstnode.create(shift, u8inttype, False);
+                                ConstNode.resultdef:=u8inttype;
+                                ThisNode:=cinlinenode.createintern(in_ror_x_y,false,
+                                  ccallparanode.create(ConstNode,
+                                  ccallparanode.create(ThisNode, nil)));
+
+                                ThisNode.resultdef:=ThisDef;
+
+                                ConstNode:=cordconstnode.create(comparison shr (shift - 1), ThisDef, False);
+                              end
+                            else
+                              ConstNode:=cordconstnode.create(comparison*2, ThisDef, False);
+
+                            ConstNode.resultdef:=ThisDef;
+
+                            Result:=CAddNode.create_internal(ThisType, ThisNode, ConstNode);
+                            Result.resultdef:=resultdef;
+                            Exit;
+                          end;
+                      end
+                    else
+                      begin
+                        { For bit length N, convert "(x mod d) = 0" or "(x mod d) <> 0", where
+                          d is an odd-numbered integer constant, to "(x * r) <= m", where
+                          dr = 1 (mod 2^N) and m = floor(2^N / d).
+
+                          If d is even, convert to the form k.2^b, where k is odd, then
+                          convert to "(x * r) ror b <= m", where kr = 1 (mod 2^N) and
+                          m = floor(2^N / d) = floor(2^(N-b) / k) }
+                        while True do
+                          begin
+                            ThisNode:=TModDivNode(t).left;
+                            case ThisNode.nodetype of
+                              typeconvn:
+                                begin
+                                  ThisDef:=TTypeConvNode(ThisNode).left.resultdef;
+                                  { See if we can simplify things to a smaller ordinal to
+                                    reduce code size and increase speed }
+                                  if not is_signed(ThisDef) and
+                                    is_integer(ThisDef) and
+                                    { Byte-sized multiplications can cause problems }
+                                    (ThisDef.size>=2) and
+                                    { Make sure the divisor is in range }
+                                    (divisor>=TOrdDef(ThisDef).low) and
+                                    (divisor<=TOrdDef(ThisDef).high) then
+                                    begin
+                                      TOrdConstNode(TModDivNode(t).right).resultdef:=ThisDef;
+                                      TOrdConstNode(m).resultdef:=ThisDef;
+                                      TModDivNode(t).resultdef:=ThisDef;
+
+                                      { Destroy the typeconv node }
+                                      TModDivNode(t).left:=TTypeConvNode(ThisNode).left;
+                                      TTypeConvNode(ThisNode).left:=nil;
+                                      ThisNode.Free;
+                                      Continue;
+                                    end;
+                                  end;
+                              ordconstn:
+                                begin
+                                  { Just simplify into a constant }
+                                  Result:=inherited simplify(forinline);
+                                  Exit;
+                                end;
+                              else
+                                ;
+                            end;
+
+                            DoOptimisation:=True;
+                            Break;
+                          end;
+
+                        if DoOptimisation then
+                          begin
+                            ThisDef:=TModDivNode(t).left.resultdef;
+
+                            { Construct the following node tree for odd divisors:
+                                <lten> (for equaln) or <gtn> (for notequaln)
+                                  <muln>
+                                    <numerator node (TModDivNode(t).left)>
+                                    <reciprocal constant>
+                                  (2^N / divisor)
+
+                              For even divisors, convert them to the form k.2^b, with
+                              odd k, then construct the following:
+                                <lten> (for equaln) or <gtn> (for notequaln)
+                                  <ror>
+                                    (b)
+                                    <muln>
+                                      <numerator node (TModDivNode(t).left)>
+                                      <reciprocal constant>
+                                  (2^N / divisor)
+                            }
+
+                            if nodetype=equaln then
+                              ThisType:=lten
+                            else
+                              ThisType:=gtn;
+
+                            N:=ThisDef.size*8;
+                            calc_mul_inverse(N, TOrdConstNode(TModDivNode(t).right).value.uvalue, reciprocal, shift);
+
+                            ConstNode:=cordconstnode.create(reciprocal, ThisDef, False);
+                            ConstNode.resultdef:=ThisDef;
+
+                            ThisNode:=caddnode.create_internal(muln, TModDivNode(t).left, ConstNode);
+                            ThisNode.resultdef:=ThisDef;
+
+                            TModDivNode(t).left:=nil;
+
+                            if shift>0 then
+                              begin
+                                ConstNode:=cordconstnode.create(shift, u8inttype, False);
+                                ConstNode.resultdef:=u8inttype;
+                                ThisNode:=cinlinenode.createintern(in_ror_x_y,false,
+                                  ccallparanode.create(ConstNode,
+                                  ccallparanode.create(ThisNode, nil)));
+
+                                ThisNode.resultdef:=ThisDef;
+
+                                comparison:=(aWord(1) shl ((N-shift) and (SizeOf(aWord)*8-1))) div (divisor shr shift);
+                              end
+                            else
+                              begin
+{$push}
+{$warnings off}
+                                { Because 2^N and divisor are relatively prime,
+                                  floor(2^N / divisor) = floor((2^N - 1) / divisor) }
+                                comparison:=(aWord(not 0) shr (((SizeOf(aWord)*8)-N) and (SizeOf(aWord)*8-1))) div divisor;
+{$pop}
+                              end;
+
+                            ConstNode:=cordconstnode.create(comparison, ThisDef, False);
+                            ConstNode.resultdef:=ThisDef;
+
+                            Result:=CAddNode.create_internal(ThisType, ThisNode, ConstNode);
+                            Result.resultdef:=resultdef;
+                            Exit;
+                          end;
+                      end;
+                  end;
+              end;
+          end;
+{$ifend defined(cpu64bitalu) or defined(cpu32bitalu) or defined(cpu16bitalu)}
+        Result:=inherited simplify(forinline);
+      end;
+
+
     function tx86addnode.use_fma : boolean;
     function tx86addnode.use_fma : boolean;
       begin
       begin
 {$ifndef i8086}
 {$ifndef i8086}

+ 157 - 2
tests/bench/bdiv.pp

@@ -134,6 +134,46 @@ type
       function WriteResults: Boolean; override;
       function WriteResults: Boolean; override;
   end;
   end;
 
 
+  TUInt32ModCmpTest = class(TTestAncestor)
+    protected
+      FInputArray: array[$00..$FF] of Cardinal;
+      FResultArray: array[$00..$FF] of Boolean;
+      function GetDivisor: Cardinal; virtual; abstract;
+      function DoMod0(Numerator: Cardinal): Boolean; inline;
+    public
+      function WriteResults: Boolean; override;
+  end;
+
+  TSInt32ModCmpTest = class(TTestAncestor)
+    protected
+      FInputArray: array[$00..$FF] of Integer;
+      FResultArray: array[$00..$FF] of Boolean;
+      function GetDivisor: Integer; virtual; abstract;
+      function DoMod0(Numerator: Integer): Boolean; inline;
+    public
+      function WriteResults: Boolean; override;
+  end;
+
+  TUInt64ModCmpTest = class(TTestAncestor)
+    protected
+      FInputArray: array[$00..$FF] of QWord;
+      FResultArray: array[$00..$FF] of Boolean;
+      function GetDivisor: QWord; virtual; abstract;
+      function DoMod0(Numerator: QWord): Boolean; inline;
+    public
+      function WriteResults: Boolean; override;
+  end;
+
+  TSInt64ModCmpTest = class(TTestAncestor)
+    protected
+      FInputArray: array[$00..$FF] of Int64;
+      FResultArray: array[$00..$FF] of Boolean;
+      function GetDivisor: Int64; virtual; abstract;
+      function DoMod0(Numerator: Int64): Boolean; inline;
+    public
+      function WriteResults: Boolean; override;
+  end;
+
 {$I bdiv_u16.inc}
 {$I bdiv_u16.inc}
 {$I bdiv_u32.inc}
 {$I bdiv_u32.inc}
 {$I bdiv_u64.inc}
 {$I bdiv_u64.inc}
@@ -429,9 +469,109 @@ function TSInt64ModTest.WriteResults: Boolean;
       end;
       end;
   end;
   end;
 
 
+{ TSInt32ModCmpTest }
+
+function TSInt32ModCmpTest.DoMod0(Numerator: Integer): Boolean;
+  begin
+    Result := (Numerator mod GetDivisor) = 0;
+  end;
+
+function TSInt32ModCmpTest.WriteResults: Boolean;
+  var
+    X: Integer;
+    Expected: Boolean;
+  begin
+    Result := True;
+    for X := 0 to 255 do
+      begin
+        Expected := DoMod0(FInputArray[X]);
+        if FResultArray[X] <> Expected then
+          begin
+            WriteLn('FAIL - (', FInputArray[X], ' mod ', GetDivisor, ') = 0; expected ', Expected, ' got ', FResultArray[X]);
+            Result := False;
+            Exit;
+          end;
+      end;
+  end;
+
+{ TUInt32ModCmpTest }
+
+function TUInt32ModCmpTest.DoMod0(Numerator: Cardinal): Boolean;
+  begin
+    Result := (Numerator mod GetDivisor) = 0;
+  end;
+
+function TUInt32ModCmpTest.WriteResults: Boolean;
+  var
+    X: Integer;
+    Expected: Boolean;
+  begin
+    Result := True;
+    for X := 0 to 255 do
+      begin
+        Expected := DoMod0(FInputArray[X]);
+        if FResultArray[X] <> Expected then
+          begin
+            WriteLn('FAIL - (', FInputArray[X], ' mod ', GetDivisor, ') = 0; expected ', Expected, ' got ', FResultArray[X]);
+            Result := False;
+            Exit;
+          end;
+      end;
+  end;
+
+{ TSInt64ModCmpTest }
+
+function TSInt64ModCmpTest.DoMod0(Numerator: Int64): Boolean;
+  begin
+    Result := (Numerator mod GetDivisor) = 0;
+  end;
+
+function TSInt64ModCmpTest.WriteResults: Boolean;
+  var
+    X: Integer;
+    Expected: Boolean;
+  begin
+    Result := True;
+    for X := 0 to 255 do
+      begin
+        Expected := DoMod0(FInputArray[X]);
+        if FResultArray[X] <> Expected then
+          begin
+            WriteLn('FAIL - (', FInputArray[X], ' mod ', GetDivisor, ') = 0; expected ', Expected, ' got ', FResultArray[X]);
+            Result := False;
+            Exit;
+          end;
+      end;
+  end;
+
+{ TUInt64ModCmpTest }
+
+function TUInt64ModCmpTest.DoMod0(Numerator: QWord): Boolean;
+  begin
+    Result := (Numerator mod GetDivisor) = 0;
+  end;
+
+function TUInt64ModCmpTest.WriteResults: Boolean;
+  var
+    X: Integer;
+    Expected: Boolean;
+  begin
+    Result := True;
+    for X := 0 to 255 do
+      begin
+        Expected := DoMod0(FInputArray[X]);
+        if FResultArray[X] <> Expected then
+          begin
+            WriteLn('FAIL - (', FInputArray[X], ' mod ', GetDivisor, ') = 0; expected ', Expected, ' got ', FResultArray[X]);
+            Result := False;
+            Exit;
+          end;
+      end;
+  end;
+
 { Main function }
 { Main function }
 const
 const
-  TestClasses: array[0..69] of TTestClass = (
+  TestClasses: array[0..84] of TTestClass = (
     TUInt16Bit1Test,
     TUInt16Bit1Test,
     TUInt16Bit1ModTest,
     TUInt16Bit1ModTest,
     TUInt16Bit2Test,
     TUInt16Bit2Test,
@@ -501,7 +641,22 @@ const
     TSInt64Bit10000Test,
     TSInt64Bit10000Test,
     TSInt64Bit10000ModTest,
     TSInt64Bit10000ModTest,
     TSInt64Bit86400000Test,
     TSInt64Bit86400000Test,
-    TSInt64Bit86400000ModTest
+    TSInt64Bit86400000ModTest,
+    TUInt32Bit3ModCmpTest,
+    TSInt32Bit3ModCmpTest,
+    TUInt32Bit10ModCmpTest,
+    TSInt32Bit10ModCmpTest,
+    TUInt32Bit100ModCmpTest,
+    TSInt32Bit100ModCmpTest,
+    TUInt32Bit400ModCmpTest,
+    TUInt32Bit1000ModCmpTest,
+    TUInt64Bit3ModCmpTest,
+    TSInt64Bit3ModCmpTest,
+    TUInt64Bit10ModCmpTest,
+    TSInt64Bit10000ModCmpTest,
+    TUInt64Bit100ModCmpTest,
+    TSInt64Bit86400000ModCmpTest,
+    TUInt64Bit1000000000ModCmpTest
   );
   );
 
 
 var
 var

+ 156 - 0
tests/bench/bdiv_s32.inc

@@ -39,6 +39,36 @@ type
       function TestTitle: shortstring; override;
       function TestTitle: shortstring; override;
   end;
   end;
 
 
+  { TSInt32Bit3ModCmpTest }
+
+  TSInt32Bit3ModCmpTest = class(TSInt32ModCmpTest)
+    protected
+      function GetDivisor: Integer; override;
+      procedure DoTestIteration(Iteration: Integer); override;
+    public
+      function TestTitle: shortstring; override;
+  end;
+
+  { TSInt32Bit10ModCmpTest }
+
+  TSInt32Bit10ModCmpTest = class(TSInt32ModCmpTest)
+    protected
+      function GetDivisor: Integer; override;
+      procedure DoTestIteration(Iteration: Integer); override;
+    public
+      function TestTitle: shortstring; override;
+  end;
+
+  { TSInt32Bit100ModCmpTest }
+
+  TSInt32Bit100ModCmpTest = class(TSInt32ModCmpTest)
+    protected
+      function GetDivisor: Integer; override;
+      procedure DoTestIteration(Iteration: Integer); override;
+    public
+      function TestTitle: shortstring; override;
+  end;
+
 { TSInt32Bit1Test }
 { TSInt32Bit1Test }
 
 
 function TSInt32Bit1Test.TestTitle: shortstring;
 function TSInt32Bit1Test.TestTitle: shortstring;
@@ -206,3 +236,129 @@ procedure TSInt32Bit100ModTest.DoTestIteration(Iteration: Integer);
       
       
     FResultArray[Index] := Answer;
     FResultArray[Index] := Answer;
   end;
   end;
+
+{ TSInt32Bit3ModCmpTest }
+
+function TSInt32Bit3ModCmpTest.TestTitle: shortstring;
+  begin
+    Result := 'Signed 32-bit (n mod 3) = 0';
+  end;
+
+function TSInt32Bit3ModCmpTest.GetDivisor: Integer;
+  begin
+    Result := 3;
+  end;
+
+procedure TSInt32Bit3ModCmpTest.DoTestIteration(Iteration: Integer);
+  var
+    Numerator: Integer; Answer: Boolean;
+    Index, X: Integer;
+  begin
+    Index := Iteration and $FF;
+    case Index of
+      0:
+        Numerator := -2147483648;
+      1:
+        Numerator := -2147483600;
+      2:
+        Numerator := -2147483599;
+      253:
+        Numerator := 2147483599;
+      254:
+        Numerator := 2147483600;
+      255:
+        Numerator := 2147483647;
+      else
+        Numerator := Index - 128;
+    end;
+
+    FInputArray[Index] := Numerator;
+    for X := 0 to INTERNAL_LOOPS - 1 do
+      Answer := (Numerator mod 3) = 0;
+
+    FResultArray[Index] := Answer;
+  end;
+
+{ TSInt32Bit10ModCmpTest }
+
+function TSInt32Bit10ModCmpTest.TestTitle: shortstring;
+  begin
+    Result := 'Signed 32-bit (n mod 10) = 0';
+  end;
+
+function TSInt32Bit10ModCmpTest.GetDivisor: Integer;
+  begin
+    Result := 10;
+  end;
+
+procedure TSInt32Bit10ModCmpTest.DoTestIteration(Iteration: Integer);
+  var
+    Numerator: Integer; Answer: Boolean;
+    Index, X: Integer;
+  begin
+    Index := Iteration and $FF;
+    case Index of
+      0:
+        Numerator := -2147483648;
+      1:
+        Numerator := -2147483600;
+      2:
+        Numerator := -2147483599;
+      253:
+        Numerator := 2147483599;
+      254:
+        Numerator := 2147483600;
+      255:
+        Numerator := 2147483647;
+      else
+        Numerator := Index - 128;
+    end;
+
+    FInputArray[Index] := Numerator;
+    for X := 0 to INTERNAL_LOOPS - 1 do
+      Answer := (Numerator mod 10) = 0;
+
+    FResultArray[Index] := Answer;
+  end;
+
+{ TSInt32Bit100ModCmpTest }
+
+function TSInt32Bit100ModCmpTest.TestTitle: shortstring;
+  begin
+    Result := 'Signed 32-bit (n mod 100) = 0';
+  end;
+
+function TSInt32Bit100ModCmpTest.GetDivisor: Integer;
+  begin
+    Result := 100;
+  end;
+
+procedure TSInt32Bit100ModCmpTest.DoTestIteration(Iteration: Integer);
+  var
+    Numerator: Integer; Answer: Boolean;
+    Index, X: Integer;
+  begin
+    Index := Iteration and $FF;
+    case Index of
+      0:
+        Numerator := -2147483648;
+      1:
+        Numerator := -2147483600;
+      2:
+        Numerator := -2147483599;
+      253:
+        Numerator := 2147483599;
+      254:
+        Numerator := 2147483600;
+      255:
+        Numerator := 2147483647;
+      else
+        Numerator := Index - 128;
+    end;
+
+    FInputArray[Index] := Numerator;
+    for X := 0 to INTERNAL_LOOPS - 1 do
+      Answer := (Numerator mod 100) = 0;
+
+    FResultArray[Index] := Answer;
+  end;

+ 124 - 0
tests/bench/bdiv_s64.inc

@@ -207,6 +207,36 @@ type
       function TestTitle: shortstring; override;
       function TestTitle: shortstring; override;
   end;
   end;
 
 
+  { TSInt64Bit3ModCmpTest }
+
+  TSInt64Bit3ModCmpTest = class(TSInt64ModCmpTest)
+    protected
+      function GetDivisor: Int64; override;
+      procedure DoTestIteration(Iteration: Integer); override;
+    public
+      function TestTitle: shortstring; override;
+  end;
+
+  { TSInt64Bit10000ModCmpTest }
+
+  TSInt64Bit10000ModCmpTest = class(TSInt64ModCmpTest)
+    protected
+      function GetDivisor: Int64; override;
+      procedure DoTestIteration(Iteration: Integer); override;
+    public
+      function TestTitle: shortstring; override;
+  end;
+
+  { TSInt64Bit86400000ModCmpTest }
+
+  TSInt64Bit86400000ModCmpTest = class(TSInt64ModCmpTest)
+    protected
+      function GetDivisor: Int64; override;
+      procedure DoTestIteration(Iteration: Integer); override;
+    public
+      function TestTitle: shortstring; override;
+  end;
+
 { TSInt64Bit1Test }
 { TSInt64Bit1Test }
 
 
 function TSInt64Bit1Test.TestTitle: shortstring;
 function TSInt64Bit1Test.TestTitle: shortstring;
@@ -770,3 +800,97 @@ procedure TSInt64Bit86400000ModTest.DoTestIteration(Iteration: Integer);
       
       
     FResultArray[Index] := Answer;
     FResultArray[Index] := Answer;
   end;
   end;
+
+{ TSInt64Bit3ModCmpTest }
+
+function TSInt64Bit3ModCmpTest.TestTitle: shortstring;
+  begin
+    Result := 'Signed 64-bit (n mod 3) = 0';
+  end;
+
+function TSInt64Bit3ModCmpTest.GetDivisor: Int64;
+  begin
+    Result := 3;
+  end;
+
+procedure TSInt64Bit3ModCmpTest.DoTestIteration(Iteration: Integer);
+  var
+    Numerator: Int64; Answer: Boolean;
+    Index, X: Integer;
+  begin
+    Index := Iteration and $FF;
+    case Index of
+      0:
+        Numerator := Int64($8000000000000000);
+      1:
+        Numerator := Int64($8000000000000006);
+      2:
+        Numerator := Int64($8000000000000007);
+      253:
+        Numerator := Int64($7FFFFFFFFFFFFFF9);
+      254:
+        Numerator := Int64($7FFFFFFFFFFFFFFA);
+      255:
+        Numerator := Int64($7FFFFFFFFFFFFFFF);
+      else
+        Numerator := Index - 128;
+    end;
+
+    FInputArray[Index] := Numerator;
+    for X := 0 to INTERNAL_LOOPS - 1 do
+      Answer := (Numerator mod 3) = 0;
+
+    FResultArray[Index] := Answer;
+  end;
+
+{ TSInt64Bit10000ModCmpTest }
+
+function TSInt64Bit10000ModCmpTest.TestTitle: shortstring;
+  begin
+    Result := 'Signed 64-bit (n mod 10,000) = 0 (Currency)';
+  end;
+
+function TSInt64Bit10000ModCmpTest.GetDivisor: Int64;
+  begin
+    Result := 10000;
+  end;
+
+procedure TSInt64Bit10000ModCmpTest.DoTestIteration(Iteration: Integer);
+  var
+    Numerator: Int64; Answer: Boolean;
+    Index, X: Integer;
+  begin
+    Index := Iteration and $FF;
+    Numerator := FS64_10000Input[Index and $F];
+    FInputArray[Index] := Numerator;
+    for X := 0 to INTERNAL_LOOPS - 1 do
+      Answer := (Numerator mod 10000) = 0;
+
+    FResultArray[Index] := Answer;
+  end;
+
+{ TSInt64Bit86400000ModCmpTest }
+
+function TSInt64Bit86400000ModCmpTest.TestTitle: shortstring;
+  begin
+    Result := 'Signed 64-bit (n mod 86,400,000) = 0';
+  end;
+
+function TSInt64Bit86400000ModCmpTest.GetDivisor: Int64;
+  begin
+    Result := 86400000;
+  end;
+
+procedure TSInt64Bit86400000ModCmpTest.DoTestIteration(Iteration: Integer);
+  var
+    Numerator: Int64; Answer: Boolean;
+    Index, X: Integer;
+  begin
+    Index := Iteration and $FF;
+    Numerator := FS64_86400000Input[Index and $F];
+    FInputArray[Index] := Numerator;
+    for X := 0 to INTERNAL_LOOPS - 1 do
+      Answer := (Numerator mod 86400000) = 0;
+
+    FResultArray[Index] := Answer;
+  end;

+ 204 - 0
tests/bench/bdiv_u32.inc

@@ -223,6 +223,56 @@ type
       function TestTitle: shortstring; override;
       function TestTitle: shortstring; override;
   end;
   end;
 
 
+  { TUInt32Bit3ModCmpTest }
+
+  TUInt32Bit3ModCmpTest = class(TUInt32ModCmpTest)
+    protected
+      function GetDivisor: Cardinal; override;
+      procedure DoTestIteration(Iteration: Integer); override;
+    public
+      function TestTitle: shortstring; override;
+  end;
+
+  { TUInt32Bit10ModCmpTest }
+
+  TUInt32Bit10ModCmpTest = class(TUInt32ModCmpTest)
+    protected
+      function GetDivisor: Cardinal; override;
+      procedure DoTestIteration(Iteration: Integer); override;
+    public
+      function TestTitle: shortstring; override;
+  end;
+
+  { TUInt32Bit100ModCmpTest }
+
+  TUInt32Bit100ModCmpTest = class(TUInt32ModCmpTest)
+    protected
+      function GetDivisor: Cardinal; override;
+      procedure DoTestIteration(Iteration: Integer); override;
+    public
+      function TestTitle: shortstring; override;
+  end;
+
+  { TUInt32Bit400ModCmpTest }
+
+  TUInt32Bit400ModCmpTest = class(TUInt32ModCmpTest)
+    protected
+      function GetDivisor: Cardinal; override;
+      procedure DoTestIteration(Iteration: Integer); override;
+    public
+      function TestTitle: shortstring; override;
+  end;
+
+  { TUInt32Bit1000ModCmpTest }
+
+  TUInt32Bit1000ModCmpTest = class(TUInt32ModCmpTest)
+    protected
+      function GetDivisor: Cardinal; override;
+      procedure DoTestIteration(Iteration: Integer); override;
+    public
+      function TestTitle: shortstring; override;
+  end;
+
 
 
 { TUInt32Bit1Test }
 { TUInt32Bit1Test }
 
 
@@ -859,3 +909,157 @@ procedure TUInt32Bit3600000ModTest.DoTestIteration(Iteration: Integer);
 
 
     FResultArray[Index] := Answer;
     FResultArray[Index] := Answer;
   end;
   end;
+
+{ TUInt32Bit3ModCmpTest }
+
+function TUInt32Bit3ModCmpTest.TestTitle: shortstring;
+  begin
+    Result := 'Unsigned 32-bit (n mod 3) = 0';
+  end;
+
+function TUInt32Bit3ModCmpTest.GetDivisor: Cardinal;
+  begin
+    Result := 3;
+  end;
+
+procedure TUInt32Bit3ModCmpTest.DoTestIteration(Iteration: Integer);
+  var
+    Numerator: Cardinal; Answer: Boolean;
+    Index, X: Integer;
+  begin
+    Index := Iteration and $FF;
+    case Index of
+      254:
+        Numerator := 4294967294;
+      255:
+        Numerator := 4294967295;
+      else
+        Numerator := Cardinal(Index);
+    end;
+
+    FInputArray[Index] := Numerator;
+    for X := 0 to INTERNAL_LOOPS - 1 do
+      Answer := (Numerator mod 3) = 0;
+
+    FResultArray[Index] := Answer;
+  end;
+
+{ TUInt32Bit10ModCmpTest }
+
+function TUInt32Bit10ModCmpTest.TestTitle: shortstring;
+  begin
+    Result := 'Unsigned 32-bit (n mod 10) = 0';
+  end;
+
+function TUInt32Bit10ModCmpTest.GetDivisor: Cardinal;
+  begin
+    Result := 10;
+  end;
+
+procedure TUInt32Bit10ModCmpTest.DoTestIteration(Iteration: Integer);
+  var
+    Numerator: Cardinal; Answer: Boolean;
+    Index, X: Integer;
+  begin
+    Index := Iteration and $FF;
+    case Index of
+      254:
+        Numerator := 4294967294;
+      255:
+        Numerator := 4294967295;
+      else
+        Numerator := Cardinal(Index);
+    end;
+
+    FInputArray[Index] := Numerator;
+    for X := 0 to INTERNAL_LOOPS - 1 do
+      Answer := (Numerator mod 10) = 0;
+
+    FResultArray[Index] := Answer;
+  end;
+
+{ TUInt32Bit100ModCmpTest }
+
+function TUInt32Bit100ModCmpTest.TestTitle: shortstring;
+  begin
+    Result := 'Unsigned 32-bit (n mod 100) = 0';
+  end;
+
+function TUInt32Bit100ModCmpTest.GetDivisor: Cardinal;
+  begin
+    Result := 100;
+  end;
+
+procedure TUInt32Bit100ModCmpTest.DoTestIteration(Iteration: Integer);
+  var
+    Numerator: Cardinal; Answer: Boolean;
+    Index, X: Integer;
+  begin
+    Index := Iteration and $FF;
+    case Index of
+      254:
+        Numerator := 4294967294;
+      255:
+        Numerator := 4294967295;
+      else
+        Numerator := Cardinal(Index);
+    end;
+
+    FInputArray[Index] := Numerator;
+    for X := 0 to INTERNAL_LOOPS - 1 do
+      Answer := (Numerator mod 100) = 0;
+
+    FResultArray[Index] := Answer;
+  end;
+
+{ TUInt32Bit400ModCmpTest }
+
+function TUInt32Bit400ModCmpTest.TestTitle: shortstring;
+  begin
+    Result := 'Unsigned 32-bit (n mod 400) = 0';
+  end;
+
+function TUInt32Bit400ModCmpTest.GetDivisor: Cardinal;
+  begin
+    Result := 400;
+  end;
+
+procedure TUInt32Bit400ModCmpTest.DoTestIteration(Iteration: Integer);
+  var
+    Numerator: Cardinal; Answer: Boolean;
+    Index, X: Integer;
+  begin
+    Index := Iteration and $FF;
+    Numerator := FU32_1000Input[Index and $F];
+    FInputArray[Index] := Numerator;
+    for X := 0 to INTERNAL_LOOPS - 1 do
+      Answer := (Numerator mod 400) = 0;
+
+    FResultArray[Index] := Answer;
+  end;
+
+{ TUInt32Bit1000ModCmpTest }
+
+function TUInt32Bit1000ModCmpTest.TestTitle: shortstring;
+  begin
+    Result := 'Unsigned 32-bit (n mod 1,000) = 0';
+  end;
+
+function TUInt32Bit1000ModCmpTest.GetDivisor: Cardinal;
+  begin
+    Result := 1000;
+  end;
+
+procedure TUInt32Bit1000ModCmpTest.DoTestIteration(Iteration: Integer);
+  var
+    Numerator: Cardinal; Answer: Boolean;
+    Index, X: Integer;
+  begin
+    Index := Iteration and $FF;
+    Numerator := FU32_1000Input[Index and $F];
+    FInputArray[Index] := Numerator;
+    for X := 0 to INTERNAL_LOOPS - 1 do
+      Answer := (Numerator mod 1000) = 0;
+
+    FResultArray[Index] := Answer;
+  end;

+ 168 - 0
tests/bench/bdiv_u64.inc

@@ -144,6 +144,46 @@ type
       function TestTitle: shortstring; override;
       function TestTitle: shortstring; override;
   end;
   end;
 
 
+  { TUInt64Bit3ModCmpTest }
+
+  TUInt64Bit3ModCmpTest = class(TUInt64ModCmpTest)
+    protected
+      function GetDivisor: QWord; override;
+      procedure DoTestIteration(Iteration: Integer); override;
+    public
+      function TestTitle: shortstring; override;
+  end;
+
+  { TUInt64Bit10ModCmpTest }
+
+  TUInt64Bit10ModCmpTest = class(TUInt64ModCmpTest)
+    protected
+      function GetDivisor: QWord; override;
+      procedure DoTestIteration(Iteration: Integer); override;
+    public
+      function TestTitle: shortstring; override;
+  end;
+
+  { TUInt64Bit100ModCmpTest }
+
+  TUInt64Bit100ModCmpTest = class(TUInt64ModCmpTest)
+    protected
+      function GetDivisor: QWord; override;
+      procedure DoTestIteration(Iteration: Integer); override;
+    public
+      function TestTitle: shortstring; override;
+  end;
+
+  { TUInt64Bit1000000000ModCmpTest }
+
+  TUInt64Bit1000000000ModCmpTest = class(TUInt64ModCmpTest)
+    protected
+      function GetDivisor: QWord; override;
+      procedure DoTestIteration(Iteration: Integer); override;
+    public
+      function TestTitle: shortstring; override;
+  end;
+
 { TUInt64Bit1Test }
 { TUInt64Bit1Test }
 
 
 function TUInt64Bit1Test.TestTitle: shortstring;
 function TUInt64Bit1Test.TestTitle: shortstring;
@@ -629,3 +669,131 @@ procedure TUInt64Bit1000000000ModTest.DoTestIteration(Iteration: Integer);
       
       
     FResultArray[Index] := Answer;
     FResultArray[Index] := Answer;
   end;
   end;
+
+{ TUInt64Bit3ModCmpTest }
+
+function TUInt64Bit3ModCmpTest.TestTitle: shortstring;
+  begin
+    Result := 'Unsigned 64-bit (n mod 3) = 0';
+  end;
+
+function TUInt64Bit3ModCmpTest.GetDivisor: QWord;
+  begin
+    Result := 3;
+  end;
+
+procedure TUInt64Bit3ModCmpTest.DoTestIteration(Iteration: Integer);
+  var
+    Numerator: QWord; Answer: Boolean;
+    Index, X: Integer;
+  begin
+    Index := Iteration and $FF;
+    case Index of
+      254:
+        Numerator := QWord($FFFFFFFFFFFFFFFE);
+      255:
+        Numerator := QWord($FFFFFFFFFFFFFFFF);
+      else
+        Numerator := QWord(Index);
+    end;
+
+    FInputArray[Index] := Numerator;
+    for X := 0 to INTERNAL_LOOPS - 1 do
+      Answer := (Numerator mod 3) = 0;
+
+    FResultArray[Index] := Answer;
+  end;
+
+{ TUInt64Bit10ModCmpTest }
+
+function TUInt64Bit10ModCmpTest.TestTitle: shortstring;
+  begin
+    Result := 'Unsigned 64-bit (n mod 10) = 0';
+  end;
+
+function TUInt64Bit10ModCmpTest.GetDivisor: QWord;
+  begin
+    Result := 10;
+  end;
+
+procedure TUInt64Bit10ModCmpTest.DoTestIteration(Iteration: Integer);
+  var
+    Numerator: QWord; Answer: Boolean;
+    Index, X: Integer;
+  begin
+    Index := Iteration and $FF;
+    case Index of
+      254:
+        Numerator := QWord($FFFFFFFFFFFFFFFE);
+      255:
+        Numerator := QWord($FFFFFFFFFFFFFFFF);
+      else
+        Numerator := QWord(Index);
+    end;
+
+    FInputArray[Index] := Numerator;
+    for X := 0 to INTERNAL_LOOPS - 1 do
+      Answer := (Numerator mod 10) = 0;
+
+    FResultArray[Index] := Answer;
+  end;
+
+{ TUInt64Bit100ModCmpTest }
+
+function TUInt64Bit100ModCmpTest.TestTitle: shortstring;
+  begin
+    Result := 'Unsigned 64-bit (n mod 100) = 0';
+  end;
+
+function TUInt64Bit100ModCmpTest.GetDivisor: QWord;
+  begin
+    Result := 100;
+  end;
+
+procedure TUInt64Bit100ModCmpTest.DoTestIteration(Iteration: Integer);
+  var
+    Numerator: QWord; Answer: Boolean;
+    Index, X: Integer;
+  begin
+    Index := Iteration and $FF;
+    case Index of
+      254:
+        Numerator := QWord($FFFFFFFFFFFFFFFE);
+      255:
+        Numerator := QWord($FFFFFFFFFFFFFFFF);
+      else
+        Numerator := QWord(Index);
+    end;
+
+    FInputArray[Index] := Numerator;
+    for X := 0 to INTERNAL_LOOPS - 1 do
+      Answer := (Numerator mod 100) = 0;
+
+    FResultArray[Index] := Answer;
+  end;
+
+{ TUInt64Bit1000000000ModCmpTest }
+
+function TUInt64Bit1000000000ModCmpTest.TestTitle: shortstring;
+  begin
+    Result := 'Unsigned 64-bit (n mod 1,000,000,000) = 0';
+  end;
+
+function TUInt64Bit1000000000ModCmpTest.GetDivisor: QWord;
+  begin
+    Result := 1000000000;
+  end;
+
+procedure TUInt64Bit1000000000ModCmpTest.DoTestIteration(Iteration: Integer);
+  var
+    Numerator: QWord; Answer: Boolean;
+    Index, X: Integer;
+  begin
+    Index := Iteration and $FF;
+    Numerator := FU64_1000000000Input[Index and $F];
+    FInputArray[Index] := Numerator;
+    for X := 0 to INTERNAL_LOOPS - 1 do
+      Answer := (Numerator mod 1000000000) = 0;
+
+    FResultArray[Index] := Answer;
+  end;