Преглед изворни кода

Expanding division to 64-bit and tests

J. Gareth "Curious Kit" Moreton пре 3 година
родитељ
комит
671c674d65
4 измењених фајлова са 367 додато и 86 уклоњено
  1. 154 59
      compiler/x86/nx86mat.pas
  2. 87 3
      tests/bench/bdiv.pp
  3. 92 0
      tests/bench/bdiv_u32.inc
  4. 34 24
      tests/bench/bdiv_u64.inc

+ 154 - 59
compiler/x86/nx86mat.pas

@@ -414,6 +414,156 @@ interface
         s: byte;
       label
         DefaultDiv;
+
+        procedure DoUnsignedReciprocalDivision;
+          var
+            exp_rega,exp_regd:Tregister;
+            exp_opsize:topsize;
+            DoMod: Boolean;
+          begin
+            DoMod := (nodetype = modn);
+            { Extend 32-bit divides to 64-bit registers and 16-bit
+              divides to 32-bit registers.  Because the domain of
+              the left input is only up to 2^(X/2 - 1) - 1, (i.e.
+              2^31 - 1 for 64-bit and 2^15 - 1 for 32-bit), a much
+              larger error in the reciprocal is permitted. }
+            if (resultdef.size <= {$ifdef x86_64}4{$else x86_64}2{$endif x86_64}) then
+              begin
+                calc_divconst_magic_unsigned(resultdef.size * 2 * 8,d,m,m_add,s);
+
+                { Should never have a zero shift and a magic add together }
+                if (s = 0) and m_add then
+                  InternalError(2021090201);
+
+                { Extend the input register (the peephole optimizer should
+                  help clean up unnecessary MOVZX instructions }
+                hreg3 := hreg1;
+                case resultdef.size of
+{$ifdef x86_64}
+                  4:
+                    begin
+                      setsubreg(hreg3, R_SUBQ);
+                      { Make sure the upper 32 bits are zero; the peephole
+                        optimizer will remove this instruction via MovAnd2Mov
+                        if it's not needed }
+                      emit_const_reg(A_AND, S_L, $FFFFFFFF, hreg1);
+                      exp_rega := NR_RAX;
+                      exp_regd := NR_RDX;
+                      exp_opsize := S_Q;
+
+                      if m_add then
+                        { Append 1 to the tail end of the result }
+                        m := (m shr s) or ($8000000000000000 shr (s - 1))
+                      else
+                        m := m shr s;
+                    end;
+{$endif x86_64}
+                  2:
+                    begin
+                      setsubreg(hreg3, R_SUBD);
+                      emit_reg_reg(A_MOVZX, S_WL, hreg1, hreg3);
+                      exp_rega := NR_EAX;
+                      exp_regd := NR_EDX;
+                      exp_opsize := S_L;
+
+                      if m_add then
+                        { Append 1 to the tail end of the result }
+                        m := (m shr s) or ($80000000 shr (s - 1))
+                      else
+                        m := m shr s;
+                    end;
+                  1:
+                    begin
+                      setsubreg(hreg3, R_SUBW);
+                      emit_reg_reg(A_MOVZX, S_BW, hreg1, hreg3);
+                      exp_rega := NR_AX;
+                      exp_regd := NR_DX;
+                      regd := NR_DL; { We need to change this from AH }
+                      exp_opsize := S_W;
+
+                      if m_add then
+                        { Append 1 to the tail end of the result }
+                        m := (m shr s) or ($8000 shr (s - 1))
+                      else
+                        m := m shr s;
+                    end;
+                  else
+                    InternalError(2021090210);
+                end;
+
+                Inc(m);
+
+                cg.getcpuregister(current_asmdata.CurrAsmList,exp_rega);
+                emit_const_reg(A_MOV,exp_opsize,aint(m),exp_rega);
+                cg.getcpuregister(current_asmdata.CurrAsmList,exp_regd);
+                emit_reg(A_MUL,exp_opsize,hreg3);
+                cg.ungetcpuregister(current_asmdata.CurrAsmList,exp_rega);
+                if DoMod then
+                  begin
+                    hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
+                    emit_reg_reg(A_MOV,opsize,hreg1,hreg2);
+                  end;
+              end
+            else
+              begin
+                calc_divconst_magic_unsigned(resultdef.size*8,d,m,m_add,s);
+
+                { Should never have a zero shift and a magic add together }
+                if (s = 0) and m_add then
+                  InternalError(2021090202);
+
+                cg.getcpuregister(current_asmdata.CurrAsmList,rega);
+                emit_const_reg(A_MOV,opsize,aint(m),rega);
+                cg.getcpuregister(current_asmdata.CurrAsmList,regd);
+                emit_reg(A_MUL,opsize,hreg1);
+                cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
+                if DoMod then
+                  begin
+                    hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
+                    emit_reg_reg(A_MOV,opsize,hreg1,hreg2);
+                  end;
+
+                if m_add then
+                  begin
+                    { addition can overflow, shift first bit considering carry,
+                      then shift remaining bits in regular way. }
+                    cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+                    emit_reg_reg(A_ADD,opsize,hreg1,regd);
+                    emit_const_reg(A_RCR,opsize,1,regd);
+                    cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+                    dec(s);
+                  end;
+                if s<>0 then
+                  emit_const_reg(A_SHR,opsize,aint(s),regd);
+              end;
+
+            if DoMod then
+              begin
+                { Now multiply the quotient by the original denominator and
+                  subtract the product from the original numerator to get
+                  the remainder. }
+                if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in IMUL }
+                  begin
+                    hreg3:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
+                    emit_const_reg(A_MOV,opsize,aint(d),hreg3);
+                    emit_reg_reg(A_IMUL,opsize,hreg3,regd);
+                  end
+                else
+                  emit_const_reg(A_IMUL,opsize,aint(d),regd);
+
+                emit_reg_reg(A_SUB,opsize,regd,hreg2);
+              end;
+
+            cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
+            if not DoMod then
+              begin
+                hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
+                cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,hreg2);
+              end;
+
+            location.register:=hreg2;
+          end;
+
       begin
         secondpass(left);
         if codegenerror then
@@ -540,27 +690,8 @@ interface
                         cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                       end
                     else
-                      begin
-                        calc_divconst_magic_unsigned(resultdef.size*8,d,m,m_add,s);
-                        cg.getcpuregister(current_asmdata.CurrAsmList,rega);
-                        emit_const_reg(A_MOV,opsize,aint(m),rega);
-                        cg.getcpuregister(current_asmdata.CurrAsmList,regd);
-                        emit_reg(A_MUL,opsize,hreg1);
-                        cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
-                        if m_add then
-                          begin
-                            { addition can overflow, shift first bit considering carry,
-                              then shift remaining bits in regular way. }
-                            emit_reg_reg(A_ADD,opsize,hreg1,regd);
-                            emit_const_reg(A_RCR,opsize,1,regd);
-                            dec(s);
-                          end;
-                        if s<>0 then
-                          emit_const_reg(A_SHR,opsize,aint(s),regd);
-                        cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
-                        location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
-                        cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register)
-                      end;
+                      DoUnsignedReciprocalDivision;
+
                   end;
               end;
           end
@@ -614,45 +745,9 @@ interface
                     emit_reg_reg(A_ADD,opsize,hreg1,location.register);
                   end
                 else
-                  begin
-                    { Convert the division to a multiplication }
-                    calc_divconst_magic_unsigned(resultdef.size*8,d,m,m_add,s);
-                    cg.getcpuregister(current_asmdata.CurrAsmList,rega);
-                    emit_const_reg(A_MOV,opsize,aint(m),rega);
-                    cg.getcpuregister(current_asmdata.CurrAsmList,regd);
-                    emit_reg(A_MUL,opsize,hreg1);
-                    cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
-                    hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
-                    emit_reg_reg(A_MOV,opsize,hreg1,hreg2);
-                    if m_add then
-                      begin
-                        { addition can overflow, shift first bit considering carry,
-                          then shift remaining bits in regular way. }
-                        cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
-                        emit_reg_reg(A_ADD,opsize,hreg1,regd);
-                        emit_const_reg(A_RCR,opsize,1,regd);
-                        cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
-                        dec(s);
-                      end;
-                    if s<>0 then
-                      emit_const_reg(A_SHR,opsize,aint(s),regd); { R/EDX now contains the quotient }
-
-                    { Now multiply the quotient by the original denominator and
-                      subtract the product from the original numerator to get
-                      the remainder. }
-                    if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in IMUL }
-                      begin
-                        hreg3:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
-                        emit_const_reg(A_MOV,opsize,aint(d),hreg3);
-                        emit_reg_reg(A_IMUL,opsize,hreg3,regd);
-                      end
-                    else
-                      emit_const_reg(A_IMUL,opsize,aint(d),regd);
+                  { Convert the division to a multiplication }
+                  DoUnsignedReciprocalDivision;
 
-                    emit_reg_reg(A_SUB,opsize,regd,hreg2);
-                    cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
-                    location.register:=hreg2;
-                  end;
               end;
           end
         else if (nodetype=modn) and (right.nodetype=ordconstn) and (is_signed(left.resultdef)) and isabspowerof2(tordconstnode(right).value,power) then

+ 87 - 3
tests/bench/bdiv.pp

@@ -49,6 +49,23 @@ type
 
   TTestClass = class of TTestAncestor;
 
+  TUInt16DivTest = class(TTestAncestor)
+    protected
+      FInputArray: array[$00..$FF] of Word;
+      FResultArray: array[$00..$FF] of Word;
+      function GetDivisor: Word; virtual; abstract;
+      function DoVariableDiv(Numerator: Word): Word; inline;
+    public
+      function WriteResults: Boolean; override;
+  end;
+
+  TUInt16ModTest = class(TUInt16DivTest)
+    protected
+      function DoVariableMod(Numerator: Word): Word; inline;
+    public
+      function WriteResults: Boolean; override;
+  end;
+
   TUInt32DivTest = class(TTestAncestor)
     protected
       FInputArray: array[$00..$FF] of Cardinal;
@@ -117,6 +134,7 @@ type
       function WriteResults: Boolean; override;
   end;
 
+{$I bdiv_u16.inc}
 {$I bdiv_u32.inc}
 {$I bdiv_u64.inc}
 {$I bdiv_s32.inc}
@@ -161,6 +179,56 @@ procedure TTestAncestor.Run;
     FAvgTime := FEndTime - FStartTime;
   end;
 
+{ TUInt16DivTest }
+
+function TUInt16DivTest.DoVariableDiv(Numerator: Word): Word;
+  begin
+    Result := Numerator div GetDivisor;
+  end;
+
+function TUInt16DivTest.WriteResults: Boolean;
+  var
+    X: Integer;
+    Expected: Word;
+  begin
+    Result := True;
+    for X := 0 to 255 do
+      begin
+        Expected := DoVariableDiv(FInputArray[X]);
+        if FResultArray[X] <> Expected then
+          begin
+            WriteLn('FAIL - ', FInputArray[X], ' div ', GetDivisor, '; expected ', Expected, ' got ', FResultArray[X]);
+            Result := False;
+            Exit;
+          end;
+      end;
+  end;
+
+{ TUInt16ModTest }
+
+function TUInt16ModTest.DoVariableMod(Numerator: Word): Word;
+  begin
+    Result := Numerator mod GetDivisor;
+  end;
+
+function TUInt16ModTest.WriteResults: Boolean;
+  var
+    X: Integer;
+    Expected: Word;
+  begin
+    Result := True;
+    for X := 0 to 255 do
+      begin
+        Expected := DoVariableMod(FInputArray[X]);
+        if FResultArray[X] <> Expected then
+          begin
+            WriteLn('FAIL - ', FInputArray[X], ' mod ', GetDivisor, '; expected ', Expected, ' got ', FResultArray[X]);
+            Result := False;
+            Exit;
+          end;
+      end;
+  end;
+
 { TUInt32DivTest }
 
 function TUInt32DivTest.DoVariableDiv(Numerator: Cardinal): Cardinal;
@@ -363,13 +431,29 @@ function TSInt64ModTest.WriteResults: Boolean;
 
 { Main function }
 const
-  TestClasses: array[0..53] of TTestClass = (
+  TestClasses: array[0..69] of TTestClass = (
+    TUInt16Bit1Test,
+    TUInt16Bit1ModTest,
+    TUInt16Bit2Test,
+    TUInt16Bit2ModTest,
+    TUInt16Bit3Test,
+    TUInt16Bit3ModTest,
+    TUInt16Bit7Test,
+    TUInt16Bit7ModTest,
+    TUInt16Bit10Test,
+    TUInt16Bit10ModTest,
+    TUInt16Bit100Test,
+    TUInt16Bit100ModTest,
+    TUInt16Bit1000Test,
+    TUInt16Bit1000ModTest,
     TUInt32Bit1Test,
     TUInt32Bit1ModTest,
     TUInt32Bit2Test,
     TUInt32Bit2ModTest,
     TUInt32Bit3Test,
     TUInt32Bit3ModTest,
+    TUInt32Bit7Test,
+    TUInt32Bit7ModTest,
     TUInt32Bit10Test,
     TUInt32Bit10ModTest,
     TUInt32Bit100Test,
@@ -388,8 +472,8 @@ const
     TUInt64Bit2ModTest,
     TUInt64Bit3Test,
     TUInt64Bit3ModTest,
-    TUInt64Bit5Test,
-    TUInt64Bit5ModTest,
+    TUInt64Bit7Test,
+    TUInt64Bit7ModTest,
     TUInt64Bit10Test,
     TUInt64Bit10ModTest,
     TUInt64Bit100Test,

+ 92 - 0
tests/bench/bdiv_u32.inc

@@ -59,6 +59,26 @@ type
       function TestTitle: shortstring; override;
   end;
 
+  { TUInt32Bit7Test }
+
+  TUInt32Bit7Test = class(TUInt32DivTest)
+    protected
+      function GetDivisor: Cardinal; override;
+      procedure DoTestIteration(Iteration: Integer); override;
+    public
+      function TestTitle: shortstring; override;
+  end;
+
+  { TUInt32Bit7ModTest }
+
+  TUInt32Bit7ModTest = class(TUInt32ModTest)
+    protected
+      function GetDivisor: Cardinal; override;
+      procedure DoTestIteration(Iteration: Integer); override;
+    public
+      function TestTitle: shortstring; override;
+  end;
+
   { TUInt32Bit10Test }
 
   TUInt32Bit10Test = class(TUInt32DivTest)
@@ -416,6 +436,78 @@ procedure TUInt32Bit3ModTest.DoTestIteration(Iteration: Integer);
     FResultArray[Index] := Answer;
   end;
 
+{ TUInt32Bit7Test }
+
+function TUInt32Bit7Test.TestTitle: shortstring;
+  begin
+    Result := 'Unsigned 32-bit division by 7';
+  end;
+
+function TUInt32Bit7Test.GetDivisor: Cardinal;
+  begin
+    Result := 7;
+  end;
+
+procedure TUInt32Bit7Test.DoTestIteration(Iteration: Integer);
+  var
+    Numerator, Answer: Cardinal;
+    Index, X: Integer;
+  begin
+    Index := Iteration and $FF;
+    case Index of
+      254:
+        Numerator := 4294967294;
+      255:
+        Numerator := 4294967295;
+      else
+        Numerator := Cardinal(Index);
+    end;
+
+    FInputArray[Index] := Numerator;
+    for X := 0 to INTERNAL_LOOPS - 1 do
+      Answer := Numerator div 7;
+
+    FResultArray[Index] := Answer;
+  end;
+
+{ TUInt32Bit7ModTest }
+
+function TUInt32Bit7ModTest.TestTitle: shortstring;
+  begin
+    Result := 'Unsigned 32-bit modulus by 7';
+  end;
+
+function TUInt32Bit7ModTest.GetDivisor: Cardinal;
+  begin
+    Result := 7;
+  end;
+
+procedure TUInt32Bit7ModTest.DoTestIteration(Iteration: Integer);
+  var
+    Numerator, Answer: Cardinal;
+    Index, X: Integer;
+  begin
+    Index := Iteration and $FF;
+    case Index of
+      252:
+        Numerator := 4294967291;
+      253:
+        Numerator := 4294967292;
+      254:
+        Numerator := 4294967293;
+      255:
+        Numerator := 4294967295;
+      else
+        Numerator := Cardinal(Index);
+    end;
+
+    FInputArray[Index] := Numerator;
+    for X := 0 to INTERNAL_LOOPS - 1 do
+      Answer := Numerator mod 7;
+
+    FResultArray[Index] := Answer;
+  end;
+
 { TUInt32Bit10Test }
 
 function TUInt32Bit10Test.TestTitle: shortstring;

+ 34 - 24
tests/bench/bdiv_u64.inc

@@ -59,9 +59,9 @@ type
       function TestTitle: shortstring; override;
   end;
 
-  { TUInt64Bit5Test }
+  { TUInt64Bit7Test }
 
-  TUInt64Bit5Test = class(TUInt64DivTest)
+  TUInt64Bit7Test = class(TUInt64DivTest)
     protected
       function GetDivisor: QWord; override;
       procedure DoTestIteration(Iteration: Integer); override;
@@ -69,9 +69,9 @@ type
       function TestTitle: shortstring; override;
   end;
 
-  { TUInt64Bit5ModTest }
+  { TUInt64Bit7ModTest }
 
-  TUInt64Bit5ModTest = class(TUInt64ModTest)
+  TUInt64Bit7ModTest = class(TUInt64ModTest)
     protected
       function GetDivisor: QWord; override;
       procedure DoTestIteration(Iteration: Integer); override;
@@ -356,25 +356,27 @@ procedure TUInt64Bit3ModTest.DoTestIteration(Iteration: Integer);
     FResultArray[Index] := Answer;
   end;
 
-{ TUInt64Bit5Test }
+{ TUInt64Bit7Test }
 
-function TUInt64Bit5Test.TestTitle: shortstring;
+function TUInt64Bit7Test.TestTitle: shortstring;
   begin
-    Result := 'Unsigned 64-bit division by 5';
+    Result := 'Unsigned 64-bit division by 7';
   end;
 
-function TUInt64Bit5Test.GetDivisor: QWord;
+function TUInt64Bit7Test.GetDivisor: QWord;
   begin
-    Result := 5;
+    Result := 7;
   end;
 
-procedure TUInt64Bit5Test.DoTestIteration(Iteration: Integer);
+procedure TUInt64Bit7Test.DoTestIteration(Iteration: Integer);
   var
     Numerator, Answer: QWord;
     Index, X: Integer;
   begin
     Index := Iteration and $FF;
     case Index of
+      253:
+        Numerator := QWord($FFFFFFFFFFFFFFFD);
       254:
         Numerator := QWord($FFFFFFFFFFFFFFFE);
       255:
@@ -385,30 +387,32 @@ procedure TUInt64Bit5Test.DoTestIteration(Iteration: Integer);
 
     FInputArray[Index] := Numerator;
     for X := 0 to INTERNAL_LOOPS - 1 do
-      Answer := Numerator div 5;
+      Answer := Numerator div 7;
       
     FResultArray[Index] := Answer;
   end;
 
-{ TUInt64Bit5ModTest }
+{ TUInt64Bit7ModTest }
 
-function TUInt64Bit5ModTest.TestTitle: shortstring;
+function TUInt64Bit7ModTest.TestTitle: shortstring;
   begin
-    Result := 'Unsigned 64-bit modulus by 5';
+    Result := 'Unsigned 64-bit modulus by 7';
   end;
 
-function TUInt64Bit5ModTest.GetDivisor: QWord;
+function TUInt64Bit7ModTest.GetDivisor: QWord;
   begin
-    Result := 5;
+    Result := 7;
   end;
 
-procedure TUInt64Bit5ModTest.DoTestIteration(Iteration: Integer);
+procedure TUInt64Bit7ModTest.DoTestIteration(Iteration: Integer);
   var
     Numerator, Answer: QWord;
     Index, X: Integer;
   begin
     Index := Iteration and $FF;
     case Index of
+      253:
+        Numerator := QWord($FFFFFFFFFFFFFFFD);
       254:
         Numerator := QWord($FFFFFFFFFFFFFFFE);
       255:
@@ -419,7 +423,7 @@ procedure TUInt64Bit5ModTest.DoTestIteration(Iteration: Integer);
 
     FInputArray[Index] := Numerator;
     for X := 0 to INTERNAL_LOOPS - 1 do
-      Answer := Numerator mod 5;
+      Answer := Numerator mod 7;
       
     FResultArray[Index] := Answer;
   end;
@@ -479,10 +483,12 @@ procedure TUInt64Bit10ModTest.DoTestIteration(Iteration: Integer);
   begin
     Index := Iteration and $FF;
     case Index of
+      252:
+        Numerator := QWord($FFFFFFFFFFFFFFEF);
       253:
-        Numerator := QWord($FFFFFFFFFFFFFFF9);
+        Numerator := QWord($FFFFFFFFFFFFFFF0);
       254:
-        Numerator := QWord($FFFFFFFFFFFFFFFA);
+        Numerator := QWord($FFFFFFFFFFFFFFF1);
       255:
         Numerator := QWord($FFFFFFFFFFFFFFFF);
       else
@@ -515,10 +521,12 @@ procedure TUInt64Bit100Test.DoTestIteration(Iteration: Integer);
   begin
     Index := Iteration and $FF;
     case Index of
-      253:
+      252:
         Numerator := QWord($FFFFFFFFFFFFFFEF);
-      254:
+      253:
         Numerator := QWord($FFFFFFFFFFFFFFF0);
+      254:
+        Numerator := QWord($FFFFFFFFFFFFFFF1);
       255:
         Numerator := QWord($FFFFFFFFFFFFFFFF);
       else
@@ -551,10 +559,12 @@ procedure TUInt64Bit100ModTest.DoTestIteration(Iteration: Integer);
   begin
     Index := Iteration and $FF;
     case Index of
-      253:
+      252:
         Numerator := QWord($FFFFFFFFFFFFFFEF);
-      254:
+      253:
         Numerator := QWord($FFFFFFFFFFFFFFF0);
+      254:
+        Numerator := QWord($FFFFFFFFFFFFFFF1);
       255:
         Numerator := QWord($FFFFFFFFFFFFFFFF);
       else