Browse Source

* patch by J. Gareth Moreton: reorganises the produced machine code for large unsigned divisions, resolves #32984

git-svn-id: trunk@37950 -
florian 7 years ago
parent
commit
4a98fcb9d3
3 changed files with 199 additions and 6 deletions
  1. 1 0
      .gitattributes
  2. 51 6
      compiler/x86/nx86mat.pas
  3. 147 0
      tests/test/cg/tmoddiv5.pp

+ 1 - 0
.gitattributes

@@ -11954,6 +11954,7 @@ tests/test/cg/tmoddiv1.pp svneol=native#text/plain
 tests/test/cg/tmoddiv2.pp svneol=native#text/plain
 tests/test/cg/tmoddiv3.pp svneol=native#text/pascal
 tests/test/cg/tmoddiv4.pp svneol=native#text/pascal
+tests/test/cg/tmoddiv5.pp svneol=native#text/pascal
 tests/test/cg/tmul3264.pp svneol=native#text/plain
 tests/test/cg/tneg.pp svneol=native#text/plain
 tests/test/cg/tnegnotassign1.pp svneol=native#text/plain

+ 51 - 6
compiler/x86/nx86mat.pas

@@ -378,7 +378,7 @@ interface
 
     procedure tx86moddivnode.pass_generate_code;
       var
-        hreg1,hreg2,hreg3,rega,regd:Tregister;
+        hreg1,hreg2,hreg3,rega,regd,tempreg:Tregister;
         power:longint;
         instr:TAiCpu;
         op:Tasmop;
@@ -415,7 +415,7 @@ interface
             if isabspowerof2(tordconstnode(right).value,power) then
               begin
                 { for signed numbers, the numerator must be adjusted before the
-                  shift instruction, but not wih unsigned numbers! Otherwise,
+                  shift instruction, but not with unsigned numbers! Otherwise,
                   "Cardinal($ffffffff) div 16" overflows! (JM) }
                 if is_signed(left.resultdef) Then
                   begin
@@ -485,8 +485,24 @@ interface
                     d:=tordconstnode(right).value.svalue;
                     if d>=aword(1) shl (left.resultdef.size*8-1) then
                       begin
+                        location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
+                        { Ensure that the whole register is 0, since SETcc only sets the lowest byte }
+
+                        if opsize = S_Q then
+                          begin
+                            { Emit an XOR instruction that only operates on the lower 32 bits,
+                              since we want to initialise this register to zero, the upper 32
+                              bits will be set to zero regardless, and the resultant machine code
+                              will usually be smaller due to the lack of a REX prefix. [Kit] }
+                            tempreg := location.register;
+                            setsubreg(tempreg, R_SUBD);
+                            emit_reg_reg(A_XOR, S_L, tempreg, tempreg);
+                          end
+                        else
+                          emit_reg_reg(A_XOR,opsize,location.register,location.register);
+
                         cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
-                        if (cgsize in [OS_64,OS_S64]) then
+                        if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in CMP }
                           begin
                             hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
                             emit_const_reg(A_MOV,opsize,aint(d),hreg2);
@@ -494,9 +510,38 @@ interface
                           end
                         else
                           emit_const_reg(A_CMP,opsize,aint(d),hreg1);
-                        location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
-                        emit_const_reg(A_MOV,opsize,0,location.register);
-                        emit_const_reg(A_SBB,opsize,-1,location.register);
+                        { NOTE: SBB and SETAE are both 3 bytes long without the REX prefix,
+                          both use an ALU for their execution and take a single cycle to
+                          run. The only difference is that SETAE does not modify the flags,
+                          allowing for some possible reuse. [Kit] }
+{$ifdef x86_64}
+                        { Emit a SETcc instruction that depends on the carry bit being zero,
+                          that is, the numerator is greater than or equal to the denominator. }
+                        tempreg := location.register;
+                        setsubreg(tempreg, R_SUBL);
+                         { On x86-64, all registers can have their lower 8 bits represented }
+                        instr:=TAiCpu.op_reg(A_SETcc,S_B,tempreg);
+                        instr.condition := C_AE;
+                        current_asmdata.CurrAsmList.concat(instr);
+{$else}
+                        case getsupreg(location.register) of
+                          { On x86, only these four registers can have their lower 8 bits represented }
+                          RS_EAX, RS_ECX, RS_EDX, RS_EBX:
+                            begin
+                              { Emit a SETcc instruction that depends on the carry bit being zero,
+                                that is, the numerator is greater than or equal to the denominator. }
+                              tempreg := location.register;
+                              setsubreg(tempreg, R_SUBL);
+                              instr:=TAiCpu.op_reg(A_SETcc,S_B,tempreg);
+                              instr.condition := C_AE;
+                              current_asmdata.CurrAsmList.concat(instr);
+                            end;
+                          else
+                            { It will likely emit SBB anyway because location.register is
+                              usually imaginary. [Kit] }
+                            emit_const_reg(A_SBB,opsize,-1,location.register);
+                        end;
+{$endif}
                         cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                       end
                     else

+ 147 - 0
tests/test/cg/tmoddiv5.pp

@@ -0,0 +1,147 @@
+program tmoddiv5;
+
+{$MACRO ON}
+
+const
+  DivModConst_NumeratorCount = 6;
+
+type
+  TExpectedSet = record
+    Divisor: QWord;
+    ExpectedQ: array[0..DivModConst_NumeratorCount - 1] of QWord;
+    ExpectedR: array[0..DivModConst_NumeratorCount - 1] of QWord;
+  end;
+
+{ NOTES:
+  -  $DE0B6B3A7640000 =  1,000,000,000,000,000,000
+  - $4563918244F40000 =  5,000,000,000,000,000,000
+  - $8AC7230489E80000 = 10,000,000,000,000,000,000
+  - $D02AB486CEDC0000 = 15,000,000,000,000,000,000
+  - 18446744073709551615 = $FFFFFFFFFFFFFFFF - this tests how well the compiler can manage large decimal immediates
+}
+
+const
+  Inputs: array[0..DivModConst_NumeratorCount - 1] of QWord = (0, 500, $4563918244F40000, QWord($8AC7230489E80000), QWord($D02AB486CEDC0000), 18446744073709551615);
+
+  ExpectedSets: array[0..12] of TExpectedSet = (
+    (Divisor:                       1; ExpectedQ: (0, 500, $4563918244F40000, QWord($8AC7230489E80000),QWord($D02AB486CEDC0000),   QWord($FFFFFFFFFFFFFFFF));ExpectedR: (0,   0,                 0,                 0,                 0,                  0)),
+    (Divisor:                       3; ExpectedQ: (0, 166, $17213080C1A6AAAA,        $2E426101834D5555,       $4563918244F40000,       6148914691236517205); ExpectedR: (0,   2,                 2,                 1,                 0,                  0)),
+    (Divisor:                     $10; ExpectedQ: (0, 31,   $4563918244F4000,         $8AC7230489E8000,        $D02AB486CEDC000,          $FFFFFFFFFFFFFFF); ExpectedR: (0,   4,                 0,                 0,                 0,                 $F)),
+    (Divisor:                    $100; ExpectedQ: (0, 1,     $4563918244F400,   $8AC7230489E800,   $D02AB486CEDC00,     $FFFFFFFFFFFFFF); ExpectedR: (0, 244,                 0,                 0,                 0,                $FF)),
+    (Divisor:                  $10000; ExpectedQ: (0, 0,       $4563918244F4,     $8AC7230489E8,     $D02AB486CEDC,       $FFFFFFFFFFFF); ExpectedR: (0, 500,                 0,                 0,                 0,              $FFFF)),
+    (Divisor:                 1000000; ExpectedQ: (0, 0,       5000000000000,    10000000000000,    15000000000000,      18446744073709); ExpectedR: (0, 500,                 0,                 0,                 0,             551615)),
+    (Divisor:              $100000000; ExpectedQ: (0, 0,           $45639182,         $8AC72304,         $D02AB486,           $FFFFFFFF); ExpectedR: (0, 500,         $44F40000,         $89E80000,         $CEDC0000,          $FFFFFFFF)),
+    (Divisor:        $DE0B6B3A7640000; ExpectedQ: (0, 0,                   5,                10,                15,                  18); ExpectedR: (0, 500,                 0,                 0,                 0, 446744073709551615)),
+    (Divisor:       $1000000000000000; ExpectedQ: (0, 0,                  $4,                $8,                $D,                  $F); ExpectedR: (0, 500,  $563918244F40000,  $AC7230489E80000,   $2AB486CEDC0000,   $FFFFFFFFFFFFFFF)),
+    (Divisor:       $7FFFFFFFFFFFFFFF; ExpectedQ: (0, 0,                   0,                 1,                 1,                   2); ExpectedR: (0, 500, $4563918244F40000,  $AC7230489E80001, $502AB486CEDC0001,                  1)),
+    (Divisor: QWord($8000000000000000); ExpectedQ: (0, 0,                   0,                 1,                 1,                   1); ExpectedR: (0, 500, $4563918244F40000,  $AC7230489E80000, $502AB486CEDC0000,  $7FFFFFFFFFFFFFFF)),
+    (Divisor: QWord($8AC7230489E80000); ExpectedQ: (0, 0,                   0,                 1,                 1,                   1); ExpectedR: (0, 500, $4563918244F40000,                 0, $4563918244F40000,  $7538DCFB7617FFFF)),
+    (Divisor: QWord($FFFFFFFFFFFFFFFF); ExpectedQ: (0, 0,                   0,                 0,                 0,                   1); ExpectedR: (0, 500, $4563918244F40000, QWord($8AC7230489E80000),QWord($D02AB486CEDC0000),                  0))
+  );
+
+var
+  TestCount, PassCount, SkipCount, FailCount: Cardinal;
+
+{ It must be inline for reasons of code expansion, so div and mod contain constant denominators }
+procedure DivModConstTest(D: QWord); inline;
+var
+  X, Y, Z: QWord; A, C: Integer; FoundSet: Boolean;
+begin
+  WriteLn('Denominator: ', D);
+  WriteLn('---------------------------------');
+  FoundSet := False;
+
+  for A := Low(ExpectedSets) to High(ExpectedSets) do
+    if ExpectedSets[A].Divisor = D then
+    begin
+      FoundSet := True;
+      Break;
+    end;
+
+  if not FoundSet then
+    WriteLn('WARNING: Expected values missing');
+
+  for C := Low(Inputs) to High(Inputs) do
+  begin
+    Inc(TestCount, 2);
+    X := Inputs[C];
+    Y := X div D;
+    Z := X mod D;
+
+    if not FoundSet then
+    begin
+      WriteLn('  ', X, ' div ', D, ' = ', Y, #10'  ', X, ' mod ', D, ' = ', Z);
+      Inc(SkipCount, 2);
+    end
+    else
+    begin
+
+      { Compare quotient values }
+      if Y = ExpectedSets[A].ExpectedQ[C] then
+      begin
+        Write('Pass');
+        Inc(PassCount);
+      end
+      else
+      begin
+        Write('FAIL');
+        Inc(FailCount);
+      end;
+
+      WriteLn(' - ', X, ' div ', D, ' = ', Y, '; Expected: ', ExpectedSets[A].ExpectedQ[C]);
+
+      { Compare remainder values }
+      if Z = ExpectedSets[A].ExpectedR[C] then
+      begin
+        Write('Pass');
+        Inc(PassCount);
+      end
+      else
+      begin
+        Write('FAIL');
+        Inc(FailCount);
+      end;
+
+      WriteLn(' - ', X, ' mod ', D, ' = ', Z, '; Expected: ', ExpectedSets[A].ExpectedR[C]);
+
+    end;
+
+  end;
+  WriteLn();
+end;
+
+begin
+  { Initialisation }
+  TestCount := 0;
+  PassCount := 0;
+  FailCount := 0;
+  SkipCount := 0;
+
+  { Insert tests here }
+  DivModConstTest(1);
+  DivModConstTest(3);
+  DivModConstTest($10);
+  DivModConstTest($100);
+  DivModConstTest($10000);
+  DivModConstTest(1000000);
+  DivModConstTest($100000000);
+  DivModConstTest(1000000000000000000);
+  DivModConstTest($1000000000000000);
+  DivModConstTest($7FFFFFFFFFFFFFFF);
+  DivModConstTest(QWord($8000000000000000));
+
+  { Comment out these two tests to remove "Internal error 200706094" }
+  DivModConstTest(QWord($8AC7230489E80000));
+  DivModConstTest(QWord($FFFFFFFFFFFFFFFF));
+
+  { Final tally }
+  WriteLn('Total tests: ', TestCount);
+  WriteLn('----------------');
+  WriteLn('     PASSED: ', PassCount);
+  WriteLn('     FAILED: ', FailCount);
+  WriteLn('    SKIPPED: ', SkipCount);
+  if FailCount<>0 then
+    halt(1);
+  writeln('ok');
+end.
+