Browse Source

Make int64 division helpers “nostackframe”.

Rika Ichinose 1 year ago
parent
commit
ea271c1088
1 changed files with 175 additions and 197 deletions
  1. 175 197
      rtl/i386/int64p.inc

+ 175 - 197
rtl/i386/int64p.inc

@@ -15,28 +15,24 @@
 {$Q- no overflow checking }
 {$Q- no overflow checking }
 {$R- no range checking }
 {$R- no range checking }
 
 
+    function div_qword_throwdivbyzero(n,z : qword) : qword;
+      begin
+        HandleErrorFrame(210,get_frame);
+      end;
+
 {$define FPC_SYSTEM_HAS_DIV_INT64}
 {$define FPC_SYSTEM_HAS_DIV_INT64}
-    function fpc_div_int64(n,z : int64) : int64;assembler;[public,alias: 'FPC_DIV_INT64']; compilerproc;
-      var
-         saveebx,saveedi,saveesi : longint;
+    function fpc_div_int64(n,z : int64) : int64;assembler;nostackframe;[public,alias: 'FPC_DIV_INT64']; compilerproc;
+      { n = [esp + 12], z = [esp + 4]. }
       asm
       asm
-            movl %ebx,saveebx
-            movl %esi,saveesi
-            movl %edi,saveedi
+            push %ebx
+            push %esi
+            push %edi
             { the following piece of code is taken from the     }
             { the following piece of code is taken from the     }
             { AMD Athlon Processor x86 Code Optimization manual }
             { AMD Athlon Processor x86 Code Optimization manual }
-            movl n+4,%ecx
-            movl n,%ebx
-            movl %ecx,%eax
-            orl %ebx,%eax
-            jnz .Lnodivzero
-            movl  %ebp,%edx
-            movl  $200,%eax
-            call HandleErrorFrame
-            jmp .Lexit
-.Lnodivzero:
-            movl z+4,%edx
-            movl z,%eax
+            movl 12+16(%esp),%ecx { ecx = hi(n) }
+            movl 12+12(%esp),%ebx { ebx = lo(n) }
+            movl 12+8(%esp),%edx { edx = hi(z) }
+            movl 12+4(%esp),%eax { eax = lo(z) }
             movl %ecx,%esi
             movl %ecx,%esi
             xorl %edx,%esi
             xorl %edx,%esi
             sarl $31,%esi
             sarl $31,%esi
@@ -56,13 +52,21 @@
             cmpl %ebx,%edx
             cmpl %ebx,%edx
             jae .Ltwo_divs
             jae .Ltwo_divs
             divl %ebx
             divl %ebx
-            movl %ecx,%edx
+.Lmake_sign_zero_hi:
+            xorl %edx,%edx
+.Lmake_sign:
             xorl %esi,%eax
             xorl %esi,%eax
             xorl %esi,%edx
             xorl %esi,%edx
             subl %esi,%eax
             subl %esi,%eax
             sbbl %esi,%edx
             sbbl %esi,%edx
-            jmp .Lexit
+            pop %edi
+            pop %esi
+            pop %ebx
+            ret $16
+
 .Ltwo_divs:
 .Ltwo_divs:
+            test %ebx,%ebx { Zero division ends up here with ebx = 0. }
+            jz .Ldivzero
             movl %eax,%ecx
             movl %eax,%ecx
             movl %edx,%eax
             movl %edx,%eax
             xorl %edx,%edx
             xorl %edx,%edx
@@ -71,11 +75,11 @@
             divl %ebx
             divl %ebx
             movl %ecx,%edx
             movl %ecx,%edx
             jmp .Lmake_sign
             jmp .Lmake_sign
+
 .Lbigdivisor:
 .Lbigdivisor:
-            subl $12,%esp
-            movl %eax,(%esp)
-            movl %ebx,4(%esp)
-            movl %edx,8(%esp)
+            movl %eax,12+4(%esp) { Reuse n~z stack space. }
+            movl %ebx,12+8(%esp)
+            movl %edx,12+12(%esp)
             movl %ecx,%edi
             movl %ecx,%edi
             shrl $1,%edx
             shrl $1,%edx
             rcrl $1,%eax
             rcrl $1,%eax
@@ -87,51 +91,38 @@
             shrl %cl,%edx
             shrl %cl,%edx
             roll $1,%edi
             roll $1,%edi
             divl %ebx
             divl %ebx
-            movl (%esp),%ebx
+            movl 12+4(%esp),%ebx
             movl %eax,%ecx
             movl %eax,%ecx
             imull %eax,%edi
             imull %eax,%edi
-            mull 4(%esp)
+            mull 12+8(%esp)
             addl %edi,%edx
             addl %edi,%edx
             subl %eax,%ebx
             subl %eax,%ebx
             movl %ecx,%eax
             movl %ecx,%eax
-            movl 8(%esp),%ecx
+            movl 12+12(%esp),%ecx
             sbbl %edx,%ecx
             sbbl %edx,%ecx
             sbbl $0,%eax
             sbbl $0,%eax
-            xorl %edx,%edx
-            addl $12,%esp
-.Lmake_sign:
-            xorl %esi,%eax
-            xorl %esi,%edx
-            subl %esi,%eax
-            sbbl %esi,%edx
-.Lexit:
-            movl saveebx,%ebx
-            movl saveesi,%esi
-            movl saveedi,%edi
+            jmp .Lmake_sign_zero_hi
+
+.Ldivzero:
+            pop %edi
+            pop %esi
+            pop %ebx
+            jmp div_qword_throwdivbyzero
       end;
       end;
 
 
 {$define FPC_SYSTEM_HAS_MOD_INT64}
 {$define FPC_SYSTEM_HAS_MOD_INT64}
-    function fpc_mod_int64(n,z : int64) : int64;assembler;[public,alias: 'FPC_MOD_INT64']; compilerproc;
-      var
-         saveebx,saveedi,saveesi : longint;
+    function fpc_mod_int64(n,z : int64) : int64;assembler;nostackframe;[public,alias: 'FPC_MOD_INT64']; compilerproc;
+      { n = [esp + 12], z = [esp + 4]. }
       asm
       asm
-            movl %ebx,saveebx
-            movl %esi,saveesi
-            movl %edi,saveedi
+            push %ebx
+            push %esi
+            push %edi
             { the following piece of code is taken from the     }
             { the following piece of code is taken from the     }
             { AMD Athlon Processor x86 Code Optimization manual }
             { AMD Athlon Processor x86 Code Optimization manual }
-            movl n+4,%ecx
-            movl n,%ebx
-            movl %ecx,%eax
-            orl %ebx,%eax
-            jnz .Lnodivzero
-            movl  %ebp,%edx
-            movl  $200,%eax
-            call HandleErrorFrame
-            jmp .Lexit
-.Lnodivzero:
-            movl z+4,%edx
-            movl z,%eax
+            movl 12+16(%esp),%ecx
+            movl 12+12(%esp),%ebx
+            movl 12+8(%esp),%edx
+            movl 12+4(%esp),%eax
             movl %edx,%esi
             movl %edx,%esi
             sarl $31,%esi
             sarl $31,%esi
             movl %edx,%edi
             movl %edx,%edi
@@ -152,12 +143,19 @@
             divl %ebx
             divl %ebx
             movl %edx,%eax
             movl %edx,%eax
             movl %ecx,%edx
             movl %ecx,%edx
+.Lmake_sign:
             xorl %esi,%eax
             xorl %esi,%eax
             xorl %esi,%edx
             xorl %esi,%edx
             subl %esi,%eax
             subl %esi,%eax
             sbbl %esi,%edx
             sbbl %esi,%edx
-            jmp .Lexit
+            pop %edi
+            pop %esi
+            pop %ebx
+            ret $16
+
 .Ltwo_divs:
 .Ltwo_divs:
+            test %ebx,%ebx { Zero division ends up here with ebx = 0. }
+            jz .Ldivzero
             movl %eax,%ecx
             movl %eax,%ecx
             movl %edx,%eax
             movl %edx,%eax
             xorl %edx,%edx
             xorl %edx,%edx
@@ -167,12 +165,12 @@
             movl %edx,%eax
             movl %edx,%eax
             xorl %edx,%edx
             xorl %edx,%edx
             jmp .Lmake_sign
             jmp .Lmake_sign
+
 .Lbig_divisor:
 .Lbig_divisor:
-            subl $16,%esp
-            movl %eax,(%esp)
-            movl %ebx,4(%esp)
-            movl %edx,8(%esp)
-            movl %ecx,12(%esp)
+            movl %eax,12+4(%esp)  { Reuse n~z stack space. }
+            movl %ebx,12+8(%esp)
+            movl %edx,12+12(%esp)
+            movl %ecx,12+16(%esp)
             movl %ecx,%edi
             movl %ecx,%edi
             shrl $1,%edx
             shrl $1,%edx
             rcrl $1,%eax
             rcrl $1,%eax
@@ -184,75 +182,68 @@
             shrl %cl,%edx
             shrl %cl,%edx
             roll $1,%edi
             roll $1,%edi
             divl %ebx
             divl %ebx
-            movl (%esp),%ebx
+            movl 12+4(%esp),%ebx
             movl %eax,%ecx
             movl %eax,%ecx
             imull %eax,%edi
             imull %eax,%edi
-            mull 4(%esp)
+            mull 12+8(%esp)
             addl %edi,%edx
             addl %edi,%edx
             subl %eax,%ebx
             subl %eax,%ebx
-            movl 8(%esp),%ecx
+            movl 12+12(%esp),%ecx
             sbbl %edx,%ecx
             sbbl %edx,%ecx
             sbbl %eax,%eax
             sbbl %eax,%eax
-            movl 12(%esp),%edx
+            movl 12+16(%esp),%edx
             andl %eax,%edx
             andl %eax,%edx
-            andl 4(%esp),%eax
+            andl 12+8(%esp),%eax
             addl %ebx,%eax
             addl %ebx,%eax
             adcl %ecx,%edx
             adcl %ecx,%edx
-            addl $16,%esp
-
-.Lmake_sign:
-            xorl %esi,%eax
-            xorl %esi,%edx
-            subl %esi,%eax
-            sbbl %esi,%edx
+            jmp .Lmake_sign
 
 
-.Lexit:
-            movl saveebx,%ebx
-            movl saveesi,%esi
-            movl saveedi,%edi
+.Ldivzero:
+            pop %edi
+            pop %esi
+            pop %ebx
+            jmp div_qword_throwdivbyzero
       end;
       end;
 
 
 {$define FPC_SYSTEM_HAS_DIV_QWORD}
 {$define FPC_SYSTEM_HAS_DIV_QWORD}
-    function fpc_div_qword(n,z : qword) : qword;assembler;[public,alias: 'FPC_DIV_QWORD']; compilerproc;
-      var
-         saveebx,saveedi,saveesi : longint;
+    function fpc_div_qword(n,z : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_DIV_QWORD']; compilerproc;
+      { n = [esp + 12], z = [esp + 4]. }
       asm
       asm
-            movl %ebx,saveebx
-            movl %esi,saveesi
-            movl %edi,saveedi
             { the following piece of code is taken from the     }
             { the following piece of code is taken from the     }
             { AMD Athlon Processor x86 Code Optimization manual }
             { AMD Athlon Processor x86 Code Optimization manual }
-            movl n+4,%ecx
-            movl n,%ebx
-            movl %ecx,%eax
-            orl %ebx,%eax
-            jnz .Lnodivzero
-            movl  %ebp,%edx
-            movl  $200,%eax
-            call HandleErrorFrame
-            jmp .Lexit
-.Lnodivzero:
-            movl z+4,%edx
-            movl z,%eax
-            testl %ecx,%ecx
+            movl 16(%esp),%ecx { ecx = hi(n) }
+            test %ecx,%ecx
             jnz .Lqworddivbigdivisor
             jnz .Lqworddivbigdivisor
-            cmpl %ebx,%edx
+
+            movl 12(%esp),%ecx { ecx = lo(n) }
+            movl 8(%esp),%edx { edx = hi(z) }
+            cmpl %ecx,%edx
             jae .Lqworddivtwo_divs
             jae .Lqworddivtwo_divs
-            divl %ebx
-            movl %ecx,%edx
-            jmp .Lexit
+
+            movl 4(%esp),%eax { eax = lo(z) }
+            divl %ecx
+            xorl %edx,%edx
+            ret $16
 
 
          .Lqworddivtwo_divs:
          .Lqworddivtwo_divs:
-            movl %eax,%ecx
+            test %ecx,%ecx { Zero division ends up here with ecx = 0. }
+            jz div_qword_throwdivbyzero
             movl %edx,%eax
             movl %edx,%eax
             xorl %edx,%edx
             xorl %edx,%edx
-            divl %ebx
-            xchgl %ecx,%eax
-            divl %ebx
-            movl %ecx,%edx
-            jmp .Lexit
+            divl %ecx
+            push %eax { eax = future hi(result); remember }
+            movl 4+4(%esp),%eax { eax = lo(z) }
+            divl %ecx
+            pop %edx
+            ret $16
 
 
          .Lqworddivbigdivisor:
          .Lqworddivbigdivisor:
+            push %ebx
+            push %esi
+            push %edi
+            movl 12+12(%esp),%ebx { ebx = lo(n) }
+            movl 12+8(%esp),%edx { edx = hi(z) }
+            movl 12+4(%esp),%eax { eax = lo(z) }
             movl %ecx,%edi
             movl %ecx,%edi
             shrl $1,%edx
             shrl $1,%edx
             rcrl $1,%eax
             rcrl $1,%eax
@@ -264,14 +255,14 @@
             shrl %cl,%edx
             shrl %cl,%edx
             roll $1,%edi
             roll $1,%edi
             divl %ebx
             divl %ebx
-            movl z,%ebx
+            movl 12+4(%esp),%ebx
             movl %eax,%esi             // save quotient to esi
             movl %eax,%esi             // save quotient to esi
             imull %eax,%edi
             imull %eax,%edi
-            mull n
+            mull 12+12(%esp)
             addl %edi,%edx
             addl %edi,%edx
             setcb %cl                  // cl:edx:eax = 65 bits quotient*divisor
             setcb %cl                  // cl:edx:eax = 65 bits quotient*divisor
 
 
-            movl z+4,%edi              // edi:ebx = dividend
+            movl 12+8(%esp),%edi       // edi:ebx = dividend
             subl %eax,%ebx
             subl %eax,%ebx
             movb $0,%al
             movb $0,%al
             sbbl %edx,%edi
             sbbl %edx,%edi
@@ -279,55 +270,50 @@
             sbbl $0,%esi
             sbbl $0,%esi
             xorl %edx,%edx
             xorl %edx,%edx
             movl %esi,%eax
             movl %esi,%eax
-.Lexit:
-            movl saveebx,%ebx
-            movl saveesi,%esi
-            movl saveedi,%edi
+            pop %edi
+            pop %esi
+            pop %ebx
       end;
       end;
 
 
 
 
 {$define FPC_SYSTEM_HAS_MOD_QWORD}
 {$define FPC_SYSTEM_HAS_MOD_QWORD}
-    function fpc_mod_qword(n,z : qword) : qword;assembler;[public,alias: 'FPC_MOD_QWORD']; compilerproc;
-      var
-         saveebx,saveedi : longint;
+    function fpc_mod_qword(n,z : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MOD_QWORD']; compilerproc;
+      { n = [esp + 12], z = [esp + 4]. }
       asm
       asm
-            movl %ebx,saveebx
-            movl %edi,saveedi
             { the following piece of code is taken from the     }
             { the following piece of code is taken from the     }
             { AMD Athlon Processor x86 Code Optimization manual }
             { AMD Athlon Processor x86 Code Optimization manual }
-            movl n+4,%ecx
-            movl n,%ebx
-            movl %ecx,%eax
-            orl %ebx,%eax
-            jnz .Lnodivzero
-            movl  %ebp,%edx
-            movl  $200,%eax
-            call HandleErrorFrame
-            jmp .Lexit
-.Lnodivzero:
-            movl z+4,%edx
-            movl z,%eax
-            testl %ecx,%ecx
+            movl 16(%esp),%ecx { ecx = hi(n) }
+            movl 8(%esp),%edx { edx = hi(z) }
+            test %ecx,%ecx
             jnz .Lqwordmodr_big_divisior
             jnz .Lqwordmodr_big_divisior
-            cmpl %ebx,%edx
+
+            movl 12(%esp),%ecx { ecx = lo(n) }
+            movl 4(%esp),%eax { eax = lo(z) }
+            cmpl %ecx,%edx
             jae .Lqwordmodr_two_divs
             jae .Lqwordmodr_two_divs
-            divl %ebx
+
+            divl %ecx
             movl %edx,%eax
             movl %edx,%eax
-            movl %ecx,%edx
-            jmp .Lexit
+            xorl %edx,%edx
+            ret $16
 
 
          .Lqwordmodr_two_divs:
          .Lqwordmodr_two_divs:
-            movl %eax,%ecx
+            test %ecx,%ecx { Zero division ends up here with ecx = 0. }
+            jz div_qword_throwdivbyzero
             movl %edx,%eax
             movl %edx,%eax
             xorl %edx,%edx
             xorl %edx,%edx
-            divl %ebx
-            movl %ecx,%eax
-            divl %ebx
+            divl %ecx
+            movl 4(%esp),%eax { eax = lo(z) }
+            divl %ecx
             movl %edx,%eax
             movl %edx,%eax
             xorl %edx,%edx
             xorl %edx,%edx
-            jmp .Lexit
+            ret $16
 
 
          .Lqwordmodr_big_divisior:
          .Lqwordmodr_big_divisior:
+            push %ebx
+            push %edi
+            movl 8+12(%esp),%ebx { ebx = lo(n) }
+            movl 8+4(%esp),%eax { eax = lo(z) }
             movl %ecx,%edi
             movl %ecx,%edi
             shrl $1,%edx
             shrl $1,%edx
             rcrl $1,%eax
             rcrl $1,%eax
@@ -339,26 +325,25 @@
             shrl %cl,%edx
             shrl %cl,%edx
             roll $1,%edi
             roll $1,%edi
             divl %ebx
             divl %ebx
-            movl z,%ebx
+            movl 8+4(%esp),%ebx { lo(z) }
             imull %eax,%edi
             imull %eax,%edi
-            mull n
+            mull 8+12(%esp) { lo(n) }
             addl %edi,%edx
             addl %edi,%edx
             setcb %cl                  // cl:edx:eax = 65 bits quotient*divisor
             setcb %cl                  // cl:edx:eax = 65 bits quotient*divisor
-            movl z+4,%edi
+            movl 8+8(%esp),%edi { hi(z) }
             subl %eax,%ebx             // subtract (quotient*divisor) from dividend
             subl %eax,%ebx             // subtract (quotient*divisor) from dividend
             movb $0,%al
             movb $0,%al
             sbbl %edx,%edi
             sbbl %edx,%edi
             sbbb %cl,%al               // if carry is set now, the quotient was off by 1,
             sbbb %cl,%al               // if carry is set now, the quotient was off by 1,
                                        // and we need to add divisor to result
                                        // and we need to add divisor to result
-            movl n,%eax
+            movl 8+12(%esp),%eax { lo(n) }
             sbbl %edx,%edx
             sbbl %edx,%edx
             andl %edx,%eax
             andl %edx,%eax
-            andl n+4,%edx
+            andl 8+16(%esp),%edx { hi(n) }
             addl %ebx,%eax
             addl %ebx,%eax
             adcl %edi,%edx
             adcl %edi,%edx
-.Lexit:
-            movl saveebx,%ebx
-            movl saveedi,%edi
+            pop %edi
+            pop %ebx
       end;
       end;
 
 
 {$ifndef VER3_0}
 {$ifndef VER3_0}
@@ -390,61 +375,54 @@
         end [ 'eax','edx','ecx'];
         end [ 'eax','edx','ecx'];
       end;
       end;
 
 
-	
-    function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
-      var
-        overflowed : boolean;
+
+    function mul_qword_throwoverflow(f1,f2 : qword) : qword;
       begin
       begin
-        overflowed:=false;
+        HandleErrorFrame(215,get_frame);
+      end;
+
+
+    function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
+      { f1 = [esp + 12], f2 = [esp + 4]. }
+      asm
         { the following piece of code is taken from the
         { the following piece of code is taken from the
           AMD Athlon Processor x86 Code Optimization manual }
           AMD Athlon Processor x86 Code Optimization manual }
-        asm
-           movl f1+4,%edx
-           movl f2+4,%ecx
-           orl %ecx,%edx
-           movl f2,%edx
-           movl f1,%eax
-           jnz .Loverflowchecked
-           { if both upper dwords are =0 then it cannot overflow }
-           mull %edx
-           movl %eax,__RESULT
-           movl %edx,__RESULT+4
-           jmp .Lend
-
-        .Loverflowchecked:
-           { if both upper dwords are <>0 then it overflows always }
-           or %ecx,%ecx
-           jz .Loverok1
-           cmpl $0,f1+4
-           jnz .Loverflowed
-        .Loverok1:
-           { overflow checked code }
-           movl f1+4,%eax
-           mull f2
-           movl %eax,%ecx
-           jc  .Loverflowed
+        movl 16(%esp),%edx { edx = hi(f1) }
+        movl 8(%esp),%ecx { ecx = hi(f2) }
+        orl %ecx,%edx
+        movl 4(%esp),%edx { edx = lo(f2) }
+        movl 12(%esp),%eax { eax = lo(f1) }
+        jnz .Loverflowchecked
+        { if both upper dwords are =0 then it cannot overflow }
+        mull %edx
+        ret $16
 
 
-           movl f1,%eax
-           mull f2+4
-           jc  .Loverflowed
+.Loverflowed:
+        jmp mul_qword_throwoverflow
 
 
-           addl %eax,%ecx
-           jc  .Loverflowed
+.Loverflowchecked:
+        { if both upper dwords are <>0 then it overflows always }
+        test %ecx,%ecx
+        jz .Loverok1
+        cmpl $0,16(%esp)
+        jnz .Loverflowed
+.Loverok1:
+        { overflow checked code }
+        movl 16(%esp),%eax { eax = hi(f1) }
+        mull 4(%esp)
+        movl %eax,%ecx
+        jc  .Loverflowed
 
 
-           movl f2,%eax
-           mull f1
-           addl %ecx,%edx
-           movl %eax,__RESULT
-           movl %edx,__RESULT+4
-           jnc  .Lend
-
-        .Loverflowed:
-           movb $1,overflowed
+        movl 12(%esp),%eax { eax = lo(f1) }
+        mull 8(%esp)
+        jc  .Loverflowed
 
 
-        .Lend:
-        end [ 'eax','edx','ecx'];
+        addl %eax,%ecx
+        jc  .Loverflowed
 
 
-        if overflowed then
-          HandleErrorFrame(215,get_frame);
+        movl 4(%esp),%eax
+        mull 12(%esp)
+        addl %ecx,%edx
+        jc  .Loverflowed
       end;
       end;
 {$endif VER3_0}
 {$endif VER3_0}