Browse Source

* Reworked i386 division by constant optimization to reuse code from powerpc64. The algorithm is slightly different, signed version is one instruction shorter, unsigned one is one instruction longer (typically). The new algorithm is easily scalable for x86_64 target, unlike the old one.

git-svn-id: trunk@27884 -
sergei 11 years ago
parent
commit
92cf25b9a5
1 changed files with 28 additions and 143 deletions
  1. 28 143
      compiler/i386/n386mat.pas

+ 28 - 143
compiler/i386/n386mat.pas

@@ -61,16 +61,6 @@ implementation
                              TI386MODDIVNODE
 *****************************************************************************}
 
-    function log2(i : dword) : dword;
-      begin
-        result:=0;
-        i:=i shr 1;
-        while i<>0 do
-          begin
-            i:=i shr 1;
-            inc(result);
-          end;
-      end;
 
 
    procedure ti386moddivnode.pass_generate_code;
@@ -80,8 +70,10 @@ implementation
         hl:Tasmlabel;
         op:Tasmop;
         e : longint;
-        d,l,r,s,m,a,n,t : dword;
-        m_low,m_high,j,k : qword;
+        d,m: dword;
+        s: byte;
+        sm: aint;
+        m_add: boolean;
       begin
         secondpass(left);
         if codegenerror then
@@ -147,74 +139,28 @@ implementation
                 if is_signed(left.resultdef) then
                   begin
                     e:=tordconstnode(right).value.svalue;
-                    d:=abs(e);
-                    { Determine algorithm (a), multiplier (m), and shift factor (s) for 32-bit
-                      signed integer division. Based on: Granlund, T.; Montgomery, P.L.:
-                      "Division by Invariant Integers using Multiplication". SIGPLAN Notices,
-                      Vol. 29, June 1994, page 61.
-                    }
-
-                    l:=log2(d);
-                    j:=qword($80000000) mod qword(d);
-                    k:=(qword(1) shl (32+l)) div (qword($80000000-j));
-                    m_low:=((qword(1)) shl (32+l)) div d;
-                    m_high:=(((qword(1)) shl (32+l)) + k) div d;
-                    while ((m_low shr 1) < (m_high shr 1)) and (l > 0) do
-                      begin
-                        m_low:=m_low shr 1;
-                        m_high:=m_high shr 1;
-                        dec(l);
-                      end;
-                    m:=dword(m_high);
-                    s:=l;
-                    if (m_high shr 31)<>0 then
-                      a:=1
-                    else
-                      a:=0;
+                    calc_divconst_magic_signed(32,e,sm,s);
                     cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX);
-                    emit_const_reg(A_MOV,S_L,aint(m),NR_EAX);
+                    emit_const_reg(A_MOV,S_L,sm,NR_EAX);
                     cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
                     emit_reg(A_IMUL,S_L,hreg1);
-                    emit_reg_reg(A_MOV,S_L,hreg1,NR_EAX);
-                    if a<>0 then
-                      begin
-                        emit_reg_reg(A_ADD,S_L,NR_EAX,NR_EDX);
-                        {
-                          printf ("; dividend: memory location or register other than EAX or EDX\n");
-                          printf ("\n");
-                          printf ("MOV EAX, 0%08LXh\n", m);
-                          printf ("IMUL dividend\n");
-                          printf ("MOV EAX, dividend\n");
-                          printf ("ADD EDX, EAX\n");
-                          if (s) printf ("SAR EDX, %d\n", s);
-                          printf ("SHR EAX, 31\n");
-                          printf ("ADD EDX, EAX\n");
-                          if (e < 0) printf ("NEG EDX\n");
-                          printf ("\n");
-                          printf ("; quotient now in EDX\n");
-                        }
-                      end;
-                      {
-                        printf ("; dividend: memory location of register other than EAX or EDX\n");
-                        printf ("\n");
-                        printf ("MOV EAX, 0%08LXh\n", m);
-                        printf ("IMUL dividend\n");
-                        printf ("MOV EAX, dividend\n");
-                        if (s) printf ("SAR EDX, %d\n", s);
-                        printf ("SHR EAX, 31\n");
-                        printf ("ADD EDX, EAX\n");
-                        if (e < 0) printf ("NEG EDX\n");
-                        printf ("\n");
-                        printf ("; quotient now in EDX\n");
-                      }
-                    if s<>0 then
+                    { only the high half of result is used }
+                    cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
+                    { add or subtract dividend }
+                    if (e>0) and (sm<0) then
+                      emit_reg_reg(A_ADD,S_L,hreg1,NR_EDX)
+                    else if (e<0) and (sm>0) then
+                      emit_reg_reg(A_SUB,S_L,hreg1,NR_EDX);
+                    { shift if necessary }
+                    if (s<>0) then
                       emit_const_reg(A_SAR,S_L,s,NR_EDX);
-                    emit_const_reg(A_SHR,S_L,31,NR_EAX);
-                    emit_reg_reg(A_ADD,S_L,NR_EAX,NR_EDX);
-                    if e<0 then
-                      emit_reg(A_NEG,S_L,NR_EDX);
+                    { extract and add the sign bit }
+                    if (e<0) then
+                      emit_reg_reg(A_MOV,S_L,NR_EDX,hreg1);
+                    { if e>=0, hreg1 still contains dividend }
+                    emit_const_reg(A_SHR,S_L,31,hreg1);
+                    emit_reg_reg(A_ADD,S_L,hreg1,NR_EDX);
                     cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
-                    cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
                     location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
                     cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EDX,location.register)
                   end
@@ -230,83 +176,22 @@ implementation
                       end
                     else
                       begin
-                        { Reduce divisor until it becomes odd }
-                        n:=0;
-                        t:=d;
-                        while (t and 1)=0 do
-                          begin
-                            t:=t shr 1;
-                            inc(n);
-                          end;
-                        { Generate m, s for algorithm 0. Based on: Granlund, T.; Montgomery,
-                        P.L.: "Division by Invariant Integers using Multiplication".
-                        SIGPLAN Notices, Vol. 29, June 1994, page 61.
-                        }
-                        l:=log2(t)+1;
-                        j:=qword($ffffffff) mod qword(t);
-                        k:=(qword(1) shl (32+l)) div (qword($ffffffff-j));
-                        m_low:=((qword(1)) shl (32+l)) div t;
-                        m_high:=(((qword(1)) shl (32+l)) + k) div t;
-                        while ((m_low shr 1) < (m_high shr 1)) and (l>0) do
-                          begin
-                            m_low:=m_low shr 1;
-                            m_high:=m_high shr 1;
-                            l:=l-1;
-                          end;
-                        if (m_high shr 32)=0 then
-                          begin
-                            m:=dword(m_high);
-                            s:=l;
-                            a:=0;
-                          end
-
-                        { Generate m, s for algorithm 1. Based on: Magenheimer, D.J.; et al:
-                        "Integer Multiplication and Division on the HP Precision Architecture".
-                        IEEE Transactions on Computers, Vol 37, No. 8, August 1988, page 980.
-                        }
-                        else
-                          begin
-                            s:=log2(t);
-                            m_low:=(qword(1) shl (32+s)) div qword(t);
-                            r:=dword(((qword(1)) shl (32+s)) mod qword(t));
-                            if (r < ((t>>1)+1)) then
-                              m:=dword(m_low)
-                            else
-                              m:=dword(m_low)+1;
-                            a:=1;
-                          end;
-                        { Reduce multiplier for either algorithm to smallest possible }
-                        while (m and 1)=0 do
-                          begin
-                            m:=m shr 1;
-                            dec(s);
-                          end;
-                        { Adjust multiplier for reduction of even divisors }
-                        inc(s,n);
+                        calc_divconst_magic_unsigned(32,d,m,m_add,s);
                         cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX);
                         emit_const_reg(A_MOV,S_L,aint(m),NR_EAX);
                         cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
                         emit_reg(A_MUL,S_L,hreg1);
-                        if a<>0 then
+                        cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
+                        if m_add then
                           begin
-                            {
-                            printf ("; dividend: register other than EAX or memory location\n");
-                            printf ("\n");
-                            printf ("MOV EAX, 0%08lXh\n", m);
-                            printf ("MUL dividend\n");
-                            printf ("ADD EAX, 0%08lXh\n", m);
-                            printf ("ADC EDX, 0\n");
-                            if (s) printf ("SHR EDX, %d\n", s);
-                            printf ("\n");
-                            printf ("; quotient now in EDX\n");
-                            }
-                            emit_const_reg(A_ADD,S_L,aint(m),NR_EAX);
-                            emit_const_reg(A_ADC,S_L,0,NR_EDX);
+                            emit_reg_reg(A_SUB,S_L,NR_EDX,hreg1);
+                            emit_const_reg(A_SHR,S_L,1,hreg1);
+                            emit_reg_reg(A_ADD,S_L,hreg1,NR_EDX);
+                            dec(s);
                           end;
                         if s<>0 then
                           emit_const_reg(A_SHR,S_L,aint(s),NR_EDX);
                         cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
-                        cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
                         location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
                         cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EDX,location.register)
                       end;