Browse Source

* fixed overflow detection, + some small optimizations

Jonas Maebe 20 years ago
parent
commit
4b6030beb9
1 changed files with 28 additions and 22 deletions
  1. 28 22
      rtl/powerpc/int64p.inc

+ 28 - 22
rtl/powerpc/int64p.inc

@@ -151,44 +151,47 @@
         // (r3:r4) = (r3:r4) * (r5:r6),  checkoverflow is in r7
         // (r3:r4) = (r3:r4) * (r5:r6),  checkoverflow is in r7
         //   res        f1        f2
         //   res        f1        f2
 
 
-        or.     r8,r3,r5    // are both msw's 0?
+        or.     r10,r3,r5    // are both msw's 0?
         mulhwu  r8,r4,r6    // msw of product of lsw's
         mulhwu  r8,r4,r6    // msw of product of lsw's
+        subi    r0,r7,1     // if no overflowcheck, r0 := $ffffffff, else r0 := 0;
+        beq     .LDone      // if both msw's are zero, skip cross products
+        mullw   r9,r4,r5    // lsw of first cross-product
         cntlzw  r11,r3      // count leading zeroes of msw1
         cntlzw  r11,r3      // count leading zeroes of msw1
         cntlzw  r12,r5      // count leading zeroes of msw2
         cntlzw  r12,r5      // count leading zeroes of msw2
-        subi    r0,r7,1     // if no overflowcheck, r0 := $ffffffff, else r0 := 0;
+        mullw   r7,r3,r6    // lsw of second cross-product
+        add     r12,r11,r12  // sum of leading zeroes
         mr      r10,r8
         mr      r10,r8
-        add     r9,r11,r12  // sum of leading zeroes
-        or      r0,r9,r0    // maximise sum if no overflow checking, otherwise it remains
+        add     r8,r8,r9    // add
+        or      r0,r12,r0    // maximise sum if no overflow checking, otherwise it remains
         cmplwi  cr1,r0,64   // >= 64 leading zero bits in total? If so, no overflow
         cmplwi  cr1,r0,64   // >= 64 leading zero bits in total? If so, no overflow
-        beq     .Lmsw_zero  // if both msw's are zero, skip cross products
-        mullw   r7,r3,r6    // lsw of first cross-product
         add     r8,r8,r7    // add
         add     r8,r8,r7    // add
-        mullw   r5,r4,r5    // lsw of second cross-product
-        add     r8,r8,r5    // add
-      .Lmsw_zero:
         bge+    cr1,.LDone  // if the sum of leading zero's >= 64 (or checkoverflow was 0)
         bge+    cr1,.LDone  // if the sum of leading zero's >= 64 (or checkoverflow was 0)
                             // there's no overflow, otherwise more thorough check
                             // there's no overflow, otherwise more thorough check
+        add     r7,r7,r9
+        mulhwu  r3,r6,r3
+        addc    r7,r7,r10   // add the msw of the product of the lsw's, record carry
+        cntlzw  r9,r5
+        cntlzw  r10,r4      // get leading zeroes count of lsw f1
+        mulhwu  r5,r4,r5
+        addze   r3,r3
         subfic  r0,r11,31   // if msw f1 = 0, then r0 := -1, else r0 >= 0
         subfic  r0,r11,31   // if msw f1 = 0, then r0 := -1, else r0 >= 0
-        cntlzw  r3,r4       // get leading zeroes count of lsw f1
+        cntlzw  r7,r6
+        subfic  r11,r9,31   // same for f2
         srawi   r0,r0,31    // if msw f1 = 0, then r0 := 1, else r0 := 0
         srawi   r0,r0,31    // if msw f1 = 0, then r0 := 1, else r0 := 0
-        subfic  r11,r12,31  // same for f2
-        cntlzw  r12,r6
         srawi   r11,r11,31
         srawi   r11,r11,31
-        and     r3,r3,r0    // if msw f1 <> 0, the leading zero count lsw f1 := 0
-        and     r12,r12,r11 // same for f2
-        add     r9,r9,r3    // add leading zero counts of lsw's to sum if appropriate
+        and     r10,r10,r0    // if msw f1 <> 0, the leading zero count lsw f1 := 0
+        and     r9,r7,r11     // same for f2
+        or.     r5,r5,r3
+        add     r9,r9,r10    // add leading zero counts of lsw's to sum if appropriate
         add     r9,r9,r12
         add     r9,r9,r12
-        cmplwi  r9,64       // is the sum now >= 64?
+        cmplwi  cr7,r9,64   // is the sum now >= 64?
         cmplwi  cr1,r9,62   // or <= 62?
         cmplwi  cr1,r9,62   // or <= 62?
-        bge+    .LDone      // >= 64 leading zeroes -> no overflow
+        bge+    cr7,.LDone      // >= 64 leading zeroes -> no overflow
         ble+    cr1,.LOverflow  // <= 62 leading zeroes -> overflow
         ble+    cr1,.LOverflow  // <= 62 leading zeroes -> overflow
                             // for 63 zeroes, we need additional checks
                             // for 63 zeroes, we need additional checks
-        add     r9,r7,r5    // sum of lsw's cross products can't produce a carry,
+                            // sum of lsw's cross products can't have produced a carry,
                             // because the sum of leading zeroes is 63 -> at least
                             // because the sum of leading zeroes is 63 -> at least
                             // one of these cross products is 0
                             // one of these cross products is 0
-        li      r0, 0
-        addc    r9,r9,r10   // add the msw of the product of the lsw's
-        addze.  r0,r0
         beq+    .LDone
         beq+    .LDone
       .LOverflow:
       .LOverflow:
         b       FPC_OVERFLOW
         b       FPC_OVERFLOW
@@ -200,7 +203,10 @@
 
 
 {
 {
   $Log$
   $Log$
-  Revision 1.7  2005-02-14 17:13:31  peter
+  Revision 1.8  2005-02-19 14:16:02  jonas
+    * fixed overflow detection, + some small optimizations
+
+  Revision 1.7  2005/02/14 17:13:31  peter
     * truncate log
     * truncate log
 
 
 }
 }