Răsfoiți Sursa

Add System.UMul64x64_128.

Rika Ichinose 4 luni în urmă
părinte
comite
ff2492edf5
7 a modificat fișierele cu 109 adăugiri și 96 ștergeri
  1. 2 24
      packages/fcl-hash/src/fpecc.pp
  2. 49 0
      rtl/i386/i386.inc
  3. 15 0
      rtl/inc/generic.inc
  4. 18 46
      rtl/inc/softfpu.pp
  5. 8 26
      rtl/inc/system.inc
  6. 2 0
      rtl/inc/systemh.inc
  7. 15 0
      rtl/x86_64/x86_64.inc

+ 2 - 24
packages/fcl-hash/src/fpecc.pp

@@ -284,28 +284,6 @@ begin
   Result := Borrow;
 end;
 
-procedure mul_64_64(const Left, Right: UInt64; out Result: UInt128);
-var
-  a0, a1, b0, b1, m0, m1, m2, m3: UInt64;
-begin
-  a0 := Left and $FFFFFFFF;
-  a1 := Left shr 32;
-  b0 := Right and $FFFFFFFF;
-  b1 := Right shr 32;
-  m0 := a0 * b0;
-  m1 := a0 * b1;
-  m2 := a1 * b0;
-  m3 := a1 * b1;
-    
-  m2 := m2 + (m0 shr 32);
-  m2 := m2+m1;
-  if m2 < m1 then
-    m3 := m3+UInt64($100000000); // overflow
-
-  Result.m_low := (m0 and $FFFFFFFF) or (m2 shl 32);
-  Result.m_high := m3 + (m2 shr 32);
-end;
-
 function add_128_128(var a, b: UInt128): UInt128;
 begin
   Result.m_low := a.m_low + b.m_low;
@@ -334,7 +312,7 @@ begin
     begin
       if i >= NUM_ECC_DIGITS then
         Break;
-      mul_64_64(Left[I], Right[K-I], Product);
+      Product.m_low := UMul64x64_128(Left[I], Right[K-I], Product.m_high);
       r01 := add_128_128(r01, Product);
       if r01.m_high < Product.m_high then
         Inc(r2);
@@ -366,7 +344,7 @@ begin
     begin
       if I > K-I then
         Break;
-      mul_64_64(Left[I], Left[K-I], Product);
+      Product.m_low := UMul64x64_128(Left[I], Left[K-I], Product.m_high);
       if I < K-I then
       begin
         Inc(r2, Product.m_high shr 63);

+ 49 - 0
rtl/i386/i386.inc

@@ -3054,3 +3054,52 @@ asm
         sarl   %cl,%eax // uses 5 lower bits of cl.
 end;
 {$endif FPC_SYSTEM_HAS_SAR_QWORD}
+
+{$ifndef FPC_SYSTEM_HAS_UMUL64X64_128}
+{$define FPC_SYSTEM_HAS_UMUL64X64_128}
+function UMul64x64_128(a,b: uint64; out rHi: uint64): uint64; assembler; nostackframe;
+{ [esp + 12] = a, [esp + 4] = b, eax = rHi }
+asm
+  {                                                   Hi(a)             Lo(a)
+                                                    x Hi(b)             Lo(b)
+    -------------------------------------------------------------------------
+                                          Hi(Lo(a) * Lo(b)) Lo(Lo(a) * Lo(b))
+    +                   Hi(Hi(a) * Lo(b)) Lo(Hi(a) * Lo(b))
+    +                   Hi(Lo(a) * Hi(b)) Lo(Lo(a) * Hi(b))
+    + Hi(Hi(a) * Hi(b)) Lo(Hi(a) * Hi(b))
+             edi               esi          ebx, then edx          eax        }
+  push %ebx
+  push %esi
+  push %edi
+  mov %eax, %ecx { ecx = rHi. }
+
+  mov 12+16(%esp), %eax
+  mull 12+8(%esp) { edx:eax = Hi(a) * Hi(b). }
+  mov %eax, %esi
+  mov %edx, %edi { edi:esi = Hi(a) * Hi(b). }
+
+  mov 12+16(%esp), %eax
+  mull 12+4(%esp) { edx:eax = Hi(a) * Lo(b). }
+  mov %eax, %ebx
+  add %edx, %esi { edi:esi += Hi(Hi(a) * Lo(b)). }
+  adc $0, %edi
+
+  mov 12+12(%esp), %eax
+  mull 12+8(%esp) { edx:eax = Lo(a) * Hi(b). }
+  add %eax, %ebx // edi:esi:ebx += Lo(a) * Hi(b).
+  adc %edx, %esi
+  adc $0, %edi
+
+  mov 12+12(%esp), %eax
+  mull 12+4(%esp) { edx:eax = Lo(a) * Lo(b). }
+  add %ebx, %edx { edi:esi:edx += Hi(Lo(a) * Lo(b)). }
+  adc $0, %esi
+  adc $0, %edi
+
+  mov %esi, (%ecx)
+  mov %edi, 4(%ecx)
+  pop %edi
+  pop %esi
+  pop %ebx
+end;
+{$endif FPC_SYSTEM_HAS_UMUL64X64_128}

+ 15 - 0
rtl/inc/generic.inc

@@ -2064,6 +2064,21 @@ function fpc_mod_shortint(n,z : shortint) : shortint; [public,alias: 'FPC_MOD_SH
 
 {$endif FPC_INCLUDE_SOFTWARE_MOD_DIV}
 
+{$ifndef FPC_SYSTEM_HAS_UMUL64X64_128}
+{$push} {$q-,r-}
+function UMul64x64_128(a,b: uint64; out rHi: uint64): uint64;
+var
+  albl, albh, ahbl: uint64;
+begin
+  albl := uint64(uint32(a)) * uint32(b);
+  albh := uint64(uint32(a)) * (b shr 32);
+  ahbl := a shr 32 * uint32(b);
+  result := albl + albh shl 32 + ahbl shl 32;
+  rHi    := a shr 32 * (b shr 32) + albh shr 32 + ahbl shr 32 + (albl shr 32 + uint32(albh) + uint32(ahbl)) shr 32;
+end;
+{$pop}
+{$endif FPC_SYSTEM_HAS_UMUL64X64_128}
+
 {****************************************************************************}
 
 {$if defined(CPUINT8)}

+ 18 - 46
rtl/inc/softfpu.pp

@@ -1434,34 +1434,6 @@ Begin
     z0Ptr := z0;
 End;
 
-{*----------------------------------------------------------------------------
-| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
-| into two 64-bit pieces which are stored at the locations pointed to by
-| `z0Ptr' and `z1Ptr'.
-*----------------------------------------------------------------------------*}
-
-procedure mul64To128( a, b : bits64; var z0Ptr, z1Ptr : bits64);
-var
-    aHigh, aLow, bHigh, bLow : bits32;
-    z0, zMiddleA, zMiddleB, z1 : bits64;
-begin
-    aLow := a;
-    aHigh := a shr 32;
-    bLow := b;
-    bHigh := b shr 32;
-    z1 := ( bits64(aLow) ) * bLow;
-    zMiddleA := ( bits64( aLow )) * bHigh;
-    zMiddleB := ( bits64( aHigh )) * bLow;
-    z0 := ( bits64(aHigh) ) * bHigh;
-    inc(zMiddleA, zMiddleB);
-    inc(z0 ,( ( bits64( zMiddleA < zMiddleB ) ) shl 32 ) + ( zMiddleA shr 32 ));
-    zMiddleA := zMiddleA shl 32;
-    inc(z1, zMiddleA);
-    inc(z0, ord( z1 < zMiddleA ));
-    z1Ptr := z1;
-    z0Ptr := z0;
-end;
-
 {*----------------------------------------------------------------------------
 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
@@ -1473,12 +1445,12 @@ procedure mul128To256(a0,a1,b0,b1 : bits64;var z0Ptr,z1Ptr,z2Ptr,z3Ptr : bits64)
 var
     z0,z1,z2,z3,more1,more2 : bits64;
 begin
-    mul64To128( a1, b1, z2, z3 );
-    mul64To128( a1, b0, z1, more2 );
+    z3 := UMul64x64_128( a1, b1, z2 );
+    more2 := UMul64x64_128( a1, b0, z1 );
     add128( z1, more2, 0, z2, z1, z2 );
-    mul64To128( a0, b0, z0, more1 );
+    more1 := UMul64x64_128( a0, b0, z0 );
     add128( z0, more1, 0, z1, z0, z1 );
-    mul64To128( a0, b1, more1, more2 );
+    more2 := UMul64x64_128( a0, b1, more1 );
     add128( more1, more2, 0, z2, more1, z2 );
     add128( z0, z1, 0, more1, z0, z1 );
     z3Ptr := z3;
@@ -1498,8 +1470,8 @@ procedure mul128By64To192(a0,a1,b : bits64;var z0Ptr,z1Ptr,z2Ptr : bits64);
 var
     z0, z1, z2, more1 : bits64;
 begin
-    mul64To128( a1, b, z1, z2 );
-    mul64To128( a0, b, z0, more1 );
+    z2 := UMul64x64_128( a1, b, z1 );
+    more1 := UMul64x64_128( a0, b, z0 );
     add128( z0, more1, 0, z1, z0, z1 );
     z2Ptr := z2;
     z1Ptr := z1;
@@ -1529,7 +1501,7 @@ begin
       z:=qword( $FFFFFFFF00000000 )
     else
       z:=( a0 div b0 ) shl 32;
-    mul64To128( b, z, term0, term1 );
+    term1 := UMul64x64_128( b, z, term0 );
     sub128( a0, a1, term0, term1, rem0, rem1 );
     while ( ( sbits64(rem0) ) < 0 ) do begin
         dec(z,qword( $100000000 ));
@@ -7249,7 +7221,7 @@ begin
         normalizeFloatx80Subnormal( bSig, bExp, bSig );
     end;
     zExp := aExp + bExp - $3FFE;
-    mul64To128( aSig, bSig, zSig0, zSig1 );
+    zSig1 := UMul64x64_128( aSig, bSig, zSig0 );
     if 0 < sbits64( zSig0 ) then begin
         shortShift128Left( zSig0, zSig1, 1, zSig0, zSig1 );
         dec(zExp);
@@ -7336,7 +7308,7 @@ begin
         inc(zExp);
     end;
     zSig0 := estimateDiv128To64( aSig, rem1, bSig );
-    mul64To128( bSig, zSig0, term0, term1 );
+    term1 := UMul64x64_128( bSig, zSig0, term0 );
     sub128( aSig, rem1, term0, term1, rem0, rem1 );
     while ( sbits64( rem0 ) < 0 ) do begin
         dec(zSig0);
@@ -7344,7 +7316,7 @@ begin
     end;
     zSig1 := estimateDiv128To64( rem1, 0, bSig );
     if ( bits64( zSig1 shl 1 ) <= 8 ) then begin
-        mul64To128( bSig, zSig1, term1, term2 );
+        term2 := UMul64x64_128( bSig, zSig1, term1 );
         sub128( rem1, 0, term1, term2, rem1, rem2 );
         while ( sbits64( rem1 ) < 0 ) do begin
             dec(zSig1);
@@ -7431,7 +7403,7 @@ begin
     while ( 0 < expDiff ) do begin
         q := estimateDiv128To64( aSig0, aSig1, bSig );
         if ( 2 < q ) then q := q - 2 else q := 0;
-        mul64To128( bSig, q, term0, term1 );
+        term1 := UMul64x64_128( bSig, q, term0 );
         sub128( aSig0, aSig1, term0, term1, aSig0, aSig1 );
         shortShift128Left( aSig0, aSig1, 62, aSig0, aSig1 );
         dec( expDiff, 62 );
@@ -7441,7 +7413,7 @@ begin
         q := estimateDiv128To64( aSig0, aSig1, bSig );
         if ( 2 < q ) then q:= q - 2 else q := 0;
         q := q shr ( 64 - expDiff );
-        mul64To128( bSig, q shl ( 64 - expDiff ), term0, term1 );
+        term1 := UMul64x64_128( bSig, q shl ( 64 - expDiff ), term0 );
         sub128( aSig0, aSig1, term0, term1, aSig0, aSig1 );
         shortShift128Left( 0, bSig, 64 - expDiff, term0, term1 );
         while ( le128( term0, term1, aSig0, aSig1 ) <> 0 ) do begin
@@ -7522,7 +7494,7 @@ begin
     shift128Right( aSig0, 0, 2 + ( aExp and 1 ), aSig0, aSig1 );
     zSig0 := estimateDiv128To64( aSig0, aSig1, zSig0 shl 32 ) + ( zSig0 shl 30 );
     doubleZSig0 := zSig0 shl 1;
-    mul64To128( zSig0, zSig0, term0, term1 );
+    term1 := UMul64x64_128( zSig0, zSig0, term0 );
     sub128( aSig0, aSig1, term0, term1, rem0, rem1 );
     while ( sbits64( rem0 ) < 0 ) do begin
         dec(zSig0);
@@ -7532,9 +7504,9 @@ begin
     zSig1 := estimateDiv128To64( rem1, 0, doubleZSig0 );
     if ( ( zSig1 and $3FFFFFFFFFFFFFFF ) <= 5 ) then begin
         if ( zSig1 = 0 ) then zSig1 := 1;
-        mul64To128( doubleZSig0, zSig1, term1, term2 );
+        term2 := UMul64x64_128( doubleZSig0, zSig1, term1 );
         sub128( rem1, 0, term1, term2, rem1, rem2 );
-        mul64To128( zSig1, zSig1, term2, term3 );
+        term3 := UMul64x64_128( zSig1, zSig1, term2 );
         sub192( rem1, rem2, 0, 0, term2, term3, rem1, rem2, rem3 );
         while ( sbits64( rem1 ) < 0 ) do begin
             dec(zSig1);
@@ -9104,7 +9076,7 @@ begin
     shortShift128Left( aSig0, aSig1, 13 - ( aExp and 1 ), aSig0, aSig1 );
     zSig0 := estimateDiv128To64( aSig0, aSig1, zSig0 shl 32 ) + ( zSig0 shl 30 );
     doubleZSig0 := zSig0 shl 1;
-    mul64To128( zSig0, zSig0, term0, term1 );
+    term1 := UMul64x64_128( zSig0, zSig0, term0);
     sub128( aSig0, aSig1, term0, term1, rem0, rem1 );
     while ( sbits64(rem0) < 0 ) do begin
         dec(zSig0);
@@ -9114,9 +9086,9 @@ begin
     zSig1 := estimateDiv128To64( rem1, 0, doubleZSig0 );
     if ( ( zSig1 and $1FFF ) <= 5 ) then begin
         if ( zSig1 = 0 ) then zSig1 := 1;
-        mul64To128( doubleZSig0, zSig1, term1, term2 );
+        term2 := UMul64x64_128( doubleZSig0, zSig1, term1 );
         sub128( rem1, 0, term1, term2, rem1, rem2 );
-        mul64To128( zSig1, zSig1, term2, term3 );
+        term3 := UMul64x64_128( zSig1, zSig1, term2 );
         sub192( rem1, rem2, 0, 0, term2, term3, rem1, rem2, rem3 );
         while ( sbits64(rem1) < 0 ) do begin
             dec(zSig1);

+ 8 - 26
rtl/inc/system.inc

@@ -746,9 +746,8 @@ end;
 
 function random(l:int64): int64;
 var
- a, b, c, d: cardinal;
- q, bd, ad, bc, ac: qword;
- carry: qword;
+  a: uint32;
+  neg: boolean;
 begin
   if (l>=Low(int32)) and (l<=High(int32)) then
     begin
@@ -758,32 +757,15 @@ begin
       exit(longint(int64(xsr128_32_u32rand)*l shr 32));
     end;
 
-  if (l < 0) then
-    begin
-      inc(l);
-      q:=qword(-l)
-    end
-  else
-    q:=qword(l);
+  neg:=l<0;
+  if neg then
+    l:=-l-1;
 
   a:=xsr128_32_u32rand;
-  b:=xsr128_32_u32rand;
-
-  c:=q shr 32;
-  d:=cardinal(q);
-
-  bd:=qword(b)*d;
-  ad:=qword(a)*d;
-  bc:=qword(b)*c;
-  ac:=qword(a)*c;
-
-  // We only need the carry bit
-  carry:=((bd shr 32)+cardinal(ad)+cardinal(bc)) shr 32;
+  UMul64x64_128(uint64(a) shl 32 or xsr128_32_u32rand, uint64(l), uint64(result));
 
-  // Calculate the final result
-  result:=int64(carry+(ad shr 32)+(bc shr 32)+ac);
-  if l<0 then
-    result:=-result;
+  if neg then
+    result:=-result-1;
 end;
 
 {$ifndef FPUNONE}

+ 2 - 0
rtl/inc/systemh.inc

@@ -1060,6 +1060,8 @@ function NtoLE(const AValue: DWord): DWord;{$ifdef SYSTEMINLINE}inline;{$endif}
 function NtoLE(const AValue: Int64): Int64;{$ifdef SYSTEMINLINE}inline;{$endif}
 function NtoLE(const AValue: QWord): QWord;{$ifdef SYSTEMINLINE}inline;{$endif}
 
+function UMul64x64_128(a,b: uint64; out rHi: uint64): uint64;
+
 {$ifdef FPC_HAS_INTERNAL_ROX}
 
 {$if defined(cpux86_64) or defined(cpui386) or defined(cpui8086)}

+ 15 - 0
rtl/x86_64/x86_64.inc

@@ -1810,3 +1810,18 @@ dodiv:
   movl $1,%eax
 end;
 {$endif win64}
+
+{$ifndef FPC_SYSTEM_HAS_UMUL64X64_128}
+{$define FPC_SYSTEM_HAS_UMUL64X64_128}
+function UMul64x64_128(a,b: uint64; out rHi: uint64): uint64; assembler; nostackframe;
+{ Win64: rcx = a, rdx = b, r8 = rHi.
+  SysV:  rdi = a, rsi = b, rdx = rHi. }
+asm
+{$ifndef win64}
+  mov %rdx, %rcx { rcx = rHi, as rdx is used for mul. }
+{$endif}
+  mov a, %rax
+  mul b
+  mov %rdx, {$ifdef win64} (%r8) {$else} (%rcx) {$endif}
+end;
+{$endif FPC_SYSTEM_HAS_UMUL64X64_128}