20 年之前 · d088695868
--- a/compiler/powerpc64/cgcpu.pas
+++ b/compiler/powerpc64/cgcpu.pas
@@ -172,6 +172,136 @@ uses
 
				   symconst, symsym, fmodule,
			
 
				   rgobj, tgobj, cpupi, procinfo, paramgr;
			
 
				 
			
 
				+{ helper function which calculate "magic" values for replacement of unsigned 
			
 
				+ division by constant operation by multiplication. See the PowerPC compiler
			
 
				+ developer manual for more information }
			
 
				+procedure getmagic_unsignedN(const N : byte; const d : aWord; 
			
 
				+  out magic_m : aWord; out magic_add : boolean; out magic_shift : byte);
			
 
				+var
			
 
				+    p : aInt;
			
 
				+    nc, delta, q1, r1, q2, r2, two_N_minus_1 : aWord;
			
 
				+begin
			
 
				+  assert(d > 0);
			
 
				+
			
 
				+  two_N_minus_1 := aWord(1) shl (N-1);
			
 
				+    
			
 
				+  magic_add := false;
			
 
				+  nc := - 1 - (-d) mod d;
			
 
				+  p := N-1; { initialize p }
			
 
				+  q1 := two_N_minus_1 div nc; { initialize q1 = 2p/nc }
			
 
				+  r1 := two_N_minus_1 - q1*nc; { initialize r1 = rem(2p,nc) }
			
 
				+  q2 := (two_N_minus_1-1) div d; { initialize q2 = (2p-1)/d }
			
 
				+  r2 := (two_N_minus_1-1) - q2*d; { initialize r2 = rem((2p-1),d) }
			
 
				+  repeat
			
 
				+    inc(p);
			
 
				+    if (r1 >= (nc - r1)) then begin
			
 
				+      q1 := 2 * q1 + 1; { update q1 }
			
 
				+      r1 := 2*r1 - nc; { update r1 }
			
 
				+    end else begin
			
 
				+      q1 := 2*q1; { update q1 }
			
 
				+      r1 := 2*r1; { update r1 }
			
 
				+    end;
			
 
				+    if ((r2 + 1) >= (d - r2)) then begin
			
 
				+      if (q2 >= (two_N_minus_1-1)) then
			
 
				+        magic_add := true;
			
 
				+      q2 := 2*q2 + 1; { update q2 }
			
 
				+      r2 := 2*r2 + 1 - d; { update r2 }
			
 
				+    end else begin
			
 
				+      if (q2 >= two_N_minus_1) then 
			
 
				+        magic_add := true;
			
 
				+      q2 := 2*q2; { update q2 }
			
 
				+      r2 := 2*r2 + 1; { update r2 }
			
 
				+    end;
			
 
				+    delta := d - 1 - r2;
			
 
				+  until not ((p < (2*N)) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
			
 
				+  magic_m := q2 + 1; { resulting magic number }
			
 
				+  magic_shift := p - N; { resulting shift }
			
 
				+end;
			
 
				+
			
 
				+{ helper function which calculate "magic" values for replacement of signed 
			
 
				+ division by constant operation by multiplication. See the PowerPC compiler
			
 
				+ developer manual for more information }
			
 
				+procedure getmagic_signedN(const N : byte; const d : aInt; 
			
 
				+  out magic_m : aInt; out magic_s : aInt);
			
 
				+var
			
 
				+  p : aInt;
			
 
				+  ad, anc, delta, q1, r1, q2, r2, t : aWord;
			
 
				+  two_N_minus_1 : aWord;
			
 
				+    
			
 
				+begin
			
 
				+  assert((d < -1) or (d > 1));
			
 
				+
			
 
				+  two_N_minus_1 := aWord(1) shl (N-1);
			
 
				+
			
 
				+  ad := abs(d);
			
 
				+  t := two_N_minus_1 + (aWord(d) shr (N-1));
			
 
				+  anc := t - 1 - t mod ad; { absolute value of nc }
			
 
				+  p := (N-1); { initialize p }
			
 
				+  q1 := two_N_minus_1 div anc; { initialize q1 = 2p/abs(nc) }
			
 
				+  r1 := two_N_minus_1 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
			
 
				+  q2 := two_N_minus_1 div ad; { initialize q2 = 2p/abs(d) }
			
 
				+  r2 := two_N_minus_1 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
			
 
				+  repeat 
			
 
				+    inc(p);
			
 
				+    q1 := 2*q1; { update q1 = 2p/abs(nc) }
			
 
				+    r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
			
 
				+    if (r1 >= anc) then begin { must be unsigned comparison }
			
 
				+      inc(q1);
			
 
				+      dec(r1, anc);
			
 
				+    end;
			
 
				+    q2 := 2*q2; { update q2 = 2p/abs(d) }
			
 
				+    r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
			
 
				+    if (r2 >= ad) then begin { must be unsigned comparison }
			
 
				+      inc(q2);
			
 
				+      dec(r2, ad);
			
 
				+    end;
			
 
				+    delta := ad - r2;
			
 
				+  until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
			
 
				+  magic_m := q2 + 1;
			
 
				+  if (d < 0) then begin
			
 
				+    magic_m := -magic_m; { resulting magic number }
			
 
				+  end;
			
 
				+  magic_s := p - N; { resulting shift }
			
 
				+end;
			
 
				+
			
 
				+{ finds positive and negative powers of two of the given value, returning the
			
 
				+ power and whether it's a negative power or not in addition to the actual result
			
 
				+ of the function }
			
 
				+function ispowerof2(value : aInt; out power : byte; out neg : boolean) : boolean;
			
 
				+var
			
 
				+  i : longint;
			
 
				+  hl : aInt;
			
 
				+begin
			
 
				+  neg := false;
			
 
				+  { also try to find negative power of two's by negating if the 
			
 
				+   value is negative. low(aInt) is special because it can not be
			
 
				+   negated. Simply return the appropriate values for it }
			
 
				+  if (value < 0) then begin
			
 
				+    neg := true;
			
 
				+    if (value = low(aInt)) then begin
			
 
				+      power := sizeof(aInt)*8-1;
			
 
				+      result := true;
			
 
				+      exit;
			
 
				+    end;
			
 
				+    value := -value;
			
 
				+  end;
			
 
				+
			
 
				+  if ((value and (value-1)) <> 0) then begin
			
 
				+    result := false;
			
 
				+    exit;
			
 
				+  end;
			
 
				+  hl := 1;
			
 
				+  for i := 0 to (sizeof(aInt)*8-1) do begin
			
 
				+    if (hl = value) then begin
			
 
				+      result := true;
			
 
				+      power := i;
			
 
				+      exit;
			
 
				+    end;
			
 
				+    hl := hl shl 1;
			
 
				+  end;
			
 
				+end;
			
 
				+
			
 
				+
			
 
				 procedure tcgppc.init_register_allocators;
			
 
				 begin
			
 
				   inherited init_register_allocators;
			
@@ -438,7 +568,9 @@ procedure tcgppc.a_load_const_reg(list: taasmoutput; size: TCGSize; a: aint;
 
				   end;
			
 
				 
			
 
				   { R0-safe version of the above (ADDIS doesn't work the same way with R0 as base), without
			
 
				-    the return value }
			
 
				+   the return value. Unused until further testing shows that it is not really necessary;
			
 
				+   loading the upper 32 bits of a value is now done using R12, which does not require
			
 
				+   special treatment }
			
 
				   procedure load32bitconstantR0(list : taasmoutput; size : TCGSize; a : longint;
			
 
				     reg : TRegister);
			
 
				   begin
			
@@ -707,9 +839,86 @@ var
 
				     else
			
 
				       list.concat(taicpu.op_reg_reg_const(A_ANDI_, dst, src, word(a)));
			
 
				   end;
			
 
				+
			
 
				+  procedure do_constant_div(list : taasmoutput; size : TCgSize; a : aint; src, dst : TRegister;
			
 
				+    signed : boolean);
			
 
				+  const
			
 
				+    negops : array[boolean] of tasmop = (A_NEG, A_NEGO);
			
 
				+  var
			
 
				+    magic, shift : int64;
			
 
				+    u_magic : qword;
			
 
				+    u_shift : byte;
			
 
				+    u_add : boolean;
			
 
				+    power : byte;
			
 
				+    isNegPower : boolean;
			
 
				+             
			
 
				+    divreg : tregister;
			
 
				+  begin
			
 
				+    if (a = 0) then begin
			
 
				+      internalerror(2005061701);
			
 
				+    end else if (a = 1) then begin
			
 
				+      cg.a_load_reg_reg(exprasmlist, OS_INT, OS_INT, src, dst);
			
 
				+    end else if (a = -1) then begin
			
 
				+      { note: only in the signed case possible..., may overflow }
			
 
				+      exprasmlist.concat(taicpu.op_reg_reg(negops[cs_check_overflow in aktlocalswitches], dst, src));
			
 
				+    end else if (ispowerof2(a, power, isNegPower)) then begin
			
 
				+      if (signed) then begin
			
 
				+        { From "The PowerPC Compiler Writer's Guide", pg. 52ff          }
			
 
				+        cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, power,
			
 
				+          src, dst);
			
 
				+        exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE, dst, dst));
			
 
				+        if (isNegPower) then
			
 
				+          exprasmlist.concat(taicpu.op_reg_reg(A_NEG, dst, dst));
			
 
				+      end else begin
			
 
				+        cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, src, dst)
			
 
				+      end;
			
 
				+    end else begin
			
 
				+      { replace division by multiplication, both implementations }
			
 
				+      { from "The PowerPC Compiler Writer's Guide" pg. 53ff      }
			
 
				+      divreg := cg.getintregister(exprasmlist, OS_INT);
			
 
				+      if (signed) then begin
			
 
				+        getmagic_signedN(sizeof(aInt)*8, a, magic, shift);
			
 
				+        { load magic value }
			
 
				+        cg.a_load_const_reg(exprasmlist, OS_INT, magic, divreg);
			
 
				+        { multiply }
			
 
				+        exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHD, dst, src, divreg));
			
 
				+        { add/subtract numerator }
			
 
				+        if (a > 0) and (magic < 0) then begin
			
 
				+          cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, src, dst, dst);
			
 
				+        end else if (a < 0) and (magic > 0) then begin
			
 
				+          cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, src, dst, dst);
			
 
				+        end;
			
 
				+        { shift shift places to the right (arithmetic) }
			
 
				+        cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, shift, dst, dst);                     
			
 
				+        { extract and add sign bit }
			
 
				+        if (a >= 0) then begin
			
 
				+          cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, src, divreg);
			
 
				+        end else begin
			
 
				+          cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, dst, divreg);
			
 
				+        end;                     
			
 
				+        cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, dst, divreg, dst);
			
 
				+      end else begin
			
 
				+        getmagic_unsignedN(sizeof(aWord)*8, a, u_magic, u_add, u_shift);
			
 
				+        { load magic in divreg }
			
 
				+        cg.a_load_const_reg(exprasmlist, OS_INT, u_magic, divreg);
			
 
				+        exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHDU, dst, src, divreg));
			
 
				+        if (u_add) then begin
			
 
				+          cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, dst, src, divreg);
			
 
				+          cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT,  1, divreg, divreg);
			
 
				+          cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, divreg, dst, divreg);
			
 
				+          cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift-1, divreg, dst);
			
 
				+        end else begin
			
 
				+          cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift, dst, dst);
			
 
				+        end;
			
 
				+      end;
			
 
				+    end;
			
 
				+  end;
			
 
				+
			
 
				 var
			
 
				   scratchreg: tregister;
			
 
				-  shift, shiftmask : longint;
			
 
				+  shift : byte;
			
 
				+  shiftmask : longint;
			
 
				+  isneg : boolean;
			
 
				 
			
 
				 begin
			
 
				   { subtraction is the same as addition with negative constant }
			
@@ -725,13 +934,8 @@ begin
 
				   useReg := false;
			
 
				   case (op) of
			
 
				     OP_DIV, OP_IDIV:
			
 
				-      { actually, this method should be never called directly with OP_DIV or
			
 
				-       OP_IDIV, so just provide basic support.
			
 
				-       TODO: move division by constant stuff from nppcmat.pas here }    
			
 
				-      if (a = 0) then
			
 
				-        internalerror(200208103)
			
 
				-      else if (a = 1) then
			
 
				-        a_load_reg_reg(list, size, size, src, dst)
			
 
				+      if (cs_slowoptimize in aktglobalswitches) then
			
 
				+        do_constant_div(list, size, a, src, dst, op = OP_IDIV)
			
 
				       else
			
 
				         usereg := true; 
			
 
				     OP_IMUL, OP_MUL:
			
@@ -743,9 +947,11 @@ begin
 
				         list.concat(taicpu.op_reg_reg(A_NEG, dst, dst))
			
 
				       else if (a = 1) then
			
 
				         a_load_reg_reg(list, OS_INT, OS_INT, src, dst)
			
 
				-      else if ispowerof2(a, shift) then
			
 
				-        list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, shift))
			
 
				-      else if (a >= low(smallint)) and (a <= high(smallint)) then
			
 
				+      else if ispowerof2(a, shift, isneg) then begin
			
 
				+        list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, shift));
			
 
				+        if (isneg) then
			
 
				+          exprasmlist.concat(taicpu.op_reg_reg(A_NEG, dst, dst));
			
 
				+      end else if (a >= low(smallint)) and (a <= high(smallint)) then
			
 
				         list.concat(taicpu.op_reg_reg_const(A_MULLI, dst, src,
			
 
				           smallint(a)))
			
 
				       else
			
@@ -808,7 +1014,6 @@ end;
 
				 
			
 
				 procedure tcgppc.a_op_reg_reg_reg(list: taasmoutput; op: TOpCg;
			
 
				   size: tcgsize; src1, src2, dst: tregister);
			
 
				-
			
 
				 const
			
 
				   op_reg_reg_opcg2asmop32: array[TOpCG] of tasmop =
			
 
				   (A_NONE, A_ADD, A_AND, A_DIVWU, A_DIVW, A_MULLW, A_MULLW, A_NEG, A_NOT, A_OR,
			
@@ -816,7 +1021,6 @@ const
 
				   op_reg_reg_opcg2asmop64: array[TOpCG] of tasmop =
			
 
				   (A_NONE, A_ADD, A_AND, A_DIVDU, A_DIVD, A_MULLD, A_MULLD, A_NEG, A_NOT, A_OR,
			
 
				    A_SRAD, A_SLD, A_SRD, A_SUB, A_XOR);
			
 
				-
			
 
				 begin
			
 
				   case op of
			
 
				     OP_NEG, OP_NOT:
			
@@ -1559,7 +1763,7 @@ begin
 
				    least four. If not, add the bytes which are "off" to the base register and
			
 
				    adjust the offset accordingly }
			
 
				   case op of
			
 
				-    A_LD, A_LDU, A_STD, A_STDU, A_LWA, A_LWAU :
			
 
				+    A_LD, A_LDU, A_STD, A_STDU, A_LWA :
			
 
				      if ((ref.offset mod 4) <> 0) then begin
			
 
				        tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
			
 
				 
			
@@ -1621,7 +1825,8 @@ begin
 
				       tmpref.base := ref.base;
			
 
				       tmpref.index := tmpreg2;
			
 
				       case op of
			
 
				-        { the code generator doesn't generate update instructions anyway }
			
 
				+        { the code generator doesn't generate update instructions anyway, so 
			
 
				+        error out on those instructions }
			
 
				         A_LBZ : op := A_LBZX;
			
 
				         A_LHZ : op := A_LHZX;
			
 
				         A_LWZ : op := A_LWZX;
			
--- a/compiler/powerpc64/cpubase.pas
+++ b/compiler/powerpc64/cpubase.pas
@@ -96,7 +96,7 @@ type
 
				     A_RLDICL,
			
 
				     A_DIVDU, A_DIVDU_, A_DIVD, A_DIVD_, A_MULLD, A_MULLD_, A_MULHD, A_MULHD_, A_SRAD, A_SLD, A_SRD,
			
 
				     A_DIVDUO_, A_DIVDO_,
			
 
				-    A_LWA, A_LWAU, A_LWAX, A_LWAUX,
			
 
				+    A_LWA, A_LWAX, A_LWAUX,
			
 
				     A_FCFID,
			
 
				     A_LDARX, A_STDCX_, A_CNTLZD,
			
 
				     A_LVX, A_STVX,
			
--- a/compiler/powerpc64/itcpugas.pas
+++ b/compiler/powerpc64/itcpugas.pas
@@ -86,7 +86,7 @@ const
 
				     'rldicl',
			
 
				     'divdu', 'divdu.', 'divd', 'divd.', 'mulld', 'mulld.', 'mulhd', 'mulhd.', 'srad', 'sld', 'srd',
			
 
				     'divduo.', 'divdo.',
			
 
				-    'lwa', '<illegal lwau>', 'lwax', 'lwaux',
			
 
				+    'lwa', 'lwax', 'lwaux',
			
 
				     'fcfid',
			
 
				     'ldarx', 'stdcx.', 'cntlzd',
			
 
				     'lvx', 'stvx',
			
--- a/compiler/powerpc64/nppcinl.pas
+++ b/compiler/powerpc64/nppcinl.pas
@@ -36,7 +36,8 @@ type
 
				     }
			
 
				     function first_abs_real: tnode; override;
			
 
				     function first_sqr_real: tnode; override;
			
 
				-    { todo: inline trunc/round/frac?/int }
			
 
				+
			
 
				+    { trunc/round/frac?/int can't be inlined? }
			
 
				 
			
 
				     procedure second_abs_real; override;
			
 
				     procedure second_sqr_real; override;
			
--- a/compiler/powerpc64/nppcmat.pas
+++ b/compiler/powerpc64/nppcmat.pas
@@ -59,176 +59,6 @@ uses
 
				   cpubase, cpuinfo,
			
 
				   ncgutil, cgcpu, rgobj;
			
 
				 
			
 
				-{ helper functions }
			
 
				-procedure getmagic_unsigned32(d : dword; out magic_m : dword; out magic_add : boolean; out magic_shift : dword);
			
 
				-var
			
 
				-    p : longint;
			
 
				-    nc, delta, q1, r1, q2, r2 : dword;
			
 
				-    
			
 
				-begin
			
 
				-    assert(d > 0);
			
 
				-    
			
 
				-    magic_add := false;
			
 
				-    nc := - 1 - (-d) mod d;
			
 
				-    p := 31; { initialize p }
			
 
				-    q1 := $80000000 div nc; { initialize q1 = 2p/nc }
			
 
				-    r1 := $80000000 - q1*nc; { initialize r1 = rem(2p,nc) }
			
 
				-    q2 := $7FFFFFFF div d; { initialize q2 = (2p-1)/d }
			
 
				-    r2 := $7FFFFFFF - q2*d; { initialize r2 = rem((2p-1),d) }
			
 
				-    repeat
			
 
				-        inc(p);
			
 
				-        if (r1 >= (nc - r1)) then begin
			
 
				-            q1 := 2 * q1 + 1; { update q1 }
			
 
				-            r1 := 2*r1 - nc; { update r1 }
			
 
				-        end else begin
			
 
				-            q1 := 2*q1; { update q1 }
			
 
				-            r1 := 2*r1; { update r1 }
			
 
				-        end;
			
 
				-        if ((r2 + 1) >= (d - r2)) then begin
			
 
				-            if (q2 >= $7FFFFFFF) then
			
 
				-                magic_add := true;
			
 
				-            q2 := 2*q2 + 1; { update q2 }
			
 
				-            r2 := 2*r2 + 1 - d; { update r2 }
			
 
				-        end else begin
			
 
				-            if (q2 >= $80000000) then 
			
 
				-                magic_add := true;
			
 
				-            q2 := 2*q2; { update q2 }
			
 
				-            r2 := 2*r2 + 1; { update r2 }
			
 
				-        end;
			
 
				-        delta := d - 1 - r2;
			
 
				-    until not ((p < 64) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
			
 
				-    magic_m := q2 + 1; { resulting magic number }
			
 
				-    magic_shift := p - 32; { resulting shift }
			
 
				-end;
			
 
				-
			
 
				-procedure getmagic_signed32(d : longint; out magic_m : longint; out magic_s : longint);
			
 
				-const
			
 
				-    two_31 : DWord = high(longint)+1;
			
 
				-var
			
 
				-    p : Longint;
			
 
				-    ad, anc, delta, q1, r1, q2, r2, t : DWord;
			
 
				-    
			
 
				-begin
			
 
				-    assert((d < -1) or (d > 1));
			
 
				-
			
 
				-    ad := abs(d);
			
 
				-    t := two_31 + (DWord(d) shr 31);
			
 
				-    anc := t - 1 - t mod ad; { absolute value of nc }
			
 
				-    p := 31; { initialize p }
			
 
				-    q1 := two_31 div anc; { initialize q1 = 2p/abs(nc) }
			
 
				-    r1 := two_31 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
			
 
				-    q2 := two_31 div ad; { initialize q2 = 2p/abs(d) }
			
 
				-    r2 := two_31 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
			
 
				-    repeat 
			
 
				-        inc(p);
			
 
				-        q1 := 2*q1; { update q1 = 2p/abs(nc) }
			
 
				-        r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
			
 
				-        if (r1 >= anc) then begin { must be unsigned comparison }
			
 
				-            inc(q1);
			
 
				-            dec(r1, anc);
			
 
				-        end;
			
 
				-        q2 := 2*q2; { update q2 = 2p/abs(d) }
			
 
				-        r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
			
 
				-        if (r2 >= ad) then begin { must be unsigned comparison }
			
 
				-            inc(q2);
			
 
				-            dec(r2, ad);
			
 
				-        end;
			
 
				-        delta := ad - r2;
			
 
				-    until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
			
 
				-    magic_m := q2 + 1;
			
 
				-    if (d < 0) then begin
			
 
				-        magic_m := -magic_m; { resulting magic number }
			
 
				-    end;
			
 
				-    magic_s := p - 32; { resulting shift }
			
 
				-end;
			
 
				-
			
 
				-{ helper functions }
			
 
				-procedure getmagic_unsigned64(d : qword; out magic_m : qword; out magic_add : boolean; out magic_shift : qword);
			
 
				-const
			
 
				-  two_63 : QWord = $8000000000000000;  
			
 
				-var
			
 
				-    p : int64;
			
 
				-    nc, delta, q1, r1, q2, r2 : qword;
			
 
				-    
			
 
				-begin
			
 
				-  assert(d > 0);
			
 
				-    
			
 
				-  magic_add := false;
			
 
				-  nc := - 1 - (-d) mod d;
			
 
				-  p := 63; { initialize p }
			
 
				-  q1 := two_63 div nc; { initialize q1 = 2p/nc }
			
 
				-  r1 := two_63 - q1*nc; { initialize r1 = rem(2p,nc) }
			
 
				-  q2 := (two_63-1) div d; { initialize q2 = (2p-1)/d }
			
 
				-  r2 := (two_63-1) - q2*d; { initialize r2 = rem((2p-1),d) }
			
 
				-  repeat
			
 
				-    inc(p);
			
 
				-    if (r1 >= (nc - r1)) then begin
			
 
				-      q1 := 2 * q1 + 1; { update q1 }
			
 
				-      r1 := 2*r1 - nc; { update r1 }
			
 
				-    end else begin
			
 
				-      q1 := 2*q1; { update q1 }
			
 
				-      r1 := 2*r1; { update r1 }
			
 
				-    end;
			
 
				-    if ((r2 + 1) >= (d - r2)) then begin
			
 
				-      if (q2 >= (two_63-1)) then
			
 
				-        magic_add := true;
			
 
				-      q2 := 2*q2 + 1; { update q2 }
			
 
				-      r2 := 2*r2 + 1 - d; { update r2 }
			
 
				-    end else begin
			
 
				-      if (q2 >= two_63) then 
			
 
				-        magic_add := true;
			
 
				-      q2 := 2*q2; { update q2 }
			
 
				-      r2 := 2*r2 + 1; { update r2 }
			
 
				-    end;
			
 
				-    delta := d - 1 - r2;
			
 
				-  until not ((p < 128) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
			
 
				-  magic_m := q2 + 1; { resulting magic number }
			
 
				-  magic_shift := p - 64; { resulting shift }
			
 
				-end;
			
 
				-
			
 
				-procedure getmagic_signed64(d : int64; out magic_m : int64; out magic_s : int64);
			
 
				-const
			
 
				-  two_63 : QWord = $8000000000000000;  
			
 
				-var
			
 
				-  p : int64;
			
 
				-  ad, anc, delta, q1, r1, q2, r2, t : QWord;
			
 
				-    
			
 
				-begin
			
 
				-  assert((d < -1) or (d > 1));
			
 
				-
			
 
				-  ad := abs(d);
			
 
				-  t := two_63 + (QWord(d) shr 63);
			
 
				-  anc := t - 1 - t mod ad; { absolute value of nc }
			
 
				-  p := 63; { initialize p }
			
 
				-  q1 := two_63 div anc; { initialize q1 = 2p/abs(nc) }
			
 
				-  r1 := two_63 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
			
 
				-  q2 := two_63 div ad; { initialize q2 = 2p/abs(d) }
			
 
				-  r2 := two_63 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
			
 
				-  repeat 
			
 
				-    inc(p);
			
 
				-    q1 := 2*q1; { update q1 = 2p/abs(nc) }
			
 
				-    r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
			
 
				-    if (r1 >= anc) then begin { must be unsigned comparison }
			
 
				-      inc(q1);
			
 
				-      dec(r1, anc);
			
 
				-    end;
			
 
				-    q2 := 2*q2; { update q2 = 2p/abs(d) }
			
 
				-    r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
			
 
				-    if (r2 >= ad) then begin { must be unsigned comparison }
			
 
				-      inc(q2);
			
 
				-      dec(r2, ad);
			
 
				-    end;
			
 
				-    delta := ad - r2;
			
 
				-  until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
			
 
				-  magic_m := q2 + 1;
			
 
				-  if (d < 0) then begin
			
 
				-    magic_m := -magic_m; { resulting magic number }
			
 
				-  end;
			
 
				-  magic_s := p - 64; { resulting shift }
			
 
				-end;
			
 
				-
			
 
				-
			
 
				-
			
 
				 {*****************************************************************************
			
 
				                              TPPCMODDIVNODE
			
 
				 *****************************************************************************}
			
@@ -243,8 +73,13 @@ end;
 
				 procedure tppcmoddivnode.pass_2;
			
 
				 const         { signed   overflow }
			
 
				   divops: array[boolean, boolean] of tasmop =
			
 
				-    ((A_DIVDU,A_DIVDU_),(A_DIVD,A_DIVDO_));
			
 
				+    ((A_DIVDU, A_DIVDU_),(A_DIVD, A_DIVDO_));
			
 
				+  divcgops : array[boolean] of TOpCG = (OP_DIV, OP_IDIV);
			
 
				   zerocond: tasmcond = (dirhint: DH_Plus; simple: true; cond:C_NE; cr: RS_CR7);
			
 
				+  tcgsize2native : array[OS_8..OS_S128] of tcgsize = (
			
 
				+    OS_64, OS_64, OS_64, OS_64, OS_NO, 
			
 
				+    OS_S64, OS_S64, OS_S64, OS_S64, OS_NO
			
 
				+    );
			
 
				 var
			
 
				   power  : longint;
			
 
				   op  : tasmop;
			
@@ -254,78 +89,10 @@ var
 
				   hl : tasmlabel;
			
 
				   done: boolean;
			
 
				          
			
 
				-  procedure genOrdConstNodeDiv;
			
 
				-  const
			
 
				-    negops : array[boolean] of tasmop = (A_NEG, A_NEGO);
			
 
				-  var
			
 
				-    magic, shift : int64;
			
 
				-    u_magic, u_shift : qword;
			
 
				-    u_add : boolean;
			
 
				-             
			
 
				-    divreg : tregister;
			
 
				-  begin
			
 
				-    if (tordconstnode(right).value = 0) then begin
			
 
				-      internalerror(2005061701);
			
 
				-    end else if (tordconstnode(right).value = 1) then begin
			
 
				-      cg.a_load_reg_reg(exprasmlist, OS_INT, OS_INT, numerator, resultreg);
			
 
				-    end else if (tordconstnode(right).value = -1) then begin
			
 
				-      { note: only in the signed case possible..., may overflow }
			
 
				-      exprasmlist.concat(taicpu.op_reg_reg(negops[cs_check_overflow in aktlocalswitches], resultreg, numerator));
			
 
				-    end else if (ispowerof2(tordconstnode(right).value, power)) then begin
			
 
				-      if (is_signed(right.resulttype.def)) then begin
			
 
				-        { From "The PowerPC Compiler Writer's Guide", pg. 52ff          }
			
 
				-        cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, power,
			
 
				-        numerator, resultreg);
			
 
				-        exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE, resultreg, resultreg));
			
 
				-      end else begin
			
 
				-        cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, numerator, resultreg)
			
 
				-      end;
			
 
				-    end else begin
			
 
				-      { replace division by multiplication, both implementations }
			
 
				-      { from "The PowerPC Compiler Writer's Guide" pg. 53ff      }
			
 
				-      divreg := cg.getintregister(exprasmlist, OS_INT);
			
 
				-      if (is_signed(right.resulttype.def)) then begin
			
 
				-        getmagic_signed64(tordconstnode(right).value, magic, shift);
			
 
				-        { load magic value }
			
 
				-        cg.a_load_const_reg(exprasmlist, OS_INT, magic, divreg);
			
 
				-        { multiply }
			
 
				-        exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHD, resultreg, numerator, divreg));
			
 
				-        { add/subtract numerator }
			
 
				-        if (tordconstnode(right).value > 0) and (magic < 0) then begin
			
 
				-          cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, numerator, resultreg, resultreg);
			
 
				-        end else if (tordconstnode(right).value < 0) and (magic > 0) then begin
			
 
				-          cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, numerator, resultreg, resultreg);
			
 
				-        end;
			
 
				-        { shift shift places to the right (arithmetic) }
			
 
				-        cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, shift, resultreg, resultreg);                     
			
 
				-        { extract and add sign bit }
			
 
				-        if (tordconstnode(right).value >= 0) then begin
			
 
				-          cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, numerator, divreg);
			
 
				-        end else begin
			
 
				-          cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, resultreg, divreg);
			
 
				-        end;                     
			
 
				-        cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, resultreg, divreg, resultreg);
			
 
				-      end else begin
			
 
				-        getmagic_unsigned64(tordconstnode(right).value, u_magic, u_add, u_shift);
			
 
				-        { load magic in divreg }
			
 
				-        cg.a_load_const_reg(exprasmlist, OS_INT, u_magic, divreg);
			
 
				-        exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHDU, resultreg, numerator, divreg));
			
 
				-        if (u_add) then begin
			
 
				-          cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, resultreg, numerator, divreg);
			
 
				-          cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT,  1, divreg, divreg);
			
 
				-          cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, divreg, resultreg, divreg);
			
 
				-          cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift-1, divreg, resultreg);
			
 
				-        end else begin
			
 
				-          cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift, resultreg, resultreg);
			
 
				-        end;
			
 
				-      end;
			
 
				-    end;
			
 
				-    done := true;
			
 
				-  end;
			
 
				-
			
 
				   procedure genOrdConstNodeMod;
			
 
				   var
			
 
				     modreg, maskreg, tempreg : tregister;
			
 
				+    isNegPower : boolean;
			
 
				   begin
			
 
				     if (tordconstnode(right).value = 0) then begin
			
 
				       internalerror(2005061702);
			
@@ -348,11 +115,14 @@ var
 
				         cg.a_op_reg_reg_reg(exprasmlist, OP_AND, OS_INT, modreg, maskreg, maskreg);
			
 
				         cg.a_op_reg_reg_reg(exprasmlist, OP_OR, OS_INT, maskreg, tempreg, resultreg);
			
 
				       end else begin
			
 
				-        cg.a_op_const_reg_reg(exprasmlist, OP_AND, OS_INT, tordconstnode(right).value-1, numerator, resultreg);
			
 
				+        cg.a_op_const_reg_reg(exprasmlist, OP_AND, OS_INT, tordconstnode(right).value-1, numerator, 
			
 
				+          resultreg);
			
 
				       end;
			
 
				     end else begin
			
 
				-      genOrdConstNodeDiv();
			
 
				-      cg.a_op_const_reg_reg(exprasmlist, OP_MUL, OS_INT, tordconstnode(right).value, resultreg, resultreg);
			
 
				+      cg.a_op_const_reg_reg(exprasmlist, divCgOps[is_signed(right.resulttype.def)], OS_INT, 
			
 
				+        tordconstnode(right).value, numerator, resultreg);
			
 
				+      cg.a_op_const_reg_reg(exprasmlist, OP_MUL, OS_INT, tordconstnode(right).value, resultreg, 
			
 
				+        resultreg);
			
 
				       cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, resultreg, numerator, resultreg);
			
 
				     end;
			
 
				   end;
			
@@ -380,20 +150,19 @@ begin
 
				     resultreg := cg.getintregister(exprasmlist,size);
			
 
				   end;
			
 
				   done := false;
			
 
				-(*
			
 
				-  if (right.nodetype = ordconstn) then begin
			
 
				+
			
 
				+  if (cs_slowoptimize in aktglobalswitches) and (right.nodetype = ordconstn) then begin
			
 
				     if (nodetype = divn) then
			
 
				-      genOrdConstNodeDiv
			
 
				-    else
			
 
				+      cg.a_op_const_reg_reg(exprasmlist, divCgOps[is_signed(right.resulttype.def)], 
			
 
				+        size, tordconstnode(right).value, numerator, resultreg)
			
 
				+    else 
			
 
				       genOrdConstNodeMod;
			
 
				     done := true;
			
 
				   end;
			
 
				-*)
			
 
				 
			
 
				   if (not done) then begin
			
 
				     { load divider in a register if necessary }
			
 
				-    location_force_reg(exprasmlist,right.location,
			
 
				-      def_cgsize(right.resulttype.def),true);
			
 
				+    location_force_reg(exprasmlist,right.location,def_cgsize(right.resulttype.def),true);
			
 
				     if (right.nodetype <> ordconstn) then
			
 
				       exprasmlist.concat(taicpu.op_reg_reg_const(A_CMPDI, NR_CR7,
			
 
				         right.location.register, 0))
			
@@ -403,13 +172,14 @@ begin
 
				     end;
			
 
				     divider := right.location.register;
			
 
				 
			
 
				-    { needs overflow checking, (-maxlongint-1) div (-1) overflows! }
			
 
				-    op := divops[is_signed(right.resulttype.def),
			
 
				-      cs_check_overflow in aktlocalswitches];
			
 
				+    { select the correct opcode according to the sign of the result, whether we need
			
 
				+     overflow checking }
			
 
				+    op := divops[is_signed(right.resulttype.def), cs_check_overflow in aktlocalswitches];
			
 
				     exprasmlist.concat(taicpu.op_reg_reg_reg(op, resultreg, numerator,
			
 
				       divider));
			
 
				 
			
 
				     if (nodetype = modn) then begin
			
 
				+      { multiply with the divisor again, taking care of the correct size }
			
 
				       exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULLD,resultreg,
			
 
				           divider,resultreg));
			
 
				       exprasmlist.concat(taicpu.op_reg_reg_reg(A_SUB,location.register,
			
@@ -433,111 +203,10 @@ begin
 
				     cg.g_overflowcheck(exprasmlist,location,resulttype.def);
			
 
				 end;
			
 
				 
			
 
				-(*
			
 
				-procedure tppcmoddivnode.pass_2;
			
 
				-const
			
 
				-  // ts: todo, use 32 bit operations if possible (much faster!)
			
 
				-  { signed   overflow }
			
 
				-  divops: array[boolean, boolean] of tasmop =
			
 
				-  ((A_DIVDU, A_DIVDUO_), (A_DIVD, A_DIVDO_));
			
 
				-  zerocond: tasmcond = (dirhint: DH_Plus; simple: true; cond: C_NE; cr: RS_CR1);
			
 
				-var
			
 
				-  power: longint;
			
 
				-  op: tasmop;
			
 
				-  numerator,
			
 
				-    divider,
			
 
				-    resultreg: tregister;
			
 
				-  size: Tcgsize;
			
 
				-  hl: tasmlabel;
			
 
				-
			
 
				-begin
			
 
				-  secondpass(left);
			
 
				-  secondpass(right);
			
 
				-  location_copy(location, left.location);
			
 
				-
			
 
				-  { put numerator in register }
			
 
				-  size := def_cgsize(left.resulttype.def);
			
 
				-  location_force_reg(exprasmlist, left.location,
			
 
				-    size, true);
			
 
				-  location_copy(location, left.location);
			
 
				-  numerator := location.register;
			
 
				-  resultreg := location.register;
			
 
				-  if (location.loc = LOC_CREGISTER) then
			
 
				-  begin
			
 
				-    location.loc := LOC_REGISTER;
			
 
				-    location.register := cg.getintregister(exprasmlist, size);
			
 
				-    resultreg := location.register;
			
 
				-  end;
			
 
				-  if (nodetype = modn) then
			
 
				-  begin
			
 
				-    resultreg := cg.getintregister(exprasmlist, size);
			
 
				-  end;
			
 
				-
			
 
				-  if (nodetype = divn) and
			
 
				-    (right.nodetype = ordconstn) and
			
 
				-    ispowerof2(tordconstnode(right).value, power) then
			
 
				-  begin
			
 
				-  	if (is_signed(right.resulttype.def)) then begin
			
 
				-      { From "The PowerPC Compiler Writer's Guide":                   }
			
 
				-      { This code uses the fact that, in the PowerPC architecture,    }
			
 
				-      { the shift right algebraic instructions set the Carry bit if   }
			
 
				-      { the source register contains a negative number and one or     }
			
 
				-      { more 1-bits are shifted out. Otherwise, the carry bit is      }
			
 
				-      { cleared. The addze instruction corrects the quotient, if      }
			
 
				-      { necessary, when the dividend is negative. For example, if     }
			
 
				-      { n = -13, (0xFFFF_FFF3), and k = 2, after executing the srawi  }
			
 
				-      { instruction, q = -4 (0xFFFF_FFFC) and CA = 1. After executing }
			
 
				-      { the addze instruction, q = -3, the correct quotient.          }
			
 
				-      cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_64, power,
			
 
				-        numerator, resultreg);
			
 
				-      exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE, resultreg, resultreg));
			
 
				-    end else begin
			
 
				-      cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, numerator, resultreg);
			
 
				-    end;
			
 
				-  end else begin
			
 
				-    { load divider in a register if necessary }
			
 
				-    location_force_reg(exprasmlist, right.location,
			
 
				-      def_cgsize(right.resulttype.def), true);
			
 
				-    if (right.nodetype <> ordconstn) then
			
 
				-{$NOTE ts: testme}
			
 
				-      exprasmlist.concat(taicpu.op_reg_reg_const(A_CMPDI, NR_CR1,
			
 
				-        right.location.register, 0));
			
 
				-    divider := right.location.register;
			
 
				-
			
 
				-    { needs overflow checking, (-maxlongint-1) div (-1) overflows! }
			
 
				-    { And on PPC, the only way to catch a div-by-0 is by checking  }
			
 
				-    { the overflow flag (JM)                                       }
			
 
				-    op := divops[is_signed(right.resulttype.def),
			
 
				-      cs_check_overflow in aktlocalswitches];
			
 
				-    exprasmlist.concat(taicpu.op_reg_reg_reg(op, resultreg, numerator,
			
 
				-      divider));
			
 
				-
			
 
				-    if (nodetype = modn) then begin
			
 
				-{$NOTE ts:testme}
			
 
				-      exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULLD, resultreg,
			
 
				-        divider, resultreg));
			
 
				-      exprasmlist.concat(taicpu.op_reg_reg_reg(A_SUB, location.register,
			
 
				-        numerator, resultreg));
			
 
				-      resultreg := location.register;
			
 
				-    end;
			
 
				-  end;
			
 
				-  { set result location }
			
 
				-  location.loc := LOC_REGISTER;
			
 
				-  location.register := resultreg;
			
 
				-  if (right.nodetype <> ordconstn) then begin
			
 
				-    objectlibrary.getjumplabel(hl);
			
 
				-    exprasmlist.concat(taicpu.op_cond_sym(A_BC, zerocond, hl));
			
 
				-    cg.a_call_name(exprasmlist, 'FPC_DIVBYZERO');
			
 
				-    cg.a_label(exprasmlist, hl);
			
 
				-  end;
			
 
				-  cg.g_overflowcheck(exprasmlist, location, resulttype.def);
			
 
				-end;
			
 
				-*)
			
 
				 {*****************************************************************************
			
 
				                              TPPCSHLRSHRNODE
			
 
				 *****************************************************************************}
			
 
				 
			
 
				-
			
 
				 procedure tppcshlshrnode.pass_2;
			
 
				 
			
 
				 var