Browse Source

* div by const now enabled with -O2 (not -O3)
* large integer constants (>2 instructions to load) are put into the TOC as well

git-svn-id: trunk@2233 -

tom_at_work 19 years ago
parent
commit
6e290373c8
2 changed files with 111 additions and 48 deletions
  1. 109 46
      compiler/powerpc64/cgcpu.pas
  2. 2 2
      compiler/powerpc64/nppcmat.pas

+ 109 - 46
compiler/powerpc64/cgcpu.pas

@@ -118,7 +118,6 @@ type
 
 
     procedure g_intf_wrapper(list: TAAsmoutput; procdef: tprocdef; const
     procedure g_intf_wrapper(list: TAAsmoutput; procdef: tprocdef; const
       labelname: string; ioffset: longint); override;
       labelname: string; ioffset: longint); override;
-
   private
   private
 
 
     { Make sure ref is a valid reference for the PowerPC and sets the }
     { Make sure ref is a valid reference for the PowerPC and sets the }
@@ -156,6 +155,10 @@ type
     function hasLargeOffset(const ref : TReference) : Boolean; inline;
     function hasLargeOffset(const ref : TReference) : Boolean; inline;
 
 
     procedure a_call_name_direct(list: taasmoutput; s: string; prependDot : boolean; addNOP : boolean);
     procedure a_call_name_direct(list: taasmoutput; s: string; prependDot : boolean; addNOP : boolean);
+
+    { emits code to store the given value a into the TOC (if not already in there), and load it from there
+     as well }
+    procedure loadConstantPIC(list : taasmoutput; size : TCGSize; a : aint; reg : TRegister);
   end;
   end;
 
 
 const
 const
@@ -311,6 +314,49 @@ begin
   end;
   end;
 end;
 end;
 
 
+{ returns the number of instruction required to load the given integer into a register.
+ This is basically a stripped down version of a_load_const_reg, increasing a counter
+ instead of emitting instructions. }
+function getInstructionLength(a : aint) : longint;
+
+  function get32bitlength(a : longint; var length : longint) : boolean; inline;
+  var 
+    is_half_signed : byte;
+  begin
+    { if the lower 16 bits are zero, do a single LIS }
+    if (smallint(a) = 0) and ((a shr 16) <> 0) then begin
+      inc(length);
+      get32bitlength := longint(a) < 0;
+    end else begin
+      is_half_signed := ord(smallint(lo(a)) < 0);
+      inc(length);
+      if smallint(hi(a) + is_half_signed) <> 0 then
+        inc(length);
+      get32bitlength := (smallint(a) < 0) or (a < 0);
+    end;
+  end;
+
+var
+  extendssign : boolean;
+
+begin
+  result := 0;
+  if (lo(a) = 0) and (hi(a) <> 0) then begin
+    get32bitlength(hi(a), result);
+    inc(result);
+  end else begin
+    extendssign := get32bitlength(lo(a), result);
+    if (extendssign) and (hi(a) = 0) then
+      inc(result)
+    else if (not 
+      ((extendssign and (longint(hi(a)) = -1)) or 
+       ((not extendssign) and (hi(a)=0)))
+      ) then begin
+      get32bitlength(hi(a), result);
+      inc(result);
+    end;
+  end;
+end;
 
 
 procedure tcgppc.init_register_allocators;
 procedure tcgppc.init_register_allocators;
 begin
 begin
@@ -591,30 +637,42 @@ procedure tcgppc.a_load_const_reg(list: taasmoutput; size: TCGSize; a: aint;
     end;
     end;
   end;
   end;
 
 
-  { R0-safe version of the above (ADDIS doesn't work the same way with R0 as base), without
-   the return value. Unused until further testing shows that it is not really necessary;
-   loading the upper 32 bits of a value is now done using R12, which does not require
-   special treatment }
-  procedure load32bitconstantR0(list : taasmoutput; size : TCGSize; a : longint;
-    reg : TRegister);
+  { emits the code to load a constant by emitting various instructions into the output
+   code}
+  procedure loadConstantNormal(list: taasmoutput; size : TCgSize; a: aint; reg: TRegister);
+  var
+    extendssign : boolean;
   begin
   begin
-    { only 16 bit constant? (-2^15 <= a <= +2^15-1) }
-    if (a >= low(smallint)) and (a <= high(smallint)) then begin
-      list.concat(taicpu.op_reg_const(A_LI, reg, smallint(a)));
+    if (lo(a) = 0) and (hi(a) <> 0) then begin
+      { load only upper 32 bits, and shift }
+      load32bitconstant(list, size, hi(a), reg);
+      list.concat(taicpu.op_reg_reg_const(A_SLDI, reg, reg, 32));    
     end else begin
     end else begin
-      { check if we have to start with LI or LIS, load as 32 bit constant }
-      if ((a and $FFFF) <> 0) then begin
-        list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(a shr 16)));
-        list.concat(taicpu.op_reg_reg_const(A_ORI, reg, reg, word(a)));
-      end else begin
-        list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(a shr 16)));
+      { load lower 32 bits }
+      extendssign := load32bitconstant(list, size, lo(a), reg);
+      if (extendssign) and (hi(a) = 0) then
+        { if upper 32 bits are zero, but loading the lower 32 bit resulted in automatic 
+          sign extension, clear those bits }
+        a_load_reg_reg(list, OS_32, OS_64, reg, reg)
+      else if (not 
+        ((extendssign and (longint(hi(a)) = -1)) or 
+         ((not extendssign) and (hi(a)=0)))
+        ) then begin
+        { only load the upper 32 bits, if the automatic sign extension is not okay,
+          that is, _not_ if 
+          - loading the lower 32 bits resulted in -1 in the upper 32 bits, and the upper 
+           32 bits should contain -1
+          - loading the lower 32 bits resulted in 0 in the upper 32 bits, and the upper
+           32 bits should contain 0 }
+        load32bitconstant(list, size, hi(a), NR_R12);
+        { combine both registers }
+        list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R12, 32, 0));
       end;
       end;
     end;
     end;
   end;
   end;
 
 
-var
-  extendssign : boolean;
   {$IFDEF EXTDEBUG}
   {$IFDEF EXTDEBUG}
+var
   astring : string;
   astring : string;
   {$ENDIF EXTDEBUG}
   {$ENDIF EXTDEBUG}
 
 
@@ -623,35 +681,15 @@ begin
   astring := 'a_load_const reg ' + inttostr(hi(a)) + ' ' + inttostr(lo(a)) + ' ' + inttostr(ord(size)) + ' ' + inttostr(tcgsize2size[size]);
   astring := 'a_load_const reg ' + inttostr(hi(a)) + ' ' + inttostr(lo(a)) + ' ' + inttostr(ord(size)) + ' ' + inttostr(tcgsize2size[size]);
   list.concat(tai_comment.create(strpnew(astring)));
   list.concat(tai_comment.create(strpnew(astring)));
   {$ENDIF EXTDEBUG}
   {$ENDIF EXTDEBUG}
-
   if not (size in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
   if not (size in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
     internalerror(2002090902);
     internalerror(2002090902);
-  if (lo(a) = 0) and (hi(a) <> 0) then begin
-    { load only upper 32 bits, and shift }
-    load32bitconstant(list, size, hi(a), reg);
-    list.concat(taicpu.op_reg_reg_const(A_SLDI, reg, reg, 32));    
-  end else begin
-    { load lower 32 bits }
-    extendssign := load32bitconstant(list, size, lo(a), reg);
-    if (extendssign) and (hi(a) = 0) then
-      { if upper 32 bits are zero, but loading the lower 32 bit resulted in automatic 
-        sign extension, clear those bits }
-      a_load_reg_reg(list, OS_32, OS_64, reg, reg)
-    else if (not 
-      ((extendssign and (longint(hi(a)) = -1)) or 
-       ((not extendssign) and (hi(a)=0)))
-      ) then begin
-      { only load the upper 32 bits, if the automatic sign extension is not okay,
-        that is, _not_ if 
-        - loading the lower 32 bits resulted in -1 in the upper 32 bits, and the upper 
-         32 bits should contain -1
-        - loading the lower 32 bits resulted in 0 in the upper 32 bits, and the upper
-         32 bits should contain 0 }
-      load32bitconstant(list, size, hi(a), NR_R12);
-      { combine both registers }
-      list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R12, 32, 0));
-    end;
-  end;
+  { if PIC or basic optimizations are enabled, and the number of instructions which would be 
+   required to load the value is greater than 2, store (and later load) the value from there }
+  if (((cs_fastoptimize in aktglobalswitches) or (cs_create_pic in aktmoduleswitches)) and 
+    (getInstructionLength(a) > 2)) then
+    loadConstantPIC(list, size, a, reg)
+  else
+    loadConstantNormal(list, size, a, reg);
 end;
 end;
 
 
 procedure tcgppc.a_load_reg_ref(list: taasmoutput; fromsize, tosize: TCGSize;
 procedure tcgppc.a_load_reg_ref(list: taasmoutput; fromsize, tosize: TCGSize;
@@ -962,7 +1000,7 @@ begin
   useReg := false;
   useReg := false;
   case (op) of
   case (op) of
     OP_DIV, OP_IDIV:
     OP_DIV, OP_IDIV:
-      if (cs_slowoptimize in aktglobalswitches) then
+      if (cs_optimize in aktglobalswitches) then
         do_constant_div(list, size, a, src, dst, op = OP_IDIV)
         do_constant_div(list, size, a, src, dst, op = OP_IDIV)
       else
       else
         usereg := true; 
         usereg := true; 
@@ -2086,6 +2124,31 @@ begin
   result := aword(ref.offset-low(smallint)) > high(smallint)-low(smallint);
   result := aword(ref.offset-low(smallint)) > high(smallint)-low(smallint);
 end;
 end;
 
 
+procedure tcgppc.loadConstantPIC(list : taasmoutput; size : TCGSize; a : aint; reg : TRegister);
+var
+  l: tasmsymbol;
+  ref: treference;
+  symbol : string;
+begin
+  symbol := 'toc$' + hexstr(a, sizeof(a)*2);
+  l:=objectlibrary.getasmsymbol(symbol);
+  if not(assigned(l)) then begin
+    l:=objectlibrary.newasmsymbol(symbol,AB_LOCAL, AT_LABEL);
+    asmlist[al_picdata].concat(tai_section.create(sec_toc, '.toc', 8));
+    asmlist[al_picdata].concat(tai_symbol.create(l,0));
+    asmlist[al_picdata].concat(tai_directive.create(asd_toc_entry, symbol + '[TC], ' + inttostr(a)));
+  end;
+  reference_reset_symbol(ref,l,0);
+  ref.base := NR_R2;
+  ref.refaddr := addr_pic;
+
+  {$IFDEF EXTDEBUG}
+  list.concat(tai_comment.create(strpnew('loading value from TOC reference for ' + symbol)));
+  {$ENDIF EXTDEBUG}
+  cg.a_load_ref_reg(list, OS_INT, OS_INT, ref, reg);
+end;
+
+
 begin
 begin
   cg := tcgppc.create;
   cg := tcgppc.create;
 end.
 end.

+ 2 - 2
compiler/powerpc64/nppcmat.pas

@@ -49,6 +49,7 @@ type
 implementation
 implementation
 
 
 uses
 uses
+  sysutils,
   globtype, systems,
   globtype, systems,
   cutils, verbose, globals,
   cutils, verbose, globals,
   symconst, symdef,
   symconst, symdef,
@@ -150,8 +151,7 @@ begin
     resultreg := cg.getintregister(exprasmlist,size);
     resultreg := cg.getintregister(exprasmlist,size);
   end;
   end;
   done := false;
   done := false;
-
-  if (cs_slowoptimize in aktglobalswitches) and (right.nodetype = ordconstn) then begin
+  if (cs_optimize in aktglobalswitches) and (right.nodetype = ordconstn) then begin
     if (nodetype = divn) then
     if (nodetype = divn) then
       cg.a_op_const_reg_reg(exprasmlist, divCgOps[is_signed(right.resulttype.def)], 
       cg.a_op_const_reg_reg(exprasmlist, divCgOps[is_signed(right.resulttype.def)], 
         size, tordconstnode(right).value, numerator, resultreg)
         size, tordconstnode(right).value, numerator, resultreg)