Pārlūkot izejas kodu

* jumptable support
* fixed cg for loading from addresses with symbol and large offset, but no base register

git-svn-id: trunk@1547 -

tom_at_work 20 gadi atpakaļ
vecāks
revīzija
9531293b89
2 mainītis faili ar 119 papildinājumiem un 48 dzēšanām
  1. 37 26
      compiler/powerpc64/cgcpu.pas
  2. 82 22
      compiler/powerpc64/nppcset.pas

+ 37 - 26
compiler/powerpc64/cgcpu.pas

@@ -146,6 +146,10 @@ type
     { returns the lowest numbered GP register in use, and the number of used GP registers
       for the current procedure }
     procedure calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
+
+    { returns true if the offset of the given reference can not be represented by a 16 bit
+    immediate as required by some PowerPC instructions }
+    function hasLargeOffset(const ref : TReference) : Boolean; inline;
   end;
 
 const
@@ -438,7 +442,7 @@ begin
       { combine both registers }
       list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R0, 32, 0));
     end;
-  end;  
+  end;
 end;
 
 procedure tcgppc.a_load_reg_ref(list: taasmoutput; fromsize, tosize: TCGSize;
@@ -1072,7 +1076,6 @@ begin
       list.concat(taicpu.op_reg_reg_reg(A_STDUX, NR_R1, NR_R1, NR_R0));
     end;
   end;
-
   // CR register not used by FPC atm
 
   // keep R1 allocated???
@@ -1084,8 +1087,6 @@ procedure tcgppc.g_proc_exit(list: taasmoutput; parasize: longint; nostackframe:
 { This procedure may be called before, as well as after g_stackframe_entry }
 { is called. NOTE registers are not to be allocated through the register   }
 { allocator here, because the register colouring has already occured !!    }
-
-
 var
   regcount, firstregfpu, firstreggpr: TSuperRegister;
   href: treference;
@@ -1177,7 +1178,7 @@ begin
   ref2 := ref;
   fixref(list, ref2, OS_64);
   { load a symbol }
-  if assigned(ref2.symbol) or (ref2.offset < low(smallint)) or (ref2.offset > high(smallint)) then begin
+  if assigned(ref2.symbol) or (hasLargeOffset(ref2)) then begin
       { add the symbol's value to the base of the reference, and if the }
       { reference doesn't have a base, create one                       }
       reference_reset(tmpref);
@@ -1416,14 +1417,14 @@ procedure tcgppc.g_intf_wrapper(list: TAAsmoutput; procdef: tprocdef; const
     { call/jmp  vmtoffs(%eax) ; method offs }
     reference_reset_base(href, NR_R11,
       procdef._class.vmtmethodoffset(procdef.extnumber));
-    if not ((aint(href.offset) >= low(smallint)) and
-      (aint(href.offset) <= high(smallint))) then begin
-      {$warning ts:adapt me for offsets > 16 bit }
+    if not (hasLargeOffset(href)) then begin
       list.concat(taicpu.op_reg_reg_const(A_ADDIS, NR_R11, NR_R11,
         smallint((href.offset shr 16) + ord(smallint(href.offset and $FFFF) <
         0))));
       href.offset := smallint(href.offset and $FFFF);
-    end;
+    end else
+      { add support for offsets > 16 bit }
+      internalerror(200510201);
     list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href));
     // the loaded reference is a function descriptor reference, so deref again
     // (at ofs 0 there's the real pointer)
@@ -1494,7 +1495,6 @@ begin
 end;
 
 function tcgppc.fixref(list: taasmoutput; var ref: treference; const size : TCgsize): boolean;
-
 var
   tmpreg: tregister;
   needsAlign : boolean;
@@ -1523,14 +1523,14 @@ var
   tmpref: treference;
   largeOffset: Boolean;
 begin
-  // at this point there must not be a combination of values in the ref treference
-  // which is not possible to directly map to instructions of the PowerPC architecture
+  { at this point there must not be a combination of values in the ref treference
+    which is not possible to directly map to instructions of the PowerPC architecture }
   if (ref.index <> NR_NO) and ((ref.offset <> 0) or (assigned(ref.symbol))) then
     internalerror(200310131);
-
-  // for some instructions we need to check that the offset is divisible by at
-  // least four. If not, add the bytes which are "off" to the base register and
-  // adjust the offset accordingly 
+ 
+  { for some instructions we need to check that the offset is divisible by at
+   least four. If not, add the bytes which are "off" to the base register and
+   adjust the offset accordingly }
   case op of
     A_LD, A_LDU, A_STD, A_STDU, A_LWA, A_LWAU :
      if ((ref.offset mod 4) <> 0) then begin
@@ -1547,26 +1547,31 @@ begin
      end;
   end;
 
-  // if we have to load/store from a symbol or large addresses, use a temporary register
-  // containing the address
-  if assigned(ref.symbol) or (ref.offset < low(smallint)) or (ref.offset > high(smallint)) then begin
+  { if we have to load/store from a symbol or large addresses, use a temporary register
+   containing the address }
+  if assigned(ref.symbol) or (hasLargeOffset(ref)) then begin
     tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
 
+    if (hasLargeOffset(ref) and (ref.base = NR_NO)) then begin
+      ref.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
+      a_load_const_reg(list, OS_ADDR, ref.offset, ref.base);
+      ref.offset := 0;
+    end;
+
     reference_reset(tmpref);
     tmpref.symbol := ref.symbol;
     tmpref.relsymbol := ref.relsymbol;
     tmpref.offset := ref.offset;
     if (ref.base <> NR_NO) then begin
-      {
-      As long as the TOC isn't working we try to achieve highest speed (in this
-      case by allowing instructions execute in parallel) as possible, at the cost
+      { As long as the TOC isn't working we try to achieve highest speed (in this
+      case by allowing instructions execute in parallel) as possible at the cost
       of using another temporary register. So the code template when there is
       a base register and an offset is the following:
 
       lis rT1, SYM+offs@highest
       ori rT1, rT1, SYM+offs@higher
-      lis rT2, SYM+offs@high
-      ori rT2, SYM+offs@low
+      lis rT2, SYM+offs@hi
+      ori rT2, SYM+offs@lo
       rldimi rT2, rT1, 32
 
       <op>X reg, base, rT2
@@ -1589,7 +1594,7 @@ begin
       tmpref.base := ref.base;
       tmpref.index := tmpreg2;
       case op of
-        // the code generator doesn't generate update instructions anyway
+        { the code generator doesn't generate update instructions anyway }
         A_LBZ : op := A_LBZX;
         A_LHZ : op := A_LHZX;
         A_LWZ : op := A_LWZX;
@@ -1607,7 +1612,7 @@ begin
         A_STFS : op := A_STFSX;
         A_STFD : op := A_STFDX;
         else
-          // unknown load/store opcode
+          { unknown load/store opcode }
           internalerror(2005101302);
       end;
       list.concat(taicpu.op_reg_ref(op, reg, tmpref));
@@ -1652,6 +1657,12 @@ begin
   list.concat(p)
 end;
 
+function tcgppc.hasLargeOffset(const ref : TReference) : Boolean;
+begin
+  { this rather strange calculation is required because offsets of TReferences are unsigned }
+  result := aword(ref.offset-low(smallint)) > high(smallint)-low(smallint);
+end;
+
 begin
   cg := tcgppc.create;
 end.

+ 82 - 22
compiler/powerpc64/nppcset.pas

@@ -26,33 +26,103 @@ unit nppcset;
 interface
 
 uses
-  node, nset, ncgset, cpubase, cgbase, cgobj, aasmbase, aasmtai;
+  node, nset, ncgset, cpubase, cgbase, cgobj, aasmbase, aasmtai, globtype;
 
 type
 
   tppccasenode = class(tcgcasenode)
   protected
+    procedure optimizevalues(var max_linear_list : aint; var max_dist : aword); override;
+
+    function has_jumptable : boolean; override;
+    procedure genjumptable(hp: pcaselabel; min_, max_ : aint); override;
     procedure genlinearlist(hp: pcaselabel); override;
   end;
 
 implementation
 
 uses
-  globtype, systems,
+  systems,
   verbose, globals,
   symconst, symdef, defutil,
   paramgr,
   cpuinfo,
   pass_2, cgcpu,
   ncon,
-  tgobj, ncgutil, regvars, rgobj, aasmcpu;
+  tgobj, ncgutil, regvars, rgobj, aasmcpu,
+  procinfo, cgutils;
 
 {*****************************************************************************
                             TCGCASENODE
 *****************************************************************************}
 
-procedure tppccasenode.genlinearlist(hp: pcaselabel);
+procedure tppccasenode.optimizevalues(var max_linear_list : aint; var max_dist : aword);
+begin
+  max_linear_list := 10;
+end;
+
+function tppccasenode.has_jumptable : boolean;
+begin
+  has_jumptable := true;
+end;
 
+procedure tppccasenode.genjumptable(hp : pcaselabel; min_, max_ : aint);
+var
+  table : tasmlabel;
+  last : TConstExprInt;
+  indexreg : tregister;
+  href : treference;
+
+  procedure genitem(list:taasmoutput;t : pcaselabel);
+  var
+    i : aint;
+  begin
+    if assigned(t^.less) then
+      genitem(list,t^.less);
+    { fill possible hole }
+    for i:=last+1 to t^._low-1 do
+      list.concat(Tai_const.Create_sym(elselabel));
+    for i:=t^._low to t^._high do
+      list.concat(Tai_const.Create_sym(blocklabel(t^.blockid)));
+    last:=t^._high;
+    if assigned(t^.greater) then
+      genitem(list,t^.greater);
+  end;
+
+begin
+  { this is exactly the same code as for 32 bit PowerPC processors. It might be useful to change this
+   later (with e.g. TOC support) into a method which uses relative values in the jumptable to save space
+   and memory bandwidth. At the moment this is not a good idea, since these methods involve loading of
+   one or more 64 bit integer adresses which is slow }
+  if not(jumptable_no_range) then begin
+    { case expr less than min_ => goto elselabel }
+    cg.a_cmp_const_reg_label(exprasmlist,opsize,jmp_lt,aint(min_),hregister,elselabel);
+    { case expr greater than max_ => goto elselabel }
+    cg.a_cmp_const_reg_label(exprasmlist,opsize,jmp_gt,aint(max_),hregister,elselabel);
+  end;
+  objectlibrary.getjumplabel(table);
+  { allocate base and index registers register }
+  indexreg:= cg.makeregsize(exprasmlist, hregister, OS_INT);
+  { indexreg := hregister; }
+  cg.a_load_reg_reg(exprasmlist, opsize, OS_INT, hregister, indexreg);
+  { create reference, indexreg := indexreg * sizeof(OS_ADDR) }
+  cg.a_op_const_reg(exprasmlist, OP_MUL, OS_INT, tcgsize2size[OS_ADDR], indexreg);
+  reference_reset_symbol(href, table, (-aint(min_)) * tcgsize2size[OS_ADDR]);
+  href.index := indexreg;
+
+  cg.a_load_ref_reg(exprasmlist, OS_INT, OS_INT, href, indexreg);
+
+  exprasmlist.concat(taicpu.op_reg(A_MTCTR, indexreg));
+  exprasmlist.concat(taicpu.op_none(A_BCTR));
+
+  { generate jump table }
+  new_section(current_procinfo.aktlocaldata,sec_data,current_procinfo.procdef.mangledname,sizeof(aint));
+  current_procinfo.aktlocaldata.concat(Tai_label.Create(table));
+  last:=min_;
+  genitem(current_procinfo.aktlocaldata,hp);
+end;
+
+procedure tppccasenode.genlinearlist(hp: pcaselabel);
 var
   first, lastrange: boolean;
   last: TConstExprInt;
@@ -81,13 +151,11 @@ var
     if assigned(t^.less) then
       genitem(t^.less);
     { need we to test the first value }
-    if first and (t^._low > get_min_value(left.resulttype.def)) then
-    begin
+    if first and (t^._low > get_min_value(left.resulttype.def)) then begin
       cg.a_cmp_const_reg_label(exprasmlist, OS_INT, jmp_lt, aword(t^._low),
         hregister, elselabel);
     end;
-    if t^._low = t^._high then
-    begin
+    if t^._low = t^._high then begin
       if t^._low - last = 0 then
         cg.a_cmp_const_reg_label(exprasmlist, opsize, OC_EQ, 0, hregister,
           blocklabel(t^.blockid))
@@ -96,26 +164,20 @@ var
       tcgppc(cg).a_jmp_cond(exprasmlist, OC_EQ, blocklabel(t^.blockid));
       last := t^._low;
       lastrange := false;
-    end
-    else
-    begin
+    end else begin
       { it begins with the smallest label, if the value }
       { is even smaller then jump immediately to the    }
       { ELSE-label                                }
-      if first then
-      begin
+      if first then begin
         { have we to ajust the first value ? }
         if (t^._low > get_min_value(left.resulttype.def)) then
           gensub(aint(t^._low));
-      end
-      else
-      begin
+      end else begin
         { if there is no unused label between the last and the }
         { present label then the lower limit can be checked    }
         { immediately. else check the range in between:       }
         gensub(aint(t^._low - last));
-        if ((t^._low - last) <> 1) or
-          (not lastrange) then
+        if ((t^._low - last) <> 1) or (not lastrange) then
           tcgppc(cg).a_jmp_cond(exprasmlist, jmp_lt, elselabel);
       end;
       gensub(aint(t^._high - t^._low));
@@ -130,11 +192,9 @@ var
 
 begin
   { do we need to generate cmps? }
-  if (with_sign and (min_label < 0)) or
-    (opsize = OS_32) then
+  if (with_sign and (min_label < 0)) or (opsize = OS_32) then
     genlinearcmplist(hp)
-  else
-  begin
+  else begin
     last := 0;
     lastrange := false;
     first := true;