Browse Source

* jumptable support
* fixed cg for loading from addresses with symbol and large offset, but no base register

git-svn-id: trunk@1547 -

tom_at_work 20 years ago
parent
commit
9531293b89
2 changed files with 119 additions and 48 deletions
  1. 37 26
      compiler/powerpc64/cgcpu.pas
  2. 82 22
      compiler/powerpc64/nppcset.pas

+ 37 - 26
compiler/powerpc64/cgcpu.pas

@@ -146,6 +146,10 @@ type
     { returns the lowest numbered GP register in use, and the number of used GP registers
     { returns the lowest numbered GP register in use, and the number of used GP registers
       for the current procedure }
       for the current procedure }
     procedure calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
     procedure calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
+
+    { returns true if the offset of the given reference can not be represented by a 16 bit
+    immediate as required by some PowerPC instructions }
+    function hasLargeOffset(const ref : TReference) : Boolean; inline;
   end;
   end;
 
 
 const
 const
@@ -438,7 +442,7 @@ begin
       { combine both registers }
       { combine both registers }
       list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R0, 32, 0));
       list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R0, 32, 0));
     end;
     end;
-  end;  
+  end;
 end;
 end;
 
 
 procedure tcgppc.a_load_reg_ref(list: taasmoutput; fromsize, tosize: TCGSize;
 procedure tcgppc.a_load_reg_ref(list: taasmoutput; fromsize, tosize: TCGSize;
@@ -1072,7 +1076,6 @@ begin
       list.concat(taicpu.op_reg_reg_reg(A_STDUX, NR_R1, NR_R1, NR_R0));
       list.concat(taicpu.op_reg_reg_reg(A_STDUX, NR_R1, NR_R1, NR_R0));
     end;
     end;
   end;
   end;
-
   // CR register not used by FPC atm
   // CR register not used by FPC atm
 
 
   // keep R1 allocated???
   // keep R1 allocated???
@@ -1084,8 +1087,6 @@ procedure tcgppc.g_proc_exit(list: taasmoutput; parasize: longint; nostackframe:
 { This procedure may be called before, as well as after g_stackframe_entry }
 { This procedure may be called before, as well as after g_stackframe_entry }
 { is called. NOTE registers are not to be allocated through the register   }
 { is called. NOTE registers are not to be allocated through the register   }
 { allocator here, because the register colouring has already occured !!    }
 { allocator here, because the register colouring has already occured !!    }
-
-
 var
 var
   regcount, firstregfpu, firstreggpr: TSuperRegister;
   regcount, firstregfpu, firstreggpr: TSuperRegister;
   href: treference;
   href: treference;
@@ -1177,7 +1178,7 @@ begin
   ref2 := ref;
   ref2 := ref;
   fixref(list, ref2, OS_64);
   fixref(list, ref2, OS_64);
   { load a symbol }
   { load a symbol }
-  if assigned(ref2.symbol) or (ref2.offset < low(smallint)) or (ref2.offset > high(smallint)) then begin
+  if assigned(ref2.symbol) or (hasLargeOffset(ref2)) then begin
       { add the symbol's value to the base of the reference, and if the }
       { add the symbol's value to the base of the reference, and if the }
       { reference doesn't have a base, create one                       }
       { reference doesn't have a base, create one                       }
       reference_reset(tmpref);
       reference_reset(tmpref);
@@ -1416,14 +1417,14 @@ procedure tcgppc.g_intf_wrapper(list: TAAsmoutput; procdef: tprocdef; const
     { call/jmp  vmtoffs(%eax) ; method offs }
     { call/jmp  vmtoffs(%eax) ; method offs }
     reference_reset_base(href, NR_R11,
     reference_reset_base(href, NR_R11,
       procdef._class.vmtmethodoffset(procdef.extnumber));
       procdef._class.vmtmethodoffset(procdef.extnumber));
-    if not ((aint(href.offset) >= low(smallint)) and
-      (aint(href.offset) <= high(smallint))) then begin
-      {$warning ts:adapt me for offsets > 16 bit }
+    if not (hasLargeOffset(href)) then begin
       list.concat(taicpu.op_reg_reg_const(A_ADDIS, NR_R11, NR_R11,
       list.concat(taicpu.op_reg_reg_const(A_ADDIS, NR_R11, NR_R11,
         smallint((href.offset shr 16) + ord(smallint(href.offset and $FFFF) <
         smallint((href.offset shr 16) + ord(smallint(href.offset and $FFFF) <
         0))));
         0))));
       href.offset := smallint(href.offset and $FFFF);
       href.offset := smallint(href.offset and $FFFF);
-    end;
+    end else
+      { add support for offsets > 16 bit }
+      internalerror(200510201);
     list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href));
     list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href));
     // the loaded reference is a function descriptor reference, so deref again
     // the loaded reference is a function descriptor reference, so deref again
     // (at ofs 0 there's the real pointer)
     // (at ofs 0 there's the real pointer)
@@ -1494,7 +1495,6 @@ begin
 end;
 end;
 
 
 function tcgppc.fixref(list: taasmoutput; var ref: treference; const size : TCgsize): boolean;
 function tcgppc.fixref(list: taasmoutput; var ref: treference; const size : TCgsize): boolean;
-
 var
 var
   tmpreg: tregister;
   tmpreg: tregister;
   needsAlign : boolean;
   needsAlign : boolean;
@@ -1523,14 +1523,14 @@ var
   tmpref: treference;
   tmpref: treference;
   largeOffset: Boolean;
   largeOffset: Boolean;
 begin
 begin
-  // at this point there must not be a combination of values in the ref treference
-  // which is not possible to directly map to instructions of the PowerPC architecture
+  { at this point there must not be a combination of values in the ref treference
+    which is not possible to directly map to instructions of the PowerPC architecture }
   if (ref.index <> NR_NO) and ((ref.offset <> 0) or (assigned(ref.symbol))) then
   if (ref.index <> NR_NO) and ((ref.offset <> 0) or (assigned(ref.symbol))) then
     internalerror(200310131);
     internalerror(200310131);
-
-  // for some instructions we need to check that the offset is divisible by at
-  // least four. If not, add the bytes which are "off" to the base register and
-  // adjust the offset accordingly 
+ 
+  { for some instructions we need to check that the offset is divisible by at
+   least four. If not, add the bytes which are "off" to the base register and
+   adjust the offset accordingly }
   case op of
   case op of
     A_LD, A_LDU, A_STD, A_STDU, A_LWA, A_LWAU :
     A_LD, A_LDU, A_STD, A_STDU, A_LWA, A_LWAU :
      if ((ref.offset mod 4) <> 0) then begin
      if ((ref.offset mod 4) <> 0) then begin
@@ -1547,26 +1547,31 @@ begin
      end;
      end;
   end;
   end;
 
 
-  // if we have to load/store from a symbol or large addresses, use a temporary register
-  // containing the address
-  if assigned(ref.symbol) or (ref.offset < low(smallint)) or (ref.offset > high(smallint)) then begin
+  { if we have to load/store from a symbol or large addresses, use a temporary register
+   containing the address }
+  if assigned(ref.symbol) or (hasLargeOffset(ref)) then begin
     tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
     tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
 
 
+    if (hasLargeOffset(ref) and (ref.base = NR_NO)) then begin
+      ref.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
+      a_load_const_reg(list, OS_ADDR, ref.offset, ref.base);
+      ref.offset := 0;
+    end;
+
     reference_reset(tmpref);
     reference_reset(tmpref);
     tmpref.symbol := ref.symbol;
     tmpref.symbol := ref.symbol;
     tmpref.relsymbol := ref.relsymbol;
     tmpref.relsymbol := ref.relsymbol;
     tmpref.offset := ref.offset;
     tmpref.offset := ref.offset;
     if (ref.base <> NR_NO) then begin
     if (ref.base <> NR_NO) then begin
-      {
-      As long as the TOC isn't working we try to achieve highest speed (in this
-      case by allowing instructions execute in parallel) as possible, at the cost
+      { As long as the TOC isn't working we try to achieve highest speed (in this
+      case by allowing instructions execute in parallel) as possible at the cost
       of using another temporary register. So the code template when there is
       of using another temporary register. So the code template when there is
       a base register and an offset is the following:
       a base register and an offset is the following:
 
 
       lis rT1, SYM+offs@highest
       lis rT1, SYM+offs@highest
       ori rT1, rT1, SYM+offs@higher
       ori rT1, rT1, SYM+offs@higher
-      lis rT2, SYM+offs@high
-      ori rT2, SYM+offs@low
+      lis rT2, SYM+offs@hi
+      ori rT2, SYM+offs@lo
       rldimi rT2, rT1, 32
       rldimi rT2, rT1, 32
 
 
       <op>X reg, base, rT2
       <op>X reg, base, rT2
@@ -1589,7 +1594,7 @@ begin
       tmpref.base := ref.base;
       tmpref.base := ref.base;
       tmpref.index := tmpreg2;
       tmpref.index := tmpreg2;
       case op of
       case op of
-        // the code generator doesn't generate update instructions anyway
+        { the code generator doesn't generate update instructions anyway }
         A_LBZ : op := A_LBZX;
         A_LBZ : op := A_LBZX;
         A_LHZ : op := A_LHZX;
         A_LHZ : op := A_LHZX;
         A_LWZ : op := A_LWZX;
         A_LWZ : op := A_LWZX;
@@ -1607,7 +1612,7 @@ begin
         A_STFS : op := A_STFSX;
         A_STFS : op := A_STFSX;
         A_STFD : op := A_STFDX;
         A_STFD : op := A_STFDX;
         else
         else
-          // unknown load/store opcode
+          { unknown load/store opcode }
           internalerror(2005101302);
           internalerror(2005101302);
       end;
       end;
       list.concat(taicpu.op_reg_ref(op, reg, tmpref));
       list.concat(taicpu.op_reg_ref(op, reg, tmpref));
@@ -1652,6 +1657,12 @@ begin
   list.concat(p)
   list.concat(p)
 end;
 end;
 
 
+function tcgppc.hasLargeOffset(const ref : TReference) : Boolean;
+begin
+  { this rather strange calculation is required because offsets of TReferences are unsigned }
+  result := aword(ref.offset-low(smallint)) > high(smallint)-low(smallint);
+end;
+
 begin
 begin
   cg := tcgppc.create;
   cg := tcgppc.create;
 end.
 end.

+ 82 - 22
compiler/powerpc64/nppcset.pas

@@ -26,33 +26,103 @@ unit nppcset;
 interface
 interface
 
 
 uses
 uses
-  node, nset, ncgset, cpubase, cgbase, cgobj, aasmbase, aasmtai;
+  node, nset, ncgset, cpubase, cgbase, cgobj, aasmbase, aasmtai, globtype;
 
 
 type
 type
 
 
   tppccasenode = class(tcgcasenode)
   tppccasenode = class(tcgcasenode)
   protected
   protected
+    procedure optimizevalues(var max_linear_list : aint; var max_dist : aword); override;
+
+    function has_jumptable : boolean; override;
+    procedure genjumptable(hp: pcaselabel; min_, max_ : aint); override;
     procedure genlinearlist(hp: pcaselabel); override;
     procedure genlinearlist(hp: pcaselabel); override;
   end;
   end;
 
 
 implementation
 implementation
 
 
 uses
 uses
-  globtype, systems,
+  systems,
   verbose, globals,
   verbose, globals,
   symconst, symdef, defutil,
   symconst, symdef, defutil,
   paramgr,
   paramgr,
   cpuinfo,
   cpuinfo,
   pass_2, cgcpu,
   pass_2, cgcpu,
   ncon,
   ncon,
-  tgobj, ncgutil, regvars, rgobj, aasmcpu;
+  tgobj, ncgutil, regvars, rgobj, aasmcpu,
+  procinfo, cgutils;
 
 
 {*****************************************************************************
 {*****************************************************************************
                             TCGCASENODE
                             TCGCASENODE
 *****************************************************************************}
 *****************************************************************************}
 
 
-procedure tppccasenode.genlinearlist(hp: pcaselabel);
+procedure tppccasenode.optimizevalues(var max_linear_list : aint; var max_dist : aword);
+begin
+  max_linear_list := 10;
+end;
+
+function tppccasenode.has_jumptable : boolean;
+begin
+  has_jumptable := true;
+end;
 
 
+procedure tppccasenode.genjumptable(hp : pcaselabel; min_, max_ : aint);
+var
+  table : tasmlabel;
+  last : TConstExprInt;
+  indexreg : tregister;
+  href : treference;
+
+  procedure genitem(list:taasmoutput;t : pcaselabel);
+  var
+    i : aint;
+  begin
+    if assigned(t^.less) then
+      genitem(list,t^.less);
+    { fill possible hole }
+    for i:=last+1 to t^._low-1 do
+      list.concat(Tai_const.Create_sym(elselabel));
+    for i:=t^._low to t^._high do
+      list.concat(Tai_const.Create_sym(blocklabel(t^.blockid)));
+    last:=t^._high;
+    if assigned(t^.greater) then
+      genitem(list,t^.greater);
+  end;
+
+begin
+  { this is exactly the same code as for 32 bit PowerPC processors. It might be useful to change this
+   later (with e.g. TOC support) into a method which uses relative values in the jumptable to save space
+   and memory bandwidth. At the moment this is not a good idea, since these methods involve loading of
+   one or more 64 bit integer adresses which is slow }
+  if not(jumptable_no_range) then begin
+    { case expr less than min_ => goto elselabel }
+    cg.a_cmp_const_reg_label(exprasmlist,opsize,jmp_lt,aint(min_),hregister,elselabel);
+    { case expr greater than max_ => goto elselabel }
+    cg.a_cmp_const_reg_label(exprasmlist,opsize,jmp_gt,aint(max_),hregister,elselabel);
+  end;
+  objectlibrary.getjumplabel(table);
+  { allocate base and index registers register }
+  indexreg:= cg.makeregsize(exprasmlist, hregister, OS_INT);
+  { indexreg := hregister; }
+  cg.a_load_reg_reg(exprasmlist, opsize, OS_INT, hregister, indexreg);
+  { create reference, indexreg := indexreg * sizeof(OS_ADDR) }
+  cg.a_op_const_reg(exprasmlist, OP_MUL, OS_INT, tcgsize2size[OS_ADDR], indexreg);
+  reference_reset_symbol(href, table, (-aint(min_)) * tcgsize2size[OS_ADDR]);
+  href.index := indexreg;
+
+  cg.a_load_ref_reg(exprasmlist, OS_INT, OS_INT, href, indexreg);
+
+  exprasmlist.concat(taicpu.op_reg(A_MTCTR, indexreg));
+  exprasmlist.concat(taicpu.op_none(A_BCTR));
+
+  { generate jump table }
+  new_section(current_procinfo.aktlocaldata,sec_data,current_procinfo.procdef.mangledname,sizeof(aint));
+  current_procinfo.aktlocaldata.concat(Tai_label.Create(table));
+  last:=min_;
+  genitem(current_procinfo.aktlocaldata,hp);
+end;
+
+procedure tppccasenode.genlinearlist(hp: pcaselabel);
 var
 var
   first, lastrange: boolean;
   first, lastrange: boolean;
   last: TConstExprInt;
   last: TConstExprInt;
@@ -81,13 +151,11 @@ var
     if assigned(t^.less) then
     if assigned(t^.less) then
       genitem(t^.less);
       genitem(t^.less);
     { need we to test the first value }
     { need we to test the first value }
-    if first and (t^._low > get_min_value(left.resulttype.def)) then
-    begin
+    if first and (t^._low > get_min_value(left.resulttype.def)) then begin
       cg.a_cmp_const_reg_label(exprasmlist, OS_INT, jmp_lt, aword(t^._low),
       cg.a_cmp_const_reg_label(exprasmlist, OS_INT, jmp_lt, aword(t^._low),
         hregister, elselabel);
         hregister, elselabel);
     end;
     end;
-    if t^._low = t^._high then
-    begin
+    if t^._low = t^._high then begin
       if t^._low - last = 0 then
       if t^._low - last = 0 then
         cg.a_cmp_const_reg_label(exprasmlist, opsize, OC_EQ, 0, hregister,
         cg.a_cmp_const_reg_label(exprasmlist, opsize, OC_EQ, 0, hregister,
           blocklabel(t^.blockid))
           blocklabel(t^.blockid))
@@ -96,26 +164,20 @@ var
       tcgppc(cg).a_jmp_cond(exprasmlist, OC_EQ, blocklabel(t^.blockid));
       tcgppc(cg).a_jmp_cond(exprasmlist, OC_EQ, blocklabel(t^.blockid));
       last := t^._low;
       last := t^._low;
       lastrange := false;
       lastrange := false;
-    end
-    else
-    begin
+    end else begin
       { it begins with the smallest label, if the value }
       { it begins with the smallest label, if the value }
       { is even smaller then jump immediately to the    }
       { is even smaller then jump immediately to the    }
       { ELSE-label                                }
       { ELSE-label                                }
-      if first then
-      begin
+      if first then begin
         { have we to ajust the first value ? }
         { have we to ajust the first value ? }
         if (t^._low > get_min_value(left.resulttype.def)) then
         if (t^._low > get_min_value(left.resulttype.def)) then
           gensub(aint(t^._low));
           gensub(aint(t^._low));
-      end
-      else
-      begin
+      end else begin
         { if there is no unused label between the last and the }
         { if there is no unused label between the last and the }
         { present label then the lower limit can be checked    }
         { present label then the lower limit can be checked    }
         { immediately. else check the range in between:       }
         { immediately. else check the range in between:       }
         gensub(aint(t^._low - last));
         gensub(aint(t^._low - last));
-        if ((t^._low - last) <> 1) or
-          (not lastrange) then
+        if ((t^._low - last) <> 1) or (not lastrange) then
           tcgppc(cg).a_jmp_cond(exprasmlist, jmp_lt, elselabel);
           tcgppc(cg).a_jmp_cond(exprasmlist, jmp_lt, elselabel);
       end;
       end;
       gensub(aint(t^._high - t^._low));
       gensub(aint(t^._high - t^._low));
@@ -130,11 +192,9 @@ var
 
 
 begin
 begin
   { do we need to generate cmps? }
   { do we need to generate cmps? }
-  if (with_sign and (min_label < 0)) or
-    (opsize = OS_32) then
+  if (with_sign and (min_label < 0)) or (opsize = OS_32) then
     genlinearcmplist(hp)
     genlinearcmplist(hp)
-  else
-  begin
+  else begin
     last := 0;
     last := 0;
     lastrange := false;
     lastrange := false;
     first := true;
     first := true;