Browse Source

* call by register now also takes care of the TOC and environment pointer
* first "optimize for size"-optimization

git-svn-id: trunk@1593 -

tom_at_work 20 years ago
parent
commit
5520442fc4
2 changed files with 137 additions and 116 deletions
  1. 120 115
      compiler/powerpc64/cgcpu.pas
  2. 17 1
      rtl/linux/powerpc64/prt0.as

+ 120 - 115
compiler/powerpc64/cgcpu.pas

@@ -50,8 +50,6 @@ type
       paraloc: tcgpara); override;
       paraloc: tcgpara); override;
 
 
     procedure a_call_name(list: taasmoutput; const s: string); override;
     procedure a_call_name(list: taasmoutput; const s: string); override;
-        procedure a_call_name_direct(list: taasmoutput; s: string; prependDot : boolean);
-
     procedure a_call_reg(list: taasmoutput; reg: tregister); override;
     procedure a_call_reg(list: taasmoutput; reg: tregister); override;
 
 
     procedure a_op_const_reg(list: taasmoutput; Op: TOpCG; size: TCGSize; a:
     procedure a_op_const_reg(list: taasmoutput; Op: TOpCG; size: TCGSize; a:
@@ -67,8 +65,11 @@ type
     { move instructions }
     { move instructions }
     procedure a_load_const_reg(list: taasmoutput; size: tcgsize; a: aint; reg:
     procedure a_load_const_reg(list: taasmoutput; size: tcgsize; a: aint; reg:
       tregister); override;
       tregister); override;
+    { stores the contents of register reg to the memory location described by
+    ref }
     procedure a_load_reg_ref(list: taasmoutput; fromsize, tosize: tcgsize; reg:
     procedure a_load_reg_ref(list: taasmoutput; fromsize, tosize: tcgsize; reg:
       tregister; const ref: treference); override;
       tregister; const ref: treference); override;
+    { loads the memory pointed to by ref into register reg }
     procedure a_load_ref_reg(list: taasmoutput; fromsize, tosize: tcgsize; const
     procedure a_load_ref_reg(list: taasmoutput; fromsize, tosize: tcgsize; const
       Ref: treference; reg: tregister); override;
       Ref: treference; reg: tregister); override;
     procedure a_load_reg_reg(list: taasmoutput; fromsize, tosize: tcgsize; reg1,
     procedure a_load_reg_reg(list: taasmoutput; fromsize, tosize: tcgsize; reg1,
@@ -77,6 +78,7 @@ type
     { fpu move instructions }
     { fpu move instructions }
     procedure a_loadfpu_reg_reg(list: taasmoutput; size: tcgsize; reg1, reg2:
     procedure a_loadfpu_reg_reg(list: taasmoutput; size: tcgsize; reg1, reg2:
       tregister); override;
       tregister); override;
+
     procedure a_loadfpu_ref_reg(list: taasmoutput; size: tcgsize; const ref:
     procedure a_loadfpu_ref_reg(list: taasmoutput; size: tcgsize; const ref:
       treference; reg: tregister); override;
       treference; reg: tregister); override;
     procedure a_loadfpu_reg_ref(list: taasmoutput; size: tcgsize; reg:
     procedure a_loadfpu_reg_ref(list: taasmoutput; size: tcgsize; reg:
@@ -150,6 +152,8 @@ type
     { returns true if the offset of the given reference can not be represented by a 16 bit
     { returns true if the offset of the given reference can not be represented by a 16 bit
     immediate as required by some PowerPC instructions }
     immediate as required by some PowerPC instructions }
     function hasLargeOffset(const ref : TReference) : Boolean; inline;
     function hasLargeOffset(const ref : TReference) : Boolean; inline;
+
+    procedure a_call_name_direct(list: taasmoutput; s: string; prependDot : boolean; addNOP : boolean);
   end;
   end;
 
 
 const
 const
@@ -258,10 +262,9 @@ begin
         else
         else
           internalerror(2002072801);
           internalerror(2002072801);
         end;
         end;
-      LOC_VOID:
-        begin
-          // nothing to do
-        end;
+      LOC_VOID: 
+        { nothing to do }
+        ;
     else
     else
       internalerror(2002081103);
       internalerror(2002081103);
     end;
     end;
@@ -300,17 +303,17 @@ end;
 
 
 procedure tcgppc.a_call_name(list: taasmoutput; const s: string);
 procedure tcgppc.a_call_name(list: taasmoutput; const s: string);
 begin
 begin
-        a_call_name_direct(list, s, true);
+        a_call_name_direct(list, s, true, true);
 end;
 end;
 
 
-procedure tcgppc.a_call_name_direct(list: taasmoutput; s: string; prependDot : boolean);
+procedure tcgppc.a_call_name_direct(list: taasmoutput; s: string; prependDot : boolean; addNOP : boolean);
 begin
 begin
-  if (prependDot) then begin
-        s := '.' + s;
-  end;
+  if (prependDot) then
+    s := '.' + s;
   list.concat(taicpu.op_sym(A_BL, objectlibrary.newasmsymbol(s, AB_EXTERNAL,
   list.concat(taicpu.op_sym(A_BL, objectlibrary.newasmsymbol(s, AB_EXTERNAL,
     AT_FUNCTION)));
     AT_FUNCTION)));
-  list.concat(taicpu.op_none(A_NOP));
+  if (addNOP) then
+    list.concat(taicpu.op_none(A_NOP));
   {
   {
          the compiler does not properly set this flag anymore in pass 1, and
          the compiler does not properly set this flag anymore in pass 1, and
          for now we only need it after pass 2 (I hope) (JM)
          for now we only need it after pass 2 (I hope) (JM)
@@ -324,35 +327,43 @@ end;
 { calling a procedure by address }
 { calling a procedure by address }
 
 
 procedure tcgppc.a_call_reg(list: taasmoutput; reg: tregister);
 procedure tcgppc.a_call_reg(list: taasmoutput; reg: tregister);
-
 var
 var
-  tmpreg: tregister;
   tmpref: treference;
   tmpref: treference;
-
-  gotref : treference;
-
 begin
 begin
-  tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
+  if (not (cs_littlesize in aktglobalswitches)) then begin
+    { load actual function entry (reg contains the reference to the function descriptor)
+    into R0 }
+    reference_reset_base(tmpref, reg, 0);
+    a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_R0);
 
 
-  reference_reset(tmpref);
-  tmpref.offset := 0;
-  tmpref.base := reg;
-  list.concat(taicpu.op_reg_ref(A_LD, tmpreg, tmpref));
+    { save TOC pointer in stackframe }
+    reference_reset_base(tmpref, NR_STACK_POINTER_REG, LA_RTOC_ELF);
+    a_load_reg_ref(list, OS_ADDR, OS_ADDR, NR_RTOC, tmpref);
 
 
-//  TODO: GOT change
+    { move actual function pointer to CTR register }
+    list.concat(taicpu.op_reg(A_MTCTR, NR_R0));
 
 
-//  reference_reset(gotref);
-//  tmpref.offset := 40;
-//  tmpref.base := rg[R_INTREGISTER].getregister(list, NR_STACK_POINTER_REG);
+    { load new TOC pointer from function descriptor into RTOC register }
+    reference_reset_base(tmpref, reg, tcgsize2size[OS_ADDR]);
+    a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_RTOC);
 
 
-//  taicpu.op_load_reg_ref(list, OS_INT, OS_INT,
-  list.concat(taicpu.op_reg(A_MTCTR, tmpreg));
+    { load new environment pointer from function descriptor into R11 register }
+    reference_reset_base(tmpref, reg, 2*tcgsize2size[OS_ADDR]);
+    a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_R11);
 
 
+    { call function }
+    list.concat(taicpu.op_none(A_BCTRL));
+  end else begin
+    { call ptrgl helper routine which expects the pointer to the function descriptor
+    in R11 }
+    a_load_reg_reg(list, OS_ADDR, OS_ADDR, reg, NR_R11);
+    a_call_name_direct(list, 'ptrgl', true, false);
+  end;
+
+  { we need to load the old RTOC from stackframe because we changed it}
+  reference_reset_base(tmpref, NR_STACK_POINTER_REG, LA_RTOC_ELF);
+  a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_RTOC);
 
 
-  list.concat(taicpu.op_none(A_BCTRL));
-  //if target_info.system=system_powerpc_macos then
-  //  //NOP is not needed here.
-  //  list.concat(taicpu.op_none(A_NOP));
   include(current_procinfo.flags, pi_do_call);
   include(current_procinfo.flags, pi_do_call);
 end;
 end;
 
 
@@ -389,7 +400,7 @@ procedure tcgppc.a_load_const_reg(list: taasmoutput; size: TCGSize; a: aint;
   procedure load32bitconstantR0(list : taasmoutput; size : TCGSize; a : longint;
   procedure load32bitconstantR0(list : taasmoutput; size : TCGSize; a : longint;
     reg : TRegister);
     reg : TRegister);
   begin
   begin
-    // only 16 bit constant? (-2^15 <= a <= +2^15-1)
+    { only 16 bit constant? (-2^15 <= a <= +2^15-1) }
     if (a >= low(smallint)) and (a <= high(smallint)) then begin
     if (a >= low(smallint)) and (a <= high(smallint)) then begin
       list.concat(taicpu.op_reg_const(A_LI, reg, smallint(a)));
       list.concat(taicpu.op_reg_const(A_LI, reg, smallint(a)));
     end else begin
     end else begin
@@ -474,7 +485,7 @@ procedure tcgppc.a_load_ref_reg(list: taasmoutput; fromsize, tosize: tcgsize;
 
 
 const
 const
   LoadInstr: array[OS_8..OS_S64, boolean, boolean] of TAsmOp =
   LoadInstr: array[OS_8..OS_S64, boolean, boolean] of TAsmOp =
-  { indexed? updating?}
+  { indexed? updating? }
   (((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)),
   (((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)),
     ((A_LHZ, A_LHZU), (A_LHZX, A_LHZUX)),
     ((A_LHZ, A_LHZU), (A_LHZX, A_LHZUX)),
     ((A_LWZ, A_LWZU), (A_LWZX, A_LWZUX)),
     ((A_LWZ, A_LWZU), (A_LWZX, A_LWZUX)),
@@ -493,26 +504,24 @@ var
   ref2: treference;
   ref2: treference;
 
 
 begin
 begin
-  { TODO: optimize/take into consideration fromsize/tosize. Will }
-  { probably only matter for OS_S8 loads though                  }
   if not (fromsize in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
   if not (fromsize in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
     internalerror(2002090902);
     internalerror(2002090902);
   ref2 := ref;
   ref2 := ref;
   fixref(list, ref2, tosize);
   fixref(list, ref2, tosize);
-  { the caller is expected to have adjusted the reference already }
-  { in this case                                                  }
+  { the caller is expected to have adjusted the reference already
+   in this case                                                  }
   if (TCGSize2Size[fromsize] >= TCGSize2Size[tosize]) then
   if (TCGSize2Size[fromsize] >= TCGSize2Size[tosize]) then
     fromsize := tosize;
     fromsize := tosize;
   op := loadinstr[fromsize, ref2.index <> NR_NO, false];
   op := loadinstr[fromsize, ref2.index <> NR_NO, false];
-  // there is no LWAU instruction, simulate using ADDI and LWA
+  { there is no LWAU instruction, simulate using ADDI and LWA }
   if (op = A_LWAU) then begin
   if (op = A_LWAU) then begin
     list.concat(taicpu.op_reg_reg_const(A_ADDI, reg, reg, ref2.offset));
     list.concat(taicpu.op_reg_reg_const(A_ADDI, reg, reg, ref2.offset));
     ref2.offset := 0;
     ref2.offset := 0;
     op := A_LWA;
     op := A_LWA;
   end;
   end;
   a_load_store(list, op, reg, ref2);
   a_load_store(list, op, reg, ref2);
-  // sign extend shortint if necessary, since there is no
-  // load instruction that does that automatically (JM)
+  { sign extend shortint if necessary, since there is no
+   load instruction that does that automatically (JM) }
   if fromsize = OS_S8 then
   if fromsize = OS_S8 then
     list.concat(taicpu.op_reg_reg(A_EXTSB, reg, reg));
     list.concat(taicpu.op_reg_reg(A_EXTSB, reg, reg));
 end;
 end;
@@ -573,8 +582,8 @@ var
   ref2: treference;
   ref2: treference;
 
 
 begin
 begin
-  { several functions call this procedure with OS_32 or OS_64 }
-  { so this makes life easier (FK)                            }
+  { several functions call this procedure with OS_32 or OS_64
+   so this makes life easier (FK) }
   case size of
   case size of
     OS_32, OS_F32:
     OS_32, OS_F32:
       size := OS_F32;
       size := OS_F32;
@@ -594,7 +603,7 @@ procedure tcgppc.a_loadfpu_reg_ref(list: taasmoutput; size: tcgsize; reg:
 
 
 const
 const
   FpuStoreInstr: array[OS_F32..OS_F64, boolean, boolean] of TAsmOp =
   FpuStoreInstr: array[OS_F32..OS_F64, boolean, boolean] of TAsmOp =
-  { indexed? updating?}
+  { indexed? updating? }
   (((A_STFS, A_STFSU), (A_STFSX, A_STFSUX)),
   (((A_STFS, A_STFSU), (A_STFSX, A_STFSUX)),
    ((A_STFD, A_STFDU), (A_STFDX, A_STFDUX)));
    ((A_STFD, A_STFDU), (A_STFDX, A_STFDUX)));
 var
 var
@@ -635,7 +644,7 @@ var
   begin
   begin
     usereg := false;
     usereg := false;
     if (size in [OS_64, OS_S64]) then begin
     if (size in [OS_64, OS_S64]) then begin
-      // ts: use register method for 64 bit consts. Sloooooow
+      { ts: use register method for 64 bit consts. Sloooooow }
       usereg := true;
       usereg := true;
     end else if (size in [OS_32, OS_S32]) then begin
     end else if (size in [OS_32, OS_S32]) then begin
       list.concat(taicpu.op_reg_reg_const(oplo, dst, src, word(a)));
       list.concat(taicpu.op_reg_reg_const(oplo, dst, src, word(a)));
@@ -652,8 +661,8 @@ begin
   end;
   end;
   ophi := TOpCG2AsmOpConstHi[op];
   ophi := TOpCG2AsmOpConstHi[op];
   oplo := TOpCG2AsmOpConstLo[op];
   oplo := TOpCG2AsmOpConstLo[op];
-  // peephole optimizations for AND, OR, XOR - can't this be done at
-  // some higher level, independent of architecture?
+  { peephole optimizations for AND, OR, XOR - can't this be done at
+   some higher level, independent of architecture? }
   if (op in [OP_AND, OP_OR, OP_XOR]) then begin
   if (op in [OP_AND, OP_OR, OP_XOR]) then begin
     if (a = 0) then begin
     if (a = 0) then begin
       if op = OP_AND then
       if op = OP_AND then
@@ -682,8 +691,7 @@ begin
       exit;
       exit;
     end;
     end;
 
 
-  { otherwise, the instructions we can generate depend on the }
-  { operation                                                 }
+  { otherwise, the instructions we can generate depend on the operation }
   useReg := false;
   useReg := false;
   case op of
   case op of
     OP_DIV, OP_IDIV:
     OP_DIV, OP_IDIV:
@@ -897,8 +905,7 @@ var
   bitvalue: boolean;
   bitvalue: boolean;
 
 
 begin
 begin
-  { get the bit to extract from the conditional register + its }
-  { requested value (0 or 1)                                   }
+  { get the bit to extract from the conditional register + its requested value (0 or 1) }
   testbit := ((f.cr - RS_CR0) * 4);
   testbit := ((f.cr - RS_CR0) * 4);
   case f.flag of
   case f.flag of
     F_EQ, F_NE:
     F_EQ, F_NE:
@@ -920,8 +927,7 @@ begin
   end;
   end;
   { load the conditional register in the destination reg }
   { load the conditional register in the destination reg }
   list.concat(taicpu.op_reg(A_MFCR, reg));
   list.concat(taicpu.op_reg(A_MFCR, reg));
-  { we will move the bit that has to be tested to bit 0 by rotating }
-  { left                                                            }
+  { we will move the bit that has to be tested to bit 0 by rotating left }
   testbit := (testbit + 1) and 31;
   testbit := (testbit + 1) and 31;
   { extract bit }
   { extract bit }
   list.concat(taicpu.op_reg_reg_const_const_const(
   list.concat(taicpu.op_reg_reg_const_const_const(
@@ -980,13 +986,13 @@ end;
 
 
 procedure tcgppc.g_proc_entry(list: taasmoutput; localsize: longint;
 procedure tcgppc.g_proc_entry(list: taasmoutput; localsize: longint;
   nostackframe: boolean);
   nostackframe: boolean);
-{ generated the entry code of a procedure/function. Note: localsize is the }
-{ sum of the size necessary for local variables and the maximum possible   }
-{ combined size of ALL the parameters of a procedure called by the current }
-{ one.                                                                     }
-{ This procedure may be called before, as well as after g_return_from_proc }
-{ is called. NOTE registers are not to be allocated through the register   }
-{ allocator here, because the register colouring has already occured !!    }
+{ generated the entry code of a procedure/function. Note: localsize is the 
+ sum of the size necessary for local variables and the maximum possible
+ combined size of ALL the parameters of a procedure called by the current
+ one.                                                                     
+ This procedure may be called before, as well as after g_return_from_proc
+ is called. NOTE registers are not to be allocated through the register
+ allocator here, because the register colouring has already occured !! }
 var
 var
   firstregfpu, firstreggpr: TSuperRegister;
   firstregfpu, firstreggpr: TSuperRegister;
   href: treference;
   href: treference;
@@ -996,34 +1002,34 @@ var
   fprcount, gprcount : aint;
   fprcount, gprcount : aint;
 
 
 begin
 begin
-  { CR and LR only have to be saved in case they are modified by the current }
-  { procedure, but currently this isn't checked, so save them always         }
-  { following is the entry code as described in "Altivec Programming }
-  { Interface Manual", bar the saving of AltiVec registers           }
+  { CR and LR only have to be saved in case they are modified by the current
+   procedure, but currently this isn't checked, so save them always        
+   following is the entry code as described in "Altivec Programming
+   Interface Manual", bar the saving of AltiVec registers }
   a_reg_alloc(list, NR_STACK_POINTER_REG);
   a_reg_alloc(list, NR_STACK_POINTER_REG);
   a_reg_alloc(list, NR_R0);
   a_reg_alloc(list, NR_R0);
 
 
   calcFirstUsedFPR(firstregfpu, fprcount);
   calcFirstUsedFPR(firstregfpu, fprcount);
   calcFirstUsedGPR(firstreggpr, gprcount);
   calcFirstUsedGPR(firstreggpr, gprcount);
 
 
-  // calculate real stack frame size
+  { calculate real stack frame size }
   localsize := tppcprocinfo(current_procinfo).calc_stackframe_size(
   localsize := tppcprocinfo(current_procinfo).calc_stackframe_size(
     gprcount, fprcount);
     gprcount, fprcount);
 
 
-  // determine whether we need to save the link register
+  { determine whether we need to save the link register }
   needslinkreg := ((not (po_assembler in current_procinfo.procdef.procoptions)) and
   needslinkreg := ((not (po_assembler in current_procinfo.procdef.procoptions)) and
     (pi_do_call in current_procinfo.flags));
     (pi_do_call in current_procinfo.flags));
 
 
-  // move link register to r0
+  { move link register to r0 }
   if (needslinkreg) then begin
   if (needslinkreg) then begin
     list.concat(taicpu.op_reg(A_MFLR, NR_R0));
     list.concat(taicpu.op_reg(A_MFLR, NR_R0));
   end;
   end;
-  // save old stack frame pointer
+  { save old stack frame pointer }
   if (localsize > 0) then begin
   if (localsize > 0) then begin
     a_reg_alloc(list, NR_R12);
     a_reg_alloc(list, NR_R12);
     list.concat(taicpu.op_reg_reg(A_MR, NR_R12, NR_STACK_POINTER_REG));
     list.concat(taicpu.op_reg_reg(A_MR, NR_R12, NR_STACK_POINTER_REG));
   end;
   end;
-  // save registers, FPU first, then GPR
+  { save registers, FPU first, then GPR }
   reference_reset_base(href, NR_STACK_POINTER_REG, -8);
   reference_reset_base(href, NR_STACK_POINTER_REG, -8);
   if (fprcount > 0) then begin
   if (fprcount > 0) then begin
     for regcount := RS_F31 downto firstregfpu do begin
     for regcount := RS_F31 downto firstregfpu do begin
@@ -1040,15 +1046,15 @@ begin
     end;
     end;
   end;
   end;
 
 
-  // VMX registers not supported by FPC atm
+  { VMX registers not supported by FPC atm }
 
 
-  // we may need to store R0 (=LR) ourselves
+  { we may need to store R0 (=LR) ourselves }
   if (needslinkreg) then begin
   if (needslinkreg) then begin
     reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF);
     reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF);
     list.concat(taicpu.op_reg_ref(A_STD, NR_R0, href));
     list.concat(taicpu.op_reg_ref(A_STD, NR_R0, href));
   end;
   end;
 
 
-  // create stack frame
+  { create stack frame }
   if (not nostackframe) and (localsize > 0) then begin
   if (not nostackframe) and (localsize > 0) then begin
     if (localsize <= high(smallint)) then begin
     if (localsize <= high(smallint)) then begin
       reference_reset_base(href, NR_STACK_POINTER_REG, -localsize);
       reference_reset_base(href, NR_STACK_POINTER_REG, -localsize);
@@ -1056,10 +1062,10 @@ begin
     end else begin
     end else begin
       reference_reset_base(href, NR_NO, -localsize);
       reference_reset_base(href, NR_NO, -localsize);
 
 
-      // use R0 for loading the constant (which is definitely > 32k when entering
-      // this branch)
-      // inlined because it must not use temp registers because register allocations
-      // have already been done :(
+      { use R0 for loading the constant (which is definitely > 32k when entering
+       this branch)
+       Inlined at this position because it must not use temp registers because 
+       register allocations have already been done :( }
       { Code template:
       { Code template:
       lis   r0,ofs@highest
       lis   r0,ofs@highest
       ori   r0,r0,ofs@higher
       ori   r0,r0,ofs@higher
@@ -1076,9 +1082,9 @@ begin
       list.concat(taicpu.op_reg_reg_reg(A_STDUX, NR_R1, NR_R1, NR_R0));
       list.concat(taicpu.op_reg_reg_reg(A_STDUX, NR_R1, NR_R1, NR_R0));
     end;
     end;
   end;
   end;
-  // CR register not used by FPC atm
+  { CR register not used by FPC atm }
 
 
-  // keep R1 allocated???
+  { keep R1 allocated??? }
   a_reg_dealloc(list, NR_R0);
   a_reg_dealloc(list, NR_R0);
 end;
 end;
 
 
@@ -1097,26 +1103,26 @@ begin
   calcFirstUsedFPR(firstregfpu, fprcount);
   calcFirstUsedFPR(firstregfpu, fprcount);
   calcFirstUsedGPR(firstreggpr, gprcount);
   calcFirstUsedGPR(firstreggpr, gprcount);
 
 
-  // determine whether we need to restore the link register
+  { determine whether we need to restore the link register }
   needslinkreg := ((not (po_assembler in current_procinfo.procdef.procoptions)) and
   needslinkreg := ((not (po_assembler in current_procinfo.procdef.procoptions)) and
     (pi_do_call in current_procinfo.flags));
     (pi_do_call in current_procinfo.flags));
-  // calculate stack frame
+  { calculate stack frame }
   localsize := tppcprocinfo(current_procinfo).calc_stackframe_size(
   localsize := tppcprocinfo(current_procinfo).calc_stackframe_size(
     gprcount, fprcount);
     gprcount, fprcount);
 
 
-  // CR register not supported
+  { CR register not supported }
 
 
-  // restore stack pointer
+  { restore stack pointer }
   if (not nostackframe) and (localsize > 0) then begin
   if (not nostackframe) and (localsize > 0) then begin
     if (localsize <= high(smallint)) then begin
     if (localsize <= high(smallint)) then begin
       list.concat(taicpu.op_reg_reg_const(A_ADDI, NR_STACK_POINTER_REG, NR_STACK_POINTER_REG, localsize));
       list.concat(taicpu.op_reg_reg_const(A_ADDI, NR_STACK_POINTER_REG, NR_STACK_POINTER_REG, localsize));
     end else begin
     end else begin
       reference_reset_base(href, NR_NO, localsize);
       reference_reset_base(href, NR_NO, localsize);
 
 
-      // use R0 for loading the constant (which is definitely > 32k when entering
-      // this branch)
-      // inlined because it must not use temp registers because register allocations
-      // have already been done :(
+      { use R0 for loading the constant (which is definitely > 32k when entering
+       this branch)
+       Inlined because it must not use temp registers because register allocations
+       have already been done :( }
       { Code template:
       { Code template:
       lis   r0,ofs@highest
       lis   r0,ofs@highest
       ori   r0,ofs@higher
       ori   r0,ofs@higher
@@ -1134,7 +1140,7 @@ begin
     end;
     end;
   end;
   end;
 
 
-  // load registers, FPR first, then GPR
+  { load registers, FPR first, then GPR }
   {$note ts:todo change order of loading}
   {$note ts:todo change order of loading}
   reference_reset_base(href, NR_STACK_POINTER_REG, -tcgsize2size[OS_FLOAT]);
   reference_reset_base(href, NR_STACK_POINTER_REG, -tcgsize2size[OS_FLOAT]);
   if (fprcount > 0) then begin
   if (fprcount > 0) then begin
@@ -1152,16 +1158,16 @@ begin
     end;
     end;
   end;
   end;
 
 
-  // VMX not supported...
+  { VMX not supported... }
 
 
-  // restore LR (if needed)
+  { restore LR (if needed) }
   if (needslinkreg) then begin
   if (needslinkreg) then begin
     reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF);
     reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF);
     list.concat(taicpu.op_reg_ref(A_LD, NR_R0, href));
     list.concat(taicpu.op_reg_ref(A_LD, NR_R0, href));
     list.concat(taicpu.op_reg(A_MTLR, NR_R0));
     list.concat(taicpu.op_reg(A_MTLR, NR_R0));
   end;
   end;
 
 
-  // generate return instruction
+  { generate return instruction }
   list.concat(taicpu.op_none(A_BLR));
   list.concat(taicpu.op_none(A_BLR));
 end;
 end;
 
 
@@ -1171,7 +1177,7 @@ procedure tcgppc.a_loadaddr_ref_reg(list: taasmoutput; const ref: treference; r:
 
 
 var
 var
   ref2, tmpref: treference;
   ref2, tmpref: treference;
-  // register used to construct address
+  { register used to construct address }
   tempreg : TRegister;
   tempreg : TRegister;
 
 
 begin
 begin
@@ -1185,17 +1191,17 @@ begin
       tmpref.offset := ref2.offset;
       tmpref.offset := ref2.offset;
       tmpref.symbol := ref2.symbol;
       tmpref.symbol := ref2.symbol;
       tmpref.relsymbol := ref2.relsymbol;
       tmpref.relsymbol := ref2.relsymbol;
-      // load 64 bit reference into r. If the reference already has a base register,
-      // first load the 64 bit value into a temp register, then add it to the result
-      // register rD
+      { load 64 bit reference into r. If the reference already has a base register,
+       first load the 64 bit value into a temp register, then add it to the result
+       register rD }
       if (ref2.base <> NR_NO) then begin
       if (ref2.base <> NR_NO) then begin
-        // already have a base register, so allocate a new one
+        { already have a base register, so allocate a new one }
         tempreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
         tempreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
       end else begin
       end else begin
         tempreg := r;
         tempreg := r;
       end;
       end;
 
 
-      // code for loading a reference from a symbol into a register rD.
+      { code for loading a reference from a symbol into a register rD }
       (*
       (*
       lis   rX,SYM@highest
       lis   rX,SYM@highest
       ori   rX,SYM@higher
       ori   rX,SYM@higher
@@ -1213,8 +1219,8 @@ begin
       tmpref.refaddr := addr_low;
       tmpref.refaddr := addr_low;
       list.concat(taicpu.op_reg_reg_ref(A_ORI, tempreg, tempreg, tmpref));
       list.concat(taicpu.op_reg_reg_ref(A_ORI, tempreg, tempreg, tmpref));
 
 
-      // if there's already a base register, add the temp register contents to
-      // the base register
+      { if there's already a base register, add the temp register contents to
+       the base register }
       if (ref2.base <> NR_NO) then begin
       if (ref2.base <> NR_NO) then begin
         list.concat(taicpu.op_reg_reg_reg(A_ADD, r, tempreg, ref2.base));
         list.concat(taicpu.op_reg_reg_reg(A_ADD, r, tempreg, ref2.base));
       end;
       end;
@@ -1222,8 +1228,8 @@ begin
     { no symbol, but offset <> 0 }
     { no symbol, but offset <> 0 }
     if ref2.base <> NR_NO then begin
     if ref2.base <> NR_NO then begin
       a_op_const_reg_reg(list, OP_ADD, OS_64, ref2.offset, ref2.base, r)
       a_op_const_reg_reg(list, OP_ADD, OS_64, ref2.offset, ref2.base, r)
-      { FixRef makes sure that "(ref.index <> R_NO) and (ref.offset <> 0)" never}
-      { occurs, so now only ref.offset has to be loaded                         }
+      { FixRef makes sure that "(ref.index <> R_NO) and (ref.offset <> 0)" never
+       occurs, so now only ref.offset has to be loaded }
     end else begin
     end else begin
       a_load_const_reg(list, OS_64, ref2.offset, r)
       a_load_const_reg(list, OS_64, ref2.offset, r)
     end;
     end;
@@ -1305,18 +1311,18 @@ begin
 
 
   { generate a loop }
   { generate a loop }
   if count > 4 then begin
   if count > 4 then begin
-    { the offsets are zero after the a_loadaddress_ref_reg and just }
-    { have to be set to 8. I put an Inc there so debugging may be   }
-    { easier (should offset be different from zero here, it will be }
-    { easy to notice in the generated assembler                     }
+    { the offsets are zero after the a_loadaddress_ref_reg and just 
+     have to be set to 8. I put an Inc there so debugging may be   
+     easier (should offset be different from zero here, it will be 
+     easy to notice in the generated assembler }
     inc(dst.offset, 8);
     inc(dst.offset, 8);
     inc(src.offset, 8);
     inc(src.offset, 8);
     list.concat(taicpu.op_reg_reg_const(A_SUBI, src.base, src.base, 8));
     list.concat(taicpu.op_reg_reg_const(A_SUBI, src.base, src.base, 8));
     list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, 8));
     list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, 8));
     countreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
     countreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
     a_load_const_reg(list, OS_32, count, countreg);
     a_load_const_reg(list, OS_32, count, countreg);
-    { explicitely allocate R_0 since it can be used safely here }
-    { (for holding date that's being copied)                    }
+    { explicitely allocate R_0 since it can be used safely here
+     (for holding date that's being copied) }
     a_reg_alloc(list, NR_F0);
     a_reg_alloc(list, NR_F0);
     objectlibrary.getjumplabel(lab);
     objectlibrary.getjumplabel(lab);
     a_label(list, lab);
     a_label(list, lab);
@@ -1382,10 +1388,10 @@ begin
     (torddef(def).typ in [u64bit, u16bit, u32bit, u8bit, uchar,
     (torddef(def).typ in [u64bit, u16bit, u32bit, u8bit, uchar,
     bool8bit, bool16bit, bool32bit]))) then
     bool8bit, bool16bit, bool32bit]))) then
   begin
   begin
-    // ... instruction setting overflow flag ...
-    // mfxerf R0
-    // mtcrf 128, R0
-    // ble cr0, label
+    { ... instructions setting overflow flag ...
+     mfxerf R0
+     mtcrf 128, R0
+     ble cr0, label }
     list.concat(taicpu.op_reg(A_MFXER, NR_R0));
     list.concat(taicpu.op_reg(A_MFXER, NR_R0));
     list.concat(taicpu.op_const_reg(A_MTCRF, 128, NR_R0));
     list.concat(taicpu.op_const_reg(A_MTCRF, 128, NR_R0));
     flags.cr := RS_CR0;
     flags.cr := RS_CR0;
@@ -1426,15 +1432,15 @@ procedure tcgppc.g_intf_wrapper(list: TAAsmoutput; procdef: tprocdef; const
       { add support for offsets > 16 bit }
       { add support for offsets > 16 bit }
       internalerror(200510201);
       internalerror(200510201);
     list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href));
     list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href));
-    // the loaded reference is a function descriptor reference, so deref again
-    // (at ofs 0 there's the real pointer)
+    { the loaded reference is a function descriptor reference, so deref again
+     (at ofs 0 there's the real pointer) }
     {$warning ts:TODO: update GOT reference}
     {$warning ts:TODO: update GOT reference}
     reference_reset_base(href, NR_R11, 0);
     reference_reset_base(href, NR_R11, 0);
     list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href));
     list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href));
 
 
     list.concat(taicpu.op_reg(A_MTCTR, NR_R11));
     list.concat(taicpu.op_reg(A_MTCTR, NR_R11));
     list.concat(taicpu.op_none(A_BCTR));
     list.concat(taicpu.op_none(A_BCTR));
-    // NOP needed for the linker...?
+    { NOP needed for the linker...? }
     list.concat(taicpu.op_none(A_NOP));
     list.concat(taicpu.op_none(A_NOP));
   end;
   end;
 
 
@@ -1464,11 +1470,10 @@ begin
   { set param1 interface to self  }
   { set param1 interface to self  }
   g_adjust_self_value(list, procdef, ioffset);
   g_adjust_self_value(list, procdef, ioffset);
 
 
-  { case 4 }
   if po_virtualmethod in procdef.procoptions then begin
   if po_virtualmethod in procdef.procoptions then begin
     loadvmttor11;
     loadvmttor11;
     op_onr11methodaddr;
     op_onr11methodaddr;
-  end { case 0 } else
+  end else
     {$note ts:todo add GOT change?? - think not needed :) }
     {$note ts:todo add GOT change?? - think not needed :) }
     list.concat(taicpu.op_sym(A_B,
     list.concat(taicpu.op_sym(A_B,
       objectlibrary.newasmsymbol('.' + procdef.mangledname, AB_EXTERNAL,
       objectlibrary.newasmsymbol('.' + procdef.mangledname, AB_EXTERNAL,

+ 17 - 1
rtl/linux/powerpc64/prt0.as

@@ -60,6 +60,22 @@
 	li	3,1
 	li	3,1
 	sc
 	sc
 .endm
 .endm
+
+.section ".text"
+.align 3
+.globl .ptrgl
+.ptrgl:
+    ld	0, 0(11)
+    std	2, 40(1)
+    mtctr	0
+    ld	2, 8(11)
+    ld	11, 8(11)
+    bctr
+    .long 0
+    .byte	0, 12, 0, 0, 0, 0, 0, 0
+    .type .ptrgl, @function
+    .size .ptrgl, . - .ptrgl
+
 	/*
 	/*
         Main Pascal entry point label (function)
         Main Pascal entry point label (function)
 	*/
 	*/
@@ -69,7 +85,7 @@ FUNCTION_PROLOG _start
 	/* Set up an initial stack frame, and clear the LR */
 	/* Set up an initial stack frame, and clear the LR */
 	clrrdi  1, 1, 5       /* align r1 */
 	clrrdi  1, 1, 5       /* align r1 */
 	li      0, 0          
 	li      0, 0          
-	stdu    1,-48(1)      
+	stdu    1,-128(1)      
 	mtlr    0             
 	mtlr    0             
 	std     0, 0(1)       /* r1 = pointer to NULL value */
 	std     0, 0(1)       /* r1 = pointer to NULL value */