ソースを参照

* added workarounds for some load/store instructions requiring 4 byte aligned addresses
* some cleanup and optimization of 64 bit address loading in the code generator
* small changes to the IDE to make it compile
* stat type update for packages/cdrom compatibility

git-svn-id: trunk@1375 -

tom_at_work 20 年 前
コミット
0ba960e1bd
4 ファイル変更161 行追加78 行削除
  1. 114 58
      compiler/powerpc64/cgcpu.pas
  2. 4 0
      ide/fpviews.pas
  3. 41 18
      rtl/linux/powerpc64/stat.inc
  4. 2 2
      rtl/powerpc64/setjump.inc

+ 114 - 58
compiler/powerpc64/cgcpu.pas

@@ -125,7 +125,7 @@ type
     { offset or symbol, in which case the base will have been changed }
     { to a tempreg (which has to be freed by the caller) containing   }
     { the sum of part of the original reference                       }
-    function fixref(list: taasmoutput; var ref: treference): boolean;
+    function fixref(list: taasmoutput; var ref: treference; const size : TCgsize): boolean;
 
     { returns whether a reference can be used immediately in a powerpc }
     { instruction                                                      }
@@ -446,7 +446,7 @@ var
   ref2: TReference;
 begin
   ref2 := ref;
-  fixref(list, ref2);
+  fixref(list, ref2, tosize);
   if tosize in [OS_S8..OS_S64] then
     { storing is the same for signed and unsigned values }
     tosize := tcgsize(ord(tosize) - (ord(OS_S8) - ord(OS_8)));
@@ -483,7 +483,7 @@ begin
   if not (fromsize in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
     internalerror(2002090902);
   ref2 := ref;
-  fixref(list, ref2);
+  fixref(list, ref2, tosize);
   { the caller is expected to have adjusted the reference already }
   { in this case                                                  }
   if (TCGSize2Size[fromsize] >= TCGSize2Size[tosize]) then
@@ -491,9 +491,9 @@ begin
   op := loadinstr[fromsize, ref2.index <> NR_NO, false];
   // there is no LWAU instruction, simulate using ADDI and LWA
   if (op = A_LWAU) then begin
-        list.concat(taicpu.op_reg_reg_const(A_ADDI, reg, reg, ref2.offset));
-        ref2.offset := 0;
-        op := A_LWA;
+    list.concat(taicpu.op_reg_reg_const(A_ADDI, reg, reg, ref2.offset));
+    ref2.offset := 0;
+    op := A_LWA;
   end;
   a_load_store(list, op, reg, ref2);
   // sign extend shortint if necessary, since there is no
@@ -569,7 +569,7 @@ begin
     internalerror(200201121);
   end;
   ref2 := ref;
-  fixref(list, ref2);
+  fixref(list, ref2, size);
   op := fpuloadinstr[size, ref2.index <> NR_NO, false];
   a_load_store(list, op, reg, ref2);
 end;
@@ -590,7 +590,7 @@ begin
   if not (size in [OS_F32, OS_F64]) then
     internalerror(200201122);
   ref2 := ref;
-  fixref(list, ref2);
+  fixref(list, ref2, size);
   op := fpustoreinstr[size, ref2.index <> NR_NO, false];
   a_load_store(list, op, reg, ref2);
 end;
@@ -1200,7 +1200,7 @@ var
 
 begin
   ref2 := ref;
-  fixref(list, ref2);
+  fixref(list, ref2, OS_64);
   { load a symbol }
   if assigned(ref2.symbol) or (ref2.offset < low(smallint)) or (ref2.offset > high(smallint)) then begin
       { add the symbol's value to the base of the reference, and if the }
@@ -1518,75 +1518,134 @@ begin
     (ref.offset = 0)));
 end;
 
-function tcgppc.fixref(list: taasmoutput; var ref: treference): boolean;
+function tcgppc.fixref(list: taasmoutput; var ref: treference; const size : TCgsize): boolean;
 
 var
   tmpreg: tregister;
+  needsAlign : boolean;
 begin
   result := false;
-  if (ref.base = NR_NO) then
-  begin
+  needsAlign := size in [OS_S32, OS_64, OS_S64];
+
+  if (ref.base = NR_NO) then  begin
     ref.base := ref.index;
-    ref.base := NR_NO;
+    ref.index := NR_NO;
   end;
-  if (ref.base <> NR_NO) then
-  begin
-    if (ref.index <> NR_NO) and
-      ((ref.offset <> 0) or assigned(ref.symbol)) then
-    begin
+  if (ref.base <> NR_NO) and (ref.index <> NR_NO) and
+    ((ref.offset <> 0) or assigned(ref.symbol)) then begin
       result := true;
       tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
-      list.concat(taicpu.op_reg_reg_reg(
-        A_ADD, tmpreg, ref.base, ref.index));
+      a_op_reg_reg_reg(list, OP_ADD, size, ref.base, ref.index, tmpreg);
       ref.index := NR_NO;
       ref.base := tmpreg;
-    end
-  end
-  else if ref.index <> NR_NO then
-    internalerror(200208102);
+  end;
 end;
 
 procedure tcgppc.a_load_store(list: taasmoutput; op: tasmop; reg: tregister;
   ref: treference);
-
 var
-  tmpreg: tregister;
+  tmpreg, tmpreg2: tregister;
   tmpref: treference;
   largeOffset: Boolean;
-
 begin
-  tmpreg := NR_NO;
+  // at this point there must not be a combination of values in the ref treference
+  // which is not possible to directly map to instructions of the PowerPC architecture
+  if (ref.index <> NR_NO) and ((ref.offset <> 0) or (assigned(ref.symbol))) then
+    internalerror(200310131);
+
+  // for some instructions we need to check that the offset is divisible by at
+  // least four. If not, add the bytes which are "off" to the base register and
+  // adjust the offset accordingly 
+  case op of
+    A_LD, A_LDU, A_STD, A_STDU, A_LWA, A_LWAU :
+     if ((ref.offset mod 4) <> 0) then begin
+       tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
+
+       if (ref.base <> NR_NO) then begin
+         a_op_const_reg_reg(list, OP_ADD, OS_ADDR, ref.offset mod 4, ref.base, tmpreg);
+         ref.base := tmpreg;
+       end else begin
+         list.concat(taicpu.op_reg_const(A_LI, tmpreg, ref.offset mod 4));
+         ref.base := tmpreg;
+       end;
+       ref.offset := (ref.offset div 4) * 4;
+     end;
+  end;
 
   // if we have to load/store from a symbol or large addresses, use a temporary register
   // containing the address
-    if assigned(ref.symbol) or (ref.offset < low(smallint)) or (ref.offset > high(smallint)) then begin
-      tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
-      reference_reset(tmpref);
-      tmpref.symbol := ref.symbol;
-      tmpref.relsymbol := ref.relsymbol;
-      tmpref.offset := ref.offset;
-
-      (*
-      code template when there's no base register
-
-      lis rT,SYM+offs@highesta
-      addi rT,SYM+offs@highera
-      sldi rT,rT,32
-      addis rT,rT,SYM+offs@ha
-      ld rD,SYM+offs@l(rT)
+  if assigned(ref.symbol) or (ref.offset < low(smallint)) or (ref.offset > high(smallint)) then begin
+    tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
+
+    reference_reset(tmpref);
+    tmpref.symbol := ref.symbol;
+    tmpref.relsymbol := ref.relsymbol;
+    tmpref.offset := ref.offset;
+    if (ref.base <> NR_NO) then begin
+      {
+      As long as the TOC isn't working we try to achieve highest speed (in this
+      case by allowing instructions execute in parallel) as possible, at the cost
+      of using another temporary register. So the code template when there is
+      a base register and an offset is the following:
+
+      lis rT1, SYM+offs@highest
+      ori rT1, rT1, SYM+offs@higher
+      lis rT2, SYM+offs@high
+      ori rT2, SYM+offs@low
+      rldimi rT2, rT1, 32
+
+      <op>X reg, base, rT2
+      }
 
-      code template when there's a base register
+      tmpreg2 := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
+      tmpref.refaddr := addr_highest;
+      list.concat(taicpu.op_reg_ref(A_LIS, tmpreg, tmpref));
+      tmpref.refaddr := addr_higher;
+      list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg, tmpreg, tmpref));
 
-      lis rT,SYM+offs@highesta
-      addis rT,SYM+offs@highera
-      sldi rT,rT,32
-      addis rT,rT,SYM+offs@ha
-      add  rT,rBase,rT
-      ld rD,SYM+offs@l(rT)
+      tmpref.refaddr := addr_high;
+      list.concat(taicpu.op_reg_ref(A_LIS, tmpreg2, tmpref));
+      tmpref.refaddr := addr_low;
+      list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg2, tmpreg2, tmpref));
 
-      *)
-      //list.concat(tai_comment.create(strpnew('symbol: ' + tmpref.symbol.name + ' offset: ' + inttostr(tmpref.offset))));
+      list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, tmpreg2, tmpreg, 32, 0));
 
+      reference_reset(tmpref);
+      tmpref.base := ref.base;
+      tmpref.index := tmpreg2;
+      case op of
+        // the code generator doesn't generate update instructions anyway
+        A_LBZ : op := A_LBZX;
+        A_LHZ : op := A_LHZX;
+        A_LWZ : op := A_LWZX;
+        A_LD : op := A_LDX;
+        A_LHA : op := A_LHAX;
+        A_LWA : op := A_LWAX;
+        A_LFS : op := A_LFSX;
+        A_LFD : op := A_LFDX;
+
+        A_STB : op := A_STBX;
+        A_STH : op := A_STHX;
+        A_STW : op := A_STWX;
+        A_STD : op := A_STDX;
+
+        A_STFS : op := A_STFSX;
+        A_STFD : op := A_STFDX;
+        else
+          // unknown load/store opcode
+          internalerror(2005101302);
+      end;
+      list.concat(taicpu.op_reg_ref(op, reg, tmpref));
+    end else begin
+      { when accessing value from a reference without a base register, use the
+        following code template:
+
+        lis rT,SYM+offs@highesta
+        ori rT,SYM+offs@highera
+        sldi rT,rT,32
+        oris rT,rT,SYM+offs@ha
+        ld rD,SYM+offs@l(rT)
+      }
       tmpref.refaddr := addr_highesta;
       list.concat(taicpu.op_reg_ref(A_LIS, tmpreg, tmpref));
       tmpref.refaddr := addr_highera;
@@ -1595,16 +1654,13 @@ begin
       tmpref.refaddr := addr_higha;
       list.concat(taicpu.op_reg_reg_ref(A_ORIS, tmpreg, tmpreg, tmpref));
 
-      if (ref.base <> NR_NO) then begin
-        list.concat(taicpu.op_reg_reg_reg(A_ADD, tmpreg, tmpreg, ref.base));
-      end;
-
       tmpref.base := tmpreg;
       tmpref.refaddr := addr_low;
       list.concat(taicpu.op_reg_ref(op, reg, tmpref));
-    end else begin
-      list.concat(taicpu.op_reg_ref(op, reg, ref));
     end;
+  end else begin
+    list.concat(taicpu.op_reg_ref(op, reg, ref));
+  end;
 end;
 
 procedure tcgppc.a_jmp(list: taasmoutput; op: tasmop; c: tasmcondflag;

+ 4 - 0
ide/fpviews.pas

@@ -867,6 +867,7 @@ begin
   GetAsmReservedWordCount:=ord(lastop) - ord(firstop)
 {$ifndef x86_64}
 {$ifndef powerpc}
+{$ifndef powerpc64}
 {$ifndef arm}
     + CondAsmOps*(ord(high(TasmCond))-ord(low(TasmCond)));
 {$else arm}
@@ -874,6 +875,9 @@ begin
      we've to solve this different }
    ;
 {$endif arm}
+{$else powerpc64}
+   + CondAsmOps*(ord(high(TAsmCondFlag))-ord(low(TAsmCondFlag)));
+{$endif powerpc64}
 {$else powerpc}
    + CondAsmOps*(ord(high(TAsmCondFlag))-ord(low(TAsmCondFlag)));
 {$endif powerpc}

+ 41 - 18
rtl/linux/powerpc64/stat.inc

@@ -13,24 +13,47 @@
  **********************************************************************}
 
   Stat = packed record  // No unix typing because of differences
-    st_dev : qword;
-    st_ino : qword;
-    st_nlink : qword;
+    case integer of
+    0 : (
+      st_dev : qword;
+      st_ino : qword;
+      st_nlink : qword;
 
-    st_mode : dword;
-    st_uid : dword;
-    st_gid : dword;
-    __pad0 : dword;
-    st_rdev : qword;
-    st_size : int64;
-    st_blksize : int64;
-    st_blocks : int64;      { Number 512-byte blocks allocated. }
+      st_mode : dword;
+      st_uid : dword;
+      st_gid : dword;
+      __pad0 : dword;
+      st_rdev : qword;
+      st_size : int64;
+      st_blksize : int64;
+      st_blocks : int64;      { Number 512-byte blocks allocated. }
 
-    st_atime : qword;
-    __reserved0 : qword;    { reserved for atime.nanoseconds }
-    st_mtime : qword;
-    __reserved1 : qword;    { reserved for atime.nanoseconds }
-    st_ctime : qword;
-    __reserved2 : qword;    { reserved for atime.nanoseconds }
-    __unused : array[0..2] of int64;
+      st_atime : qword;
+      __reserved0 : qword;    { reserved for atime.nanoseconds }
+      st_mtime : qword;
+      __reserved1 : qword;    { reserved for atime.nanoseconds }
+      st_ctime : qword;
+      __reserved2 : qword;    { reserved for atime.nanoseconds }
+      __unused : array[0..2] of int64
+      );
+    1 : (
+      dev    : qword;
+      ino,
+      mode   : qword;
+      nlink_dummy     : dword;
+      uid_dummy,
+      gid_dummy,
+      rdev      : dword;
+      size   : qword;
+      blksize,
+      blocks,
+      atime,
+      __unused1_dummy,
+      mtime,
+      __unused2_dummy,
+      ctime,
+      __unused3_dummy,
+      __unused4_dummy,
+      __unused5_dummy  : qword;
+      );
   end;

+ 2 - 2
rtl/powerpc64/setjump.inc

@@ -42,7 +42,7 @@ function setjmp(var S : jmp_buf) : longint;assembler;[Public, alias : 'FPC_SETJM
      mfcr    r0
      std     r16,40(r3)  // store r16
      stfd    f16,192(r3) // store f16
-     std     r0,168(r3)  // store cr
+     stw     r0,168(r3)  // store cr
      std     r17,48(r3)  // store r17
      stfd    f17,200(r3) // store f17
      std     r18,56(r3)  // ...
@@ -100,7 +100,7 @@ procedure longjmp(var S : jmp_buf;value : longint);assembler;[Public, alias : 'F
      lfd     f21,232(r3)
      ld      r22,88(r3)
      lfd     f22,240(r3)
-     ld      r0,168(r3)
+     lwz     r0,168(r3)
      ld      r23,96(r3)
      lfd     f23,248(r3)
      ld      r24,104(r3)