Przeglądaj źródła

+ unaligned support in a_load_reg_ref and a_load_ref_reg for ppc64
(only needs it in case a 64 bit value is loaded from an address
with alignment < 4 bytes)

git-svn-id: trunk@9345 -

Jonas Maebe 17 lat temu
rodzic
commit
31764a0e3b
2 zmienionych plików z 61 dodań i 6 usunięć
  1. 30 3
      compiler/powerpc64/cgcpu.pas
  2. 31 3
      compiler/ppcgen/cgppc.pas

+ 30 - 3
compiler/powerpc64/cgcpu.pas

@@ -737,7 +737,7 @@ const
 var
   op: tasmop;
   ref2: treference;
-
+  tmpreg: tregister;
 begin
   {$IFDEF EXTDEBUG}
   list.concat(tai_comment.create(strpnew('a_load_ref_reg ' + ref2string(ref))));
@@ -745,12 +745,39 @@ begin
 
   if not (fromsize in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
     internalerror(2002090904);
-  ref2 := ref;
-  fixref(list, ref2);
+
   { the caller is expected to have adjusted the reference already
    in this case }
   if (TCGSize2Size[fromsize] >= TCGSize2Size[tosize]) then
     fromsize := tosize;
+
+  ref2 := ref;
+  fixref(list, ref2);
+
+  { unaligned 64 bit accesses are much slower than unaligned }
+  { 32 bit accesses because they cause a hardware exception  }
+  { (which isn't handled by linux, so there you even get a   }
+  {  crash)                                                  }
+  if (ref.alignment<>0) and
+     (fromsize in [OS_64,OS_S64]) and
+     (ref.alignment<4) then
+    begin
+      if (ref2.base<>NR_NO) and
+         (ref2.index<>NR_NO) then
+        begin
+          tmpreg:=getintregister(list,OS_64);
+          a_op_reg_reg_reg(list,OP_SHR,OS_64,ref2.base,ref2.index,tmpreg);
+          ref2.base:=tmpreg;
+          ref2.index:=NR_NO;
+        end;
+      tmpreg:=getintregister(list,OS_32);
+      a_load_ref_reg(list,OS_32,OS_32,ref2,tmpreg);
+      inc(ref2.offset,4);
+      a_load_ref_reg(list,OS_32,OS_32,ref2,reg); 
+      list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, tmpreg, 32, 0));
+      exit;
+    end;
+
   op := loadinstr[fromsize, ref2.index <> NR_NO, false];
   { there is no LWAU instruction, simulate using ADDI and LWA }
   if (op = A_NOP) then begin

+ 31 - 3
compiler/ppcgen/cgppc.pas

@@ -394,19 +394,47 @@ unit cgppc;
 {$endif cpu64bit}
         );
     var
-      op: TAsmOp;
       ref2: TReference;
+      tmpreg: tregister;
+      op: TAsmOp;
     begin
       if not (fromsize in [OS_8..OS_INT,OS_S8..OS_SINT]) then
         internalerror(2002090903);
       if not (tosize in [OS_8..OS_INT,OS_S8..OS_SINT]) then
         internalerror(2002090905);
 
-      ref2 := ref;
-      fixref(list, ref2);
       if tosize in [OS_S8..OS_SINT] then
         { storing is the same for signed and unsigned values }
         tosize := tcgsize(ord(tosize) - (ord(OS_S8) - ord(OS_8)));
+
+      ref2 := ref;
+      fixref(list, ref2);
+
+      { unaligned 64 bit accesses are much slower than unaligned }
+      { 32 bit accesses because they cause a hardware exception  }
+      { (which isn't handled by linux, so there you even get a   }
+      {  crash)                                                  }
+       if (ref2.alignment<>0) and
+         (tosize in [OS_64,OS_S64]) and
+         (ref.alignment<4) then
+        begin
+          if (ref2.base<>NR_NO) and
+             (ref2.index<>NR_NO) then
+            begin
+              tmpreg:=getintregister(list,OS_64);
+              a_op_reg_reg_reg(list,OP_SHR,OS_64,ref2.base,ref2.index,tmpreg);
+              ref2.base:=tmpreg;
+              ref2.index:=NR_NO;
+            end;
+          tmpreg:=getintregister(list,OS_64);
+          a_op_const_reg_reg(list,OP_SHR,OS_64,32,reg,tmpreg);
+          inc(ref2.offset,4);
+          a_load_reg_ref(list,OS_32,OS_32,reg,ref2);
+          dec(ref2.offset,4);
+          a_load_reg_ref(list,OS_32,OS_32,tmpreg,ref2);
+          exit;
+        end;
+
       op := storeinstr[tcgsize2unsigned[tosize], ref2.index <> NR_NO, false];
       a_load_store(list, op, reg, ref2);
     end;