Browse Source

* compile with -dppc603 to not use unaligned float loads in move() and
g_concatcopy, because the 603 and 604 take an exception for those
(and netbsd doesn't even handle those in the kernel). There are
still some of those left that could cause problems though (e.g.
in the set helpers)

Jonas Maebe 22 years ago
parent
commit
c70f75f1b7
2 changed files with 104 additions and 4 deletions
  1. 68 3
      compiler/powerpc/cgcpu.pas
  2. 36 1
      rtl/powerpc/powerpc.inc

+ 68 - 3
compiler/powerpc/cgcpu.pas

@@ -1782,6 +1782,14 @@ const
 
 { ************* concatcopy ************ }
 
+{$ifndef ppc603}
+  const
+    maxmoveunit = 8;
+{$else ppc603}
+  const
+    maxmoveunit = 4;
+{$endif ppc603}
+
     procedure tcgppc.g_concatcopy(list : taasmoutput;const source,dest : treference;len : aword; delsource,loadref : boolean);
 
       var
@@ -1800,7 +1808,7 @@ const
 
         { make sure short loads are handled as optimally as possible }
         if not loadref then
-          if (len <= 8) and
+          if (len <= maxmoveunit) and
              (byte(len) in [1,2,4,8]) then
             begin
               if len < 8 then
@@ -1828,7 +1836,8 @@ const
               exit;
             end;
 
-        count := len div 8;
+        count := len div maxmoveunit;
+
         reference_reset(src);
         reference_reset(dst);
         { load the address of source into src.base }
@@ -1870,6 +1879,7 @@ const
             orgdst := true;
           end;
 
+{$ifndef ppc603}
         if count > 4 then
           { generate a loop }
           begin
@@ -1927,6 +1937,54 @@ const
             inc(dst.offset,4);
             a_reg_dealloc(list,r);
           end;
+{$else not ppc603}
+        if count > 4 then
+          { generate a loop }
+          begin
+            { the offsets are zero after the a_loadaddress_ref_reg and just }
+            { have to be set to 4. I put an Inc there so debugging may be   }
+            { easier (should offset be different from zero here, it will be }
+            { easy to notice in the generated assembler                     }
+            inc(dst.offset,4);
+            inc(src.offset,4);
+            list.concat(taicpu.op_reg_reg_const(A_SUBI,src.base,src.base,4));
+            list.concat(taicpu.op_reg_reg_const(A_SUBI,dst.base,dst.base,4));
+            countreg := get_scratch_reg_int(list,OS_INT);
+            a_load_const_reg(list,OS_32,count,countreg);
+            { explicitely allocate R_0 since it can be used safely here }
+            { (for holding date that's being copied)                    }
+            r.enum:=R_INTREGISTER;
+            r.number:=NR_R0;
+            a_reg_alloc(list,r);
+            objectlibrary.getlabel(lab);
+            a_label(list, lab);
+            list.concat(taicpu.op_reg_reg_const(A_SUBIC_,countreg,countreg,1));
+            list.concat(taicpu.op_reg_ref(A_LWZU,r,src));
+            list.concat(taicpu.op_reg_ref(A_STWU,r,dst));
+            a_jmp(list,A_BC,C_NE,0,lab);
+            free_scratch_reg(list,countreg);
+            a_reg_dealloc(list,r);
+            len := len mod 4;
+          end;
+
+        count := len div 4;
+        if count > 0 then
+          { unrolled loop }
+          begin
+            r.enum:=R_INTREGISTER;
+            r.number:=NR_R0;
+            a_reg_alloc(list,r);
+            for count2 := 1 to count do
+              begin
+                a_load_ref_reg(list,OS_32,src,r);
+                a_load_reg_ref(list,OS_32,r,dst);
+                inc(src.offset,4);
+                inc(dst.offset,4);
+              end;
+            a_reg_dealloc(list,r);
+            len := len mod 4;
+          end;
+{$endif not ppc603}
        { copy the leftovers }
        if (len and 2) <> 0 then
          begin
@@ -2484,7 +2542,14 @@ begin
 end.
 {
   $Log$
-  Revision 1.99  2003-05-29 10:06:09  jonas
+  Revision 1.100  2003-05-29 21:17:27  jonas
+    * compile with -dppc603 to not use unaligned float loads in move() and
+      g_concatcopy, because the 603 and 604 take an exception for those
+      (and netbsd doesn't even handle those in the kernel). There are
+      still some of those left that could cause problems though (e.g.
+      in the set helpers)
+
+  Revision 1.99  2003/05/29 10:06:09  jonas
     * also free temps in g_concatcopy if delsource is true
 
   Revision 1.98  2003/05/28 23:58:18  jonas

+ 36 - 1
rtl/powerpc/powerpc.inc

@@ -178,8 +178,10 @@ LMove4ByteAlignLoop:
           {  while not aligned, continue  }
           bne     cr0,LMove4ByteAlignLoop
 
+{$ifndef ppc603}
           { check for 32 byte alignment }
           andi.   r7,r4,31
+{$endif non ppc603}
           { we are going to copy one byte again (the one at the newly }
           { aligned address), so increase count byte 1                }
           addi    r5,r5,1
@@ -188,6 +190,7 @@ LMove4ByteAlignLoop:
           {  if 11 <= count < 63, copy using dwords }
           blt     cr7,LMoveDWords
 
+{$ifndef ppc603}
           { # of dwords to copy to reach 32 byte alignment (*4) }
           { (depends on forward/backward copy)                  }
 
@@ -202,6 +205,8 @@ LMove4ByteAlignLoop:
           not     r8, r6
           add     r7, r7, r8
           xor     r7, r7, r8
+{$endif not ppc603}
+
           { multiply the update count with 4 }
           slwi    r10,r10,2
           slwi    r6,r6,2
@@ -209,6 +214,7 @@ LMove4ByteAlignLoop:
           add     r3,r3,r6
           add     r4,r4,r6
 
+{$ifndef ppc603}
           beq     cr0,LMove32BytesAligned
 L32BytesAlignMoveLoop:
           {  count >= 39 -> align to 8 byte boundary and then use the FPU  }
@@ -226,12 +232,17 @@ LMove32BytesAligned:
           andi.   r5,r5,31
           { to decide if we will do some dword stores (instead of only }
           { byte stores) afterwards or not                             }
+{$else not ppc603}
+          srwi    r0,r5,4
+          andi.   r5,r5,15
+{$endif not ppc603}
           cmpwi   cr1,r5,11
           mtctr   r0
 
           {  r0 := count div 4, will be moved to ctr when copying dwords  }
           srwi    r0,r5,2
 
+{$ifndef ppc603}
           {  adjust the update count: it will now be 8 or -8 depending on overlap  }
           slwi    r10,r10,1
 
@@ -271,6 +282,18 @@ LMove32ByteDcbz:
           stfdux  f3,r4,r10
           bdnz    LMove32ByteDcbz
 LMove32ByteLoopDone:
+{$else not ppc603}
+LMove16ByteLoop:
+          lwzux   r11,r3,r10
+          lwzux   r7,r3,r10
+          lwzux   r8,r3,r10
+          lwzux   r9,r3,r10
+          stwux   r11,r4,r10
+          stwux   r7,r4,r10
+          stwux   r8,r4,r10
+          stwux   r9,r4,r10
+          bdnz    LMove16ByteLoop
+{$endif not ppc603}
 
           { cr0*4+eq is true if "count and 31" = 0 }
           beq     cr0,LMoveDone
@@ -278,8 +301,13 @@ LMove32ByteLoopDone:
           {  make r10 again -1 or 1, but first adjust source/dest pointers }
           sub     r3,r3,r6
           sub     r4,r4,r6
+{$ifndef ppc603}
           srawi   r10,r10,3
           srawi   r6,r6,3
+{$else not ppc603}
+          srawi   r10,r10,2
+          srawi   r6,r6,2
+{$endif not ppc603}
 
           { cr1 contains whether count <= 11 }
           ble     cr1,LMoveBytes
@@ -932,7 +960,14 @@ end ['R3','R10'];
 
 {
   $Log$
-  Revision 1.48  2003-05-29 14:32:54  jonas
+  Revision 1.49  2003-05-29 21:17:27  jonas
+    * compile with -dppc603 to not use unaligned float loads in move() and
+      g_concatcopy, because the 603 and 604 take an exception for those
+      (and netbsd doesn't even handle those in the kernel). There are
+      still some of those left that could cause problems though (e.g.
+      in the set helpers)
+
+  Revision 1.48  2003/05/29 14:32:54  jonas
     * changed dcbst to dcbtst (former means "flush cache block to memory,
       the latter means "I will soon store something to that cache block")