Browse Source

* ppc64/linux: enhanced varargs support (passes tprintf test now)
* ppc64/linux: some small tweak for small memory locations copy code

git-svn-id: trunk@1778 -

tom_at_work 19 năm trước cách đây
mục cha
commit
93bb997d8f
3 tập tin đã thay đổi với 85 bổ sung27 xóa
  1. 20 0
      compiler/ncgcal.pas
  2. 45 17
      compiler/powerpc64/cgcpu.pas
  3. 20 10
      compiler/powerpc64/cpupara.pas

+ 20 - 0
compiler/ncgcal.pas

@@ -231,6 +231,17 @@ implementation
                        cg64.a_param64_ref(exprasmlist,left.location.reference,tempcgpara);
                    end;
 {$endif powerpc}
+{$ifdef powerpc64}
+                 LOC_REGISTER,
+                 LOC_CREGISTER :
+                   begin
+                     { ppc64 abi passes floats of varargs in integer registers, so force a store }
+                     location_force_mem(exprasmlist,left.location);
+                     { force integer size }
+                     left.location.size:=int_cgsize(tcgsize2size[tempcgpara.location^.size]);
+                     cg.a_param_ref(exprasmlist,left.location.size,left.location.reference,tempcgpara)
+                   end;
+{$endif powerpc64}
 {$if defined(sparc) or defined(arm)}
                  { sparc and arm pass floats in normal registers }
                  LOC_REGISTER,
@@ -273,6 +284,15 @@ implementation
                        cg64.a_param64_ref(exprasmlist,left.location.reference,tempcgpara);
                    end;
 {$endif powerpc}
+{$ifdef powerpc64}
+                 LOC_REGISTER,
+                 LOC_CREGISTER :
+                   begin
+                     { force integer size }
+                     left.location.size:=int_cgsize(tcgsize2size[tempcgpara.location^.size]);
+                     cg.a_param_ref(exprasmlist,left.location.size,left.location.reference,tempcgpara)
+                   end;
+{$endif powerpc64}
 {$if defined(sparc) or defined(arm) }
                  { sparc and arm pass floats in normal registers }
                  LOC_REGISTER,

+ 45 - 17
compiler/powerpc64/cgcpu.pas

@@ -357,11 +357,13 @@ var
   tmpref, ref: treference;
   location: pcgparalocation;
   sizeleft: aint;
+  adjusttail : boolean;
 
 begin
   location := paraloc.location;
   tmpref := r;
   sizeleft := paraloc.intsize;
+  adjusttail := false;
   while assigned(location) do begin
     case location^.loc of
       LOC_REGISTER, LOC_CREGISTER:
@@ -370,6 +372,10 @@ begin
             a_load_ref_reg(list, size, location^.size, tmpref,
               location^.register)
           else
+          {$IFDEF extdebug}
+            list.concat(tai_comment.create(strpnew('a_param_ref with OS_NO')));
+          {$ENDIF extdebug}
+
             { load non-integral sized memory location into register. This 
              memory location be 1-sizeleft byte sized.
              Always assume that this memory area is properly aligned, eg. start
@@ -421,7 +427,18 @@ begin
                 { still > 8 bytes to load, so load data single register now }
                 a_load_ref_reg(list, location^.size, location^.size, tmpref,
                   location^.register);
+                { the block is > 8 bytes, so we have to store any bytes not
+                 a multiple of the register size beginning with the MSB }
+                adjusttail := true;
             end; 
+(*          
+            { Comment this in (for gcc compat) and be prepared for a whole bunch of errors :/ }
+            
+            if (adjusttail) and (sizeleft < tcgsize2size[OS_INT]) then
+              a_op_const_reg(list, OP_SHL, OS_INT, 
+                (tcgsize2size[OS_INT] - sizeleft) * tcgsize2size[OS_INT], 
+                location^.register);
+*)
         end;
       LOC_REFERENCE:
         begin
@@ -1489,23 +1506,33 @@ begin
 {$IFDEF extdebug}
   if len > high(aint) then
     internalerror(2002072704);
+  list.concat(tai_comment.create(strpnew('g_concatcopy')));
 {$ENDIF extdebug}
-  { make sure short loads are handled as optimally as possible }
-
-  if (len <= maxmoveunit) and
-    (byte(len) in [1, 2, 4, 8]) then
-  begin
-    if len < 8 then
-    begin
-      size := int_cgsize(len);
-      a_load_ref_ref(list, size, size, source, dest);
-    end
-    else
-    begin
-      a_reg_alloc(list, NR_F0);
-      a_loadfpu_ref_reg(list, OS_F64, source, NR_F0);
-      a_loadfpu_reg_ref(list, OS_F64, NR_F0, dest);
-      a_reg_dealloc(list, NR_F0);
+  { make sure short loads are handled as optimally as possible;
+   note that the data here never overlaps, so we can do a forward
+   copy at all times.
+   NOTE: maybe use some scratch registers to pair load/store instructions
+  }
+
+  if (len <= maxmoveunit) then begin
+    src := source; dst := dest;
+    while (len <> 0) do begin
+      if (len = 8) then begin
+        a_load_ref_ref(list, OS_64, OS_64, src, dst);    
+        dec(len, 8);
+      end else if (len >= 4) then begin
+        a_load_ref_ref(list, OS_32, OS_32, src, dst);    
+        inc(src.offset, 4); inc(dst.offset, 4);
+        dec(len, 4);
+      end else if (len >= 2) then begin
+        a_load_ref_ref(list, OS_16, OS_16, src, dst);    
+        inc(src.offset, 2); inc(dst.offset, 2);
+        dec(len, 2);
+      end else begin
+        a_load_ref_ref(list, OS_8, OS_8, src, dst);    
+        inc(src.offset, 1); inc(dst.offset, 1);
+        dec(len, 1);
+      end;
     end;
     exit;
   end;
@@ -1546,7 +1573,7 @@ begin
     list.concat(taicpu.op_reg_reg_const(A_SUBI, src.base, src.base, 8));
     list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, 8));
     countreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
-    a_load_const_reg(list, OS_32, count, countreg);
+    a_load_const_reg(list, OS_64, count, countreg);
     { explicitely allocate F0 since it can be used safely here
      (for holding date that's being copied) }
     a_reg_alloc(list, NR_F0);
@@ -1793,6 +1820,7 @@ begin
     tmpref.symbol := ref.symbol;
     tmpref.relsymbol := ref.relsymbol;
     tmpref.offset := ref.offset;
+
     if (ref.base <> NR_NO) then begin
       { As long as the TOC isn't working we try to achieve highest speed (in this
       case by allowing instructions execute in parallel) as possible at the cost

+ 20 - 10
compiler/powerpc64/cpupara.pas

@@ -42,8 +42,7 @@ type
 
     procedure getintparaloc(calloption: tproccalloption; nr: longint; var
       cgpara: TCGPara); override;
-    function create_paraloc_info(p: tabstractprocdef; side: tcallercallee):
-      longint; override;
+    function create_paraloc_info(p: tabstractprocdef; side: tcallercallee): longint; override;
     function create_varargs_paraloc_info(p: tabstractprocdef; varargspara:
       tvarargsparalist): longint; override;
     procedure create_funcretloc_info(p: tabstractprocdef; side: tcallercallee);
@@ -54,7 +53,7 @@ type
     function create_paraloc_info_intern(p: tabstractprocdef; side:
       tcallercallee; paras: tparalist;
       var curintreg, curfloatreg, curmmreg: tsuperregister; var
-        cur_stack_offset: aword): longint;
+        cur_stack_offset: aword; isVararg : boolean): longint;
     function parseparaloc(p: tparavarsym; const s: string): boolean; override;
   end;
 
@@ -169,7 +168,7 @@ begin
   end;
   case def.deftype of
     variantdef,
-      formaldef:
+    formaldef:
       result := true;
     recorddef:
       result :=
@@ -256,7 +255,7 @@ begin
   init_values(curintreg, curfloatreg, curmmreg, cur_stack_offset);
 
   result := create_paraloc_info_intern(p, side, p.paras, curintreg, curfloatreg,
-    curmmreg, cur_stack_offset);
+    curmmreg, cur_stack_offset, false);
 
   create_funcretloc_info(p, side);
 end;
@@ -264,7 +263,7 @@ end;
 function tppcparamanager.create_paraloc_info_intern(p: tabstractprocdef; side:
   tcallercallee; paras: tparalist;
   var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset:
-  aword): longint;
+  aword; isVararg : boolean): longint;
 var
   stack_offset: longint;
   paralen: aint;
@@ -348,6 +347,18 @@ begin
         end;
       end
     end;
+
+    { patch FPU values into integer registers if we currently have
+     to pass them as vararg parameters     
+    }
+    if (isVararg) and (paradef.deftype = floatdef) then begin
+      loc := LOC_REGISTER;
+      if paracgsize = OS_F64 then
+        paracgsize := OS_64
+      else
+        paracgsize := OS_32;
+    end;
+
     hp.paraloc[side].alignment := std_param_align;
     hp.paraloc[side].size := paracgsize;
     hp.paraloc[side].intsize := paralen;
@@ -360,8 +371,7 @@ begin
     { can become < 0 for e.g. 3-byte records }
     while (paralen > 0) do begin
       paraloc := hp.paraloc[side].add_location;
-      if (loc = LOC_REGISTER) and
-        (nextintreg <= RS_R10) then begin
+      if (loc = LOC_REGISTER) and (nextintreg <= RS_R10) then begin
         paraloc^.loc := loc;
         { make sure we don't lose whether or not the type is signed }
         if (paradef.deftype <> orddef) then
@@ -430,11 +440,11 @@ begin
   firstfloatreg := curfloatreg;
 
   result := create_paraloc_info_intern(p, callerside, p.paras, curintreg,
-    curfloatreg, curmmreg, cur_stack_offset);
+    curfloatreg, curmmreg, cur_stack_offset, false);
   if (p.proccalloption in [pocall_cdecl, pocall_cppdecl]) then begin
     { just continue loading the parameters in the registers }
     result := create_paraloc_info_intern(p, callerside, varargspara, curintreg,
-      curfloatreg, curmmreg, cur_stack_offset);
+      curfloatreg, curmmreg, cur_stack_offset, true);
     { varargs routines have to reserve at least 64 bytes for the PPC64 ABI }
     if (result < 64) then
       result := 64;