Przeglądaj źródła

* ppc64/linux: enhanced varargs support (passes tprintf test now)
* ppc64/linux: some small tweak for small memory locations copy code

git-svn-id: trunk@1778 -

tom_at_work 19 lat temu
rodzic
commit
93bb997d8f
3 zmienionych plików z 85 dodań i 27 usunięć
  1. 20 0
      compiler/ncgcal.pas
  2. 45 17
      compiler/powerpc64/cgcpu.pas
  3. 20 10
      compiler/powerpc64/cpupara.pas

+ 20 - 0
compiler/ncgcal.pas

@@ -231,6 +231,17 @@ implementation
                        cg64.a_param64_ref(exprasmlist,left.location.reference,tempcgpara);
                        cg64.a_param64_ref(exprasmlist,left.location.reference,tempcgpara);
                    end;
                    end;
 {$endif powerpc}
 {$endif powerpc}
+{$ifdef powerpc64}
+                 LOC_REGISTER,
+                 LOC_CREGISTER :
+                   begin
+                     { ppc64 abi passes floats of varargs in integer registers, so force a store }
+                     location_force_mem(exprasmlist,left.location);
+                     { force integer size }
+                     left.location.size:=int_cgsize(tcgsize2size[tempcgpara.location^.size]);
+                     cg.a_param_ref(exprasmlist,left.location.size,left.location.reference,tempcgpara)
+                   end;
+{$endif powerpc64}
 {$if defined(sparc) or defined(arm)}
 {$if defined(sparc) or defined(arm)}
                  { sparc and arm pass floats in normal registers }
                  { sparc and arm pass floats in normal registers }
                  LOC_REGISTER,
                  LOC_REGISTER,
@@ -273,6 +284,15 @@ implementation
                        cg64.a_param64_ref(exprasmlist,left.location.reference,tempcgpara);
                        cg64.a_param64_ref(exprasmlist,left.location.reference,tempcgpara);
                    end;
                    end;
 {$endif powerpc}
 {$endif powerpc}
+{$ifdef powerpc64}
+                 LOC_REGISTER,
+                 LOC_CREGISTER :
+                   begin
+                     { force integer size }
+                     left.location.size:=int_cgsize(tcgsize2size[tempcgpara.location^.size]);
+                     cg.a_param_ref(exprasmlist,left.location.size,left.location.reference,tempcgpara)
+                   end;
+{$endif powerpc64}
 {$if defined(sparc) or defined(arm) }
 {$if defined(sparc) or defined(arm) }
                  { sparc and arm pass floats in normal registers }
                  { sparc and arm pass floats in normal registers }
                  LOC_REGISTER,
                  LOC_REGISTER,

+ 45 - 17
compiler/powerpc64/cgcpu.pas

@@ -357,11 +357,13 @@ var
   tmpref, ref: treference;
   tmpref, ref: treference;
   location: pcgparalocation;
   location: pcgparalocation;
   sizeleft: aint;
   sizeleft: aint;
+  adjusttail : boolean;
 
 
 begin
 begin
   location := paraloc.location;
   location := paraloc.location;
   tmpref := r;
   tmpref := r;
   sizeleft := paraloc.intsize;
   sizeleft := paraloc.intsize;
+  adjusttail := false;
   while assigned(location) do begin
   while assigned(location) do begin
     case location^.loc of
     case location^.loc of
       LOC_REGISTER, LOC_CREGISTER:
       LOC_REGISTER, LOC_CREGISTER:
@@ -370,6 +372,10 @@ begin
             a_load_ref_reg(list, size, location^.size, tmpref,
             a_load_ref_reg(list, size, location^.size, tmpref,
               location^.register)
               location^.register)
           else
           else
+          {$IFDEF extdebug}
+            list.concat(tai_comment.create(strpnew('a_param_ref with OS_NO')));
+          {$ENDIF extdebug}
+
             { load non-integral sized memory location into register. This 
             { load non-integral sized memory location into register. This 
              memory location be 1-sizeleft byte sized.
              memory location be 1-sizeleft byte sized.
              Always assume that this memory area is properly aligned, eg. start
              Always assume that this memory area is properly aligned, eg. start
@@ -421,7 +427,18 @@ begin
                 { still > 8 bytes to load, so load data single register now }
                 { still > 8 bytes to load, so load data single register now }
                 a_load_ref_reg(list, location^.size, location^.size, tmpref,
                 a_load_ref_reg(list, location^.size, location^.size, tmpref,
                   location^.register);
                   location^.register);
+                { the block is > 8 bytes, so we have to store any bytes not
+                 a multiple of the register size beginning with the MSB }
+                adjusttail := true;
             end; 
             end; 
+(*          
+            { Comment this in (for gcc compat) and be prepared for a whole bunch of errors :/ }
+            
+            if (adjusttail) and (sizeleft < tcgsize2size[OS_INT]) then
+              a_op_const_reg(list, OP_SHL, OS_INT, 
+                (tcgsize2size[OS_INT] - sizeleft) * tcgsize2size[OS_INT], 
+                location^.register);
+*)
         end;
         end;
       LOC_REFERENCE:
       LOC_REFERENCE:
         begin
         begin
@@ -1489,23 +1506,33 @@ begin
 {$IFDEF extdebug}
 {$IFDEF extdebug}
   if len > high(aint) then
   if len > high(aint) then
     internalerror(2002072704);
     internalerror(2002072704);
+  list.concat(tai_comment.create(strpnew('g_concatcopy')));
 {$ENDIF extdebug}
 {$ENDIF extdebug}
-  { make sure short loads are handled as optimally as possible }
-
-  if (len <= maxmoveunit) and
-    (byte(len) in [1, 2, 4, 8]) then
-  begin
-    if len < 8 then
-    begin
-      size := int_cgsize(len);
-      a_load_ref_ref(list, size, size, source, dest);
-    end
-    else
-    begin
-      a_reg_alloc(list, NR_F0);
-      a_loadfpu_ref_reg(list, OS_F64, source, NR_F0);
-      a_loadfpu_reg_ref(list, OS_F64, NR_F0, dest);
-      a_reg_dealloc(list, NR_F0);
+  { make sure short loads are handled as optimally as possible;
+   note that the data here never overlaps, so we can do a forward
+   copy at all times.
+   NOTE: maybe use some scratch registers to pair load/store instructions
+  }
+
+  if (len <= maxmoveunit) then begin
+    src := source; dst := dest;
+    while (len <> 0) do begin
+      if (len = 8) then begin
+        a_load_ref_ref(list, OS_64, OS_64, src, dst);    
+        dec(len, 8);
+      end else if (len >= 4) then begin
+        a_load_ref_ref(list, OS_32, OS_32, src, dst);    
+        inc(src.offset, 4); inc(dst.offset, 4);
+        dec(len, 4);
+      end else if (len >= 2) then begin
+        a_load_ref_ref(list, OS_16, OS_16, src, dst);    
+        inc(src.offset, 2); inc(dst.offset, 2);
+        dec(len, 2);
+      end else begin
+        a_load_ref_ref(list, OS_8, OS_8, src, dst);    
+        inc(src.offset, 1); inc(dst.offset, 1);
+        dec(len, 1);
+      end;
     end;
     end;
     exit;
     exit;
   end;
   end;
@@ -1546,7 +1573,7 @@ begin
     list.concat(taicpu.op_reg_reg_const(A_SUBI, src.base, src.base, 8));
     list.concat(taicpu.op_reg_reg_const(A_SUBI, src.base, src.base, 8));
     list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, 8));
     list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, 8));
     countreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
     countreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
-    a_load_const_reg(list, OS_32, count, countreg);
+    a_load_const_reg(list, OS_64, count, countreg);
     { explicitely allocate F0 since it can be used safely here
     { explicitely allocate F0 since it can be used safely here
      (for holding date that's being copied) }
      (for holding date that's being copied) }
     a_reg_alloc(list, NR_F0);
     a_reg_alloc(list, NR_F0);
@@ -1793,6 +1820,7 @@ begin
     tmpref.symbol := ref.symbol;
     tmpref.symbol := ref.symbol;
     tmpref.relsymbol := ref.relsymbol;
     tmpref.relsymbol := ref.relsymbol;
     tmpref.offset := ref.offset;
     tmpref.offset := ref.offset;
+
     if (ref.base <> NR_NO) then begin
     if (ref.base <> NR_NO) then begin
       { As long as the TOC isn't working we try to achieve highest speed (in this
       { As long as the TOC isn't working we try to achieve highest speed (in this
       case by allowing instructions execute in parallel) as possible at the cost
       case by allowing instructions execute in parallel) as possible at the cost

+ 20 - 10
compiler/powerpc64/cpupara.pas

@@ -42,8 +42,7 @@ type
 
 
     procedure getintparaloc(calloption: tproccalloption; nr: longint; var
     procedure getintparaloc(calloption: tproccalloption; nr: longint; var
       cgpara: TCGPara); override;
       cgpara: TCGPara); override;
-    function create_paraloc_info(p: tabstractprocdef; side: tcallercallee):
-      longint; override;
+    function create_paraloc_info(p: tabstractprocdef; side: tcallercallee): longint; override;
     function create_varargs_paraloc_info(p: tabstractprocdef; varargspara:
     function create_varargs_paraloc_info(p: tabstractprocdef; varargspara:
       tvarargsparalist): longint; override;
       tvarargsparalist): longint; override;
     procedure create_funcretloc_info(p: tabstractprocdef; side: tcallercallee);
     procedure create_funcretloc_info(p: tabstractprocdef; side: tcallercallee);
@@ -54,7 +53,7 @@ type
     function create_paraloc_info_intern(p: tabstractprocdef; side:
     function create_paraloc_info_intern(p: tabstractprocdef; side:
       tcallercallee; paras: tparalist;
       tcallercallee; paras: tparalist;
       var curintreg, curfloatreg, curmmreg: tsuperregister; var
       var curintreg, curfloatreg, curmmreg: tsuperregister; var
-        cur_stack_offset: aword): longint;
+        cur_stack_offset: aword; isVararg : boolean): longint;
     function parseparaloc(p: tparavarsym; const s: string): boolean; override;
     function parseparaloc(p: tparavarsym; const s: string): boolean; override;
   end;
   end;
 
 
@@ -169,7 +168,7 @@ begin
   end;
   end;
   case def.deftype of
   case def.deftype of
     variantdef,
     variantdef,
-      formaldef:
+    formaldef:
       result := true;
       result := true;
     recorddef:
     recorddef:
       result :=
       result :=
@@ -256,7 +255,7 @@ begin
   init_values(curintreg, curfloatreg, curmmreg, cur_stack_offset);
   init_values(curintreg, curfloatreg, curmmreg, cur_stack_offset);
 
 
   result := create_paraloc_info_intern(p, side, p.paras, curintreg, curfloatreg,
   result := create_paraloc_info_intern(p, side, p.paras, curintreg, curfloatreg,
-    curmmreg, cur_stack_offset);
+    curmmreg, cur_stack_offset, false);
 
 
   create_funcretloc_info(p, side);
   create_funcretloc_info(p, side);
 end;
 end;
@@ -264,7 +263,7 @@ end;
 function tppcparamanager.create_paraloc_info_intern(p: tabstractprocdef; side:
 function tppcparamanager.create_paraloc_info_intern(p: tabstractprocdef; side:
   tcallercallee; paras: tparalist;
   tcallercallee; paras: tparalist;
   var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset:
   var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset:
-  aword): longint;
+  aword; isVararg : boolean): longint;
 var
 var
   stack_offset: longint;
   stack_offset: longint;
   paralen: aint;
   paralen: aint;
@@ -348,6 +347,18 @@ begin
         end;
         end;
       end
       end
     end;
     end;
+
+    { patch FPU values into integer registers if we currently have
+     to pass them as vararg parameters     
+    }
+    if (isVararg) and (paradef.deftype = floatdef) then begin
+      loc := LOC_REGISTER;
+      if paracgsize = OS_F64 then
+        paracgsize := OS_64
+      else
+        paracgsize := OS_32;
+    end;
+
     hp.paraloc[side].alignment := std_param_align;
     hp.paraloc[side].alignment := std_param_align;
     hp.paraloc[side].size := paracgsize;
     hp.paraloc[side].size := paracgsize;
     hp.paraloc[side].intsize := paralen;
     hp.paraloc[side].intsize := paralen;
@@ -360,8 +371,7 @@ begin
     { can become < 0 for e.g. 3-byte records }
     { can become < 0 for e.g. 3-byte records }
     while (paralen > 0) do begin
     while (paralen > 0) do begin
       paraloc := hp.paraloc[side].add_location;
       paraloc := hp.paraloc[side].add_location;
-      if (loc = LOC_REGISTER) and
-        (nextintreg <= RS_R10) then begin
+      if (loc = LOC_REGISTER) and (nextintreg <= RS_R10) then begin
         paraloc^.loc := loc;
         paraloc^.loc := loc;
         { make sure we don't lose whether or not the type is signed }
         { make sure we don't lose whether or not the type is signed }
         if (paradef.deftype <> orddef) then
         if (paradef.deftype <> orddef) then
@@ -430,11 +440,11 @@ begin
   firstfloatreg := curfloatreg;
   firstfloatreg := curfloatreg;
 
 
   result := create_paraloc_info_intern(p, callerside, p.paras, curintreg,
   result := create_paraloc_info_intern(p, callerside, p.paras, curintreg,
-    curfloatreg, curmmreg, cur_stack_offset);
+    curfloatreg, curmmreg, cur_stack_offset, false);
   if (p.proccalloption in [pocall_cdecl, pocall_cppdecl]) then begin
   if (p.proccalloption in [pocall_cdecl, pocall_cppdecl]) then begin
     { just continue loading the parameters in the registers }
     { just continue loading the parameters in the registers }
     result := create_paraloc_info_intern(p, callerside, varargspara, curintreg,
     result := create_paraloc_info_intern(p, callerside, varargspara, curintreg,
-      curfloatreg, curmmreg, cur_stack_offset);
+      curfloatreg, curmmreg, cur_stack_offset, true);
     { varargs routines have to reserve at least 64 bytes for the PPC64 ABI }
     { varargs routines have to reserve at least 64 bytes for the PPC64 ABI }
     if (result < 64) then
     if (result < 64) then
       result := 64;
       result := 64;