Browse Source

* changed tprocdef.funcretloc[] from a tlocation into a tcgpara so it can
represent complex locations (required for full x86-64 ABI support,
which is not yet implemented) -> lots of special result handling
code has been removed and replaced by the parameter handling
routines
+ added support for composite parameters (and hence function
results) to tcg.a_load_ref_cgpara() (so it can be used for
handling, e.g., 64 bit parameters on 32 bit platforms)
* the above fixed writing past the end of allocated memory when
handling records returned in registers on x86-64 whose size is
not a multiple of 8 bytes (mantis #16357)
- removed the x86-64 and PPC specific versions of a_load_ref_cgpara(),
as they are now handled correctly by the generic version
* moved the responsibility of allocating tcgpara cpu registers
(using paramanager.allocparaloc()) from the callers of
cg.a_load*_cgpara() to the cg.a_load*_cgpara() methods
themselves (so the register allocation can be done efficiently
when dealing with function results)
* for the above, renamed paramanager.alloc/freeparaloc() to
paramanager.alloc/freecgpara(), and use paramanager.allocparaloc()
to allocate individual pcgparalocations instead
* fixed the register size of SSE2 function result registers for
x86-64 (when used for floating point), which results in removing
a few superfluous "movs? %xmm0,%xmm0" instructions
* fixed compilation of paramanagers of avr, m68k and mips after r13695
and also updated them for these new changes

git-svn-id: trunk@15350 -

Jonas Maebe 15 years ago
parent
commit
283018a3bf

+ 14 - 8
compiler/arm/cgcpu.pas

@@ -418,6 +418,7 @@ unit cgcpu;
         ref: treference;
       begin
         paraloc.check_simple_location;
+        paramanager.allocparaloc(list,paraloc.location);
         case paraloc.location^.loc of
           LOC_REGISTER,LOC_CREGISTER:
             a_load_const_reg(list,size,a,paraloc.location^.register);
@@ -445,6 +446,7 @@ unit cgcpu;
         sizeleft := paraloc.intsize;
         while assigned(location) do
           begin
+            paramanager.allocparaloc(list,location);
             case location^.loc of
               LOC_REGISTER,LOC_CREGISTER:
                 a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
@@ -488,6 +490,7 @@ unit cgcpu;
         tmpreg: tregister;
       begin
         paraloc.check_simple_location;
+        paramanager.allocparaloc(list,paraloc.location);
         case paraloc.location^.loc of
           LOC_REGISTER,LOC_CREGISTER:
             a_loadaddr_ref_reg(list,r,paraloc.location^.register);
@@ -1238,12 +1241,18 @@ unit cgcpu;
           begin
             case hloc^.loc of
               LOC_FPUREGISTER,LOC_CFPUREGISTER:
-                a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
+                begin
+                  paramanager.allocparaloc(list,paraloc.location);
+                  a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
+                end;
               LOC_REGISTER :
                 case hloc^.size of
                   OS_32,
                   OS_F32:
-                    a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
+                    begin
+                      paramanager.allocparaloc(list,paraloc.location);
+                      a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
+                    end;
                   OS_64,
                   OS_F64:
                     cg64.a_load64_ref_cgpara(list,href,paraloc);
@@ -1813,15 +1822,12 @@ unit cgcpu;
         paramanager.getintparaloc(pocall_default,1,paraloc1);
         paramanager.getintparaloc(pocall_default,2,paraloc2);
         paramanager.getintparaloc(pocall_default,3,paraloc3);
-        paramanager.allocparaloc(list,paraloc3);
         a_load_const_cgpara(list,OS_INT,len,paraloc3);
-        paramanager.allocparaloc(list,paraloc2);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
-        paramanager.allocparaloc(list,paraloc2);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
-        paramanager.freeparaloc(list,paraloc3);
-        paramanager.freeparaloc(list,paraloc2);
-        paramanager.freeparaloc(list,paraloc1);
+        paramanager.freecgpara(list,paraloc3);
+        paramanager.freecgpara(list,paraloc2);
+        paramanager.freecgpara(list,paraloc1);
         alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
         alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
         a_call_name(list,'FPC_MOVE',false);

+ 50 - 30
compiler/arm/cpupara.pas

@@ -42,7 +42,7 @@ unit cpupara;
           procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara:TCGPara);override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
-          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;override;
+          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
          private
           procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword);
           function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
@@ -451,32 +451,45 @@ unit cpupara;
       end;
 
 
-    function  tarmparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;
+    function  tarmparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;
       var
+        paraloc : pcgparalocation;
         retcgsize  : tcgsize;
       begin
-        { Constructors return self instead of a boolean }
-        if (p.proctypeoption=potype_constructor) then
-          retcgsize:=OS_ADDR
-        else
-          retcgsize:=def_cgsize(def);
-
-        location_reset(result,LOC_INVALID,OS_NO);
-        result.size:=retcgsize;
-
+        result.init;
+        result.alignment:=get_para_align(p.proccalloption);
         { void has no location }
         if is_void(def) then
           begin
-            location_reset(result,LOC_VOID,OS_NO);
+            paraloc:=result.add_location;
+            result.size:=OS_NO;
+            result.intsize:=0;
+            paraloc^.size:=OS_NO;
+            paraloc^.loc:=LOC_VOID;
             exit;
           end;
+        { Constructors return self instead of a boolean }
+        if (p.proctypeoption=potype_constructor) then
+          begin
+            retcgsize:=OS_ADDR;
+            result.intsize:=sizeof(pint);
+          end
+        else
+          begin
+            retcgsize:=def_cgsize(def);
+            result.intsize:=def.size;
+          end;
+        result.size:=retcgsize;
         { Return is passed as var parameter }
         if ret_in_param(def,p.proccalloption) then
           begin
-            result.loc:=LOC_REFERENCE;
-            result.size:=retcgsize;
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REFERENCE;
+            paraloc^.size:=retcgsize;
             exit;
           end;
+
+        paraloc:=result.add_location;
         { Return in FPU register? }
         if def.typ=floatdef then
           begin
@@ -488,18 +501,20 @@ unit cpupara;
                   OS_64,
                   OS_F64:
                     begin
-                      { low }
-                      result.loc:=LOC_REGISTER;
-                      result.register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG;
-                      result.register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG;
-                      result.size:=OS_64;
+                      paraloc^.loc:=LOC_REGISTER;
+                      paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
+                      paraloc^.size:=OS_32;
+                      paraloc:=result.add_location;
+                      paraloc^.loc:=LOC_REGISTER;
+                      paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
+                      paraloc^.size:=OS_32;
                     end;
                   OS_32,
                   OS_F32:
                     begin
-                      result.loc:=LOC_REGISTER;
-                      result.register:=NR_FUNCTION_RETURN_REG;
-                      result.size:=OS_32;
+                      paraloc^.loc:=LOC_REGISTER;
+                      paraloc^.register:=NR_FUNCTION_RETURN_REG;
+                      paraloc^.size:=OS_32;
                     end;
                   else
                     internalerror(2005082603);
@@ -507,8 +522,9 @@ unit cpupara;
               end
             else
               begin
-                result.loc:=LOC_FPUREGISTER;
-                result.register:=NR_FPU_RESULT_REG;
+                paraloc^.loc:=LOC_FPUREGISTER;
+                paraloc^.register:=NR_FPU_RESULT_REG;
+                paraloc^.size:=retcgsize;
               end;
           end
           { Return in register }
@@ -516,15 +532,19 @@ unit cpupara;
           begin
             if retcgsize in [OS_64,OS_S64] then
               begin
-                { low }
-                result.loc:=LOC_REGISTER;
-                result.register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG;
-                result.register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG;
+                paraloc^.loc:=LOC_REGISTER;
+                paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
+                paraloc^.size:=OS_32;
+                paraloc:=result.add_location;
+                paraloc^.loc:=LOC_REGISTER;
+                paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
+                paraloc^.size:=OS_32;
               end
             else
               begin
-                result.loc:=LOC_REGISTER;
-                result.register:=NR_FUNCTION_RETURN_REG;
+                paraloc^.loc:=LOC_REGISTER;
+                paraloc^.register:=NR_FUNCTION_RETURN_REG;
+                paraloc^.size:=retcgsize;
               end;
           end;
       end;

+ 29 - 5
compiler/arm/narmcal.pas

@@ -30,20 +30,44 @@ interface
 
     type
        tarmcallnode = class(tcgcallnode)
-          // procedure push_framepointer;override;
+         procedure set_result_location(realresdef: tstoreddef);override;
        end;
 
 implementation
 
   uses
+    verbose,globtype,globals,aasmdata,
+    symconst,
+    cgbase,
+    cpubase,cpuinfo,
+    ncgutil,
     paramgr;
 
-(*
-  procedure tarmcallnode.push_framepointer;
+  procedure tarmcallnode.set_result_location(realresdef: tstoreddef);
     begin
-      framepointer_paraloc:=paramanager.getintparaloc(procdefinition.proccalloption,1);
+      if (realresdef.typ=floatdef) and
+         ((cs_fp_emulation in current_settings.moduleswitches) or
+          (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3])) then
+        begin
+          { keep the fpu values in integer registers for now, the code
+            generator will move them to memory or an mmregister when necessary
+            (avoids double moves in case a function result is assigned to
+             another function result, or passed as a parameter) }
+          case retloc.size of
+            OS_32,
+            OS_F32:
+              location_allocate_register(current_asmdata.CurrAsmList,location,s32inttype,false);
+            OS_64,
+            OS_F64:
+              location_allocate_register(current_asmdata.CurrAsmList,location,s64inttype,false);
+            else
+              internalerror(2010053008);
+          end
+        end
+      else
+        inherited;
     end;
-*)
+
 
 begin
    ccallnode:=tarmcallnode;

+ 6 - 6
compiler/avr/cgcpu.pas

@@ -151,6 +151,7 @@ unit cgcpu;
         ref: treference;
       begin
         paraloc.check_simple_location;
+        paramanager.allocparaloc(list,paraloc.location);
         case paraloc.location^.loc of
           LOC_REGISTER,LOC_CREGISTER:
             a_load_const_reg(list,size,a,paraloc.location^.register);
@@ -178,6 +179,7 @@ unit cgcpu;
         sizeleft := paraloc.intsize;
         while assigned(location) do
           begin
+            paramanager.allocparaloc(list,location);
             case location^.loc of
               LOC_REGISTER,LOC_CREGISTER:
                 a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
@@ -214,6 +216,7 @@ unit cgcpu;
         tmpreg: tregister;
       begin
         paraloc.check_simple_location;
+        paramanager.allocparaloc(list,paraloc.location);
         case paraloc.location^.loc of
           LOC_REGISTER,LOC_CREGISTER:
             a_loadaddr_ref_reg(list,r,paraloc.location^.register);
@@ -714,15 +717,12 @@ unit cgcpu;
         paramanager.getintparaloc(pocall_default,1,paraloc1);
         paramanager.getintparaloc(pocall_default,2,paraloc2);
         paramanager.getintparaloc(pocall_default,3,paraloc3);
-        paramanager.allocparaloc(list,paraloc3);
         a_load_const_cgpara(list,OS_INT,len,paraloc3);
-        paramanager.allocparaloc(list,paraloc2);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
-        paramanager.allocparaloc(list,paraloc2);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
-        paramanager.freeparaloc(list,paraloc3);
-        paramanager.freeparaloc(list,paraloc2);
-        paramanager.freeparaloc(list,paraloc1);
+        paramanager.freecgpara(list,paraloc3);
+        paramanager.freecgpara(list,paraloc2);
+        paramanager.freecgpara(list,paraloc1);
         alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
         a_call_name_static(list,'FPC_MOVE');
         dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));

+ 52 - 31
compiler/avr/cpupara.pas

@@ -41,10 +41,12 @@ unit cpupara;
           procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara:TCGPara);override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
+          function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
          private
           procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword);
           function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
             var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword):longint;
+          procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
        end;
 
   implementation
@@ -405,32 +407,45 @@ unit cpupara;
       end;
 
 
-    function  tavrparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;
+    function  tavrparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;
       var
-        retcgsize  : tcgsize;
+        retcgsize : tcgsize;
+        paraloc : pcgparalocation;
       begin
-        { Constructors return self instead of a boolean }
-        if (p.proctypeoption=potype_constructor) then
-          retcgsize:=OS_ADDR
-        else
-          retcgsize:=def_cgsize(def);
-
-        location_reset(result,LOC_INVALID,OS_NO);
-        result.size:=retcgsize;
-
+        result.init;
+        result.alignment:=get_para_align(p.proccalloption);
         { void has no location }
         if is_void(def) then
           begin
-            location_reset(result,LOC_VOID,OS_NO);
+            paraloc:=result.add_location;
+            result.size:=OS_NO;
+            result.intsize:=0;
+            paraloc^.size:=OS_NO;
+            paraloc^.loc:=LOC_VOID;
             exit;
           end;
+        { Constructors return self instead of a boolean }
+        if (p.proctypeoption=potype_constructor) then
+          begin
+            retcgsize:=OS_ADDR;
+            result.intsize:=sizeof(pint);
+          end
+        else
+          begin
+            retcgsize:=def_cgsize(def);
+            result.intsize:=def.size;
+          end;
+        result.size:=retcgsize;
         { Return is passed as var parameter }
         if ret_in_param(def,p.proccalloption) then
           begin
-            result.loc:=LOC_REFERENCE;
-            result.size:=retcgsize;
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REFERENCE;
+            paraloc^.size:=retcgsize;
             exit;
           end;
+
+        paraloc:=result.add_location;
         { Return in FPU register? }
         if def.typ=floatdef then
           begin
@@ -440,18 +455,20 @@ unit cpupara;
                   OS_64,
                   OS_F64:
                     begin
-                      { low }
-                      result.loc:=LOC_REGISTER;
-                      result.register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG;
-                      result.register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG;
-                      result.size:=OS_64;
+                      paraloc^.loc:=LOC_REGISTER;
+                      paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
+                      paraloc^.size:=OS_32;
+                      paraloc:=result.add_location;
+                      paraloc^.loc:=LOC_REGISTER;
+                      paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
+                      paraloc^.size:=OS_32;
                     end;
                   OS_32,
                   OS_F32:
                     begin
-                      result.loc:=LOC_REGISTER;
-                      result.register:=NR_FUNCTION_RETURN_REG;
-                      result.size:=OS_32;
+                      paraloc^.loc:=LOC_REGISTER;
+                      paraloc^.register:=NR_FUNCTION_RETURN_REG;
+                      paraloc^.size:=OS_32;
                     end;
                   else
                     internalerror(2005082603);
@@ -459,8 +476,9 @@ unit cpupara;
               end
             else
               begin
-                result.loc:=LOC_FPUREGISTER;
-                result.register:=NR_FPU_RESULT_REG;
+                paraloc^.loc:=LOC_FPUREGISTER;
+                paraloc^.register:=NR_FPU_RESULT_REG;
+                paraloc^.size:=retcgsize;
               end;
           end
           { Return in register }
@@ -468,17 +486,20 @@ unit cpupara;
           begin
             if retcgsize in [OS_64,OS_S64] then
               begin
-                { low }
-                result.loc:=LOC_REGISTER;
-                result.register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG;
-                result.register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG;
+                paraloc^.loc:=LOC_REGISTER;
+                paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
+                paraloc^.size:=OS_32;
+                paraloc:=result.add_location;
+                paraloc^.loc:=LOC_REGISTER;
+                paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
+                paraloc^.size:=OS_32;
               end
             else
               begin
-                result.loc:=LOC_REGISTER;
-                result.register:=NR_FUNCTION_RETURN_REG;
+                paraloc^.loc:=LOC_REGISTER;
+                paraloc^.register:=NR_FUNCTION_RETURN_REG;
+                paraloc^.size:=OS_32;
               end;
-
           end;
       end;
 

+ 174 - 80
compiler/cgobj.pas

@@ -120,7 +120,8 @@ unit cgobj;
 
              This routine should push/send the parameter to the routine, as
              required by the specific processor ABI and routine modifiers.
-             This must be overriden for each CPU target.
+             It must generate register allocation information for the cgpara in
+             case it consists of cpuregisters.
 
              @param(size size of the operand in the register)
              @param(r register source of the operand)
@@ -132,6 +133,8 @@ unit cgobj;
              A generic version is provided. This routine should
              be overriden for optimization purposes if the cpu
              permits directly sending this type of parameter.
+             It must generate register allocation information for the cgpara in
+             case it consists of cpuregisters.
 
              @param(size size of the operand in constant)
              @param(a value of constant to send)
@@ -143,6 +146,8 @@ unit cgobj;
              A generic version is provided. This routine should
              be overriden for optimization purposes if the cpu
              permits directly sending this type of parameter.
+             It must generate register allocation information for the cgpara in
+             case it consists of cpuregisters.
 
              @param(size size of the operand in constant)
              @param(r Memory reference of value to send)
@@ -162,6 +167,8 @@ unit cgobj;
           {# Pass the address of a reference to a routine. This routine
              will calculate the address of the reference, and pass this
              calculated address as a parameter.
+             It must generate register allocation information for the cgpara in
+             case it consists of cpuregisters.
 
              A generic version is provided. This routine should
              be overriden for optimization purposes if the cpu
@@ -173,6 +180,8 @@ unit cgobj;
           procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const cgpara : TCGPara);virtual;
 
           {# Load a cgparaloc into a memory reference.
+             It must generate register allocation information for the cgpara in
+             case it consists of cpuregisters.
 
            @param(paraloc the source parameter sublocation)
            @param(ref the destination reference)
@@ -880,6 +889,7 @@ implementation
          ref : treference;
       begin
          cgpara.check_simple_location;
+         paramanager.alloccgpara(list,cgpara);
          case cgpara.location^.loc of
             LOC_REGISTER,LOC_CREGISTER:
               a_load_reg_reg(list,size,cgpara.location^.size,r,cgpara.location^.register);
@@ -899,6 +909,7 @@ implementation
          ref : treference;
       begin
          cgpara.check_simple_location;
+         paramanager.alloccgpara(list,cgpara);
          case cgpara.location^.loc of
             LOC_REGISTER,LOC_CREGISTER:
               a_load_const_reg(list,cgpara.location^.size,a,cgpara.location^.register);
@@ -915,31 +926,134 @@ implementation
 
     procedure tcg.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const cgpara : TCGPara);
       var
-         ref : treference;
-      begin
-         cgpara.check_simple_location;
-         case cgpara.location^.loc of
-            LOC_REGISTER,LOC_CREGISTER:
-              a_load_ref_reg(list,size,cgpara.location^.size,r,cgpara.location^.register);
-            LOC_REFERENCE,LOC_CREFERENCE:
-              begin
-                 reference_reset_base(ref,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
-                 if (size <> OS_NO) and
-                    (tcgsize2size[size] < sizeof(aint)) then
-                   begin
-                     if (cgpara.size = OS_NO) or
-                        assigned(cgpara.location^.next) then
-                       internalerror(2006052401);
-                     a_load_ref_ref(list,size,cgpara.size,r,ref);
-                   end
-                 else
-                   { use concatcopy, because the parameter can be larger than }
-                   { what the OS_* constants can handle                       }
-                   g_concatcopy(list,r,ref,cgpara.intsize);
-              end
-            else
-              internalerror(2002071004);
-         end;
+        tmpref, ref: treference;
+        tmpreg: tregister;
+        location: pcgparalocation;
+        orgsizeleft,
+        sizeleft: aint;
+        reghasvalue: boolean;
+      begin
+        location:=cgpara.location;
+        tmpref:=r;
+        sizeleft:=cgpara.intsize;
+        while assigned(location) do
+          begin
+            paramanager.allocparaloc(list,location);
+            case location^.loc of
+              LOC_REGISTER,LOC_CREGISTER:
+                begin
+                   { Parameter locations are often allocated in multiples of
+                     entire registers. If a parameter only occupies a part of
+                     such a register (e.g. a 16 bit int on a 32 bit
+                     architecture), the size of this parameter can only be
+                     determined by looking at the "size" parameter of this
+                     method -> if the size parameter is <= sizeof(aint), then
+                     we check that there is only one parameter location and
+                     then use this "size" to load the value into the parameter
+                     location }
+                   if (size<>OS_NO) and
+                      (tcgsize2size[size]<=sizeof(aint)) then
+                     begin
+                       cgpara.check_simple_location;
+                       a_load_ref_reg(list,size,location^.size,tmpref,location^.register);
+                     end
+                   { there's a lot more data left, and the current paraloc's
+                     register is entirely filled with part of that data }
+                   else if (sizeleft>sizeof(aint)) then
+                     begin
+                       a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
+                     end
+                   { we're at the end of the data, and it can be loaded into
+                     the current location's register with a single regular
+                     load }
+                   else if (sizeleft in [1,2{$ifndef cpu16bitalu},4{$endif}{$ifdef cpu64bitalu},8{$endif}]) then
+                     begin
+                       a_load_ref_reg(list,int_cgsize(sizeleft),location^.size,tmpref,location^.register);
+                     end
+                   { we're at the end of the data, and we need multiple loads
+                     to get it in the register because it's an irregular size }
+                   else
+                     begin
+                       { should be the last part }
+                       if assigned(location^.next) then
+                         internalerror(2010052907);
+                       { load the value piecewise to get it into the register }
+                       orgsizeleft:=sizeleft;
+                       reghasvalue:=false;
+{$ifdef cpu64bitalu}
+                       if sizeleft>=4 then
+                         begin
+                           a_load_ref_reg(list,OS_32,location^.size,tmpref,location^.register);
+                           dec(sizeleft,4);
+                           if target_info.endian=endian_big then
+                             a_op_const_reg(list,OP_SHL,location^.size,sizeleft*8,location^.register);
+                           inc(tmpref.offset,4);
+                           reghasvalue:=true;
+                         end;
+{$endif cpu64bitalu}
+                       if sizeleft>=2 then
+                         begin
+                           tmpreg:=getintregister(list,location^.size);
+                           a_load_ref_reg(list,OS_16,location^.size,tmpref,tmpreg);
+                           dec(sizeleft,2);
+                           if reghasvalue then
+                             begin
+                               if target_info.endian=endian_big then
+                                 a_op_const_reg(list,OP_SHL,location^.size,sizeleft*8,tmpreg)
+                               else
+                                 a_op_const_reg(list,OP_SHL,location^.size,(orgsizeleft-(sizeleft+2))*8,tmpreg);
+                               a_op_reg_reg(list,OP_OR,location^.size,tmpreg,location^.register);
+                             end
+                           else
+                             begin
+                               if target_info.endian=endian_big then
+                                 a_op_const_reg_reg(list,OP_SHL,location^.size,sizeleft*8,tmpreg,location^.register)
+                               else
+                                 a_load_reg_reg(list,location^.size,location^.size,tmpreg,location^.register);
+                             end;
+                           inc(tmpref.offset,2);
+                           reghasvalue:=true;
+                         end;
+                       if sizeleft=1 then
+                         begin
+                           tmpreg:=getintregister(list,location^.size);
+                           a_load_ref_reg(list,OS_8,location^.size,tmpref,tmpreg);
+                           dec(sizeleft,1);
+                           if reghasvalue then
+                             begin
+                               if target_info.endian=endian_little then
+                                 a_op_const_reg(list,OP_SHL,location^.size,(orgsizeleft-(sizeleft+1))*8,tmpreg);
+                               a_op_reg_reg(list,OP_OR,location^.size,tmpreg,location^.register)
+                             end
+                           else
+                             a_load_reg_reg(list,location^.size,location^.size,tmpreg,location^.register);
+                           inc(tmpref.offset);
+                         end;
+                       { the loop will already adjust the offset and sizeleft }
+                       dec(tmpref.offset,orgsizeleft);
+                       sizeleft:=orgsizeleft;
+                     end;
+                end;
+              LOC_REFERENCE,LOC_CREFERENCE:
+                begin
+                   if assigned(location^.next) then
+                     internalerror(2010052906);
+                   reference_reset_base(ref,location^.reference.index,location^.reference.offset,newalignment(cgpara.alignment,cgpara.intsize-sizeleft));
+                   if (size <> OS_NO) and
+                      (tcgsize2size[size] <= sizeof(aint)) then
+                     a_load_ref_ref(list,size,location^.size,tmpref,ref)
+                   else
+                     { use concatcopy, because the parameter can be larger than }
+                     { what the OS_* constants can handle                       }
+                     g_concatcopy(list,tmpref,ref,sizeleft);
+                end
+              else
+                internalerror(2002071004);
+            end;
+            inc(tmpref.offset,tcgsize2size[location^.size]);
+            dec(sizeleft,tcgsize2size[location^.size]);
+            location:=location^.next;
+          end;
       end;
 
 
@@ -966,7 +1080,10 @@ implementation
       begin
          cgpara.check_simple_location;
          if cgpara.location^.loc in [LOC_CREGISTER,LOC_REGISTER] then
-           a_loadaddr_ref_reg(list,r,cgpara.location^.register)
+           begin
+             paramanager.allocparaloc(list,cgpara.location);
+             a_loadaddr_ref_reg(list,r,cgpara.location^.register)
+           end
          else
            begin
              hr:=getaddressregister(list);
@@ -2603,6 +2720,7 @@ implementation
       var
          ref : treference;
       begin
+        paramanager.alloccgpara(list,cgpara);
          case cgpara.location^.loc of
             LOC_FPUREGISTER,LOC_CFPUREGISTER:
               begin
@@ -2638,6 +2756,7 @@ implementation
           LOC_FPUREGISTER,LOC_CFPUREGISTER:
             begin
               cgpara.check_simple_location;
+              paramanager.alloccgpara(list,cgpara);
               a_loadfpu_ref_reg(list,size,size,ref,cgpara.location^.register);
             end;
           LOC_REFERENCE,LOC_CREFERENCE:
@@ -3059,6 +3178,7 @@ implementation
             (size<>OS_F64) then
 {$endif not cpu64bitalu}
            cgpara.check_simple_location;
+         paramanager.alloccgpara(list,cgpara);
          case cgpara.location^.loc of
           LOC_MMREGISTER,LOC_CMMREGISTER:
             a_loadmm_reg_reg(list,size,cgpara.location^.size,reg,cgpara.location^.register,shuffle);
@@ -3248,15 +3368,12 @@ implementation
         paramanager.getintparaloc(pocall_default,1,cgpara1);
         paramanager.getintparaloc(pocall_default,2,cgpara2);
         paramanager.getintparaloc(pocall_default,3,cgpara3);
-        paramanager.allocparaloc(list,cgpara3);
         a_loadaddr_ref_cgpara(list,dest,cgpara3);
-        paramanager.allocparaloc(list,cgpara2);
         a_loadaddr_ref_cgpara(list,source,cgpara2);
-        paramanager.allocparaloc(list,cgpara1);
         a_load_const_cgpara(list,OS_INT,len,cgpara1);
-        paramanager.freeparaloc(list,cgpara3);
-        paramanager.freeparaloc(list,cgpara2);
-        paramanager.freeparaloc(list,cgpara1);
+        paramanager.freecgpara(list,cgpara3);
+        paramanager.freecgpara(list,cgpara2);
+        paramanager.freecgpara(list,cgpara1);
         allocallcpuregisters(list);
         a_call_name(list,'FPC_SHORTSTR_ASSIGN',false);
         deallocallcpuregisters(list);
@@ -3274,12 +3391,10 @@ implementation
         cgpara2.init;
         paramanager.getintparaloc(pocall_default,1,cgpara1);
         paramanager.getintparaloc(pocall_default,2,cgpara2);
-        paramanager.allocparaloc(list,cgpara2);
         a_loadaddr_ref_cgpara(list,dest,cgpara2);
-        paramanager.allocparaloc(list,cgpara1);
         a_loadaddr_ref_cgpara(list,source,cgpara1);
-        paramanager.freeparaloc(list,cgpara2);
-        paramanager.freeparaloc(list,cgpara1);
+        paramanager.freecgpara(list,cgpara2);
+        paramanager.freecgpara(list,cgpara1);
         allocallcpuregisters(list);
         a_call_name(list,'FPC_VARIANT_COPY_OVERWRITE',false);
         deallocallcpuregisters(list);
@@ -3313,7 +3428,6 @@ implementation
          { call the special incr function or the generic addref }
          if incrfunc<>'' then
           begin
-            paramanager.allocparaloc(list,cgpara1);
             { widestrings aren't ref. counted on all platforms so we need the address
               to create a real copy }
             if is_widestring(t) then
@@ -3321,7 +3435,7 @@ implementation
             else
               { these functions get the pointer by value }
               a_load_ref_cgpara(list,OS_ADDR,ref,cgpara1);
-            paramanager.freeparaloc(list,cgpara1);
+            paramanager.freecgpara(list,cgpara1);
             allocallcpuregisters(list);
             a_call_name(list,incrfunc,false);
             deallocallcpuregisters(list);
@@ -3329,12 +3443,10 @@ implementation
          else
           begin
             reference_reset_symbol(href,RTTIWriter.get_rtti_label(t,initrtti),0,sizeof(pint));
-            paramanager.allocparaloc(list,cgpara2);
             a_loadaddr_ref_cgpara(list,href,cgpara2);
-            paramanager.allocparaloc(list,cgpara1);
             a_loadaddr_ref_cgpara(list,ref,cgpara1);
-            paramanager.freeparaloc(list,cgpara1);
-            paramanager.freeparaloc(list,cgpara2);
+            paramanager.freecgpara(list,cgpara1);
+            paramanager.freecgpara(list,cgpara2);
             allocallcpuregisters(list);
             a_call_name(list,'FPC_ADDREF',false);
             deallocallcpuregisters(list);
@@ -3384,14 +3496,11 @@ implementation
             tempreg1:=getaddressregister(list);
             a_loadaddr_ref_reg(list,ref,tempreg1);
             if needrtti then
-              begin
-                paramanager.allocparaloc(list,cgpara2);
-                a_load_reg_cgpara(list,OS_ADDR,tempreg2,cgpara2);
-                paramanager.freeparaloc(list,cgpara2);
-              end;
-            paramanager.allocparaloc(list,cgpara1);
+              a_load_reg_cgpara(list,OS_ADDR,tempreg2,cgpara2);
             a_load_reg_cgpara(list,OS_ADDR,tempreg1,cgpara1);
-            paramanager.freeparaloc(list,cgpara1);
+            paramanager.freecgpara(list,cgpara1);
+            if needrtti then
+              paramanager.freecgpara(list,cgpara2);
             allocallcpuregisters(list);
             a_call_name(list,decrfunc,false);
             deallocallcpuregisters(list);
@@ -3399,12 +3508,10 @@ implementation
          else
           begin
             reference_reset_symbol(href,RTTIWriter.get_rtti_label(t,initrtti),0,sizeof(pint));
-            paramanager.allocparaloc(list,cgpara2);
             a_loadaddr_ref_cgpara(list,href,cgpara2);
-            paramanager.allocparaloc(list,cgpara1);
             a_loadaddr_ref_cgpara(list,ref,cgpara1);
-            paramanager.freeparaloc(list,cgpara1);
-            paramanager.freeparaloc(list,cgpara2);
+            paramanager.freecgpara(list,cgpara1);
+            paramanager.freecgpara(list,cgpara2);
             allocallcpuregisters(list);
             a_call_name(list,'FPC_DECREF',false);
             deallocallcpuregisters(list);
@@ -3432,12 +3539,10 @@ implementation
          else
            begin
               reference_reset_symbol(href,RTTIWriter.get_rtti_label(t,initrtti),0,sizeof(pint));
-              paramanager.allocparaloc(list,cgpara2);
               a_loadaddr_ref_cgpara(list,href,cgpara2);
-              paramanager.allocparaloc(list,cgpara1);
               a_loadaddr_ref_cgpara(list,ref,cgpara1);
-              paramanager.freeparaloc(list,cgpara1);
-              paramanager.freeparaloc(list,cgpara2);
+              paramanager.freecgpara(list,cgpara1);
+              paramanager.freecgpara(list,cgpara2);
               allocallcpuregisters(list);
               a_call_name(list,'FPC_INITIALIZE',false);
               deallocallcpuregisters(list);
@@ -3467,12 +3572,10 @@ implementation
          else
            begin
               reference_reset_symbol(href,RTTIWriter.get_rtti_label(t,initrtti),0,sizeof(pint));
-              paramanager.allocparaloc(list,cgpara2);
               a_loadaddr_ref_cgpara(list,href,cgpara2);
-              paramanager.allocparaloc(list,cgpara1);
               a_loadaddr_ref_cgpara(list,ref,cgpara1);
-              paramanager.freeparaloc(list,cgpara1);
-              paramanager.freeparaloc(list,cgpara2);
+              paramanager.freecgpara(list,cgpara1);
+              paramanager.freecgpara(list,cgpara2);
               allocallcpuregisters(list);
               a_call_name(list,'FPC_FINALIZE',false);
               deallocallcpuregisters(list);
@@ -3703,9 +3806,8 @@ implementation
            a_cmp_const_reg_label(list,OS_ADDR,OC_NE,0,reg,oklabel);
            cgpara1.init;
            paramanager.getintparaloc(pocall_default,1,cgpara1);
-           paramanager.allocparaloc(list,cgpara1);
            a_load_const_cgpara(list,OS_INT,210,cgpara1);
-           paramanager.freeparaloc(list,cgpara1);
+           paramanager.freecgpara(list,cgpara1);
            a_call_name(list,'FPC_HANDLEERROR',false);
            a_label(list,oklabel);
            cgpara1.done;
@@ -3725,12 +3827,10 @@ implementation
         if (cs_check_object in current_settings.localswitches) then
          begin
            reference_reset_symbol(hrefvmt,current_asmdata.RefAsmSymbol(objdef.vmt_mangledname),0,sizeof(pint));
-           paramanager.allocparaloc(list,cgpara2);
            a_loadaddr_ref_cgpara(list,hrefvmt,cgpara2);
-           paramanager.allocparaloc(list,cgpara1);
            a_load_reg_cgpara(list,OS_ADDR,reg,cgpara1);
-           paramanager.freeparaloc(list,cgpara1);
-           paramanager.freeparaloc(list,cgpara2);
+           paramanager.freecgpara(list,cgpara1);
+           paramanager.freecgpara(list,cgpara2);
            allocallcpuregisters(list);
            a_call_name(list,'FPC_CHECK_OBJECT_EXT',false);
            deallocallcpuregisters(list);
@@ -3738,9 +3838,8 @@ implementation
         else
          if (cs_check_range in current_settings.localswitches) then
           begin
-            paramanager.allocparaloc(list,cgpara1);
             a_load_reg_cgpara(list,OS_ADDR,reg,cgpara1);
-            paramanager.freeparaloc(list,cgpara1);
+            paramanager.freecgpara(list,cgpara1);
             allocallcpuregisters(list);
             a_call_name(list,'FPC_CHECK_OBJECT',false);
             deallocallcpuregisters(list);
@@ -3784,9 +3883,8 @@ implementation
         { do getmem call }
         cgpara1.init;
         paramanager.getintparaloc(pocall_default,1,cgpara1);
-        paramanager.allocparaloc(list,cgpara1);
         a_load_reg_cgpara(list,OS_INT,sizereg,cgpara1);
-        paramanager.freeparaloc(list,cgpara1);
+        paramanager.freecgpara(list,cgpara1);
         allocallcpuregisters(list);
         a_call_name(list,'FPC_GETMEM',false);
         deallocallcpuregisters(list);
@@ -3802,17 +3900,14 @@ implementation
         paramanager.getintparaloc(pocall_default,2,cgpara2);
         paramanager.getintparaloc(pocall_default,3,cgpara3);
         { load size }
-        paramanager.allocparaloc(list,cgpara3);
         a_load_reg_cgpara(list,OS_INT,sizereg,cgpara3);
         { load destination }
-        paramanager.allocparaloc(list,cgpara2);
         a_load_reg_cgpara(list,OS_ADDR,destreg,cgpara2);
         { load source }
-        paramanager.allocparaloc(list,cgpara1);
         a_load_reg_cgpara(list,OS_ADDR,sourcereg,cgpara1);
-        paramanager.freeparaloc(list,cgpara3);
-        paramanager.freeparaloc(list,cgpara2);
-        paramanager.freeparaloc(list,cgpara1);
+        paramanager.freecgpara(list,cgpara3);
+        paramanager.freecgpara(list,cgpara2);
+        paramanager.freecgpara(list,cgpara1);
         allocallcpuregisters(list);
         a_call_name(list,'FPC_MOVE',false);
         deallocallcpuregisters(list);
@@ -3830,9 +3925,8 @@ implementation
         cgpara1.init;
         paramanager.getintparaloc(pocall_default,1,cgpara1);
         { load source }
-        paramanager.allocparaloc(list,cgpara1);
         a_load_loc_cgpara(list,l,cgpara1);
-        paramanager.freeparaloc(list,cgpara1);
+        paramanager.freecgpara(list,cgpara1);
         allocallcpuregisters(list);
         a_call_name(list,'FPC_FREEMEM',false);
         deallocallcpuregisters(list);

+ 18 - 6
compiler/i386/cgcpu.pas

@@ -289,17 +289,29 @@ unit cgcpu;
            { this messes up stack alignment }
            (target_info.system <> system_i386_darwin) then
           begin
-            if (current_procinfo.procdef.funcretloc[calleeside].loc<>LOC_VOID) and
-               (current_procinfo.procdef.funcretloc[calleeside].loc=LOC_REGISTER) then
-              list.concat(Taicpu.Op_const_reg(A_ADD,S_L,4,NR_ESP))
+            if assigned(current_procinfo.procdef.funcretloc[calleeside].location) and
+               (current_procinfo.procdef.funcretloc[calleeside].location^.loc=LOC_REGISTER) then
+              begin
+                if (getsupreg(current_procinfo.procdef.funcretloc[calleeside].location^.register)=RS_EAX) then
+                  list.concat(Taicpu.Op_const_reg(A_ADD,S_L,4,NR_ESP))
+                else
+                  internalerror(2010053001);
+              end
             else
               list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EAX));
             list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EBX));
             list.concat(Taicpu.Op_reg(A_POP,S_L,NR_ECX));
 
-            if (current_procinfo.procdef.funcretloc[calleeside].loc=LOC_REGISTER) and
-               (current_procinfo.procdef.funcretloc[calleeside].size in [OS_64,OS_S64]) then
-              list.concat(Taicpu.Op_const_reg(A_ADD,S_L,4,NR_ESP))
+            if (current_procinfo.procdef.funcretloc[calleeside].size in [OS_64,OS_S64]) and
+               assigned(current_procinfo.procdef.funcretloc[calleeside].location) and
+               assigned(current_procinfo.procdef.funcretloc[calleeside].location^.next) and
+               (current_procinfo.procdef.funcretloc[calleeside].location^.next^.loc=LOC_REGISTER) then
+              begin
+                if (getsupreg(current_procinfo.procdef.funcretloc[calleeside].location^.next^.register)=RS_EDX) then
+                  list.concat(Taicpu.Op_const_reg(A_ADD,S_L,4,NR_ESP))
+                else
+                  internalerror(2010053002);
+              end
             else
               list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EDX));
 

+ 43 - 25
compiler/i386/cpupara.pas

@@ -49,7 +49,7 @@ unit cpupara;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
           procedure createtempparaloc(list: TAsmList;calloption : tproccalloption;parasym : tparavarsym;var cgpara:TCGPara);override;
-          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;override;
+          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): TCGPara;override;
        private
           procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
           procedure create_stdcall_paraloc_info(p : tabstractprocdef; side: tcallercallee;paras:tparalist;var parasize:longint);
@@ -312,63 +312,81 @@ unit cpupara;
       end;
 
 
-    function  ti386paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;
+    function  ti386paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): TCGPara;
       var
         retcgsize  : tcgsize;
+        paraloc : pcgparalocation;
       begin
-        { Constructors return self instead of a boolean }
-        if (p.proctypeoption=potype_constructor) then
-          retcgsize:=OS_ADDR
-        else
-          retcgsize:=def_cgsize(def);
-
-        location_reset(result,LOC_INVALID,OS_NO);
+        result.init;
+        result.alignment:=get_para_align(p.proccalloption);
         { void has no location }
         if is_void(def) then
           begin
-            location_reset(result,LOC_VOID,OS_NO);
+            paraloc:=result.add_location;
+            result.size:=OS_NO;
+            result.intsize:=0;
+            paraloc^.size:=OS_NO;
+            paraloc^.loc:=LOC_VOID;
             exit;
           end;
+        { Constructors return self instead of a boolean }
+        if (p.proctypeoption=potype_constructor) then
+          begin
+            retcgsize:=OS_ADDR;
+            result.intsize:=sizeof(pint);
+          end
+        else
+          begin
+            retcgsize:=def_cgsize(def);
+            result.intsize:=def.size;
+          end;
+        result.size:=retcgsize;
         { Return is passed as var parameter }
         if ret_in_param(def,p.proccalloption) then
           begin
-            result.loc:=LOC_REFERENCE;
-            result.size:=retcgsize;
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REFERENCE;
+            paraloc^.size:=retcgsize;
             exit;
           end;
         { Return in FPU register? }
         if def.typ=floatdef then
           begin
-            result.loc:=LOC_FPUREGISTER;
-            result.register:=NR_FPU_RESULT_REG;
-            result.size:=retcgsize;
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_FPUREGISTER;
+            paraloc^.register:=NR_FPU_RESULT_REG;
+            paraloc^.size:=retcgsize;
           end
         else
          { Return in register }
           begin
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REGISTER;
             if retcgsize in [OS_64,OS_S64] then
              begin
                { low 32bits }
-               result.loc:=LOC_REGISTER;
-               result.size:=OS_64;
                if side=callerside then
-                 result.register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
                else
-                 result.register64.reglo:=NR_FUNCTION_RETURN64_LOW_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_LOW_REG;
+               paraloc^.size:=OS_32;
+
                { high 32bits }
+               paraloc:=result.add_location;
+               paraloc^.loc:=LOC_REGISTER;
                if side=callerside then
-                 result.register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
                else
-                 result.register64.reghi:=NR_FUNCTION_RETURN64_HIGH_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_HIGH_REG;
+               paraloc^.size:=OS_32;
              end
             else
              begin
-               result.loc:=LOC_REGISTER;
-               result.size:=retcgsize;
+               paraloc^.size:=retcgsize;
                if side=callerside then
-                 result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
                else
-                 result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
              end;
           end;
       end;

+ 51 - 36
compiler/m68k/cpupara.pas

@@ -44,8 +44,9 @@ unit cpupara;
           procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara : TCGPara);override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
-          procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
+          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
           procedure createtempparaloc(list: TAsmList;calloption : tproccalloption;parasym : tparavarsym;var cgpara:TCGPara);override;
+          procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
          private
           procedure init_values(var curintreg, curfloatreg: tsuperregister; var cur_stack_offset: aword);
           function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
@@ -184,45 +185,56 @@ unit cpupara;
       end;
 
     procedure tm68kparamanager.create_funcretloc_info(p: tabstractprocdef; side: tcallercallee);
-      var
-        retcgsize: tcgsize;
       begin
-        { Constructors return self instead of a boolean }
-        if (p.proctypeoption=potype_constructor) then
-          retcgsize:=OS_ADDR
-        else
-          retcgsize:=def_cgsize(p.returndef);
-
-        location_reset(p.funcretloc[side],LOC_INVALID,OS_NO);
+        p.funcretloc[side]:=get_funcretloc(p,side,p.returndef);
+      end;
 
-        { explicit paraloc specified? }
-        if po_explicitparaloc in p.procoptions then
-         begin
-           p.funcretloc[side].loc:=LOC_REGISTER;
-           p.funcretloc[side].register:=p.exp_funcretloc;
-           p.funcretloc[side].size:=retcgsize;
-           exit;
-         end;
 
+    function tm68kparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;
+      var
+        paraloc : pcgparalocation;
+        retcgsize  : tcgsize;
+      begin
+        result.init;
+        result.alignment:=get_para_align(p.proccalloption);
         { void has no location }
-        if is_void(p.returndef) then
+        if is_void(def) then
           begin
-            location_reset(p.funcretloc[side],LOC_VOID,OS_NO);
+            paraloc:=result.add_location;
+            result.size:=OS_NO;
+            result.intsize:=0;
+            paraloc^.size:=OS_NO;
+            paraloc^.loc:=LOC_VOID;
             exit;
           end;
+        { Constructors return self instead of a boolean }
+        if (p.proctypeoption=potype_constructor) then
+          begin
+            retcgsize:=OS_ADDR;
+            result.intsize:=sizeof(pint);
+          end
+        else
+          begin
+            retcgsize:=def_cgsize(def);
+            result.intsize:=def.size;
+          end;
+        result.size:=retcgsize;
         { Return is passed as var parameter }
-        if ret_in_param(p.returndef,p.proccalloption) then
+        if ret_in_param(def,p.proccalloption) then
           begin
-            p.funcretloc[side].loc:=LOC_REFERENCE;
-            p.funcretloc[side].size:=retcgsize;
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REFERENCE;
+            paraloc^.size:=retcgsize;
             exit;
           end;
+
+        paraloc:=result.add_location;
         { Return in FPU register? }
         if not(cs_fp_emulation in current_settings.moduleswitches) and (p.returndef.typ=floatdef) then
           begin
-            p.funcretloc[side].loc:=LOC_FPUREGISTER;
-            p.funcretloc[side].register:=NR_FPU_RESULT_REG;
-            p.funcretloc[side].size:=retcgsize;
+            paraloc^.loc:=LOC_FPUREGISTER;
+            paraloc^.register:=NR_FPU_RESULT_REG;
+            paraloc^.size:=retcgsize;
           end
         else
          { Return in register }
@@ -230,26 +242,29 @@ unit cpupara;
             if retcgsize in [OS_64,OS_S64] then
              begin
                { low 32bits }
-               p.funcretloc[side].loc:=LOC_REGISTER;
-               p.funcretloc[side].size:=OS_64;
+               paraloc^.loc:=LOC_REGISTER;
+               paraloc^.size:=OS_32;
                if side=callerside then
-                 p.funcretloc[side].register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
                else
-                 p.funcretloc[side].register64.reglo:=NR_FUNCTION_RETURN64_LOW_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_LOW_REG;
                { high 32bits }
+               paraloc:=result.add_location;
+               paraloc^.loc:=LOC_REGISTER;
+               paraloc^.size:=OS_32;
                if side=calleeside then
-                 p.funcretloc[side].register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
                else
-                 p.funcretloc[side].register64.reghi:=NR_FUNCTION_RETURN64_HIGH_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_HIGH_REG;
              end
             else
              begin
-               p.funcretloc[side].loc:=LOC_REGISTER;
-               p.funcretloc[side].size:=retcgsize;
+               paraloc^.loc:=LOC_REGISTER;
+               paraloc^.size:=retcgsize;
                if side=callerside then
-                 p.funcretloc[side].register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
                else
-                 p.funcretloc[side].register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
              end;
           end;
       end;

+ 7 - 6
compiler/mips/cgcpu.pas

@@ -545,6 +545,7 @@ var
   Ref: TReference;
 begin
   paraloc.check_simple_location;
+  paramanager.allocparaloc(list,paraloc.location);
   case paraloc.location^.loc of
     LOC_REGISTER, LOC_CREGISTER:
       a_load_const_reg(list, size, a, paraloc.location^.Register);
@@ -570,6 +571,7 @@ var
   tmpreg: TRegister;
 begin
   paraloc.check_simple_location;
+  paramanager.allocparaloc(list,paraloc.location);
   with paraloc.location^ do
   begin
     case loc of
@@ -600,6 +602,7 @@ var
   TmpReg: TRegister;
 begin
   paraloc.check_simple_location;
+  paramanager.allocparaloc(list,paraloc.location);
   with paraloc.location^ do
   begin
     case loc of
@@ -630,6 +633,7 @@ begin
   hloc := paraloc.location;
   while assigned(hloc) do
   begin
+    paramanager.allocparaloc(list,hloc);
     case hloc^.loc of
       LOC_REGISTER:
         a_load_ref_reg(list, hloc^.size, hloc^.size, href, hloc^.Register);
@@ -1467,15 +1471,12 @@ begin
   paramanager.getintparaloc(pocall_default, 1, paraloc1);
   paramanager.getintparaloc(pocall_default, 2, paraloc2);
   paramanager.getintparaloc(pocall_default, 3, paraloc3);
-  paramanager.allocparaloc(list, paraloc3);
   a_load_const_cgpara(list, OS_INT, len, paraloc3);
-  paramanager.allocparaloc(list, paraloc2);
   a_loadaddr_ref_cgpara(list, dest, paraloc2);
-  paramanager.allocparaloc(list, paraloc2);
   a_loadaddr_ref_cgpara(list, Source, paraloc1);
-  paramanager.freeparaloc(list, paraloc3);
-  paramanager.freeparaloc(list, paraloc2);
-  paramanager.freeparaloc(list, paraloc1);
+  paramanager.freecgpara(list, paraloc3);
+  paramanager.freecgpara(list, paraloc2);
+  paramanager.freecgpara(list, paraloc1);
   alloccpuregisters(list, R_INTREGISTER, paramanager.get_volatile_registers_int(pocall_default));
   alloccpuregisters(list, R_FPUREGISTER, paramanager.get_volatile_registers_fpu(pocall_default));
   a_call_name(list, 'FPC_MOVE', false);

+ 53 - 28
compiler/mips/cpupara.pas

@@ -41,6 +41,7 @@ interface
         procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara : TCGPara);override;
         function  create_paraloc_info(p : TAbstractProcDef; side: tcallercallee):longint;override;
         function  create_varargs_paraloc_info(p : TAbstractProcDef; varargspara:tvarargsparalist):longint;override;
+        function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
       private
         procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
         procedure create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
@@ -134,68 +135,92 @@ implementation
 
 
     procedure tMIPSELparamanager.create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
+      begin
+        p.funcretloc[side]:=get_funcretloc(p,side,p.returndef);
+      end;
+
+
+    function tMIPSELparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;
       var
+        paraloc : pcgparalocation;
         retcgsize  : tcgsize;
       begin
+        result.init;
+        result.alignment:=get_para_align(p.proccalloption);
+        { void has no location }
+        if is_void(def) then
+          begin
+            paraloc:=result.add_location;
+            result.size:=OS_NO;
+            result.intsize:=0;
+            paraloc^.size:=OS_NO;
+            paraloc^.loc:=LOC_VOID;
+            exit;
+          end;
         { Constructors return self instead of a boolean }
         if (p.proctypeoption=potype_constructor) then
-          retcgsize:=OS_ADDR
+          begin
+            retcgsize:=OS_ADDR;
+            result.intsize:=sizeof(pint);
+          end
         else
-          retcgsize:=def_cgsize(p.returndef);
-
-        location_reset(p.funcretloc[side],LOC_INVALID,OS_NO);
-        p.funcretloc[side].size:=retcgsize;
-        { void has no location }
-        if is_void(p.returndef) then
           begin
-            p.funcretloc[side].loc:=LOC_VOID;
+            retcgsize:=def_cgsize(def);
+            result.intsize:=def.size;
+          end;
+        result.size:=retcgsize;
+        { Return is passed as var parameter }
+        if ret_in_param(def,p.proccalloption) then
+          begin
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REFERENCE;
+            paraloc^.size:=retcgsize;
             exit;
           end;
 
+        paraloc:=result.add_location;
         { Return in FPU register? }
         if p.returndef.typ=floatdef then
           begin
-            p.funcretloc[side].loc:=LOC_FPUREGISTER;
-            p.funcretloc[side].register:=NR_FPU_RESULT_REG;
+            paraloc^.loc:=LOC_FPUREGISTER;
+            paraloc^.register:=NR_FPU_RESULT_REG;
             if retcgsize=OS_F64 then
-              setsubreg(p.funcretloc[side].register,R_SUBFD);
-            p.funcretloc[side].size:=retcgsize;
+              setsubreg(paraloc^.register,R_SUBFD);
+            paraloc^.size:=retcgsize;
           end
         else
-         { Return in register? }
-         if not ret_in_param(p.returndef,p.proccalloption) then
+         { Return in register }
           begin
 {$ifndef cpu64bit}
             if retcgsize in [OS_64,OS_S64] then
              begin
-               p.funcretloc[side].loc:=LOC_REGISTER;
                { high }
+               paraloc^.loc:=LOC_REGISTER;
                if side=callerside then
-                 p.funcretloc[side].register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
                else
-                 p.funcretloc[side].register64.reghi:=NR_FUNCTION_RETURN64_HIGH_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_HIGH_REG;
+               paraloc^.size:=OS_32;
                { low }
+               paraloc:=result.add_location;
+               paraloc^.loc:=LOC_REGISTER;
                if side=callerside then
-                 p.funcretloc[side].register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
                else
-                 p.funcretloc[side].register64.reglo:=NR_FUNCTION_RETURN64_LOW_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_LOW_REG;
+               paraloc^.size:=OS_32;
              end
             else
 {$endif cpu64bit}
              begin
-               p.funcretloc[side].loc:=LOC_REGISTER;
-               p.funcretloc[side].size:=retcgsize;
+               paraloc^.loc:=LOC_REGISTER;
+               paraloc^.size:=retcgsize;
                if side=callerside then
-                 p.funcretloc[side].register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
                else
-                 p.funcretloc[side].register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
              end;
           end
-        else
-          begin
-            p.funcretloc[side].loc:=LOC_REFERENCE;
-            p.funcretloc[side].size:=retcgsize;
-          end;
       end;
 
     var

+ 23 - 3
compiler/ncal.pas

@@ -28,7 +28,7 @@ interface
     uses
        cutils,cclasses,
        globtype,constexp,
-       paramgr,parabase,
+       paramgr,parabase,cgbase,
        node,nbas,nutils,
        {$ifdef state_tracking}
        nstate,
@@ -75,6 +75,7 @@ interface
           procedure check_inlining;
           function  pass1_normal:tnode;
           procedure register_created_object_types;
+          function get_expect_loc: tcgloc;
        protected
           procedure objc_convert_to_message_send;virtual;
 
@@ -222,7 +223,6 @@ implementation
       ncnv,nld,ninl,nadd,ncon,nmem,nset,nobjc,
       objcutil,
       procinfo,cpuinfo,
-      cgbase,
       wpobase
       ;
 
@@ -1762,6 +1762,26 @@ implementation
        end;
 
 
+    function tcallnode.get_expect_loc: tcgloc;
+      var
+        realresdef: tstoreddef;
+      begin
+        if not assigned(typedef) then
+          realresdef:=tstoreddef(resultdef)
+        else
+          realresdef:=tstoreddef(typedef);
+        if realresdef.is_intregable then
+          result:=LOC_REGISTER
+        else if realresdef.is_fpuregable then
+          if use_vectorfpu(realresdef) then
+            result:=LOC_MMREGISTER
+          else
+            result:=LOC_FPUREGISTER
+        else
+          result:=LOC_REFERENCE
+      end;
+
+
     procedure tcallnode.objc_convert_to_message_send;
       var
         block,
@@ -3221,7 +3241,7 @@ implementation
              else
              { we have only to handle the result if it is used }
               if (cnf_return_value_used in callnodeflags) then
-               expectloc:=procdefinition.funcretloc[callerside].loc
+                expectloc:=get_expect_loc
              else
                expectloc:=LOC_VOID;
            end

+ 2 - 36
compiler/ncgbas.pas

@@ -70,7 +70,7 @@ interface
       cutils,verbose,
       aasmbase,aasmtai,aasmdata,aasmcpu,
       symsym,symconst,symdef,defutil,
-      nflw,pass_2,
+      nflw,pass_2,ncgutil,
       cgbase,cgobj,
       procinfo,
       tgobj
@@ -406,41 +406,7 @@ interface
           end
         else if (ti_may_be_in_reg in tempinfo^.flags) then
           begin
-            if tempinfo^.typedef.typ=floatdef then
-              begin
-                if use_vectorfpu(tempinfo^.typedef) then
-                  begin
-                    if (tempinfo^.temptype = tt_persistent) then
-                      location_reset(tempinfo^.location,LOC_CMMREGISTER,def_cgsize(tempinfo^.typedef))
-                    else
-                      location_reset(tempinfo^.location,LOC_MMREGISTER,def_cgsize(tempinfo^.typedef));
-                    tempinfo^.location.register:=cg.getmmregister(current_asmdata.CurrAsmList,tempinfo^.location.size);
-                  end
-                else
-                  begin
-                    if (tempinfo^.temptype = tt_persistent) then
-                      location_reset(tempinfo^.location,LOC_CFPUREGISTER,def_cgsize(tempinfo^.typedef))
-                    else
-                      location_reset(tempinfo^.location,LOC_FPUREGISTER,def_cgsize(tempinfo^.typedef));
-                    tempinfo^.location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,tempinfo^.location.size);
-                  end;
-              end
-            else
-              begin
-                if (tempinfo^.temptype = tt_persistent) then
-                  location_reset(tempinfo^.location,LOC_CREGISTER,def_cgsize(tempinfo^.typedef))
-                else
-                  location_reset(tempinfo^.location,LOC_REGISTER,def_cgsize(tempinfo^.typedef));
-{$ifndef cpu64bitalu}
-                if tempinfo^.location.size in [OS_64,OS_S64] then
-                  begin
-                    tempinfo^.location.register64.reglo:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
-                    tempinfo^.location.register64.reghi:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
-                  end
-                else
-{$endif not cpu64bitalu}
-                  tempinfo^.location.register:=cg.getintregister(current_asmdata.CurrAsmList,tempinfo^.location.size);
-              end;
+            location_allocate_register(current_asmdata.CurrAsmList,tempinfo^.location,tempinfo^.typedef,tempinfo^.temptype = tt_persistent);
           end
         else
           begin

+ 130 - 235
compiler/ncgcal.pas

@@ -45,7 +45,6 @@ interface
 
        tcgcallnode = class(tcallnode)
        private
-          retloc: tlocation;
 
           procedure handle_return_value;
           procedure release_unused_return_value;
@@ -53,6 +52,8 @@ interface
           procedure pushparas;
           procedure freeparas;
        protected
+          retloc: tcgpara;
+
           framepointer_paraloc : tcgpara;
           {# This routine is used to push the current frame pointer
              on the stack. This is used in nested routines where the
@@ -68,8 +69,15 @@ interface
           procedure extra_call_code;virtual;
           procedure extra_post_call_code;virtual;
           procedure do_syscall;virtual;abstract;
+
+          { The function result is returned in a tcgpara. This tcgpara has to
+            be translated into a tlocation so the rest of the code generator
+            can work with it. This routine decides what the most appropriate
+            tlocation is and sets self.location based on that. }
+          procedure set_result_location(realresdef: tstoreddef);virtual;
        public
           procedure pass_generate_code;override;
+          destructor destroy;override;
        end;
 
 
@@ -271,6 +279,27 @@ implementation
       end;
 
 
+    procedure tcgcallnode.set_result_location(realresdef: tstoreddef);
+      begin
+        if realresdef.is_intregable or
+           realresdef.is_fpuregable or
+           { avoid temporarily storing pointer-sized entities that can't be
+             regvars, such as reference-counted pointers, to memory --
+             no exception can occur right now (except in case of existing
+             memory corruption), and we'd store them to a regular temp
+             anyway and that is not safer than keeping them in a register }
+           ((realresdef.size=sizeof(aint)) and
+            (retloc.location^.loc=LOC_REGISTER) and
+            not assigned(retloc.location^.next)) then
+          location_allocate_register(current_asmdata.CurrAsmList,location,realresdef,false)
+        else
+          begin
+            location_reset_ref(location,LOC_REFERENCE,def_cgsize(realresdef),0);
+            tg.GetTemp(current_asmdata.CurrAsmList,retloc.intsize,retloc.Alignment,tt_normal,location.reference);
+          end;
+      end;
+
+
     procedure tcgcallnode.pop_parasize(pop_size:longint);
       begin
       end;
@@ -278,165 +307,61 @@ implementation
 
     procedure tcgcallnode.handle_return_value;
       var
-        tmpcgsize,
-        cgsize    : tcgsize;
-{$ifdef cpu64bitaddr}
-        ref       : treference;
-{$endif cpu64bitaddr}
-{$ifndef x86}
-        hregister : tregister;
-{$endif not x86}
+        realresdef: tstoreddef;
       begin
         { Check that the return location is set when the result is passed in
           a parameter }
         if (procdefinition.proctypeoption<>potype_constructor) and
            paramanager.ret_in_param(resultdef,procdefinition.proccalloption) then
           begin
+            { self.location is set near the end of secondcallparan so it
+              refers to the implicit result parameter }
             if location.loc<>LOC_REFERENCE then
               internalerror(200304241);
             exit;
           end;
 
-        { Load normal (ordinal,float,pointer) result value from accumulator }
-        cgsize:=retloc.size;
-        case retloc.loc of
-           LOC_FPUREGISTER:
-             begin
-               location_reset(location,LOC_FPUREGISTER,cgsize);
-               location.register:=retloc.register;
+        if not assigned(typedef) then
+          realresdef:=tstoreddef(resultdef)
+        else
+          realresdef:=tstoreddef(typedef);
+
 {$ifdef x86}
-               tcgx86(cg).inc_fpu_stack;
-{$else x86}
-               { Do not move the physical register to a virtual one in case
-                 the return value is not used, because if the virtual one is
-                 then mapped to the same register as the physical one, we will
-                 end up with two deallocs of this register (one inserted here,
-                 one inserted by the register allocator), which unbalances the
-                 register allocation information.  The return register(s) will
-                 be freed by location_free() in release_unused_return_value
-                 (mantis #13536).  }
-               if (cnf_return_value_used in callnodeflags) then
-                 begin
-                   if getsupreg(retloc.register)<first_fpu_imreg then
-                     cg.ungetcpuregister(current_asmdata.CurrAsmList,retloc.register);
-                   hregister:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
-                   cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,location.size,location.size,location.register,hregister);
-                   location.register:=hregister;
-                 end;
+        if (retloc.location^.loc=LOC_FPUREGISTER) then
+          begin
+            tcgx86(cg).inc_fpu_stack;
+            location_reset(location,LOC_FPUREGISTER,retloc.location^.size);
+            location.register:=retloc.location^.register;
+          end
+        else
 {$endif x86}
-             end;
-
-           LOC_REGISTER:
-             begin
-               if cgsize<>OS_NO then
-                begin
-                  location_reset(location,LOC_REGISTER,cgsize);
-{$ifdef cpu64bitaddr}
-                  { x86-64 system v abi:
-                    structs with up to 16 bytes are returned in registers }
-                  if cgsize in [OS_128,OS_S128] then
-                    begin
-                      if retloc.loc<>LOC_REGISTER then
-                        internalerror(2009042001);
-                      { See #13536 comment above.  }
-                      if (cnf_return_value_used in callnodeflags) then
-                        begin
-                          tg.GetTemp(current_asmdata.CurrAsmList,16,8,tt_normal,ref);
-                          location_reset_ref(location,LOC_REFERENCE,OS_NO,0);
-                          location.reference:=ref;
-                          if getsupreg(retloc.register)<first_int_imreg then
-                            cg.ungetcpuregister(current_asmdata.CurrAsmList,retloc.register);
-                          cg.a_load_reg_ref(current_asmdata.CurrAsmList,OS_64,OS_64,retloc.register,ref);
-                          inc(ref.offset,8);
-                          if getsupreg(retloc.registerhi)<first_int_imreg then
-                            cg.ungetcpuregister(current_asmdata.CurrAsmList,retloc.registerhi);
-                          cg.a_load_reg_ref(current_asmdata.CurrAsmList,OS_64,OS_64,retloc.registerhi,ref);
-                        end
-                      else
-                        location:=retloc;
-                    end
-                  else
-{$else cpu64bitaddr}
-                  if cgsize in [OS_64,OS_S64] then
-                    begin
-                      if retloc.loc<>LOC_REGISTER then
-                        internalerror(200409141);
-                      { See #13536 comment above.  }
-                      if (cnf_return_value_used in callnodeflags) then
-                        begin
-                          { the function result registers are already allocated }
-                          if getsupreg(retloc.register64.reglo)<first_int_imreg then
-                            cg.ungetcpuregister(current_asmdata.CurrAsmList,retloc.register64.reglo);
-                          location.register64.reglo:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
-                          cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_32,OS_32,retloc.register64.reglo,location.register64.reglo);
-                          if getsupreg(retloc.register64.reghi)<first_int_imreg then
-                            cg.ungetcpuregister(current_asmdata.CurrAsmList,retloc.register64.reghi);
-                          location.register64.reghi:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
-                          cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_32,OS_32,retloc.register64.reghi,location.register64.reghi);
-                        end
-                      else
-                        location:=retloc;
-                    end
-                  else
-{$endif not cpu64bitaddr}
-                    begin
-                      { change register size after the unget because the
-                        getregister was done for the full register
-                        def_cgsize(resultdef) is used here because
-                        it could be a constructor call }
-
-                      { See #13536 comment above.  }
-                      if (cnf_return_value_used in callnodeflags) then
-                        begin
-                          if getsupreg(retloc.register)<first_int_imreg then
-                            cg.ungetcpuregister(current_asmdata.CurrAsmList,retloc.register);
-
-                          { but use def_size only if it returns something valid because in
-                            case of odd sized structured results in registers def_cgsize(resultdef)
-                            could return OS_NO }
-                          if def_cgsize(resultdef)<>OS_NO then
-                            tmpcgsize:=def_cgsize(resultdef)
-                          else
-                            tmpcgsize:=cgsize;
-
-                          location.register:=cg.getintregister(current_asmdata.CurrAsmList,tmpcgsize);
-                          cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,tmpcgsize,retloc.register,location.register);
-                        end
-                      else
-                        location:=retloc;
-                    end;
+          begin
+            { get a tlocation that can hold the return value that's currently in
+            the the return value's tcgpara }
+            set_result_location(realresdef);
+
+            { Do not move the physical register to a virtual one in case
+              the return value is not used, because if the virtual one is
+              then mapped to the same register as the physical one, we will
+              end up with two deallocs of this register (one inserted here,
+              one inserted by the register allocator), which unbalances the
+              register allocation information.  The return register(s) will
+              be freed by location_free() in release_unused_return_value
+              (mantis #13536).  }
+            if (cnf_return_value_used in callnodeflags) or
+               assigned(funcretnode) then
+              begin
+                gen_load_cgpara_loc(current_asmdata.CurrAsmList,realresdef,retloc,location,false);
 {$ifdef arm}
-                  if (resultdef.typ=floatdef) and (current_settings.fputype in [fpu_fpa,fpu_fpa10,fpu_fpa11]) then
-                    begin
-                      location_force_mem(current_asmdata.CurrAsmList,location);
-                    end;
+                if (resultdef.typ=floatdef) and
+                   (location.loc=LOC_REGISTER) and
+                   (current_settings.fputype in [fpu_fpa,fpu_fpa10,fpu_fpa11]) then
+                  begin
+                    location_force_mem(current_asmdata.CurrAsmList,location);
+                  end;
 {$endif arm}
-                end
-               else
-                begin
-                  if resultdef.size>0 then
-                    internalerror(200305131);
-                end;
-             end;
-
-           LOC_MMREGISTER:
-             begin
-               { See #13536 comment above.  }
-               if (cnf_return_value_used in callnodeflags) then
-                 begin
-                   location_reset(location,LOC_MMREGISTER,cgsize);
-                   if getsupreg(retloc.register)<first_mm_imreg then
-                     cg.ungetcpuregister(current_asmdata.CurrAsmList,retloc.register);
-                   location.register:=cg.getmmregister(current_asmdata.CurrAsmList,cgsize);
-                   cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,retloc.register,location.register,mms_movescalar);
-                 end
-               else
-                 location:=retloc;
-             end;
-
-           else
-             internalerror(200405023);
-        end;
+              end;
+          end;
 
         { copy value to the final location if this was already provided to the
           callnode. This must be done after the call node, because the location can
@@ -455,16 +380,16 @@ implementation
             case location.loc of
               LOC_REGISTER :
 {$ifndef cpu64bitalu}
-                if cgsize in [OS_64,OS_S64] then
+                if location.size in [OS_64,OS_S64] then
                   cg64.a_load64_reg_loc(current_asmdata.CurrAsmList,location.register64,funcretnode.location)
                 else
 {$endif}
-                  cg.a_load_reg_loc(current_asmdata.CurrAsmList,cgsize,location.register,funcretnode.location);
+                  cg.a_load_reg_loc(current_asmdata.CurrAsmList,location.size,location.register,funcretnode.location);
               LOC_REFERENCE:
                 begin
                   case funcretnode.location.loc of
                     LOC_REGISTER:
-                      cg.a_load_ref_reg(current_asmdata.CurrAsmList,cgsize,cgsize,location.reference,funcretnode.location.register);
+                      cg.a_load_ref_reg(current_asmdata.CurrAsmList,location.size,location.size,location.reference,funcretnode.location.register);
                     LOC_REFERENCE:
                       cg.g_concatcopy(current_asmdata.CurrAsmList,location.reference,funcretnode.location.reference,resultdef.size);
                     else
@@ -486,24 +411,24 @@ implementation
           tree is generated, because that converts the temp from persistent to normal }
         if not(cnf_return_value_used in callnodeflags) then
           begin
-           case location.loc of
-             LOC_REFERENCE :
-               begin
-                 if is_managed_type(resultdef) then
-                    cg.g_finalize(current_asmdata.CurrAsmList,resultdef,location.reference);
-                  tg.ungetiftemp(current_asmdata.CurrAsmList,location.reference);
-               end;
-{$ifdef x86}
-             LOC_FPUREGISTER :
+            case location.loc of
+              LOC_REFERENCE :
                 begin
-                  { release FPU stack }
-                  emit_reg(A_FSTP,S_NO,NR_FPU_RESULT_REG);
-                  tcgx86(cg).dec_fpu_stack;
+                  if is_managed_type(resultdef) then
+                     cg.g_finalize(current_asmdata.CurrAsmList,resultdef,location.reference);
+                   tg.ungetiftemp(current_asmdata.CurrAsmList,location.reference);
                 end;
+{$ifdef x86}
+              LOC_FPUREGISTER :
+                 begin
+                   { release FPU stack }
+                   emit_reg(A_FSTP,S_NO,NR_FPU_RESULT_REG);
+                   tcgx86(cg).dec_fpu_stack;
+                 end;
 {$endif x86}
-           end;
-            if retloc.size<>OS_NO then
-              location_free(current_asmdata.CurrAsmList,retloc);
+            end;
+            if (retloc.intsize<>0) then
+              paramanager.freecgpara(current_asmdata.CurrAsmList,retloc);
             location_reset(location,LOC_VOID,OS_NO);
           end;
       end;
@@ -570,7 +495,7 @@ implementation
                  { better check for the real location of the parameter here, when stack passed parameters
                    are saved temporary in registers, checking for the tmpparaloc.loc is wrong
                  }
-                 paramanager.freeparaloc(current_asmdata.CurrAsmList,ppn.tempcgpara);
+                 paramanager.freecgpara(current_asmdata.CurrAsmList,ppn.tempcgpara);
                  tmpparaloc:=ppn.tempcgpara.location;
                  sizeleft:=ppn.tempcgpara.intsize;
                  calleralignment:=ppn.parasym.paraloc[callerside].alignment;
@@ -670,7 +595,7 @@ implementation
              if (ppn.left.nodetype<>nothingn) then
                begin
                  if (ppn.parasym.paraloc[callerside].location^.loc <> LOC_REFERENCE) then
-                   paramanager.freeparaloc(current_asmdata.CurrAsmList,ppn.parasym.paraloc[callerside]);
+                   paramanager.freecgpara(current_asmdata.CurrAsmList,ppn.parasym.paraloc[callerside]);
                end;
              ppn:=tcgcallparanode(ppn.right);
            end;
@@ -690,6 +615,7 @@ implementation
         pvreg,
         vmtreg : tregister;
         oldaktcallnode : tcallnode;
+        retlocitem: pcgparalocation;
 {$ifdef vtentry}
         sym : tasmsymbol;
 {$endif vtentry}
@@ -717,48 +643,23 @@ implementation
               retloc:=procdefinition.funcretloc[callerside]
             else
               retloc:=paramanager.get_funcretloc(procdefinition,callerside,typedef);
-            case retloc.loc of
-              LOC_REGISTER,
-              LOC_CREGISTER:
-                begin
-{$ifdef cpu64bitaddr}
-                  { x86-64 system v abi:
-                    structs with up to 16 bytes are returned in registers }
-                  if retloc.size in [OS_128,OS_S128] then
-                    begin
-                      include(regs_to_save_int,getsupreg(retloc.register));
-                      include(regs_to_save_int,getsupreg(retloc.registerhi));
-                    end
+            retlocitem:=retloc.location;
+            while assigned(retlocitem) do
+              begin
+                case retlocitem^.loc of
+                  LOC_REGISTER:
+                    include(regs_to_save_int,getsupreg(retlocitem^.register));
+                  LOC_FPUREGISTER:
+                    include(regs_to_save_fpu,getsupreg(retlocitem^.register));
+                  LOC_MMREGISTER:
+                    include(regs_to_save_mm,getsupreg(retlocitem^.register));
+                  LOC_REFERENCE,
+                  LOC_VOID:
+                    ;
                   else
-{$else cpu64bitaddr}
-                  if retloc.size in [OS_64,OS_S64] then
-                    begin
-                      include(regs_to_save_int,getsupreg(retloc.register64.reglo));
-                      include(regs_to_save_int,getsupreg(retloc.register64.reghi));
-                    end
-                  else
-{$endif not cpu64bitaddr}
-                    include(regs_to_save_int,getsupreg(retloc.register));
+                    internalerror(2004110213);
                 end;
-              LOC_FPUREGISTER,
-              LOC_CFPUREGISTER:
-                begin
-                  include(regs_to_save_fpu,getsupreg(retloc.register));
-{$ifdef SPARC}
-                  { SPARC uses two successive single precision fpu registers
-                    for double-precision values  }
-                  if retloc.size=OS_F64 then
-                    include(regs_to_save_fpu,succ(getsupreg(retloc.register)));
-{$endif SPARC}
-                end;
-              LOC_MMREGISTER,
-              LOC_CMMREGISTER:
-                include(regs_to_save_mm,getsupreg(retloc.register));
-              LOC_REFERENCE,
-              LOC_VOID:
-                ;
-              else
-                internalerror(2004110213);
+                retlocitem:=retlocitem^.next;
               end;
           end;
 
@@ -948,38 +849,24 @@ implementation
            function result }
          if (not is_void(resultdef)) then
            begin
-             case retloc.loc of
-               LOC_REGISTER,
-               LOC_CREGISTER:
-                 begin
-{$ifndef cpu64bitalu}
-                   if retloc.size in [OS_64,OS_S64] then
-                     begin
-                       exclude(regs_to_save_int,getsupreg(retloc.register64.reghi));
-                       exclude(regs_to_save_int,getsupreg(retloc.register64.reglo));
-                     end
-{$else not cpu64bitalu}
-                   if retloc.size in [OS_128,OS_S128] then
-                     begin
-                       exclude(regs_to_save_int,getsupreg(retloc.register));
-                       exclude(regs_to_save_int,getsupreg(retloc.registerhi));
-                     end
-{$endif not cpu64bitalu}
+             retlocitem:=retloc.location;
+             while assigned(retlocitem) do
+               begin
+                 case retlocitem^.loc of
+                   LOC_REGISTER:
+                     exclude(regs_to_save_int,getsupreg(retlocitem^.register));
+                   LOC_FPUREGISTER:
+                     exclude(regs_to_save_fpu,getsupreg(retlocitem^.register));
+                   LOC_MMREGISTER:
+                     exclude(regs_to_save_mm,getsupreg(retlocitem^.register));
+                   LOC_REFERENCE,
+                   LOC_VOID:
+                     ;
                    else
-                     exclude(regs_to_save_int,getsupreg(retloc.register));
+                     internalerror(2004110214);
                  end;
-               LOC_FPUREGISTER,
-               LOC_CFPUREGISTER:
-                 exclude(regs_to_save_fpu,getsupreg(retloc.register));
-               LOC_MMREGISTER,
-               LOC_CMMREGISTER:
-                 exclude(regs_to_save_mm,getsupreg(retloc.register));
-               LOC_REFERENCE,
-               LOC_VOID:
-                 ;
-               else
-                 internalerror(2004110214);
-              end;
+                 retlocitem:=retlocitem^.next;
+               end;
            end;
 
 {$if defined(x86) or defined(arm)}
@@ -1037,6 +924,14 @@ implementation
       end;
 
 
+    destructor tcgcallnode.destroy;
+      begin
+        if assigned(typedef) then
+          retloc.done;
+        inherited destroy;
+      end;
+
+
 begin
    ccallparanode:=tcgcallparanode;
    ccallnode:=tcgcallnode;

+ 8 - 18
compiler/ncgflw.pas

@@ -977,14 +977,12 @@ implementation
               { Push parameters }
               if assigned(right) then
                 begin
-                  paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc3);
                   { frame tree }
                   if assigned(third) then
                     cg.a_load_loc_cgpara(current_asmdata.CurrAsmList,third.location,paraloc3)
                   else
                     cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_INT,0,paraloc3);
                   { push address }
-                  paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc2);
                   cg.a_load_loc_cgpara(current_asmdata.CurrAsmList,right.location,paraloc2);
                 end
               else
@@ -994,20 +992,17 @@ implementation
                    cg.a_label(current_asmdata.CurrAsmList,a);
                    reference_reset_symbol(href2,a,0,1);
                    { push current frame }
-                   paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc3);
                    cg.a_load_reg_cgpara(current_asmdata.CurrAsmList,OS_ADDR,NR_FRAME_POINTER_REG,paraloc3);
                    { push current address }
-                   paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc2);
                    if target_info.system <> system_powerpc_macos then
                      cg.a_loadaddr_ref_cgpara(current_asmdata.CurrAsmList,href2,paraloc2)
                    else
                      cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_INT,0,paraloc2);
                 end;
-              paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
               cg.a_load_loc_cgpara(current_asmdata.CurrAsmList,left.location,paraloc1);
-              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
-              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc2);
-              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc3);
+              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
+              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc2);
+              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc3);
               cg.allocallcpuregisters(current_asmdata.CurrAsmList);
               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_RAISEEXCEPTION',false);
               cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -1045,10 +1040,9 @@ implementation
          cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
          paraloc1.init;
          paramanager.getintparaloc(pocall_default,1,paraloc1);
-         paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
          cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
          cg.a_load_reg_cgpara(current_asmdata.CurrAsmList,OS_ADDR,NR_FUNCTION_RESULT_REG,paraloc1);
-         paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+         paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
          cg.allocallcpuregisters(current_asmdata.CurrAsmList);
          cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DESTROYEXCEPTION',false);
          cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -1169,9 +1163,8 @@ implementation
               }
               paraloc1.init;
               paramanager.getintparaloc(pocall_default,1,paraloc1);
-              paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
               cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_ADDR,-1,paraloc1);
-              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
               cg.allocallcpuregisters(current_asmdata.CurrAsmList);
               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_CATCHES',false);
               cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -1202,9 +1195,8 @@ implementation
               paraloc1.init;
               paramanager.getintparaloc(pocall_default,1,paraloc1);
               cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
-              paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
               cg.a_load_reg_cgpara(current_asmdata.CurrAsmList, OS_ADDR, NR_FUNCTION_RESULT_REG, paraloc1);
-              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
               cg.allocallcpuregisters(current_asmdata.CurrAsmList);
               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DESTROYEXCEPTION',false);
               cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -1358,9 +1350,8 @@ implementation
          { send the vmt parameter }
          reference_reset_symbol(href2,current_asmdata.RefAsmSymbol(excepttype.vmt_mangledname),0,sizeof(pint));
          paramanager.getintparaloc(pocall_default,1,paraloc1);
-         paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
          cg.a_loadaddr_ref_cgpara(current_asmdata.CurrAsmList,href2,paraloc1);
-         paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+         paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
          cg.allocallcpuregisters(current_asmdata.CurrAsmList);
          cg.a_call_name(current_asmdata.CurrAsmList,'FPC_CATCHES',false);
          cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -1431,9 +1422,8 @@ implementation
          cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
          paramanager.getintparaloc(pocall_default,1,paraloc1);
          cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
-         paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
          cg.a_load_reg_cgpara(current_asmdata.CurrAsmList, OS_ADDR, NR_FUNCTION_RESULT_REG, paraloc1);
-         paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+         paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
          cg.allocallcpuregisters(current_asmdata.CurrAsmList);
          cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DESTROYEXCEPTION',false);
          cg.deallocallcpuregisters(current_asmdata.CurrAsmList);

+ 4 - 8
compiler/ncginl.pas

@@ -216,22 +216,18 @@ implementation
        if codegenerror then
           exit;
        { push erroraddr }
-       paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc4);
        cg.a_load_reg_cgpara(current_asmdata.CurrAsmList,OS_ADDR,NR_FRAME_POINTER_REG,paraloc4);
        { push lineno }
-       paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc3);
        cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_INT,current_filepos.line,paraloc3);
        { push filename }
-       paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc2);
        cg.a_loadaddr_ref_cgpara(current_asmdata.CurrAsmList,hp2.location.reference,paraloc2);
        { push msg }
-       paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
        cg.a_loadaddr_ref_cgpara(current_asmdata.CurrAsmList,hp3.location.reference,paraloc1);
        { call }
-       paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
-       paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc2);
-       paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc3);
-       paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc4);
+       paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
+       paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc2);
+       paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc3);
+       paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc4);
        cg.allocallcpuregisters(current_asmdata.CurrAsmList);
        cg.a_call_name(current_asmdata.CurrAsmList,'FPC_ASSERT',false);
        cg.deallocallcpuregisters(current_asmdata.CurrAsmList);

+ 1 - 2
compiler/ncgld.pas

@@ -349,9 +349,8 @@ implementation
                           reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname),0,sizeof(pint))
                         else
                           reference_reset_symbol(href,current_asmdata.WeakRefAsmSymbol(gvs.mangledname),0,sizeof(pint));
-                        paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
                         cg.a_load_ref_cgpara(current_asmdata.CurrAsmList,OS_32,href,paraloc1);
-                        paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+                        paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
                         paraloc1.done;
                         cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                         cg.a_call_reg(current_asmdata.CurrAsmList,hregister);

+ 1 - 2
compiler/ncgmat.pas

@@ -368,9 +368,8 @@ implementation
                   cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,0,hdenom,hl);
                   paraloc1.init;
                   paramanager.getintparaloc(pocall_default,1,paraloc1);
-                  paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
                   cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_S32,200,paraloc1);
-                  paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+                  paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
                   cg.a_call_name(current_asmdata.CurrAsmList,'FPC_HANDLEERROR',false);
                   paraloc1.done;
                   cg.a_label(current_asmdata.CurrAsmList,hl);

+ 10 - 20
compiler/ncgmem.pas

@@ -265,9 +265,8 @@ implementation
           begin
             paraloc1.init;
             paramanager.getintparaloc(pocall_default,1,paraloc1);
-            paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
             cg.a_load_reg_cgpara(current_asmdata.CurrAsmList, OS_ADDR,location.reference.base,paraloc1);
-            paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+            paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
             paraloc1.done;
             cg.allocallcpuregisters(current_asmdata.CurrAsmList);
             cg.a_call_name(current_asmdata.CurrAsmList,'FPC_CHECKPOINTER',false);
@@ -332,9 +331,8 @@ implementation
                 not(cs_compilesystem in current_settings.moduleswitches) then
               begin
                 paramanager.getintparaloc(pocall_default,1,paraloc1);
-                paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
                 cg.a_load_reg_cgpara(current_asmdata.CurrAsmList, OS_ADDR,location.reference.base,paraloc1);
-                paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+                paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
                 cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                 cg.a_call_name(current_asmdata.CurrAsmList,'FPC_CHECKPOINTER',false);
                 cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -351,9 +349,8 @@ implementation
                 not(cs_compilesystem in current_settings.moduleswitches) then
               begin
                 paramanager.getintparaloc(pocall_default,1,paraloc1);
-                paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
                 cg.a_load_reg_cgpara(current_asmdata.CurrAsmList, OS_ADDR,location.reference.base,paraloc1);
-                paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+                paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
                 cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                 cg.a_call_name(current_asmdata.CurrAsmList,'FPC_CHECKPOINTER',false);
                 cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -685,12 +682,10 @@ implementation
             begin
                paramanager.getintparaloc(pocall_default,1,paraloc1);
                paramanager.getintparaloc(pocall_default,2,paraloc2);
-               paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc2);
                cg.a_load_loc_cgpara(current_asmdata.CurrAsmList,right.location,paraloc2);
-               paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
                cg.a_load_loc_cgpara(current_asmdata.CurrAsmList,left.location,paraloc1);
-               paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
-               paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc2);
+               paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
+               paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc2);
                cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DYNARRAY_RANGECHECK',false);
                cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -771,9 +766,8 @@ implementation
               if (cs_check_range in current_settings.localswitches) then
                 begin
                    paramanager.getintparaloc(pocall_default,1,paraloc1);
-                   paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
                    cg.a_load_reg_cgpara(current_asmdata.CurrAsmList,OS_ADDR,location.reference.base,paraloc1);
-                   paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+                   paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
                    cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                    cg.a_call_name(current_asmdata.CurrAsmList,'FPC_'+upper(tstringdef(left.resultdef).stringtypname)+'_CHECKZERO',false);
                    cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -876,10 +870,8 @@ implementation
                            begin
                               paramanager.getintparaloc(pocall_default,1,paraloc1);
                               paramanager.getintparaloc(pocall_default,2,paraloc2);
-                              paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc2);
                               cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_INT,tordconstnode(right).value.svalue,paraloc2);
                               href:=location.reference;
-                              paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
                               if not(tf_winlikewidestring in target_info.flags) or
                                  (tstringdef(left.resultdef).stringtype<>st_widestring) then
                                 begin
@@ -892,8 +884,8 @@ implementation
                                   dec(href.offset,4-offsetdec);
                                   cg.a_load_ref_cgpara(current_asmdata.CurrAsmList,OS_32,href,paraloc1);
                                 end;
-                              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
-                              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc2);
+                              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
+                              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc2);
                               cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_'+upper(tstringdef(left.resultdef).stringtypname)+'_RANGECHECK',false);
                               cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -1038,13 +1030,11 @@ implementation
                            begin
                               paramanager.getintparaloc(pocall_default,1,paraloc1);
                               paramanager.getintparaloc(pocall_default,2,paraloc2);
-                              paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc2);
                               cg.a_load_reg_cgpara(current_asmdata.CurrAsmList,OS_INT,right.location.register,paraloc2);
                               href:=location.reference;
                               dec(href.offset,sizeof(pint)-offsetdec);
 
                               href:=location.reference;
-                              paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
                               if not(tf_winlikewidestring in target_info.flags) or
                                  (tstringdef(left.resultdef).stringtype<>st_widestring) then
                                 begin
@@ -1058,8 +1048,8 @@ implementation
                                   cg.a_load_ref_cgpara(current_asmdata.CurrAsmList,OS_32,href,paraloc1);
                                 end;
 
-                              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
-                              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc2);
+                              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
+                              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc2);
                               cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_'+upper(tstringdef(left.resultdef).stringtypname)+'_RANGECHECK',false);
                               cg.deallocallcpuregisters(current_asmdata.CurrAsmList);

+ 158 - 272
compiler/ncgutil.pas

@@ -65,12 +65,13 @@ interface
     procedure location_force_mem(list:TAsmList;var l:tlocation);
     procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;maybeconst:boolean);
     procedure location_force_mmreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
+    procedure location_allocate_register(list:TAsmList;out l: tlocation;def: tdef;constant: boolean);
 
     { load a tlocation into a cgpara }
     procedure gen_load_loc_cgpara(list: TAsmList; vardef: tdef; const l: tlocation; const cgpara: tcgpara);
     { loads a cgpara into a tlocation; assumes that loc.loc is already
       initialised }
-    procedure gen_load_cgpara_loc(list: TAsmList; vardef: tdef; const para: TCGPara; var destloc: tlocation);
+    procedure gen_load_cgpara_loc(list: TAsmList; vardef: tdef; const para: TCGPara; var destloc: tlocation; reusepara: boolean);
 
     { allocate registers for a tlocation; assumes that loc.loc is already
       set to LOC_CREGISTER/LOC_CFPUREGISTER/... }
@@ -418,24 +419,20 @@ implementation
         paramanager.getintparaloc(pocall_default,1,paraloc1);
         paramanager.getintparaloc(pocall_default,2,paraloc2);
         paramanager.getintparaloc(pocall_default,3,paraloc3);
-        paramanager.allocparaloc(list,paraloc3);
         cg.a_loadaddr_ref_cgpara(list,t.envbuf,paraloc3);
-        paramanager.allocparaloc(list,paraloc2);
         cg.a_loadaddr_ref_cgpara(list,t.jmpbuf,paraloc2);
         { push type of exceptionframe }
-        paramanager.allocparaloc(list,paraloc1);
         cg.a_load_const_cgpara(list,OS_S32,1,paraloc1);
-        paramanager.freeparaloc(list,paraloc3);
-        paramanager.freeparaloc(list,paraloc2);
-        paramanager.freeparaloc(list,paraloc1);
+        paramanager.freecgpara(list,paraloc3);
+        paramanager.freecgpara(list,paraloc2);
+        paramanager.freecgpara(list,paraloc1);
         cg.allocallcpuregisters(list);
         cg.a_call_name(list,'FPC_PUSHEXCEPTADDR',false);
         cg.deallocallcpuregisters(list);
 
         paramanager.getintparaloc(pocall_default,1,paraloc1);
-        paramanager.allocparaloc(list,paraloc1);
         cg.a_load_reg_cgpara(list,OS_ADDR,NR_FUNCTION_RESULT_REG,paraloc1);
-        paramanager.freeparaloc(list,paraloc1);
+        paramanager.freecgpara(list,paraloc1);
         cg.allocallcpuregisters(list);
         cg.a_call_name(list,'FPC_SETJMP',false);
         cg.deallocallcpuregisters(list);
@@ -791,56 +788,89 @@ implementation
 {$ifdef i386}
         href   : treference;
         size   : longint;
-{$else i386}
-        tmploc : tlocation;
 {$endif i386}
+        tmploc : tlocation;
       begin
 {$ifdef i386}
-           if cgpara.location^.loc<>LOC_REFERENCE then
-             internalerror(200309291);
            case l.loc of
              LOC_FPUREGISTER,
              LOC_CFPUREGISTER:
                begin
-                 size:=align(locintsize,cgpara.alignment);
-                 if (not use_fixed_stack) and
-                    (cgpara.location^.reference.index=NR_STACK_POINTER_REG) then
-                   begin
-                     cg.g_stackpointer_alloc(list,size);
-                     reference_reset_base(href,NR_STACK_POINTER_REG,0,sizeof(pint));
-                   end
-                 else
-                   reference_reset_base(href,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
-                 cg.a_loadfpu_reg_ref(list,l.size,l.size,l.register,href);
+                 case cgpara.location^.loc of
+                   LOC_REFERENCE:
+                     begin
+                       size:=align(locintsize,cgpara.alignment);
+                       if (not use_fixed_stack) and
+                          (cgpara.location^.reference.index=NR_STACK_POINTER_REG) then
+                         begin
+                           cg.g_stackpointer_alloc(list,size);
+                           reference_reset_base(href,NR_STACK_POINTER_REG,0,sizeof(pint));
+                         end
+                       else
+                         reference_reset_base(href,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
+                       cg.a_loadfpu_reg_ref(list,l.size,l.size,l.register,href);
+                     end;
+                   LOC_FPUREGISTER:
+                     begin
+                       cg.a_loadfpu_reg_reg(list,l.size,cgpara.location^.size,l.register,cgpara.location^.register);
+                     end;
+                   else
+                     internalerror(2010053003);
+                 end;
                end;
              LOC_MMREGISTER,
              LOC_CMMREGISTER:
                begin
-                 { can't use TCGSize2Size[l.size], because the size of an
-                   80 bit extended parameter can be either 10 or 12 bytes }
-                 size:=align(locintsize,cgpara.alignment);
-                 if (not use_fixed_stack) and
-                    (cgpara.location^.reference.index=NR_STACK_POINTER_REG) then
-                   begin
-                     cg.g_stackpointer_alloc(list,size);
-                     reference_reset_base(href,NR_STACK_POINTER_REG,0,sizeof(pint));
-                   end
-                 else
-                   reference_reset_base(href,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
-                 cg.a_loadmm_reg_ref(list,l.size,l.size,l.register,href,mms_movescalar);
+                 case cgpara.location^.loc of
+                   LOC_REFERENCE:
+                     begin
+                       { can't use TCGSize2Size[l.size], because the size of an
+                         80 bit extended parameter can be either 10 or 12 bytes }
+                       size:=align(locintsize,cgpara.alignment);
+                       if (not use_fixed_stack) and
+                          (cgpara.location^.reference.index=NR_STACK_POINTER_REG) then
+                         begin
+                           cg.g_stackpointer_alloc(list,size);
+                           reference_reset_base(href,NR_STACK_POINTER_REG,0,sizeof(pint));
+                         end
+                       else
+                         reference_reset_base(href,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
+                       cg.a_loadmm_reg_ref(list,l.size,l.size,l.register,href,mms_movescalar);
+                     end;
+                   LOC_FPUREGISTER:
+                     begin
+                       tmploc:=l;
+                       location_force_mem(list,tmploc);
+                       cg.a_loadfpu_ref_cgpara(list,tmploc.size,tmploc.reference,cgpara);
+                       location_freetemp(list,tmploc);
+                     end;
+                   else
+                     internalerror(2010053004);
+                 end;
                end;
              LOC_REFERENCE,
              LOC_CREFERENCE :
                begin
-                 size:=align(locintsize,cgpara.alignment);
-                 if (not use_fixed_stack) and
-                    (cgpara.location^.reference.index=NR_STACK_POINTER_REG) then
-                   cg.a_load_ref_cgpara(list,l.size,l.reference,cgpara)
-                 else
-                   begin
-                     reference_reset_base(href,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
-                     cg.g_concatcopy(list,l.reference,href,size);
-                   end;
+                 case cgpara.location^.loc of
+                   LOC_REFERENCE:
+                     begin
+                       size:=align(locintsize,cgpara.alignment);
+                       if (not use_fixed_stack) and
+                          (cgpara.location^.reference.index=NR_STACK_POINTER_REG) then
+                         cg.a_load_ref_cgpara(list,l.size,l.reference,cgpara)
+                       else
+                         begin
+                           reference_reset_base(href,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
+                           cg.g_concatcopy(list,l.reference,href,size);
+                         end;
+                     end;
+                   LOC_FPUREGISTER:
+                     begin
+                       cg.a_loadfpu_ref_cgpara(list,l.size,l.reference,cgpara);
+                     end;
+                   else
+                     internalerror(2010053005);
+                 end;
                end;
              else
                internalerror(2002042430);
@@ -913,13 +943,16 @@ implementation
                     value is still a const or in a register then write it
                     to a reference first. This situation can be triggered
                     by typecasting an int64 constant to a record of 8 bytes }
-                  tmploc:=l;
-                  if tmploc.size in [OS_64,OS_S64] then
-                    location_force_mem(list,tmploc);
-                  cg.a_load_loc_cgpara(list,tmploc,cgpara);
-{$else not cpu64bitalu}
-                  cg.a_load_loc_cgpara(list,l,cgpara);
+                  if l.size in [OS_64,OS_S64] then
+                    begin
+                      tmploc:=l;
+                      location_force_mem(list,tmploc);
+                      cg.a_load_loc_cgpara(list,tmploc,cgpara);
+                      location_freetemp(list,tmploc);
+                    end
+                  else
 {$endif not cpu64bitalu}
+                    cg.a_load_loc_cgpara(list,l,cgpara);
                end;
              else
                internalerror(2002042432);
@@ -963,13 +996,16 @@ implementation
                     value is still a const or in a register then write it
                     to a reference first. This situation can be triggered
                     by typecasting an int64 constant to a record of 8 bytes }
-                  tmploc:=l;
-                  if tmploc.size in [OS_64,OS_S64] then
-                    location_force_mem(list,tmploc);
-                  cg.a_load_loc_cgpara(list,tmploc,cgpara);
-{$else not cpu64bitalu}
-                  cg.a_load_loc_cgpara(list,l,cgpara);
+                  if l.size in [OS_64,OS_S64] then
+                    begin
+                      tmploc:=l;
+                      location_force_mem(list,tmploc);
+                      cg.a_load_loc_cgpara(list,tmploc,cgpara);
+                      location_freetemp(list,tmploc);
+                    end
+                  else
 {$endif not cpu64bitalu}
+                    cg.a_load_loc_cgpara(list,l,cgpara);
                 end;
             end;
 {$ifdef SUPPORT_MMX}
@@ -1025,6 +1061,48 @@ implementation
       end;
 
 
+    procedure location_allocate_register(list: TAsmList;out l: tlocation;def: tdef;constant: boolean);
+      begin
+        l.size:=def_cgsize(def);
+        if (def.typ=floatdef) and
+           not(cs_fp_emulation in current_settings.moduleswitches) then
+          begin
+            if use_vectorfpu(def) then
+              begin
+                if constant then
+                  location_reset(l,LOC_CMMREGISTER,l.size)
+                else
+                  location_reset(l,LOC_MMREGISTER,l.size);
+                l.register:=cg.getmmregister(list,l.size);
+              end
+            else
+              begin
+                if constant then
+                  location_reset(l,LOC_CFPUREGISTER,l.size)
+                else
+                  location_reset(l,LOC_FPUREGISTER,l.size);
+                l.register:=cg.getfpuregister(list,l.size);
+              end;
+          end
+        else
+          begin
+            if constant then
+              location_reset(l,LOC_CREGISTER,l.size)
+            else
+              location_reset(l,LOC_REGISTER,l.size);
+{$ifndef cpu64bitalu}
+            if l.size in [OS_64,OS_S64,OS_F64] then
+              begin
+                l.register64.reglo:=cg.getintregister(list,OS_32);
+                l.register64.reghi:=cg.getintregister(list,OS_32);
+              end
+            else
+{$endif not cpu64bitalu}
+              l.register:=cg.getintregister(list,l.size);
+          end;
+      end;
+
+
     procedure location_force_mem(list:TAsmList;var l:tlocation);
       var
         r : treference;
@@ -1573,12 +1651,8 @@ implementation
 
     procedure gen_load_return_value(list:TAsmList);
       var
-        href   : treference;
         ressym : tabstractnormalvarsym;
-        resloc,
-        restmploc : tlocation;
-        hreg   : tregister;
-        funcretloc : tlocation;
+        funcretloc : TCGPara;
       begin
         { Is the loading needed? }
         if is_void(current_procinfo.procdef.returndef) or
@@ -1599,210 +1673,16 @@ implementation
         if (ressym.refs>0) or
            is_managed_type(ressym.vardef) then
           begin
-            restmploc:=ressym.localloc;
-
-            { Here, we return the function result. In most architectures, the value is
-              passed into the FUNCTION_RETURN_REG, but in a windowed architecure like sparc a
-              function returns in a register and the caller receives it in an other one }
-            case funcretloc.loc of
-              LOC_REGISTER:
-                begin
-{$ifdef cpu64bitalu}
-                  if current_procinfo.procdef.funcretloc[calleeside].size in [OS_128,OS_S128] then
-                    begin
-                      resloc:=current_procinfo.procdef.funcretloc[calleeside];
-                      if resloc.loc<>LOC_REGISTER then
-                        internalerror(200409141);
-                      { Load low and high register separate to generate better register
-                        allocation info }
-                      if getsupreg(resloc.register)<first_int_imreg then
-                        begin
-                          cg.getcpuregister(list,resloc.register);
-                        end;
-                      case restmploc.loc of
-                        LOC_REFERENCE :
-                          begin
-                            href:=restmploc.reference;
-                            if target_info.endian=ENDIAN_BIG then
-                              inc(href.offset,8);
-                            cg.a_load_ref_reg(list,OS_64,OS_64,href,resloc.register);
-                          end;
-                        LOC_CREGISTER :
-                          cg.a_load_reg_reg(list,OS_64,OS_64,restmploc.register,resloc.register);
-                        else
-                          internalerror(200409203);
-                      end;
-                      if getsupreg(resloc.registerhi)<first_int_imreg then
-                        begin
-                          cg.getcpuregister(list,resloc.registerhi);
-                        end;
-                      case restmploc.loc of
-                        LOC_REFERENCE :
-                          begin
-                            href:=restmploc.reference;
-                            if target_info.endian=ENDIAN_LITTLE then
-                              inc(href.offset,8);
-                            cg.a_load_ref_reg(list,OS_64,OS_64,href,resloc.registerhi);
-                          end;
-                        LOC_CREGISTER :
-                          cg.a_load_reg_reg(list,OS_64,OS_64,restmploc.registerhi,resloc.registerhi);
-                        else
-                          internalerror(200409204);
-                      end;
-                    end
-                  else
-                    { this code is for structures etc. being returned in registers and having odd sizes }
-                    if (current_procinfo.procdef.funcretloc[calleeside].size=OS_64) and
-                      (restmploc.size<>OS_64) then
-                      begin
-                        resloc:=current_procinfo.procdef.funcretloc[calleeside];
-                        if resloc.loc<>LOC_REGISTER then
-                          internalerror(200409141);
-                        { Load low and high register separate to generate better register
-                          allocation info }
-                        if getsupreg(resloc.register)<first_int_imreg then
-                          begin
-                            cg.getcpuregister(list,resloc.register);
-                          end;
-                        case restmploc.loc of
-                          LOC_REFERENCE :
-                            begin
-                              href:=restmploc.reference;
-                              cg.a_load_ref_reg(list,OS_64,OS_64,href,resloc.register);
-                            end;
-                          LOC_CREGISTER :
-                            cg.a_load_reg_reg(list,OS_64,OS_64,restmploc.register,resloc.register);
-                          else
-                            internalerror(200409203);
-                        end;
-                      end
-                    else
-{$else cpu64bitalu}
-                  if current_procinfo.procdef.funcretloc[calleeside].size in [OS_64,OS_S64] then
-                    begin
-                      resloc:=current_procinfo.procdef.funcretloc[calleeside];
-                      if resloc.loc<>LOC_REGISTER then
-                        internalerror(200409141);
-                      { Load low and high register separate to generate better register
-                        allocation info }
-                      if getsupreg(resloc.register64.reglo)<first_int_imreg then
-                        begin
-                          cg.getcpuregister(list,resloc.register64.reglo);
-                        end;
-                      case restmploc.loc of
-                        LOC_REFERENCE :
-                          begin
-                            href:=restmploc.reference;
-                            if target_info.endian=ENDIAN_BIG then
-                              inc(href.offset,4);
-                            cg.a_load_ref_reg(list,OS_32,OS_32,href,resloc.register64.reglo);
-                          end;
-                        LOC_CREGISTER :
-                          cg.a_load_reg_reg(list,OS_32,OS_32,restmploc.register64.reglo,resloc.register64.reglo);
-                        LOC_CMMREGISTER :
-                          { perform the whole move at once below, both result
-                            registers are required (and since restmploc is an mmreg
-                            and resloc intregs, they don't conflict anyway) }
-                          ;
-                        else
-                          internalerror(200409203);
-                      end;
-                      if getsupreg(resloc.register64.reghi)<first_int_imreg then
-                        begin
-                          cg.getcpuregister(list,resloc.register64.reghi);
-                        end;
-                      case restmploc.loc of
-                        LOC_REFERENCE :
-                          begin
-                            href:=restmploc.reference;
-                            if target_info.endian=ENDIAN_LITTLE then
-                              inc(href.offset,4);
-                            cg.a_load_ref_reg(list,OS_32,OS_32,href,resloc.register64.reghi);
-                          end;
-                        LOC_CREGISTER :
-                          cg.a_load_reg_reg(list,OS_32,OS_32,restmploc.register64.reghi,resloc.register64.reghi);
-                        LOC_CMMREGISTER :
-                          cg64.a_loadmm_reg_intreg64(list,restmploc.size,restmploc.register,resloc.register64);
-                        else
-                          internalerror(200409204);
-                      end;
-                    end
-                  else
-{$endif cpu64bitalu}
-                  { this code is for structures etc. being returned in registers and having odd sizes }
-                  if (current_procinfo.procdef.funcretloc[calleeside].size=OS_32) and
-                    not(restmploc.size in [OS_S32,OS_32]) then
-                    begin
-                      resloc:=current_procinfo.procdef.funcretloc[calleeside];
-                      if resloc.loc<>LOC_REGISTER then
-                        internalerror(200409141);
-                      { Load low and high register separate to generate better register
-                        allocation info }
-                      if getsupreg(resloc.register)<first_int_imreg then
-                        begin
-                          cg.getcpuregister(list,resloc.register);
-                        end;
-                      case restmploc.loc of
-                        LOC_REFERENCE :
-                          begin
-                            href:=restmploc.reference;
-                            resloc.register:=cg.makeregsize(list,resloc.register,OS_32);
-                            cg.a_load_ref_reg(list,OS_32,OS_32,href,resloc.register);
-                          end;
-                        LOC_CREGISTER :
-                          cg.a_load_reg_reg(list,OS_32,OS_32,restmploc.register,resloc.register);
-                        LOC_CMMREGISTER :
-                          cg.a_loadmm_reg_intreg(list,restmploc.size,resloc.size,restmploc.register,resloc.register,mms_movescalar);
-                        else
-                          internalerror(200409203);
-                      end;
-                    end
-                  else
-                    begin
-                      hreg:=cg.makeregsize(list,funcretloc.register,funcretloc.size);
-                      if getsupreg(funcretloc.register)<first_int_imreg then
-                        begin
-                          cg.getcpuregister(list,funcretloc.register);
-                        end;
-                      { it could be that a structure is passed in memory but the function is expected to
-                        return a pointer to this memory }
-                      if paramanager.ret_in_param(current_procinfo.procdef.returndef,current_procinfo.procdef.proccalloption) then
-                        cg.a_load_loc_reg(list,OS_ADDR,restmploc,hreg)
-                      else
-                        cg.a_load_loc_reg(list,restmploc.size,restmploc,hreg);
-                    end;
-                end;
-              LOC_FPUREGISTER:
-                begin
-                  if getsupreg(funcretloc.register)<first_fpu_imreg then
-                    begin
-                      cg.getcpuregister(list,funcretloc.register);
-                    end;
-                  { we can't do direct moves between fpu and mm registers }
-                  if restmploc.loc in [LOC_MMREGISTER,LOC_CMMREGISTER] then
-                    location_force_fpureg(list,restmploc,false);
-                  cg.a_loadfpu_loc_reg(list,funcretloc.size,restmploc,funcretloc.register);
-                end;
-              LOC_MMREGISTER:
-                begin
-                  if getsupreg(funcretloc.register)<first_mm_imreg then
-                    begin
-                      cg.getcpuregister(list,funcretloc.register);
-                    end;
-                  cg.a_loadmm_loc_reg(list,restmploc.size,restmploc,funcretloc.register,mms_movescalar);
-                end;
-              LOC_INVALID,
-              LOC_REFERENCE:
-                ;
-              else
-                internalerror(200405025);
-            end;
+            { was: don't do anything if funcretloc.loc in [LOC_INVALID,LOC_REFERENCE] }
+            if not paramanager.ret_in_param(current_procinfo.procdef.returndef,current_procinfo.procdef.proccalloption) then
+              gen_load_loc_cgpara(list,ressym.vardef,ressym.localloc,funcretloc);
           end
 {$ifdef x86}
          else
           begin
             { the caller will pop a value from the fpu stack }
-            if (funcretloc.loc = LOC_FPUREGISTER) then
+            if assigned(funcretloc.location) and
+               (funcretloc.location^.loc = LOC_FPUREGISTER) then
               list.concat(taicpu.op_none(A_FLDZ));
           end;
 {$endif x86}
@@ -1858,7 +1738,7 @@ implementation
       end;
 
 
-    procedure gen_load_cgpara_loc(list: TAsmList; vardef: tdef; const para: TCGPara; var destloc: tlocation);
+    procedure gen_load_cgpara_loc(list: TAsmList; vardef: tdef; const para: TCGPara; var destloc: tlocation; reusepara: boolean);
 
       procedure unget_para(const paraloc:TCGParaLocation);
         begin
@@ -1893,9 +1773,9 @@ implementation
 {$endif not cpu64bitalu}
       begin
         paraloc:=para.location;
-        { skip e.g. empty records }
         if not assigned(paraloc) then
           internalerror(200408203);
+        { skip e.g. empty records }
         if (paraloc^.loc = LOC_VOID) then
           exit;
         case destloc.loc of
@@ -1903,7 +1783,7 @@ implementation
             begin
               { If the parameter location is reused we don't need to copy
                 anything }
-              if not paramanager.param_use_paraloc(para) then
+              if not reusepara then
                 begin
                   href:=destloc.reference;
                   sizeleft:=para.intsize;
@@ -1931,11 +1811,15 @@ implementation
                     end;
                 end;
             end;
+          LOC_REGISTER,
           LOC_CREGISTER :
             begin
 {$ifndef cpu64bitalu}
-              if (para.size in [OS_64,OS_S64]) and
-                 is_64bit(vardef) then
+              if (para.size in [OS_64,OS_S64,OS_F64]) and
+                 (is_64bit(vardef) or
+                  { in case of fpu emulation, or abi's that pass fpu values
+                    via integer registers }
+                  (vardef.typ=floatdef)) then
                 begin
                   case paraloc^.loc of
                     LOC_REGISTER:
@@ -1985,6 +1869,7 @@ implementation
                   cg.a_load_cgparaloc_anyreg(list,destloc.size,paraloc^,destloc.register,sizeof(aint));
                 end;
             end;
+          LOC_FPUREGISTER,
           LOC_CFPUREGISTER :
             begin
 {$if defined(sparc) or defined(arm)}
@@ -2013,6 +1898,7 @@ implementation
                 internalerror(200410109);
 {$endif sparc}
             end;
+          LOC_MMREGISTER,
           LOC_CMMREGISTER :
             begin
 {$ifndef cpu64bitalu}
@@ -2050,6 +1936,8 @@ implementation
                   }
                 end;
             end;
+          else
+            internalerror(2010052903);
         end;
       end;
 
@@ -2103,7 +1991,7 @@ implementation
         for i:=0 to current_procinfo.procdef.paras.count-1 do
           begin
             currpara:=tparavarsym(current_procinfo.procdef.paras[i]);
-            gen_load_cgpara_loc(list,currpara.vardef,currpara.paraloc[calleeside],currpara.initialloc);
+            gen_load_cgpara_loc(list,currpara.vardef,currpara.paraloc[calleeside],currpara.initialloc,paramanager.param_use_paraloc(currpara.paraloc[calleeside]));
             { gen_load_cgpara_loc() already allocated the initialloc
               -> don't allocate again }
             if currpara.initialloc.loc in [LOC_CREGISTER,LOC_CFPUREGISTER,LOC_CMMREGISTER] then
@@ -2399,7 +2287,7 @@ implementation
 
         { release return registers, needed for optimizer }
         if not is_void(current_procinfo.procdef.returndef) then
-          location_free(list,current_procinfo.procdef.funcretloc[calleeside]);
+          paramanager.freecgpara(list,current_procinfo.procdef.funcretloc[calleeside]);
 
         { end of frame marker for call frame info }
         current_asmdata.asmcfi.end_frame(list);
@@ -2412,9 +2300,8 @@ implementation
       begin
         paraloc1.init;
         paramanager.getintparaloc(pocall_default,1,paraloc1);
-        paramanager.allocparaloc(list,paraloc1);
         cg.a_load_const_cgpara(list,OS_INT,current_procinfo.calc_stackframe_size,paraloc1);
-        paramanager.freeparaloc(list,paraloc1);
+        paramanager.freecgpara(list,paraloc1);
         paraloc1.done;
       end;
 
@@ -2426,8 +2313,7 @@ implementation
         paraloc1.init;
         { Also alloc the register needed for the parameter }
         paramanager.getintparaloc(pocall_default,1,paraloc1);
-        paramanager.allocparaloc(list,paraloc1);
-        paramanager.freeparaloc(list,paraloc1);
+        paramanager.freecgpara(list,paraloc1);
         { Call the helper }
         cg.allocallcpuregisters(list);
         cg.a_call_name(list,'FPC_STACKCHECK',false);
@@ -2885,7 +2771,7 @@ implementation
             exit;
         end;
 
-        if (current_procinfo.procdef.funcretloc[calleeside].loc<>LOC_VOID) and
+        if not is_void(current_procinfo.procdef.returndef) and
            assigned(current_procinfo.procdef.funcretsym) and
            (tabstractvarsym(current_procinfo.procdef.funcretsym).refs <> 0) then
           if (current_procinfo.procdef.proctypeoption=potype_constructor) then

+ 90 - 2
compiler/parabase.pas

@@ -26,7 +26,8 @@ unit parabase;
 
     uses
        cclasses,globtype,
-       cpubase,cgbase,cgutils;
+       cpubase,cgbase,cgutils,
+       symtype, ppu;
 
     type
        TCGParaReference = record
@@ -62,9 +63,9 @@ unit parabase;
 
        TCGPara = object
           Location  : PCGParalocation;
+          IntSize   : aint; { size of the total location in bytes }
           Alignment : ShortInt;
           Size      : TCGSize;  { Size of the parameter included in all locations }
-          IntSize: aint; { size of the total location in bytes }
 {$ifdef powerpc}
           composite: boolean; { under the AIX abi, how certain parameters are passed depends on whether they are composite or not }
 {$endif powerpc}
@@ -75,6 +76,9 @@ unit parabase;
           procedure   check_simple_location;
           function    add_location:pcgparalocation;
           procedure   get_location(var newloc:tlocation);
+
+          procedure   ppuwrite(ppufile:tcompilerppufile);
+          procedure   ppuload(ppufile:tcompilerppufile);
        end;
 
        tvarargsinfo = (
@@ -240,6 +244,90 @@ implementation
       end;
 
 
+    procedure TCGPara.ppuwrite(ppufile: tcompilerppufile);
+      var
+        hparaloc: PCGParaLocation;
+        nparaloc: byte;
+      begin
+        ppufile.putbyte(byte(Alignment));
+        ppufile.putbyte(ord(Size));
+        ppufile.putaint(IntSize);
+{$ifdef powerpc}
+        ppufile.putbyte(byte(composite));
+{$endif}
+        nparaloc:=0;
+        hparaloc:=location;
+        while assigned(hparaloc) do
+          begin
+            inc(nparaloc);
+            hparaloc:=hparaloc^.Next;
+          end;
+        ppufile.putbyte(nparaloc);
+        hparaloc:=location;
+        while assigned(hparaloc) do
+          begin
+            ppufile.putbyte(byte(hparaloc^.Size));
+            ppufile.putbyte(byte(hparaloc^.loc));
+            if hparaloc^.loc=LOC_REFERENCE then
+              begin
+                ppufile.putlongint(longint(hparaloc^.reference.index));
+                ppufile.putaint(hparaloc^.reference.offset);
+              end
+            else
+              begin
+{$ifdef powerpc64}
+                ppufile.putbyte(hparaloc^.shiftval);
+{$endif}
+                ppufile.putlongint(longint(hparaloc^.register));
+              end;
+          end;
+      end;
+
+
+    procedure TCGPara.ppuload(ppufile: tcompilerppufile);
+      var
+        hparaloc: PCGParaLocation;
+        nparaloc: byte;
+      begin
+        reset;
+        Alignment:=shortint(ppufile.getbyte);
+        Size:=TCgSize(ppufile.getbyte);
+        IntSize:=ppufile.getaint;
+{$ifdef powerpc}
+        composite:=boolean(ppufile.getbyte);
+{$endif}
+        nparaloc:=ppufile.getbyte;
+        while nparaloc>0 do
+          begin
+            hparaloc:=add_location;
+            hparaloc^.size:=TCGSize(ppufile.getbyte);
+            hparaloc^.loc:=TCGLoc(ppufile.getbyte);
+            case hparaloc^.loc of
+              LOC_REFERENCE:
+                begin
+                  hparaloc^.reference.index:=tregister(ppufile.getlongint);
+                  hparaloc^.reference.offset:=ppufile.getaint;
+                end;
+              LOC_FPUREGISTER,
+              LOC_CFPUREGISTER,
+              LOC_MMREGISTER,
+              LOC_CMMREGISTER,
+              LOC_REGISTER,
+              LOC_CREGISTER :
+                begin
+{$ifdef powerpc64}
+                  hparaloc^.shiftval:=ppufile.getbyte;
+{$endif}
+                  hparaloc^.register:=tregister(ppufile.getlongint);
+                end
+              else
+                internalerror(2010051301);
+            end;
+            dec(nparaloc);
+          end;
+      end;
+
+
 {****************************************************************************
                           TParaList
 ****************************************************************************}

+ 42 - 26
compiler/paramgr.pas

@@ -76,19 +76,26 @@ unit paramgr;
 
           procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara:TCGPara);virtual;abstract;
 
+          {# allocate an individual pcgparalocation that's part of a tcgpara
+
+            @param(list Current assembler list)
+            @param(loc Parameter location element)
+          }
+          procedure allocparaloc(list: TAsmList; const paraloc: pcgparalocation);
+
           {# allocate a parameter location created with create_paraloc_info
 
             @param(list Current assembler list)
             @param(loc Parameter location)
           }
-          procedure allocparaloc(list: TAsmList; const cgpara: TCGPara); virtual;
+          procedure alloccgpara(list: TAsmList; const cgpara: TCGPara); virtual;
 
           {# free a parameter location allocated with alloccgpara
 
             @param(list Current assembler list)
             @param(loc Parameter location)
           }
-          procedure freeparaloc(list: TAsmList; const cgpara: TCGPara); virtual;
+          procedure freecgpara(list: TAsmList; const cgpara: TCGPara); virtual;
 
           { This is used to populate the location information on all parameters
             for the routine as seen in either the caller or the callee. It returns
@@ -100,7 +107,7 @@ unit paramgr;
             function result instead of its actual result. Used if the compiler
             forces the function result to something different than the real
             result.  }
-          function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;virtual;abstract;
+          function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;virtual;abstract;
 
           { This is used to populate the location information on all parameters
             for the routine when it is being inlined. It returns
@@ -223,39 +230,48 @@ implementation
       end;
 
 
-    procedure tparamanager.allocparaloc(list: TAsmList; const cgpara: TCGPara);
+    procedure tparamanager.allocparaloc(list: TAsmList; const paraloc: pcgparalocation);
+      begin
+        case paraloc^.loc of
+          LOC_REGISTER,
+          LOC_CREGISTER:
+            begin
+              if getsupreg(paraloc^.register)<first_int_imreg then
+                cg.getcpuregister(list,paraloc^.register);
+            end;
+{$ifndef x86}
+{ don't allocate ST(x), they're not handled by the register allocator }
+          LOC_FPUREGISTER,
+          LOC_CFPUREGISTER:
+            begin
+              if getsupreg(paraloc^.register)<first_fpu_imreg then
+                cg.getcpuregister(list,paraloc^.register);
+            end;
+{$endif not x86}
+          LOC_MMREGISTER,
+          LOC_CMMREGISTER :
+            begin
+              if getsupreg(paraloc^.register)<first_mm_imreg then
+                cg.getcpuregister(list,paraloc^.register);
+            end;
+        end;
+      end;
+
+
+    procedure tparamanager.alloccgpara(list: TAsmList; const cgpara: TCGPara);
       var
         paraloc : pcgparalocation;
       begin
         paraloc:=cgpara.location;
         while assigned(paraloc) do
           begin
-            case paraloc^.loc of
-              LOC_REGISTER,
-              LOC_CREGISTER:
-                begin
-                  if getsupreg(paraloc^.register)<first_int_imreg then
-                    cg.getcpuregister(list,paraloc^.register);
-                end;
-              LOC_FPUREGISTER,
-              LOC_CFPUREGISTER:
-                begin
-                  if getsupreg(paraloc^.register)<first_fpu_imreg then
-                    cg.getcpuregister(list,paraloc^.register);
-                end;
-              LOC_MMREGISTER,
-              LOC_CMMREGISTER :
-                begin
-                  if getsupreg(paraloc^.register)<first_mm_imreg then
-                    cg.getcpuregister(list,paraloc^.register);
-                end;
-            end;
+            allocparaloc(list,paraloc);
             paraloc:=paraloc^.next;
           end;
       end;
 
 
-    procedure tparamanager.freeparaloc(list: TAsmList; const cgpara: TCGPara);
+    procedure tparamanager.freecgpara(list: TAsmList; const cgpara: TCGPara);
       var
         paraloc : Pcgparalocation;
         href : treference;
@@ -293,7 +309,7 @@ implementation
                       fillchar(href,sizeof(href),0);
                       href.base:=paraloc^.reference.index;
                       href.offset:=paraloc^.reference.offset;
-                      tg.ungettemp(list,href);
+                      tg.ungetiftemp(list,href);
                     end;
                 end;
               else

+ 0 - 73
compiler/powerpc/cgcpu.pas

@@ -37,14 +37,6 @@ unit cgcpu;
         procedure init_register_allocators;override;
         procedure done_register_allocators;override;
 
-        { passing parameters, per default the parameter is pushed }
-        { nr gives the number of the parameter (enumerated from   }
-        { left to right), this allows to move the parameter to    }
-        { register, if the cpu supports register calling          }
-        { conventions                                             }
-        procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : tcgpara);override;
-
-
         procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
         procedure a_call_reg(list : TAsmList;reg: tregister); override;
 
@@ -183,71 +175,6 @@ const
         inherited done_register_allocators;
       end;
 
-
-    procedure tcgppc.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : tcgpara);
-
-      var
-        tmpref, ref: treference;
-        location: pcgparalocation;
-        sizeleft: aint;
-
-      begin
-        location := paraloc.location;
-        tmpref := r;
-        sizeleft := paraloc.intsize;
-        while assigned(location) do
-          begin
-            case location^.loc of
-              LOC_REGISTER,LOC_CREGISTER:
-                begin
-{$ifndef cpu64bitaddr}
-                  if (sizeleft <> 3) then
-                    begin
-                      a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
-                    end
-                  else
-                    begin
-                      a_load_ref_reg(list,OS_16,OS_16,tmpref,location^.register);
-                      a_reg_alloc(list,NR_R0);
-                      inc(tmpref.offset,2);
-                      a_load_ref_reg(list,OS_8,OS_8,tmpref,newreg(R_INTREGISTER,RS_R0,R_SUBNONE));
-                      a_op_const_reg(list,OP_SHL,OS_INT,16,location^.register);
-                      list.concat(taicpu.op_reg_reg_const_const_const(A_RLWIMI,location^.register,newreg(R_INTREGISTER,RS_R0,R_SUBNONE),8,16,31-8));
-                      a_reg_dealloc(list,NR_R0);
-                      dec(tmpref.offset,2);
-                    end;
-{$else not cpu64bitaddr}
-{$error add 64 bit support for non power of 2 loads in a_load_ref_cgpara}
-{$endif not cpu64bitaddr}
-                end;
-              LOC_REFERENCE:
-                begin
-                   reference_reset_base(ref,location^.reference.index,location^.reference.offset,paraloc.alignment);
-                   g_concatcopy(list,tmpref,ref,sizeleft);
-                   if assigned(location^.next) then
-                     internalerror(2005010710);
-                end;
-              LOC_FPUREGISTER,LOC_CFPUREGISTER:
-                case location^.size of
-                   OS_F32, OS_F64:
-                     a_loadfpu_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
-                   else
-                     internalerror(2002072801);
-                end;
-              LOC_VOID:
-                begin
-                  // nothing to do
-                end;
-              else
-                internalerror(2002081103);
-            end;
-            inc(tmpref.offset,tcgsize2size[location^.size]);
-            dec(sizeleft,tcgsize2size[location^.size]);
-            location := location^.next;
-          end;
-      end;
-
-
     { calling a procedure by name }
     procedure tcgppc.a_call_name(list : TAsmList;const s : string; weak: boolean);
       begin

+ 43 - 27
compiler/powerpc/cpupara.pas

@@ -40,7 +40,7 @@ unit cpupara;
           procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara:TCGPara);override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
-          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;override;
+          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
           procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
          private
           procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword);
@@ -250,65 +250,81 @@ unit cpupara;
       end;
 
 
-    function tppcparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;
+    function tppcparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;
       var
+        paraloc : pcgparalocation;
         retcgsize  : tcgsize;
       begin
-        { Constructors return self instead of a boolean }
-        if (p.proctypeoption=potype_constructor) then
-          retcgsize:=OS_ADDR
-        else
-          retcgsize:=def_cgsize(def);
-
-        location_reset(result,LOC_INVALID,OS_NO);
-        result.size:=retcgsize;
+        result.init;
+        result.alignment:=get_para_align(p.proccalloption);
         { void has no location }
         if is_void(def) then
           begin
-            result.loc:=LOC_VOID;
+            paraloc:=result.add_location;
+            result.size:=OS_NO;
+            result.intsize:=0;
+            paraloc^.size:=OS_NO;
+            paraloc^.loc:=LOC_VOID;
             exit;
           end;
+        { Constructors return self instead of a boolean }
+        if (p.proctypeoption=potype_constructor) then
+          begin
+            retcgsize:=OS_ADDR;
+            result.intsize:=sizeof(pint);
+          end
+        else
+          begin
+            retcgsize:=def_cgsize(def);
+            result.intsize:=def.size;
+          end;
+        result.size:=retcgsize;
         { Return is passed as var parameter }
         if ret_in_param(def,p.proccalloption) then
           begin
-            result.loc:=LOC_REFERENCE;
-            result.size:=retcgsize;
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REFERENCE;
+            paraloc^.size:=retcgsize;
             exit;
           end;
+
+        paraloc:=result.add_location;
         { Return in FPU register? }
         if def.typ=floatdef then
           begin
-            result.loc:=LOC_FPUREGISTER;
-            result.register:=NR_FPU_RESULT_REG;
-            result.size:=retcgsize;
+            paraloc^.loc:=LOC_FPUREGISTER;
+            paraloc^.register:=NR_FPU_RESULT_REG;
+            paraloc^.size:=retcgsize;
           end
         else
          { Return in register }
           begin
-{$ifndef cpu64bitaddr}
             if retcgsize in [OS_64,OS_S64] then
              begin
                { low 32bits }
-               result.loc:=LOC_REGISTER;
+               paraloc^.loc:=LOC_REGISTER;
                if side=callerside then
-                 result.register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
                else
-                 result.register64.reghi:=NR_FUNCTION_RETURN64_HIGH_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_HIGH_REG;
+               paraloc^.size:=OS_32;
                { high 32bits }
+               paraloc:=result.add_location;
+               paraloc^.loc:=LOC_REGISTER;
                if side=callerside then
-                 result.register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
                else
-                 result.register64.reglo:=NR_FUNCTION_RETURN64_LOW_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_LOW_REG;
+               paraloc^.size:=OS_32;
              end
             else
-{$endif cpu64bitaddr}
              begin
-               result.loc:=LOC_REGISTER;
-               result.size:=retcgsize;
+               paraloc^.loc:=LOC_REGISTER;
                if side=callerside then
-                 result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
                else
-                 result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
+               paraloc^.size:=retcgsize;
              end;
           end;
       end;

+ 1 - 0
compiler/powerpc64/cgcpu.pas

@@ -409,6 +409,7 @@ begin
   sizeleft := paraloc.intsize;
   adjusttail := false;
   while assigned(location) do begin
+    paramanager.allocparaloc(list,location);
     case location^.loc of
       LOC_REGISTER, LOC_CREGISTER:
         begin

+ 47 - 32
compiler/powerpc64/cpupara.pas

@@ -45,7 +45,7 @@ type
     function create_paraloc_info(p: tabstractprocdef; side: tcallercallee): longint; override;
     function create_varargs_paraloc_info(p: tabstractprocdef; varargspara:
       tvarargsparalist): longint; override;
-    function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;override;
+    function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
     procedure create_funcretloc_info(p: tabstractprocdef; side: tcallercallee);
 
   private
@@ -210,47 +210,62 @@ begin
 end;
 
 function tppcparamanager.get_funcretloc(p : tabstractprocdef; side:
-  tcallercallee; def: tdef): tlocation;
+  tcallercallee; def: tdef): tcgpara;
 var
-  retcgsize: tcgsize;
+  paraloc : pcgparalocation;
+  retcgsize  : tcgsize;
 begin
+  result.init;
+  result.alignment:=get_para_align(p.proccalloption);
+  { void has no location }
+  if is_void(def) then
+    begin
+      paraloc:=result.add_location;
+      result.size:=OS_NO;
+      result.intsize:=0;
+      paraloc^.size:=OS_NO;
+      paraloc^.loc:=LOC_VOID;
+      exit;
+    end;
   { Constructors return self instead of a boolean }
-  if (p.proctypeoption = potype_constructor) then
-    retcgsize := OS_ADDR
+  if (p.proctypeoption=potype_constructor) then
+    begin
+      retcgsize:=OS_ADDR;
+      result.intsize:=sizeof(pint);
+    end
   else
-    retcgsize := def_cgsize(def);
-
-  location_reset(result, LOC_INVALID, OS_NO);
-  result.size := retcgsize;
-  { void has no location }
-  if is_void(def) then begin
-    result.loc := LOC_VOID;
-    exit;
-  end;
+    begin
+      retcgsize:=def_cgsize(def);
+      result.intsize:=def.size;
+    end;
+  result.size:=retcgsize;
   { Return is passed as var parameter }
-  if ret_in_param(def, p.proccalloption) then
+  if ret_in_param(def,p.proccalloption) then
     begin
-      result.loc := LOC_REFERENCE;
-      result.size := retcgsize;
+      paraloc:=result.add_location;
+      paraloc^.loc:=LOC_REFERENCE;
+      paraloc^.size:=retcgsize;
       exit;
     end;
+
+  paraloc:=result.add_location;
   { Return in FPU register? }
-  if def.typ = floatdef then begin
-    result.loc := LOC_FPUREGISTER;
-    result.register := NR_FPU_RESULT_REG;
-    result.size := retcgsize;
-  end else
-    { Return in register }
+  if def.typ=floatdef then
     begin
-      result.loc := LOC_REGISTER;
-      result.size := retcgsize;
-      if side = callerside then
-        result.register := newreg(R_INTREGISTER,
-          RS_FUNCTION_RESULT_REG, cgsize2subreg(R_INTREGISTER, retcgsize))
-      else
-        result.register := newreg(R_INTREGISTER,
-          RS_FUNCTION_RETURN_REG, cgsize2subreg(R_INTREGISTER, retcgsize));
-    end;
+      paraloc^.loc:=LOC_FPUREGISTER;
+      paraloc^.register:=NR_FPU_RESULT_REG;
+      paraloc^.size:=retcgsize;
+    end
+  else
+   { Return in register }
+    begin
+       paraloc^.loc:=LOC_REGISTER;
+       if side=callerside then
+         paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
+       else
+         paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
+       paraloc^.size:=retcgsize;
+     end;
 end;
 
 function tppcparamanager.create_paraloc_info(p: tabstractprocdef; side:

+ 3 - 1
compiler/ppcgen/cgppc.pas

@@ -163,6 +163,7 @@ unit cgppc;
       ref: treference;
     begin
       paraloc.check_simple_location;
+      paramanager.allocparaloc(list,paraloc.location);
       case paraloc.location^.loc of
         LOC_REGISTER, LOC_CREGISTER:
           a_load_const_reg(list, size, a, paraloc.location^.register);
@@ -186,6 +187,7 @@ unit cgppc;
 
       begin
         paraloc.check_simple_location;
+        paramanager.allocparaloc(list,paraloc.location);
         case paraloc.location^.loc of
            LOC_REGISTER,LOC_CREGISTER:
              a_loadaddr_ref_reg(list,r,paraloc.location^.register);
@@ -616,7 +618,7 @@ unit cgppc;
           paraloc1.init;
           paramanager.getintparaloc(pocall_cdecl,1,paraloc1);
           a_load_reg_cgpara(list,OS_ADDR,NR_R0,paraloc1);
-          paramanager.freeparaloc(list,paraloc1);
+          paramanager.freecgpara(list,paraloc1);
           paraloc1.done;
           allocallcpuregisters(list);
           a_call_name(list,'mcount',false);

+ 25 - 12
compiler/sparc/cgcpu.pas

@@ -327,6 +327,7 @@ implementation
         Ref:TReference;
       begin
         paraloc.check_simple_location;
+        paramanager.alloccgpara(list,paraloc);
         case paraloc.location^.loc of
           LOC_REGISTER,LOC_CREGISTER:
             a_load_const_reg(list,size,a,paraloc.location^.register);
@@ -353,11 +354,12 @@ implementation
         tmpreg:TRegister;
       begin
         paraloc.check_simple_location;
+        paramanager.alloccgpara(list,paraloc);
         with paraloc.location^ do
           begin
             case loc of
               LOC_REGISTER,LOC_CREGISTER :
-                a_load_ref_reg(list,sz,sz,r,Register);
+                a_load_ref_reg(list,sz,paraloc.location^.size,r,Register);
               LOC_REFERENCE:
                 begin
                   { Code conventions need the parameters being allocated in %o6+92 }
@@ -392,6 +394,7 @@ implementation
         TmpReg:TRegister;
       begin
         paraloc.check_simple_location;
+        paramanager.alloccgpara(list,paraloc);
         with paraloc.location^ do
           begin
             case loc of
@@ -422,14 +425,17 @@ implementation
         hloc:=paraloc.location;
         while assigned(hloc) do
           begin
+            paramanager.allocparaloc(list,hloc);
             case hloc^.loc of
-              LOC_REGISTER :
+              LOC_REGISTER,LOC_CREGISTER :
                 a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
               LOC_REFERENCE :
                 begin
                   reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,paraloc.alignment);
                   a_load_ref_ref(list,hloc^.size,hloc^.size,href,href2);
                 end;
+              LOC_FPUREGISTER,LOC_CFPUREGISTER :
+                a_loadfpu_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
               else
                 internalerror(200408241);
            end;
@@ -443,10 +449,20 @@ implementation
       var
         href : treference;
       begin
-        tg.GetTemp(list,TCGSize2Size[size],TCGSize2Size[size],tt_normal,href);
-        a_loadfpu_reg_ref(list,size,size,r,href);
-        a_loadfpu_ref_cgpara(list,size,href,paraloc);
-        tg.Ungettemp(list,href);
+        { happens for function result loc }
+        if paraloc.location^.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
+          begin
+            paraloc.check_simple_location;
+            paramanager.allocparaloc(list,paraloc.location);
+            a_loadfpu_reg_reg(list,size,paraloc.location^.size,r,paraloc.location^.register);
+          end
+        else
+          begin
+            tg.GetTemp(list,TCGSize2Size[size],TCGSize2Size[size],tt_normal,href);
+            a_loadfpu_reg_ref(list,size,size,r,href);
+            a_loadfpu_ref_cgpara(list,size,href,paraloc);
+            tg.Ungettemp(list,href);
+          end;
       end;
 
 
@@ -1160,15 +1176,12 @@ implementation
         paramanager.getintparaloc(pocall_default,1,paraloc1);
         paramanager.getintparaloc(pocall_default,2,paraloc2);
         paramanager.getintparaloc(pocall_default,3,paraloc3);
-        paramanager.allocparaloc(list,paraloc3);
         a_load_const_cgpara(list,OS_INT,len,paraloc3);
-        paramanager.allocparaloc(list,paraloc2);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
-        paramanager.allocparaloc(list,paraloc2);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
-        paramanager.freeparaloc(list,paraloc3);
-        paramanager.freeparaloc(list,paraloc2);
-        paramanager.freeparaloc(list,paraloc1);
+        paramanager.freecgpara(list,paraloc3);
+        paramanager.freecgpara(list,paraloc2);
+        paramanager.freecgpara(list,paraloc1);
         alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
         alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
         a_call_name(list,'FPC_MOVE',false);

+ 44 - 26
compiler/sparc/cpupara.pas

@@ -41,7 +41,7 @@ interface
         procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara : TCGPara);override;
         function  create_paraloc_info(p : TAbstractProcDef; side: tcallercallee):longint;override;
         function  create_varargs_paraloc_info(p : TAbstractProcDef; varargspara:tvarargsparalist):longint;override;
-        function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;override;
+        function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
       private
         procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
         procedure create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
@@ -145,39 +145,53 @@ implementation
       end;
 
 
-    function tsparcparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;
+    function tsparcparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;
       var
+        paraloc : pcgparalocation;
         retcgsize  : tcgsize;
       begin
-        { Constructors return self instead of a boolean }
-        if (p.proctypeoption=potype_constructor) then
-          retcgsize:=OS_ADDR
-        else
-          retcgsize:=def_cgsize(def);
-
-        location_reset(result,LOC_INVALID,OS_NO);
-        result.size:=retcgsize;
+        result.init;
+        result.alignment:=get_para_align(p.proccalloption);
         { void has no location }
         if is_void(def) then
           begin
-            result.loc:=LOC_VOID;
+            paraloc:=result.add_location;
+            result.size:=OS_NO;
+            result.intsize:=0;
+            paraloc^.size:=OS_NO;
+            paraloc^.loc:=LOC_VOID;
             exit;
           end;
+        { Constructors return self instead of a boolean }
+        if (p.proctypeoption=potype_constructor) then
+          begin
+            retcgsize:=OS_ADDR;
+            result.intsize:=sizeof(pint);
+          end
+        else
+          begin
+            retcgsize:=def_cgsize(def);
+            result.intsize:=def.size;
+          end;
+        result.size:=retcgsize;
         { Return is passed as var parameter }
         if ret_in_param(def,p.proccalloption) then
           begin
-            result.loc:=LOC_REFERENCE;
-            result.size:=retcgsize;
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REFERENCE;
+            paraloc^.size:=retcgsize;
             exit;
           end;
+
+        paraloc:=result.add_location;
         { Return in FPU register? }
         if def.typ=floatdef then
           begin
-            result.loc:=LOC_FPUREGISTER;
-            result.register:=NR_FPU_RESULT_REG;
+            paraloc^.loc:=LOC_FPUREGISTER;
+            paraloc^.register:=NR_FPU_RESULT_REG;
             if retcgsize=OS_F64 then
-              setsubreg(result.register,R_SUBFD);
-            result.size:=retcgsize;
+              setsubreg(paraloc^.register,R_SUBFD);
+            paraloc^.size:=retcgsize;
           end
         else
          { Return in register }
@@ -185,27 +199,31 @@ implementation
 {$ifndef cpu64bitaddr}
             if retcgsize in [OS_64,OS_S64] then
              begin
-               result.loc:=LOC_REGISTER;
+               paraloc^.loc:=LOC_REGISTER;
                { high }
                if (side=callerside) or (po_inline in p.procoptions) then
-                 result.register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
                else
-                 result.register64.reghi:=NR_FUNCTION_RETURN64_HIGH_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_HIGH_REG;
+               paraloc^.size:=OS_32;
                { low }
+               paraloc:=result.add_location;
+               paraloc^.loc:=LOC_REGISTER;
                if (side=callerside) or (po_inline in p.procoptions) then
-                 result.register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
                else
-                 result.register64.reglo:=NR_FUNCTION_RETURN64_LOW_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_LOW_REG;
+               paraloc^.size:=OS_32;
              end
             else
 {$endif not cpu64bitaddr}
              begin
-               result.loc:=LOC_REGISTER;
-               result.size:=retcgsize;
+               paraloc^.loc:=LOC_REGISTER;
+               paraloc^.size:=retcgsize;
                if (side=callerside) then
-                 result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
                else
-                 result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
              end;
           end;
       end;

+ 9 - 17
compiler/symdef.pas

@@ -413,7 +413,7 @@ interface
 {$ifdef m68k}
           exp_funcretloc : tregister;   { explicit funcretloc for AmigaOS }
 {$endif}
-          funcretloc : array[tcallercallee] of TLocation;
+          funcretloc : array[tcallercallee] of TCGPara;
           has_paraloc_info : boolean; { paraloc info is available }
           constructor create(dt:tdeftyp;level:byte);
           constructor ppuload(dt:tdeftyp;ppufile:tcompilerppufile);
@@ -2755,8 +2755,8 @@ implementation
          savesize:=sizeof(pint);
          requiredargarea:=0;
          has_paraloc_info:=false;
-         location_reset(funcretloc[callerside],LOC_INVALID,OS_NO);
-         location_reset(funcretloc[calleeside],LOC_INVALID,OS_NO);
+         funcretloc[callerside].init;
+         funcretloc[calleeside].init;
       end;
 
 
@@ -2784,6 +2784,8 @@ implementation
             memprocparast.stop;
 {$endif MEMDEBUG}
           end;
+         funcretloc[callerside].done;
+         funcretloc[calleeside].done;
          inherited destroy;
       end;
 
@@ -2870,15 +2872,9 @@ implementation
          proccalloption:=tproccalloption(ppufile.getbyte);
          ppufile.getnormalset(procoptions);
 
-         location_reset(funcretloc[callerside],LOC_INVALID,OS_NO);
-         location_reset(funcretloc[calleeside],LOC_INVALID,OS_NO);
+         funcretloc[callerside].init;
          if po_explicitparaloc in procoptions then
-           begin
-             b:=ppufile.getbyte;
-             if b<>sizeof(funcretloc[callerside]) then
-               internalerror(200411155);
-             ppufile.getdata(funcretloc[callerside],sizeof(funcretloc[callerside]));
-           end;
+           funcretloc[callerside].ppuload(ppufile);
 
          savesize:=sizeof(pint);
          has_paraloc_info:=(po_explicitparaloc in procoptions);
@@ -2903,11 +2899,7 @@ implementation
          ppufile.do_interface_crc:=oldintfcrc;
 
          if (po_explicitparaloc in procoptions) then
-           begin
-             { Make a 'valid' funcretloc for procedures }
-             ppufile.putbyte(sizeof(funcretloc[callerside]));
-             ppufile.putdata(funcretloc[callerside],sizeof(funcretloc[callerside]));
-           end;
+           funcretloc[callerside].ppuwrite(ppufile);
       end;
 
 
@@ -3758,7 +3750,7 @@ implementation
         tprocvardef(result).maxparacount:=maxparacount;
         tprocvardef(result).minparacount:=minparacount;
         for i:=low(tcallercallee) to high(tcallercallee) do
-          location_copy(tprocvardef(result).funcretloc[i],funcretloc[i]);
+          tprocvardef(result).funcretloc[i]:=funcretloc[i].getcopy;
         tprocvardef(result).has_paraloc_info:=has_paraloc_info;
 {$ifdef m68k}
         tprocvardef(result).exp_funcretloc:=exp_funcretloc;

+ 0 - 48
compiler/x86_64/cgcpu.pas

@@ -40,8 +40,6 @@ unit cgcpu;
         procedure g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);override;
         procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
 
-        procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);override;
-
         procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
         procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister;shuffle : pmmshuffle); override;
       end;
@@ -108,52 +106,6 @@ unit cgcpu;
       end;
 
 
-    procedure tcgx86_64.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);
-      var
-        tmpref, ref: treference;
-        location: pcgparalocation;
-        sizeleft: aint;
-        sourcesize: tcgsize;
-      begin
-        location := paraloc.location;
-        tmpref := r;
-        { make sure we handle passing a 32 bit value in memory to a }
-        { 64 bit register location etc. correctly                   }
-        if (size<>OS_NO) and
-           (tcgsize2size[size]<paraloc.intsize) then
-          begin
-            paraloc.check_simple_location;
-            if not(location^.loc in [LOC_REGISTER,LOC_CREGISTER]) then
-              internalerror(2008031801);
-            sizeleft:=tcgsize2size[size]
-          end
-        else
-          sizeleft:=paraloc.intsize;
-        while assigned(location) do
-          begin
-            case location^.loc of
-              LOC_REGISTER,LOC_CREGISTER:
-                begin
-                  sourcesize:=int_cgsize(sizeleft);
-                  if (sourcesize=OS_NO) then
-                    sourcesize:=location^.size;
-                  a_load_ref_reg(list,sourcesize,location^.size,tmpref,location^.register);
-                end;
-              LOC_REFERENCE:
-                begin
-                  reference_reset_base(ref,location^.reference.index,location^.reference.offset,paraloc.alignment);
-                  g_concatcopy(list,tmpref,ref,sizeleft);
-                end;
-              else
-                internalerror(2002081103);
-            end;
-            inc(tmpref.offset,tcgsize2size[location^.size]);
-            dec(sizeleft,tcgsize2size[location^.size]);
-            location := location^.next;
-          end;
-      end;
-
-
     procedure tcgx86_64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
       var
         stacksize : longint;

+ 62 - 32
compiler/x86_64/cpupara.pas

@@ -48,7 +48,7 @@ unit cpupara;
           function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
-          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;override;
+          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
        end;
 
   implementation
@@ -404,47 +404,71 @@ unit cpupara;
       end;
 
 
-    function tx86_64paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;
+    function tx86_64paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;
       var
         retcgsize : tcgsize;
+        paraloc : pcgparalocation;
       begin
-        { Constructors return self instead of a boolean }
-        if (p.proctypeoption=potype_constructor) then
-          retcgsize:=OS_ADDR
-        else
-          retcgsize:=def_cgsize(def);
-        location_reset(result,LOC_INVALID,OS_NO);
+        result.init;
+        result.alignment:=get_para_align(p.proccalloption);
         { void has no location }
         if is_void(def) then
           begin
-            location_reset(result,LOC_VOID,OS_NO);
+            paraloc:=result.add_location;
+            result.size:=OS_NO;
+            result.intsize:=0;
+            paraloc^.size:=OS_NO;
+            paraloc^.loc:=LOC_VOID;
             exit;
           end;
+        { Constructors return self instead of a boolean }
+        if (p.proctypeoption=potype_constructor) then
+          begin
+            retcgsize:=OS_ADDR;
+            result.intsize:=sizeof(pint);
+          end
+        else
+          begin
+            retcgsize:=def_cgsize(def);
+            result.intsize:=def.size;
+          end;
+        result.size:=retcgsize;
         { Return is passed as var parameter }
         if ret_in_param(def,p.proccalloption) then
           begin
-            result.loc:=LOC_REFERENCE;
-            result.size:=retcgsize;
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REFERENCE;
+            paraloc^.size:=retcgsize;
             exit;
           end;
+
+        paraloc:=result.add_location;
         { Return in FPU register? }
         if def.typ=floatdef then
           begin
             case tfloatdef(def).floattype of
-              s32real,s64real:
+              s32real:
+                begin
+                  paraloc^.loc:=LOC_MMREGISTER;
+                  paraloc^.register:=newreg(R_MMREGISTER,RS_MM_RESULT_REG,R_SUBMMS);
+                  paraloc^.size:=retcgsize;
+                end;
+              s64real:
                 begin
-                  result.loc:=LOC_MMREGISTER;
-                  result.register:=NR_MM_RESULT_REG;
-                  result.size:=retcgsize;
+                  paraloc^.loc:=LOC_MMREGISTER;
+                  paraloc^.register:=newreg(R_MMREGISTER,RS_MM_RESULT_REG,R_SUBMMD);
+                  paraloc^.size:=retcgsize;
                 end;
+              { the first two only exist on targets with an x87, on others
+                they are replace by int64 }
               s64currency,
               s64comp,
               s80real,
               sc80real:
                 begin
-                  result.loc:=LOC_FPUREGISTER;
-                  result.register:=NR_FPU_RESULT_REG;
-                  result.size:=retcgsize;
+                  paraloc^.loc:=LOC_FPUREGISTER;
+                  paraloc^.register:=NR_FPU_RESULT_REG;
+                  paraloc^.size:=retcgsize;
                 end;
               else
                 internalerror(200405034);
@@ -453,41 +477,47 @@ unit cpupara;
         else
          { Return in register }
           begin
-            result.loc:=LOC_REGISTER;
+            paraloc^.loc:=LOC_REGISTER;
             if retcgsize=OS_NO then
               begin
                 case def.size of
                   0..4:
                     begin
-                      result.size:=OS_32;
-                      result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,R_SUBD);
+                      paraloc^.size:=OS_32;
+                      paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,R_SUBD);
                     end;
                   5..8:
                     begin
-                      result.size:=OS_64;
-                      result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,R_SUBQ);
+                      paraloc^.size:=OS_64;
+                      paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,R_SUBQ);
                     end;
                   9..16:
                     begin
-                      result.size:=OS_128;
-                      result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,R_SUBWHOLE);
-                      result.registerhi:=newreg(R_INTREGISTER,RS_RDX,R_SUBWHOLE);
+                      paraloc^.size:=OS_64;
+                      paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,R_SUBWHOLE);
+                      paraloc:=result.add_location;
+                      paraloc^.size:=OS_64;
+                      paraloc^.loc:=LOC_REGISTER;
+                      paraloc^.register:=newreg(R_INTREGISTER,RS_RDX,R_SUBWHOLE);
                     end;
                 end;
               end
             else if retcgsize in [OS_128,OS_S128] then
               begin
-                result.size:=retcgsize;
-                result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,R_SUBWHOLE);
-                result.registerhi:=newreg(R_INTREGISTER,RS_RDX,R_SUBWHOLE);
+                paraloc^.size:=OS_64;
+                paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,R_SUBWHOLE);
+                paraloc:=result.add_location;
+                paraloc^.size:=OS_64;
+                paraloc^.loc:=LOC_REGISTER;
+                paraloc^.register:=newreg(R_INTREGISTER,RS_RDX,R_SUBWHOLE);
               end
             else
               begin
-                result.size:=retcgsize;
+                paraloc^.size:=retcgsize;
                 if side=callerside then
-                  result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
+                  paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
                 else
-                  result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
+                  paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
               end;
           end;
       end;