Browse Source

* changed tprocdef.funcretloc[] from a tlocation into a tcgpara so it can
represent complex locations (required for full x86-64 ABI support,
which is not yet implemented) -> lots of special result handling
code has been removed and replaced by the parameter handling
routines
+ added support for composite parameters (and hence function
results) to tcg.a_load_ref_cgpara() (so it can be used for
handling, e.g., 64 bit parameters on 32 bit platforms)
* the above fixed writing past the end of allocated memory when
handling records returned in registers on x86-64 whose size is
not a multiple of 8 bytes (mantis #16357)
- removed the x86-64 and PPC specific versions of a_load_ref_cgpara(),
as they are now handled correctly by the generic version
* moved the responsibility of allocating tcgpara cpu registers
(using paramanager.allocparaloc()) from the callers of
cg.a_load*_cgpara() to the cg.a_load*_cgpara() methods
themselves (so the register allocation can be done efficiently
when dealing with function results)
* for the above, renamed paramanager.alloc/freeparaloc() to
paramanager.alloc/freecgpara(), and use paramanager.allocparaloc()
to allocate individual pcgparalocations instead
* fixed the register size of SSE2 function result registers for
x86-64 (when used for floating point), which results in removing
a few superfluous "movs? %xmm0,%xmm0" instructions
* fixed compilation of paramanagers of avr, m68k and mips after r13695
and also updated them for these new changes

git-svn-id: trunk@15350 -

Jonas Maebe 15 years ago
parent
commit
283018a3bf

+ 14 - 8
compiler/arm/cgcpu.pas

@@ -418,6 +418,7 @@ unit cgcpu;
         ref: treference;
         ref: treference;
       begin
       begin
         paraloc.check_simple_location;
         paraloc.check_simple_location;
+        paramanager.allocparaloc(list,paraloc.location);
         case paraloc.location^.loc of
         case paraloc.location^.loc of
           LOC_REGISTER,LOC_CREGISTER:
           LOC_REGISTER,LOC_CREGISTER:
             a_load_const_reg(list,size,a,paraloc.location^.register);
             a_load_const_reg(list,size,a,paraloc.location^.register);
@@ -445,6 +446,7 @@ unit cgcpu;
         sizeleft := paraloc.intsize;
         sizeleft := paraloc.intsize;
         while assigned(location) do
         while assigned(location) do
           begin
           begin
+            paramanager.allocparaloc(list,location);
             case location^.loc of
             case location^.loc of
               LOC_REGISTER,LOC_CREGISTER:
               LOC_REGISTER,LOC_CREGISTER:
                 a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
                 a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
@@ -488,6 +490,7 @@ unit cgcpu;
         tmpreg: tregister;
         tmpreg: tregister;
       begin
       begin
         paraloc.check_simple_location;
         paraloc.check_simple_location;
+        paramanager.allocparaloc(list,paraloc.location);
         case paraloc.location^.loc of
         case paraloc.location^.loc of
           LOC_REGISTER,LOC_CREGISTER:
           LOC_REGISTER,LOC_CREGISTER:
             a_loadaddr_ref_reg(list,r,paraloc.location^.register);
             a_loadaddr_ref_reg(list,r,paraloc.location^.register);
@@ -1238,12 +1241,18 @@ unit cgcpu;
           begin
           begin
             case hloc^.loc of
             case hloc^.loc of
               LOC_FPUREGISTER,LOC_CFPUREGISTER:
               LOC_FPUREGISTER,LOC_CFPUREGISTER:
-                a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
+                begin
+                  paramanager.allocparaloc(list,paraloc.location);
+                  a_loadfpu_ref_reg(list,size,size,ref,hloc^.register);
+                end;
               LOC_REGISTER :
               LOC_REGISTER :
                 case hloc^.size of
                 case hloc^.size of
                   OS_32,
                   OS_32,
                   OS_F32:
                   OS_F32:
-                    a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
+                    begin
+                      paramanager.allocparaloc(list,paraloc.location);
+                      a_load_ref_reg(list,OS_32,OS_32,href,hloc^.register);
+                    end;
                   OS_64,
                   OS_64,
                   OS_F64:
                   OS_F64:
                     cg64.a_load64_ref_cgpara(list,href,paraloc);
                     cg64.a_load64_ref_cgpara(list,href,paraloc);
@@ -1813,15 +1822,12 @@ unit cgcpu;
         paramanager.getintparaloc(pocall_default,1,paraloc1);
         paramanager.getintparaloc(pocall_default,1,paraloc1);
         paramanager.getintparaloc(pocall_default,2,paraloc2);
         paramanager.getintparaloc(pocall_default,2,paraloc2);
         paramanager.getintparaloc(pocall_default,3,paraloc3);
         paramanager.getintparaloc(pocall_default,3,paraloc3);
-        paramanager.allocparaloc(list,paraloc3);
         a_load_const_cgpara(list,OS_INT,len,paraloc3);
         a_load_const_cgpara(list,OS_INT,len,paraloc3);
-        paramanager.allocparaloc(list,paraloc2);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
-        paramanager.allocparaloc(list,paraloc2);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
-        paramanager.freeparaloc(list,paraloc3);
-        paramanager.freeparaloc(list,paraloc2);
-        paramanager.freeparaloc(list,paraloc1);
+        paramanager.freecgpara(list,paraloc3);
+        paramanager.freecgpara(list,paraloc2);
+        paramanager.freecgpara(list,paraloc1);
         alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
         alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
         alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
         alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
         a_call_name(list,'FPC_MOVE',false);
         a_call_name(list,'FPC_MOVE',false);

+ 50 - 30
compiler/arm/cpupara.pas

@@ -42,7 +42,7 @@ unit cpupara;
           procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara:TCGPara);override;
           procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara:TCGPara);override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
-          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;override;
+          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
          private
          private
           procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword);
           procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword);
           function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
           function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
@@ -451,32 +451,45 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    function  tarmparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;
+    function  tarmparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;
       var
       var
+        paraloc : pcgparalocation;
         retcgsize  : tcgsize;
         retcgsize  : tcgsize;
       begin
       begin
-        { Constructors return self instead of a boolean }
-        if (p.proctypeoption=potype_constructor) then
-          retcgsize:=OS_ADDR
-        else
-          retcgsize:=def_cgsize(def);
-
-        location_reset(result,LOC_INVALID,OS_NO);
-        result.size:=retcgsize;
-
+        result.init;
+        result.alignment:=get_para_align(p.proccalloption);
         { void has no location }
         { void has no location }
         if is_void(def) then
         if is_void(def) then
           begin
           begin
-            location_reset(result,LOC_VOID,OS_NO);
+            paraloc:=result.add_location;
+            result.size:=OS_NO;
+            result.intsize:=0;
+            paraloc^.size:=OS_NO;
+            paraloc^.loc:=LOC_VOID;
             exit;
             exit;
           end;
           end;
+        { Constructors return self instead of a boolean }
+        if (p.proctypeoption=potype_constructor) then
+          begin
+            retcgsize:=OS_ADDR;
+            result.intsize:=sizeof(pint);
+          end
+        else
+          begin
+            retcgsize:=def_cgsize(def);
+            result.intsize:=def.size;
+          end;
+        result.size:=retcgsize;
         { Return is passed as var parameter }
         { Return is passed as var parameter }
         if ret_in_param(def,p.proccalloption) then
         if ret_in_param(def,p.proccalloption) then
           begin
           begin
-            result.loc:=LOC_REFERENCE;
-            result.size:=retcgsize;
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REFERENCE;
+            paraloc^.size:=retcgsize;
             exit;
             exit;
           end;
           end;
+
+        paraloc:=result.add_location;
         { Return in FPU register? }
         { Return in FPU register? }
         if def.typ=floatdef then
         if def.typ=floatdef then
           begin
           begin
@@ -488,18 +501,20 @@ unit cpupara;
                   OS_64,
                   OS_64,
                   OS_F64:
                   OS_F64:
                     begin
                     begin
-                      { low }
-                      result.loc:=LOC_REGISTER;
-                      result.register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG;
-                      result.register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG;
-                      result.size:=OS_64;
+                      paraloc^.loc:=LOC_REGISTER;
+                      paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
+                      paraloc^.size:=OS_32;
+                      paraloc:=result.add_location;
+                      paraloc^.loc:=LOC_REGISTER;
+                      paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
+                      paraloc^.size:=OS_32;
                     end;
                     end;
                   OS_32,
                   OS_32,
                   OS_F32:
                   OS_F32:
                     begin
                     begin
-                      result.loc:=LOC_REGISTER;
-                      result.register:=NR_FUNCTION_RETURN_REG;
-                      result.size:=OS_32;
+                      paraloc^.loc:=LOC_REGISTER;
+                      paraloc^.register:=NR_FUNCTION_RETURN_REG;
+                      paraloc^.size:=OS_32;
                     end;
                     end;
                   else
                   else
                     internalerror(2005082603);
                     internalerror(2005082603);
@@ -507,8 +522,9 @@ unit cpupara;
               end
               end
             else
             else
               begin
               begin
-                result.loc:=LOC_FPUREGISTER;
-                result.register:=NR_FPU_RESULT_REG;
+                paraloc^.loc:=LOC_FPUREGISTER;
+                paraloc^.register:=NR_FPU_RESULT_REG;
+                paraloc^.size:=retcgsize;
               end;
               end;
           end
           end
           { Return in register }
           { Return in register }
@@ -516,15 +532,19 @@ unit cpupara;
           begin
           begin
             if retcgsize in [OS_64,OS_S64] then
             if retcgsize in [OS_64,OS_S64] then
               begin
               begin
-                { low }
-                result.loc:=LOC_REGISTER;
-                result.register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG;
-                result.register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG;
+                paraloc^.loc:=LOC_REGISTER;
+                paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
+                paraloc^.size:=OS_32;
+                paraloc:=result.add_location;
+                paraloc^.loc:=LOC_REGISTER;
+                paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
+                paraloc^.size:=OS_32;
               end
               end
             else
             else
               begin
               begin
-                result.loc:=LOC_REGISTER;
-                result.register:=NR_FUNCTION_RETURN_REG;
+                paraloc^.loc:=LOC_REGISTER;
+                paraloc^.register:=NR_FUNCTION_RETURN_REG;
+                paraloc^.size:=retcgsize;
               end;
               end;
           end;
           end;
       end;
       end;

+ 29 - 5
compiler/arm/narmcal.pas

@@ -30,20 +30,44 @@ interface
 
 
     type
     type
        tarmcallnode = class(tcgcallnode)
        tarmcallnode = class(tcgcallnode)
-          // procedure push_framepointer;override;
+         procedure set_result_location(realresdef: tstoreddef);override;
        end;
        end;
 
 
 implementation
 implementation
 
 
   uses
   uses
+    verbose,globtype,globals,aasmdata,
+    symconst,
+    cgbase,
+    cpubase,cpuinfo,
+    ncgutil,
     paramgr;
     paramgr;
 
 
-(*
-  procedure tarmcallnode.push_framepointer;
+  procedure tarmcallnode.set_result_location(realresdef: tstoreddef);
     begin
     begin
-      framepointer_paraloc:=paramanager.getintparaloc(procdefinition.proccalloption,1);
+      if (realresdef.typ=floatdef) and
+         ((cs_fp_emulation in current_settings.moduleswitches) or
+          (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3])) then
+        begin
+          { keep the fpu values in integer registers for now, the code
+            generator will move them to memory or an mmregister when necessary
+            (avoids double moves in case a function result is assigned to
+             another function result, or passed as a parameter) }
+          case retloc.size of
+            OS_32,
+            OS_F32:
+              location_allocate_register(current_asmdata.CurrAsmList,location,s32inttype,false);
+            OS_64,
+            OS_F64:
+              location_allocate_register(current_asmdata.CurrAsmList,location,s64inttype,false);
+            else
+              internalerror(2010053008);
+          end
+        end
+      else
+        inherited;
     end;
     end;
-*)
+
 
 
 begin
 begin
    ccallnode:=tarmcallnode;
    ccallnode:=tarmcallnode;

+ 6 - 6
compiler/avr/cgcpu.pas

@@ -151,6 +151,7 @@ unit cgcpu;
         ref: treference;
         ref: treference;
       begin
       begin
         paraloc.check_simple_location;
         paraloc.check_simple_location;
+        paramanager.allocparaloc(list,paraloc.location);
         case paraloc.location^.loc of
         case paraloc.location^.loc of
           LOC_REGISTER,LOC_CREGISTER:
           LOC_REGISTER,LOC_CREGISTER:
             a_load_const_reg(list,size,a,paraloc.location^.register);
             a_load_const_reg(list,size,a,paraloc.location^.register);
@@ -178,6 +179,7 @@ unit cgcpu;
         sizeleft := paraloc.intsize;
         sizeleft := paraloc.intsize;
         while assigned(location) do
         while assigned(location) do
           begin
           begin
+            paramanager.allocparaloc(list,location);
             case location^.loc of
             case location^.loc of
               LOC_REGISTER,LOC_CREGISTER:
               LOC_REGISTER,LOC_CREGISTER:
                 a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
                 a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
@@ -214,6 +216,7 @@ unit cgcpu;
         tmpreg: tregister;
         tmpreg: tregister;
       begin
       begin
         paraloc.check_simple_location;
         paraloc.check_simple_location;
+        paramanager.allocparaloc(list,paraloc.location);
         case paraloc.location^.loc of
         case paraloc.location^.loc of
           LOC_REGISTER,LOC_CREGISTER:
           LOC_REGISTER,LOC_CREGISTER:
             a_loadaddr_ref_reg(list,r,paraloc.location^.register);
             a_loadaddr_ref_reg(list,r,paraloc.location^.register);
@@ -714,15 +717,12 @@ unit cgcpu;
         paramanager.getintparaloc(pocall_default,1,paraloc1);
         paramanager.getintparaloc(pocall_default,1,paraloc1);
         paramanager.getintparaloc(pocall_default,2,paraloc2);
         paramanager.getintparaloc(pocall_default,2,paraloc2);
         paramanager.getintparaloc(pocall_default,3,paraloc3);
         paramanager.getintparaloc(pocall_default,3,paraloc3);
-        paramanager.allocparaloc(list,paraloc3);
         a_load_const_cgpara(list,OS_INT,len,paraloc3);
         a_load_const_cgpara(list,OS_INT,len,paraloc3);
-        paramanager.allocparaloc(list,paraloc2);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
-        paramanager.allocparaloc(list,paraloc2);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
-        paramanager.freeparaloc(list,paraloc3);
-        paramanager.freeparaloc(list,paraloc2);
-        paramanager.freeparaloc(list,paraloc1);
+        paramanager.freecgpara(list,paraloc3);
+        paramanager.freecgpara(list,paraloc2);
+        paramanager.freecgpara(list,paraloc1);
         alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
         alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
         a_call_name_static(list,'FPC_MOVE');
         a_call_name_static(list,'FPC_MOVE');
         dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
         dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));

+ 52 - 31
compiler/avr/cpupara.pas

@@ -41,10 +41,12 @@ unit cpupara;
           procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara:TCGPara);override;
           procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara:TCGPara);override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
+          function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
          private
          private
           procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword);
           procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword);
           function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
           function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
             var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword):longint;
             var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword):longint;
+          procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
        end;
        end;
 
 
   implementation
   implementation
@@ -405,32 +407,45 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    function  tavrparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;
+    function  tavrparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;
       var
       var
-        retcgsize  : tcgsize;
+        retcgsize : tcgsize;
+        paraloc : pcgparalocation;
       begin
       begin
-        { Constructors return self instead of a boolean }
-        if (p.proctypeoption=potype_constructor) then
-          retcgsize:=OS_ADDR
-        else
-          retcgsize:=def_cgsize(def);
-
-        location_reset(result,LOC_INVALID,OS_NO);
-        result.size:=retcgsize;
-
+        result.init;
+        result.alignment:=get_para_align(p.proccalloption);
         { void has no location }
         { void has no location }
         if is_void(def) then
         if is_void(def) then
           begin
           begin
-            location_reset(result,LOC_VOID,OS_NO);
+            paraloc:=result.add_location;
+            result.size:=OS_NO;
+            result.intsize:=0;
+            paraloc^.size:=OS_NO;
+            paraloc^.loc:=LOC_VOID;
             exit;
             exit;
           end;
           end;
+        { Constructors return self instead of a boolean }
+        if (p.proctypeoption=potype_constructor) then
+          begin
+            retcgsize:=OS_ADDR;
+            result.intsize:=sizeof(pint);
+          end
+        else
+          begin
+            retcgsize:=def_cgsize(def);
+            result.intsize:=def.size;
+          end;
+        result.size:=retcgsize;
         { Return is passed as var parameter }
         { Return is passed as var parameter }
         if ret_in_param(def,p.proccalloption) then
         if ret_in_param(def,p.proccalloption) then
           begin
           begin
-            result.loc:=LOC_REFERENCE;
-            result.size:=retcgsize;
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REFERENCE;
+            paraloc^.size:=retcgsize;
             exit;
             exit;
           end;
           end;
+
+        paraloc:=result.add_location;
         { Return in FPU register? }
         { Return in FPU register? }
         if def.typ=floatdef then
         if def.typ=floatdef then
           begin
           begin
@@ -440,18 +455,20 @@ unit cpupara;
                   OS_64,
                   OS_64,
                   OS_F64:
                   OS_F64:
                     begin
                     begin
-                      { low }
-                      result.loc:=LOC_REGISTER;
-                      result.register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG;
-                      result.register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG;
-                      result.size:=OS_64;
+                      paraloc^.loc:=LOC_REGISTER;
+                      paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
+                      paraloc^.size:=OS_32;
+                      paraloc:=result.add_location;
+                      paraloc^.loc:=LOC_REGISTER;
+                      paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
+                      paraloc^.size:=OS_32;
                     end;
                     end;
                   OS_32,
                   OS_32,
                   OS_F32:
                   OS_F32:
                     begin
                     begin
-                      result.loc:=LOC_REGISTER;
-                      result.register:=NR_FUNCTION_RETURN_REG;
-                      result.size:=OS_32;
+                      paraloc^.loc:=LOC_REGISTER;
+                      paraloc^.register:=NR_FUNCTION_RETURN_REG;
+                      paraloc^.size:=OS_32;
                     end;
                     end;
                   else
                   else
                     internalerror(2005082603);
                     internalerror(2005082603);
@@ -459,8 +476,9 @@ unit cpupara;
               end
               end
             else
             else
               begin
               begin
-                result.loc:=LOC_FPUREGISTER;
-                result.register:=NR_FPU_RESULT_REG;
+                paraloc^.loc:=LOC_FPUREGISTER;
+                paraloc^.register:=NR_FPU_RESULT_REG;
+                paraloc^.size:=retcgsize;
               end;
               end;
           end
           end
           { Return in register }
           { Return in register }
@@ -468,17 +486,20 @@ unit cpupara;
           begin
           begin
             if retcgsize in [OS_64,OS_S64] then
             if retcgsize in [OS_64,OS_S64] then
               begin
               begin
-                { low }
-                result.loc:=LOC_REGISTER;
-                result.register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG;
-                result.register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG;
+                paraloc^.loc:=LOC_REGISTER;
+                paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
+                paraloc^.size:=OS_32;
+                paraloc:=result.add_location;
+                paraloc^.loc:=LOC_REGISTER;
+                paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
+                paraloc^.size:=OS_32;
               end
               end
             else
             else
               begin
               begin
-                result.loc:=LOC_REGISTER;
-                result.register:=NR_FUNCTION_RETURN_REG;
+                paraloc^.loc:=LOC_REGISTER;
+                paraloc^.register:=NR_FUNCTION_RETURN_REG;
+                paraloc^.size:=OS_32;
               end;
               end;
-
           end;
           end;
       end;
       end;
 
 

+ 174 - 80
compiler/cgobj.pas

@@ -120,7 +120,8 @@ unit cgobj;
 
 
              This routine should push/send the parameter to the routine, as
              This routine should push/send the parameter to the routine, as
              required by the specific processor ABI and routine modifiers.
              required by the specific processor ABI and routine modifiers.
-             This must be overriden for each CPU target.
+             It must generate register allocation information for the cgpara in
+             case it consists of cpuregisters.
 
 
              @param(size size of the operand in the register)
              @param(size size of the operand in the register)
              @param(r register source of the operand)
              @param(r register source of the operand)
@@ -132,6 +133,8 @@ unit cgobj;
              A generic version is provided. This routine should
              A generic version is provided. This routine should
              be overriden for optimization purposes if the cpu
              be overriden for optimization purposes if the cpu
              permits directly sending this type of parameter.
              permits directly sending this type of parameter.
+             It must generate register allocation information for the cgpara in
+             case it consists of cpuregisters.
 
 
              @param(size size of the operand in constant)
              @param(size size of the operand in constant)
              @param(a value of constant to send)
              @param(a value of constant to send)
@@ -143,6 +146,8 @@ unit cgobj;
              A generic version is provided. This routine should
              A generic version is provided. This routine should
              be overriden for optimization purposes if the cpu
              be overriden for optimization purposes if the cpu
              permits directly sending this type of parameter.
              permits directly sending this type of parameter.
+             It must generate register allocation information for the cgpara in
+             case it consists of cpuregisters.
 
 
              @param(size size of the operand in constant)
              @param(size size of the operand in constant)
              @param(r Memory reference of value to send)
              @param(r Memory reference of value to send)
@@ -162,6 +167,8 @@ unit cgobj;
           {# Pass the address of a reference to a routine. This routine
           {# Pass the address of a reference to a routine. This routine
              will calculate the address of the reference, and pass this
              will calculate the address of the reference, and pass this
              calculated address as a parameter.
              calculated address as a parameter.
+             It must generate register allocation information for the cgpara in
+             case it consists of cpuregisters.
 
 
              A generic version is provided. This routine should
              A generic version is provided. This routine should
              be overriden for optimization purposes if the cpu
              be overriden for optimization purposes if the cpu
@@ -173,6 +180,8 @@ unit cgobj;
           procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const cgpara : TCGPara);virtual;
           procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const cgpara : TCGPara);virtual;
 
 
           {# Load a cgparaloc into a memory reference.
           {# Load a cgparaloc into a memory reference.
+             It must generate register allocation information for the cgpara in
+             case it consists of cpuregisters.
 
 
            @param(paraloc the source parameter sublocation)
            @param(paraloc the source parameter sublocation)
            @param(ref the destination reference)
            @param(ref the destination reference)
@@ -880,6 +889,7 @@ implementation
          ref : treference;
          ref : treference;
       begin
       begin
          cgpara.check_simple_location;
          cgpara.check_simple_location;
+         paramanager.alloccgpara(list,cgpara);
          case cgpara.location^.loc of
          case cgpara.location^.loc of
             LOC_REGISTER,LOC_CREGISTER:
             LOC_REGISTER,LOC_CREGISTER:
               a_load_reg_reg(list,size,cgpara.location^.size,r,cgpara.location^.register);
               a_load_reg_reg(list,size,cgpara.location^.size,r,cgpara.location^.register);
@@ -899,6 +909,7 @@ implementation
          ref : treference;
          ref : treference;
       begin
       begin
          cgpara.check_simple_location;
          cgpara.check_simple_location;
+         paramanager.alloccgpara(list,cgpara);
          case cgpara.location^.loc of
          case cgpara.location^.loc of
             LOC_REGISTER,LOC_CREGISTER:
             LOC_REGISTER,LOC_CREGISTER:
               a_load_const_reg(list,cgpara.location^.size,a,cgpara.location^.register);
               a_load_const_reg(list,cgpara.location^.size,a,cgpara.location^.register);
@@ -915,31 +926,134 @@ implementation
 
 
     procedure tcg.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const cgpara : TCGPara);
     procedure tcg.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const cgpara : TCGPara);
       var
       var
-         ref : treference;
-      begin
-         cgpara.check_simple_location;
-         case cgpara.location^.loc of
-            LOC_REGISTER,LOC_CREGISTER:
-              a_load_ref_reg(list,size,cgpara.location^.size,r,cgpara.location^.register);
-            LOC_REFERENCE,LOC_CREFERENCE:
-              begin
-                 reference_reset_base(ref,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
-                 if (size <> OS_NO) and
-                    (tcgsize2size[size] < sizeof(aint)) then
-                   begin
-                     if (cgpara.size = OS_NO) or
-                        assigned(cgpara.location^.next) then
-                       internalerror(2006052401);
-                     a_load_ref_ref(list,size,cgpara.size,r,ref);
-                   end
-                 else
-                   { use concatcopy, because the parameter can be larger than }
-                   { what the OS_* constants can handle                       }
-                   g_concatcopy(list,r,ref,cgpara.intsize);
-              end
-            else
-              internalerror(2002071004);
-         end;
+        tmpref, ref: treference;
+        tmpreg: tregister;
+        location: pcgparalocation;
+        orgsizeleft,
+        sizeleft: aint;
+        reghasvalue: boolean;
+      begin
+        location:=cgpara.location;
+        tmpref:=r;
+        sizeleft:=cgpara.intsize;
+        while assigned(location) do
+          begin
+            paramanager.allocparaloc(list,location);
+            case location^.loc of
+              LOC_REGISTER,LOC_CREGISTER:
+                begin
+                   { Parameter locations are often allocated in multiples of
+                     entire registers. If a parameter only occupies a part of
+                     such a register (e.g. a 16 bit int on a 32 bit
+                     architecture), the size of this parameter can only be
+                     determined by looking at the "size" parameter of this
+                     method -> if the size parameter is <= sizeof(aint), then
+                     we check that there is only one parameter location and
+                     then use this "size" to load the value into the parameter
+                     location }
+                   if (size<>OS_NO) and
+                      (tcgsize2size[size]<=sizeof(aint)) then
+                     begin
+                       cgpara.check_simple_location;
+                       a_load_ref_reg(list,size,location^.size,tmpref,location^.register);
+                     end
+                   { there's a lot more data left, and the current paraloc's
+                     register is entirely filled with part of that data }
+                   else if (sizeleft>sizeof(aint)) then
+                     begin
+                       a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
+                     end
+                   { we're at the end of the data, and it can be loaded into
+                     the current location's register with a single regular
+                     load }
+                   else if (sizeleft in [1,2{$ifndef cpu16bitalu},4{$endif}{$ifdef cpu64bitalu},8{$endif}]) then
+                     begin
+                       a_load_ref_reg(list,int_cgsize(sizeleft),location^.size,tmpref,location^.register);
+                     end
+                   { we're at the end of the data, and we need multiple loads
+                     to get it in the register because it's an irregular size }
+                   else
+                     begin
+                       { should be the last part }
+                       if assigned(location^.next) then
+                         internalerror(2010052907);
+                       { load the value piecewise to get it into the register }
+                       orgsizeleft:=sizeleft;
+                       reghasvalue:=false;
+{$ifdef cpu64bitalu}
+                       if sizeleft>=4 then
+                         begin
+                           a_load_ref_reg(list,OS_32,location^.size,tmpref,location^.register);
+                           dec(sizeleft,4);
+                           if target_info.endian=endian_big then
+                             a_op_const_reg(list,OP_SHL,location^.size,sizeleft*8,location^.register);
+                           inc(tmpref.offset,4);
+                           reghasvalue:=true;
+                         end;
+{$endif cpu64bitalu}
+                       if sizeleft>=2 then
+                         begin
+                           tmpreg:=getintregister(list,location^.size);
+                           a_load_ref_reg(list,OS_16,location^.size,tmpref,tmpreg);
+                           dec(sizeleft,2);
+                           if reghasvalue then
+                             begin
+                               if target_info.endian=endian_big then
+                                 a_op_const_reg(list,OP_SHL,location^.size,sizeleft*8,tmpreg)
+                               else
+                                 a_op_const_reg(list,OP_SHL,location^.size,(orgsizeleft-(sizeleft+2))*8,tmpreg);
+                               a_op_reg_reg(list,OP_OR,location^.size,tmpreg,location^.register);
+                             end
+                           else
+                             begin
+                               if target_info.endian=endian_big then
+                                 a_op_const_reg_reg(list,OP_SHL,location^.size,sizeleft*8,tmpreg,location^.register)
+                               else
+                                 a_load_reg_reg(list,location^.size,location^.size,tmpreg,location^.register);
+                             end;
+                           inc(tmpref.offset,2);
+                           reghasvalue:=true;
+                         end;
+                       if sizeleft=1 then
+                         begin
+                           tmpreg:=getintregister(list,location^.size);
+                           a_load_ref_reg(list,OS_8,location^.size,tmpref,tmpreg);
+                           dec(sizeleft,1);
+                           if reghasvalue then
+                             begin
+                               if target_info.endian=endian_little then
+                                 a_op_const_reg(list,OP_SHL,location^.size,(orgsizeleft-(sizeleft+1))*8,tmpreg);
+                               a_op_reg_reg(list,OP_OR,location^.size,tmpreg,location^.register)
+                             end
+                           else
+                             a_load_reg_reg(list,location^.size,location^.size,tmpreg,location^.register);
+                           inc(tmpref.offset);
+                         end;
+                       { the loop will already adjust the offset and sizeleft }
+                       dec(tmpref.offset,orgsizeleft);
+                       sizeleft:=orgsizeleft;
+                     end;
+                end;
+              LOC_REFERENCE,LOC_CREFERENCE:
+                begin
+                   if assigned(location^.next) then
+                     internalerror(2010052906);
+                   reference_reset_base(ref,location^.reference.index,location^.reference.offset,newalignment(cgpara.alignment,cgpara.intsize-sizeleft));
+                   if (size <> OS_NO) and
+                      (tcgsize2size[size] <= sizeof(aint)) then
+                     a_load_ref_ref(list,size,location^.size,tmpref,ref)
+                   else
+                     { use concatcopy, because the parameter can be larger than }
+                     { what the OS_* constants can handle                       }
+                     g_concatcopy(list,tmpref,ref,sizeleft);
+                end
+              else
+                internalerror(2002071004);
+            end;
+            inc(tmpref.offset,tcgsize2size[location^.size]);
+            dec(sizeleft,tcgsize2size[location^.size]);
+            location:=location^.next;
+          end;
       end;
       end;
 
 
 
 
@@ -966,7 +1080,10 @@ implementation
       begin
       begin
          cgpara.check_simple_location;
          cgpara.check_simple_location;
          if cgpara.location^.loc in [LOC_CREGISTER,LOC_REGISTER] then
          if cgpara.location^.loc in [LOC_CREGISTER,LOC_REGISTER] then
-           a_loadaddr_ref_reg(list,r,cgpara.location^.register)
+           begin
+             paramanager.allocparaloc(list,cgpara.location);
+             a_loadaddr_ref_reg(list,r,cgpara.location^.register)
+           end
          else
          else
            begin
            begin
              hr:=getaddressregister(list);
              hr:=getaddressregister(list);
@@ -2603,6 +2720,7 @@ implementation
       var
       var
          ref : treference;
          ref : treference;
       begin
       begin
+        paramanager.alloccgpara(list,cgpara);
          case cgpara.location^.loc of
          case cgpara.location^.loc of
             LOC_FPUREGISTER,LOC_CFPUREGISTER:
             LOC_FPUREGISTER,LOC_CFPUREGISTER:
               begin
               begin
@@ -2638,6 +2756,7 @@ implementation
           LOC_FPUREGISTER,LOC_CFPUREGISTER:
           LOC_FPUREGISTER,LOC_CFPUREGISTER:
             begin
             begin
               cgpara.check_simple_location;
               cgpara.check_simple_location;
+              paramanager.alloccgpara(list,cgpara);
               a_loadfpu_ref_reg(list,size,size,ref,cgpara.location^.register);
               a_loadfpu_ref_reg(list,size,size,ref,cgpara.location^.register);
             end;
             end;
           LOC_REFERENCE,LOC_CREFERENCE:
           LOC_REFERENCE,LOC_CREFERENCE:
@@ -3059,6 +3178,7 @@ implementation
             (size<>OS_F64) then
             (size<>OS_F64) then
 {$endif not cpu64bitalu}
 {$endif not cpu64bitalu}
            cgpara.check_simple_location;
            cgpara.check_simple_location;
+         paramanager.alloccgpara(list,cgpara);
          case cgpara.location^.loc of
          case cgpara.location^.loc of
           LOC_MMREGISTER,LOC_CMMREGISTER:
           LOC_MMREGISTER,LOC_CMMREGISTER:
             a_loadmm_reg_reg(list,size,cgpara.location^.size,reg,cgpara.location^.register,shuffle);
             a_loadmm_reg_reg(list,size,cgpara.location^.size,reg,cgpara.location^.register,shuffle);
@@ -3248,15 +3368,12 @@ implementation
         paramanager.getintparaloc(pocall_default,1,cgpara1);
         paramanager.getintparaloc(pocall_default,1,cgpara1);
         paramanager.getintparaloc(pocall_default,2,cgpara2);
         paramanager.getintparaloc(pocall_default,2,cgpara2);
         paramanager.getintparaloc(pocall_default,3,cgpara3);
         paramanager.getintparaloc(pocall_default,3,cgpara3);
-        paramanager.allocparaloc(list,cgpara3);
         a_loadaddr_ref_cgpara(list,dest,cgpara3);
         a_loadaddr_ref_cgpara(list,dest,cgpara3);
-        paramanager.allocparaloc(list,cgpara2);
         a_loadaddr_ref_cgpara(list,source,cgpara2);
         a_loadaddr_ref_cgpara(list,source,cgpara2);
-        paramanager.allocparaloc(list,cgpara1);
         a_load_const_cgpara(list,OS_INT,len,cgpara1);
         a_load_const_cgpara(list,OS_INT,len,cgpara1);
-        paramanager.freeparaloc(list,cgpara3);
-        paramanager.freeparaloc(list,cgpara2);
-        paramanager.freeparaloc(list,cgpara1);
+        paramanager.freecgpara(list,cgpara3);
+        paramanager.freecgpara(list,cgpara2);
+        paramanager.freecgpara(list,cgpara1);
         allocallcpuregisters(list);
         allocallcpuregisters(list);
         a_call_name(list,'FPC_SHORTSTR_ASSIGN',false);
         a_call_name(list,'FPC_SHORTSTR_ASSIGN',false);
         deallocallcpuregisters(list);
         deallocallcpuregisters(list);
@@ -3274,12 +3391,10 @@ implementation
         cgpara2.init;
         cgpara2.init;
         paramanager.getintparaloc(pocall_default,1,cgpara1);
         paramanager.getintparaloc(pocall_default,1,cgpara1);
         paramanager.getintparaloc(pocall_default,2,cgpara2);
         paramanager.getintparaloc(pocall_default,2,cgpara2);
-        paramanager.allocparaloc(list,cgpara2);
         a_loadaddr_ref_cgpara(list,dest,cgpara2);
         a_loadaddr_ref_cgpara(list,dest,cgpara2);
-        paramanager.allocparaloc(list,cgpara1);
         a_loadaddr_ref_cgpara(list,source,cgpara1);
         a_loadaddr_ref_cgpara(list,source,cgpara1);
-        paramanager.freeparaloc(list,cgpara2);
-        paramanager.freeparaloc(list,cgpara1);
+        paramanager.freecgpara(list,cgpara2);
+        paramanager.freecgpara(list,cgpara1);
         allocallcpuregisters(list);
         allocallcpuregisters(list);
         a_call_name(list,'FPC_VARIANT_COPY_OVERWRITE',false);
         a_call_name(list,'FPC_VARIANT_COPY_OVERWRITE',false);
         deallocallcpuregisters(list);
         deallocallcpuregisters(list);
@@ -3313,7 +3428,6 @@ implementation
          { call the special incr function or the generic addref }
          { call the special incr function or the generic addref }
          if incrfunc<>'' then
          if incrfunc<>'' then
           begin
           begin
-            paramanager.allocparaloc(list,cgpara1);
             { widestrings aren't ref. counted on all platforms so we need the address
             { widestrings aren't ref. counted on all platforms so we need the address
               to create a real copy }
               to create a real copy }
             if is_widestring(t) then
             if is_widestring(t) then
@@ -3321,7 +3435,7 @@ implementation
             else
             else
               { these functions get the pointer by value }
               { these functions get the pointer by value }
               a_load_ref_cgpara(list,OS_ADDR,ref,cgpara1);
               a_load_ref_cgpara(list,OS_ADDR,ref,cgpara1);
-            paramanager.freeparaloc(list,cgpara1);
+            paramanager.freecgpara(list,cgpara1);
             allocallcpuregisters(list);
             allocallcpuregisters(list);
             a_call_name(list,incrfunc,false);
             a_call_name(list,incrfunc,false);
             deallocallcpuregisters(list);
             deallocallcpuregisters(list);
@@ -3329,12 +3443,10 @@ implementation
          else
          else
           begin
           begin
             reference_reset_symbol(href,RTTIWriter.get_rtti_label(t,initrtti),0,sizeof(pint));
             reference_reset_symbol(href,RTTIWriter.get_rtti_label(t,initrtti),0,sizeof(pint));
-            paramanager.allocparaloc(list,cgpara2);
             a_loadaddr_ref_cgpara(list,href,cgpara2);
             a_loadaddr_ref_cgpara(list,href,cgpara2);
-            paramanager.allocparaloc(list,cgpara1);
             a_loadaddr_ref_cgpara(list,ref,cgpara1);
             a_loadaddr_ref_cgpara(list,ref,cgpara1);
-            paramanager.freeparaloc(list,cgpara1);
-            paramanager.freeparaloc(list,cgpara2);
+            paramanager.freecgpara(list,cgpara1);
+            paramanager.freecgpara(list,cgpara2);
             allocallcpuregisters(list);
             allocallcpuregisters(list);
             a_call_name(list,'FPC_ADDREF',false);
             a_call_name(list,'FPC_ADDREF',false);
             deallocallcpuregisters(list);
             deallocallcpuregisters(list);
@@ -3384,14 +3496,11 @@ implementation
             tempreg1:=getaddressregister(list);
             tempreg1:=getaddressregister(list);
             a_loadaddr_ref_reg(list,ref,tempreg1);
             a_loadaddr_ref_reg(list,ref,tempreg1);
             if needrtti then
             if needrtti then
-              begin
-                paramanager.allocparaloc(list,cgpara2);
-                a_load_reg_cgpara(list,OS_ADDR,tempreg2,cgpara2);
-                paramanager.freeparaloc(list,cgpara2);
-              end;
-            paramanager.allocparaloc(list,cgpara1);
+              a_load_reg_cgpara(list,OS_ADDR,tempreg2,cgpara2);
             a_load_reg_cgpara(list,OS_ADDR,tempreg1,cgpara1);
             a_load_reg_cgpara(list,OS_ADDR,tempreg1,cgpara1);
-            paramanager.freeparaloc(list,cgpara1);
+            paramanager.freecgpara(list,cgpara1);
+            if needrtti then
+              paramanager.freecgpara(list,cgpara2);
             allocallcpuregisters(list);
             allocallcpuregisters(list);
             a_call_name(list,decrfunc,false);
             a_call_name(list,decrfunc,false);
             deallocallcpuregisters(list);
             deallocallcpuregisters(list);
@@ -3399,12 +3508,10 @@ implementation
          else
          else
           begin
           begin
             reference_reset_symbol(href,RTTIWriter.get_rtti_label(t,initrtti),0,sizeof(pint));
             reference_reset_symbol(href,RTTIWriter.get_rtti_label(t,initrtti),0,sizeof(pint));
-            paramanager.allocparaloc(list,cgpara2);
             a_loadaddr_ref_cgpara(list,href,cgpara2);
             a_loadaddr_ref_cgpara(list,href,cgpara2);
-            paramanager.allocparaloc(list,cgpara1);
             a_loadaddr_ref_cgpara(list,ref,cgpara1);
             a_loadaddr_ref_cgpara(list,ref,cgpara1);
-            paramanager.freeparaloc(list,cgpara1);
-            paramanager.freeparaloc(list,cgpara2);
+            paramanager.freecgpara(list,cgpara1);
+            paramanager.freecgpara(list,cgpara2);
             allocallcpuregisters(list);
             allocallcpuregisters(list);
             a_call_name(list,'FPC_DECREF',false);
             a_call_name(list,'FPC_DECREF',false);
             deallocallcpuregisters(list);
             deallocallcpuregisters(list);
@@ -3432,12 +3539,10 @@ implementation
          else
          else
            begin
            begin
               reference_reset_symbol(href,RTTIWriter.get_rtti_label(t,initrtti),0,sizeof(pint));
               reference_reset_symbol(href,RTTIWriter.get_rtti_label(t,initrtti),0,sizeof(pint));
-              paramanager.allocparaloc(list,cgpara2);
               a_loadaddr_ref_cgpara(list,href,cgpara2);
               a_loadaddr_ref_cgpara(list,href,cgpara2);
-              paramanager.allocparaloc(list,cgpara1);
               a_loadaddr_ref_cgpara(list,ref,cgpara1);
               a_loadaddr_ref_cgpara(list,ref,cgpara1);
-              paramanager.freeparaloc(list,cgpara1);
-              paramanager.freeparaloc(list,cgpara2);
+              paramanager.freecgpara(list,cgpara1);
+              paramanager.freecgpara(list,cgpara2);
               allocallcpuregisters(list);
               allocallcpuregisters(list);
               a_call_name(list,'FPC_INITIALIZE',false);
               a_call_name(list,'FPC_INITIALIZE',false);
               deallocallcpuregisters(list);
               deallocallcpuregisters(list);
@@ -3467,12 +3572,10 @@ implementation
          else
          else
            begin
            begin
               reference_reset_symbol(href,RTTIWriter.get_rtti_label(t,initrtti),0,sizeof(pint));
               reference_reset_symbol(href,RTTIWriter.get_rtti_label(t,initrtti),0,sizeof(pint));
-              paramanager.allocparaloc(list,cgpara2);
               a_loadaddr_ref_cgpara(list,href,cgpara2);
               a_loadaddr_ref_cgpara(list,href,cgpara2);
-              paramanager.allocparaloc(list,cgpara1);
               a_loadaddr_ref_cgpara(list,ref,cgpara1);
               a_loadaddr_ref_cgpara(list,ref,cgpara1);
-              paramanager.freeparaloc(list,cgpara1);
-              paramanager.freeparaloc(list,cgpara2);
+              paramanager.freecgpara(list,cgpara1);
+              paramanager.freecgpara(list,cgpara2);
               allocallcpuregisters(list);
               allocallcpuregisters(list);
               a_call_name(list,'FPC_FINALIZE',false);
               a_call_name(list,'FPC_FINALIZE',false);
               deallocallcpuregisters(list);
               deallocallcpuregisters(list);
@@ -3703,9 +3806,8 @@ implementation
            a_cmp_const_reg_label(list,OS_ADDR,OC_NE,0,reg,oklabel);
            a_cmp_const_reg_label(list,OS_ADDR,OC_NE,0,reg,oklabel);
            cgpara1.init;
            cgpara1.init;
            paramanager.getintparaloc(pocall_default,1,cgpara1);
            paramanager.getintparaloc(pocall_default,1,cgpara1);
-           paramanager.allocparaloc(list,cgpara1);
            a_load_const_cgpara(list,OS_INT,210,cgpara1);
            a_load_const_cgpara(list,OS_INT,210,cgpara1);
-           paramanager.freeparaloc(list,cgpara1);
+           paramanager.freecgpara(list,cgpara1);
            a_call_name(list,'FPC_HANDLEERROR',false);
            a_call_name(list,'FPC_HANDLEERROR',false);
            a_label(list,oklabel);
            a_label(list,oklabel);
            cgpara1.done;
            cgpara1.done;
@@ -3725,12 +3827,10 @@ implementation
         if (cs_check_object in current_settings.localswitches) then
         if (cs_check_object in current_settings.localswitches) then
          begin
          begin
            reference_reset_symbol(hrefvmt,current_asmdata.RefAsmSymbol(objdef.vmt_mangledname),0,sizeof(pint));
            reference_reset_symbol(hrefvmt,current_asmdata.RefAsmSymbol(objdef.vmt_mangledname),0,sizeof(pint));
-           paramanager.allocparaloc(list,cgpara2);
            a_loadaddr_ref_cgpara(list,hrefvmt,cgpara2);
            a_loadaddr_ref_cgpara(list,hrefvmt,cgpara2);
-           paramanager.allocparaloc(list,cgpara1);
            a_load_reg_cgpara(list,OS_ADDR,reg,cgpara1);
            a_load_reg_cgpara(list,OS_ADDR,reg,cgpara1);
-           paramanager.freeparaloc(list,cgpara1);
-           paramanager.freeparaloc(list,cgpara2);
+           paramanager.freecgpara(list,cgpara1);
+           paramanager.freecgpara(list,cgpara2);
            allocallcpuregisters(list);
            allocallcpuregisters(list);
            a_call_name(list,'FPC_CHECK_OBJECT_EXT',false);
            a_call_name(list,'FPC_CHECK_OBJECT_EXT',false);
            deallocallcpuregisters(list);
            deallocallcpuregisters(list);
@@ -3738,9 +3838,8 @@ implementation
         else
         else
          if (cs_check_range in current_settings.localswitches) then
          if (cs_check_range in current_settings.localswitches) then
           begin
           begin
-            paramanager.allocparaloc(list,cgpara1);
             a_load_reg_cgpara(list,OS_ADDR,reg,cgpara1);
             a_load_reg_cgpara(list,OS_ADDR,reg,cgpara1);
-            paramanager.freeparaloc(list,cgpara1);
+            paramanager.freecgpara(list,cgpara1);
             allocallcpuregisters(list);
             allocallcpuregisters(list);
             a_call_name(list,'FPC_CHECK_OBJECT',false);
             a_call_name(list,'FPC_CHECK_OBJECT',false);
             deallocallcpuregisters(list);
             deallocallcpuregisters(list);
@@ -3784,9 +3883,8 @@ implementation
         { do getmem call }
         { do getmem call }
         cgpara1.init;
         cgpara1.init;
         paramanager.getintparaloc(pocall_default,1,cgpara1);
         paramanager.getintparaloc(pocall_default,1,cgpara1);
-        paramanager.allocparaloc(list,cgpara1);
         a_load_reg_cgpara(list,OS_INT,sizereg,cgpara1);
         a_load_reg_cgpara(list,OS_INT,sizereg,cgpara1);
-        paramanager.freeparaloc(list,cgpara1);
+        paramanager.freecgpara(list,cgpara1);
         allocallcpuregisters(list);
         allocallcpuregisters(list);
         a_call_name(list,'FPC_GETMEM',false);
         a_call_name(list,'FPC_GETMEM',false);
         deallocallcpuregisters(list);
         deallocallcpuregisters(list);
@@ -3802,17 +3900,14 @@ implementation
         paramanager.getintparaloc(pocall_default,2,cgpara2);
         paramanager.getintparaloc(pocall_default,2,cgpara2);
         paramanager.getintparaloc(pocall_default,3,cgpara3);
         paramanager.getintparaloc(pocall_default,3,cgpara3);
         { load size }
         { load size }
-        paramanager.allocparaloc(list,cgpara3);
         a_load_reg_cgpara(list,OS_INT,sizereg,cgpara3);
         a_load_reg_cgpara(list,OS_INT,sizereg,cgpara3);
         { load destination }
         { load destination }
-        paramanager.allocparaloc(list,cgpara2);
         a_load_reg_cgpara(list,OS_ADDR,destreg,cgpara2);
         a_load_reg_cgpara(list,OS_ADDR,destreg,cgpara2);
         { load source }
         { load source }
-        paramanager.allocparaloc(list,cgpara1);
         a_load_reg_cgpara(list,OS_ADDR,sourcereg,cgpara1);
         a_load_reg_cgpara(list,OS_ADDR,sourcereg,cgpara1);
-        paramanager.freeparaloc(list,cgpara3);
-        paramanager.freeparaloc(list,cgpara2);
-        paramanager.freeparaloc(list,cgpara1);
+        paramanager.freecgpara(list,cgpara3);
+        paramanager.freecgpara(list,cgpara2);
+        paramanager.freecgpara(list,cgpara1);
         allocallcpuregisters(list);
         allocallcpuregisters(list);
         a_call_name(list,'FPC_MOVE',false);
         a_call_name(list,'FPC_MOVE',false);
         deallocallcpuregisters(list);
         deallocallcpuregisters(list);
@@ -3830,9 +3925,8 @@ implementation
         cgpara1.init;
         cgpara1.init;
         paramanager.getintparaloc(pocall_default,1,cgpara1);
         paramanager.getintparaloc(pocall_default,1,cgpara1);
         { load source }
         { load source }
-        paramanager.allocparaloc(list,cgpara1);
         a_load_loc_cgpara(list,l,cgpara1);
         a_load_loc_cgpara(list,l,cgpara1);
-        paramanager.freeparaloc(list,cgpara1);
+        paramanager.freecgpara(list,cgpara1);
         allocallcpuregisters(list);
         allocallcpuregisters(list);
         a_call_name(list,'FPC_FREEMEM',false);
         a_call_name(list,'FPC_FREEMEM',false);
         deallocallcpuregisters(list);
         deallocallcpuregisters(list);

+ 18 - 6
compiler/i386/cgcpu.pas

@@ -289,17 +289,29 @@ unit cgcpu;
            { this messes up stack alignment }
            { this messes up stack alignment }
            (target_info.system <> system_i386_darwin) then
            (target_info.system <> system_i386_darwin) then
           begin
           begin
-            if (current_procinfo.procdef.funcretloc[calleeside].loc<>LOC_VOID) and
-               (current_procinfo.procdef.funcretloc[calleeside].loc=LOC_REGISTER) then
-              list.concat(Taicpu.Op_const_reg(A_ADD,S_L,4,NR_ESP))
+            if assigned(current_procinfo.procdef.funcretloc[calleeside].location) and
+               (current_procinfo.procdef.funcretloc[calleeside].location^.loc=LOC_REGISTER) then
+              begin
+                if (getsupreg(current_procinfo.procdef.funcretloc[calleeside].location^.register)=RS_EAX) then
+                  list.concat(Taicpu.Op_const_reg(A_ADD,S_L,4,NR_ESP))
+                else
+                  internalerror(2010053001);
+              end
             else
             else
               list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EAX));
               list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EAX));
             list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EBX));
             list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EBX));
             list.concat(Taicpu.Op_reg(A_POP,S_L,NR_ECX));
             list.concat(Taicpu.Op_reg(A_POP,S_L,NR_ECX));
 
 
-            if (current_procinfo.procdef.funcretloc[calleeside].loc=LOC_REGISTER) and
-               (current_procinfo.procdef.funcretloc[calleeside].size in [OS_64,OS_S64]) then
-              list.concat(Taicpu.Op_const_reg(A_ADD,S_L,4,NR_ESP))
+            if (current_procinfo.procdef.funcretloc[calleeside].size in [OS_64,OS_S64]) and
+               assigned(current_procinfo.procdef.funcretloc[calleeside].location) and
+               assigned(current_procinfo.procdef.funcretloc[calleeside].location^.next) and
+               (current_procinfo.procdef.funcretloc[calleeside].location^.next^.loc=LOC_REGISTER) then
+              begin
+                if (getsupreg(current_procinfo.procdef.funcretloc[calleeside].location^.next^.register)=RS_EDX) then
+                  list.concat(Taicpu.Op_const_reg(A_ADD,S_L,4,NR_ESP))
+                else
+                  internalerror(2010053002);
+              end
             else
             else
               list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EDX));
               list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EDX));
 
 

+ 43 - 25
compiler/i386/cpupara.pas

@@ -49,7 +49,7 @@ unit cpupara;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
           procedure createtempparaloc(list: TAsmList;calloption : tproccalloption;parasym : tparavarsym;var cgpara:TCGPara);override;
           procedure createtempparaloc(list: TAsmList;calloption : tproccalloption;parasym : tparavarsym;var cgpara:TCGPara);override;
-          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;override;
+          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): TCGPara;override;
        private
        private
           procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
           procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
           procedure create_stdcall_paraloc_info(p : tabstractprocdef; side: tcallercallee;paras:tparalist;var parasize:longint);
           procedure create_stdcall_paraloc_info(p : tabstractprocdef; side: tcallercallee;paras:tparalist;var parasize:longint);
@@ -312,63 +312,81 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    function  ti386paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;
+    function  ti386paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): TCGPara;
       var
       var
         retcgsize  : tcgsize;
         retcgsize  : tcgsize;
+        paraloc : pcgparalocation;
       begin
       begin
-        { Constructors return self instead of a boolean }
-        if (p.proctypeoption=potype_constructor) then
-          retcgsize:=OS_ADDR
-        else
-          retcgsize:=def_cgsize(def);
-
-        location_reset(result,LOC_INVALID,OS_NO);
+        result.init;
+        result.alignment:=get_para_align(p.proccalloption);
         { void has no location }
         { void has no location }
         if is_void(def) then
         if is_void(def) then
           begin
           begin
-            location_reset(result,LOC_VOID,OS_NO);
+            paraloc:=result.add_location;
+            result.size:=OS_NO;
+            result.intsize:=0;
+            paraloc^.size:=OS_NO;
+            paraloc^.loc:=LOC_VOID;
             exit;
             exit;
           end;
           end;
+        { Constructors return self instead of a boolean }
+        if (p.proctypeoption=potype_constructor) then
+          begin
+            retcgsize:=OS_ADDR;
+            result.intsize:=sizeof(pint);
+          end
+        else
+          begin
+            retcgsize:=def_cgsize(def);
+            result.intsize:=def.size;
+          end;
+        result.size:=retcgsize;
         { Return is passed as var parameter }
         { Return is passed as var parameter }
         if ret_in_param(def,p.proccalloption) then
         if ret_in_param(def,p.proccalloption) then
           begin
           begin
-            result.loc:=LOC_REFERENCE;
-            result.size:=retcgsize;
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REFERENCE;
+            paraloc^.size:=retcgsize;
             exit;
             exit;
           end;
           end;
         { Return in FPU register? }
         { Return in FPU register? }
         if def.typ=floatdef then
         if def.typ=floatdef then
           begin
           begin
-            result.loc:=LOC_FPUREGISTER;
-            result.register:=NR_FPU_RESULT_REG;
-            result.size:=retcgsize;
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_FPUREGISTER;
+            paraloc^.register:=NR_FPU_RESULT_REG;
+            paraloc^.size:=retcgsize;
           end
           end
         else
         else
          { Return in register }
          { Return in register }
           begin
           begin
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REGISTER;
             if retcgsize in [OS_64,OS_S64] then
             if retcgsize in [OS_64,OS_S64] then
              begin
              begin
                { low 32bits }
                { low 32bits }
-               result.loc:=LOC_REGISTER;
-               result.size:=OS_64;
                if side=callerside then
                if side=callerside then
-                 result.register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
                else
                else
-                 result.register64.reglo:=NR_FUNCTION_RETURN64_LOW_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_LOW_REG;
+               paraloc^.size:=OS_32;
+
                { high 32bits }
                { high 32bits }
+               paraloc:=result.add_location;
+               paraloc^.loc:=LOC_REGISTER;
                if side=callerside then
                if side=callerside then
-                 result.register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
                else
                else
-                 result.register64.reghi:=NR_FUNCTION_RETURN64_HIGH_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_HIGH_REG;
+               paraloc^.size:=OS_32;
              end
              end
             else
             else
              begin
              begin
-               result.loc:=LOC_REGISTER;
-               result.size:=retcgsize;
+               paraloc^.size:=retcgsize;
                if side=callerside then
                if side=callerside then
-                 result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
                else
                else
-                 result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
              end;
              end;
           end;
           end;
       end;
       end;

+ 51 - 36
compiler/m68k/cpupara.pas

@@ -44,8 +44,9 @@ unit cpupara;
           procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara : TCGPara);override;
           procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara : TCGPara);override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
-          procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
+          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
           procedure createtempparaloc(list: TAsmList;calloption : tproccalloption;parasym : tparavarsym;var cgpara:TCGPara);override;
           procedure createtempparaloc(list: TAsmList;calloption : tproccalloption;parasym : tparavarsym;var cgpara:TCGPara);override;
+          procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
          private
          private
           procedure init_values(var curintreg, curfloatreg: tsuperregister; var cur_stack_offset: aword);
           procedure init_values(var curintreg, curfloatreg: tsuperregister; var cur_stack_offset: aword);
           function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
           function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
@@ -184,45 +185,56 @@ unit cpupara;
       end;
       end;
 
 
     procedure tm68kparamanager.create_funcretloc_info(p: tabstractprocdef; side: tcallercallee);
     procedure tm68kparamanager.create_funcretloc_info(p: tabstractprocdef; side: tcallercallee);
-      var
-        retcgsize: tcgsize;
       begin
       begin
-        { Constructors return self instead of a boolean }
-        if (p.proctypeoption=potype_constructor) then
-          retcgsize:=OS_ADDR
-        else
-          retcgsize:=def_cgsize(p.returndef);
-
-        location_reset(p.funcretloc[side],LOC_INVALID,OS_NO);
+        p.funcretloc[side]:=get_funcretloc(p,side,p.returndef);
+      end;
 
 
-        { explicit paraloc specified? }
-        if po_explicitparaloc in p.procoptions then
-         begin
-           p.funcretloc[side].loc:=LOC_REGISTER;
-           p.funcretloc[side].register:=p.exp_funcretloc;
-           p.funcretloc[side].size:=retcgsize;
-           exit;
-         end;
 
 
+    function tm68kparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;
+      var
+        paraloc : pcgparalocation;
+        retcgsize  : tcgsize;
+      begin
+        result.init;
+        result.alignment:=get_para_align(p.proccalloption);
         { void has no location }
         { void has no location }
-        if is_void(p.returndef) then
+        if is_void(def) then
           begin
           begin
-            location_reset(p.funcretloc[side],LOC_VOID,OS_NO);
+            paraloc:=result.add_location;
+            result.size:=OS_NO;
+            result.intsize:=0;
+            paraloc^.size:=OS_NO;
+            paraloc^.loc:=LOC_VOID;
             exit;
             exit;
           end;
           end;
+        { Constructors return self instead of a boolean }
+        if (p.proctypeoption=potype_constructor) then
+          begin
+            retcgsize:=OS_ADDR;
+            result.intsize:=sizeof(pint);
+          end
+        else
+          begin
+            retcgsize:=def_cgsize(def);
+            result.intsize:=def.size;
+          end;
+        result.size:=retcgsize;
         { Return is passed as var parameter }
         { Return is passed as var parameter }
-        if ret_in_param(p.returndef,p.proccalloption) then
+        if ret_in_param(def,p.proccalloption) then
           begin
           begin
-            p.funcretloc[side].loc:=LOC_REFERENCE;
-            p.funcretloc[side].size:=retcgsize;
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REFERENCE;
+            paraloc^.size:=retcgsize;
             exit;
             exit;
           end;
           end;
+
+        paraloc:=result.add_location;
         { Return in FPU register? }
         { Return in FPU register? }
         if not(cs_fp_emulation in current_settings.moduleswitches) and (p.returndef.typ=floatdef) then
         if not(cs_fp_emulation in current_settings.moduleswitches) and (p.returndef.typ=floatdef) then
           begin
           begin
-            p.funcretloc[side].loc:=LOC_FPUREGISTER;
-            p.funcretloc[side].register:=NR_FPU_RESULT_REG;
-            p.funcretloc[side].size:=retcgsize;
+            paraloc^.loc:=LOC_FPUREGISTER;
+            paraloc^.register:=NR_FPU_RESULT_REG;
+            paraloc^.size:=retcgsize;
           end
           end
         else
         else
          { Return in register }
          { Return in register }
@@ -230,26 +242,29 @@ unit cpupara;
             if retcgsize in [OS_64,OS_S64] then
             if retcgsize in [OS_64,OS_S64] then
              begin
              begin
                { low 32bits }
                { low 32bits }
-               p.funcretloc[side].loc:=LOC_REGISTER;
-               p.funcretloc[side].size:=OS_64;
+               paraloc^.loc:=LOC_REGISTER;
+               paraloc^.size:=OS_32;
                if side=callerside then
                if side=callerside then
-                 p.funcretloc[side].register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
                else
                else
-                 p.funcretloc[side].register64.reglo:=NR_FUNCTION_RETURN64_LOW_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_LOW_REG;
                { high 32bits }
                { high 32bits }
+               paraloc:=result.add_location;
+               paraloc^.loc:=LOC_REGISTER;
+               paraloc^.size:=OS_32;
                if side=calleeside then
                if side=calleeside then
-                 p.funcretloc[side].register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
                else
                else
-                 p.funcretloc[side].register64.reghi:=NR_FUNCTION_RETURN64_HIGH_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_HIGH_REG;
              end
              end
             else
             else
              begin
              begin
-               p.funcretloc[side].loc:=LOC_REGISTER;
-               p.funcretloc[side].size:=retcgsize;
+               paraloc^.loc:=LOC_REGISTER;
+               paraloc^.size:=retcgsize;
                if side=callerside then
                if side=callerside then
-                 p.funcretloc[side].register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
                else
                else
-                 p.funcretloc[side].register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
              end;
              end;
           end;
           end;
       end;
       end;

+ 7 - 6
compiler/mips/cgcpu.pas

@@ -545,6 +545,7 @@ var
   Ref: TReference;
   Ref: TReference;
 begin
 begin
   paraloc.check_simple_location;
   paraloc.check_simple_location;
+  paramanager.allocparaloc(list,paraloc.location);
   case paraloc.location^.loc of
   case paraloc.location^.loc of
     LOC_REGISTER, LOC_CREGISTER:
     LOC_REGISTER, LOC_CREGISTER:
       a_load_const_reg(list, size, a, paraloc.location^.Register);
       a_load_const_reg(list, size, a, paraloc.location^.Register);
@@ -570,6 +571,7 @@ var
   tmpreg: TRegister;
   tmpreg: TRegister;
 begin
 begin
   paraloc.check_simple_location;
   paraloc.check_simple_location;
+  paramanager.allocparaloc(list,paraloc.location);
   with paraloc.location^ do
   with paraloc.location^ do
   begin
   begin
     case loc of
     case loc of
@@ -600,6 +602,7 @@ var
   TmpReg: TRegister;
   TmpReg: TRegister;
 begin
 begin
   paraloc.check_simple_location;
   paraloc.check_simple_location;
+  paramanager.allocparaloc(list,paraloc.location);
   with paraloc.location^ do
   with paraloc.location^ do
   begin
   begin
     case loc of
     case loc of
@@ -630,6 +633,7 @@ begin
   hloc := paraloc.location;
   hloc := paraloc.location;
   while assigned(hloc) do
   while assigned(hloc) do
   begin
   begin
+    paramanager.allocparaloc(list,hloc);
     case hloc^.loc of
     case hloc^.loc of
       LOC_REGISTER:
       LOC_REGISTER:
         a_load_ref_reg(list, hloc^.size, hloc^.size, href, hloc^.Register);
         a_load_ref_reg(list, hloc^.size, hloc^.size, href, hloc^.Register);
@@ -1467,15 +1471,12 @@ begin
   paramanager.getintparaloc(pocall_default, 1, paraloc1);
   paramanager.getintparaloc(pocall_default, 1, paraloc1);
   paramanager.getintparaloc(pocall_default, 2, paraloc2);
   paramanager.getintparaloc(pocall_default, 2, paraloc2);
   paramanager.getintparaloc(pocall_default, 3, paraloc3);
   paramanager.getintparaloc(pocall_default, 3, paraloc3);
-  paramanager.allocparaloc(list, paraloc3);
   a_load_const_cgpara(list, OS_INT, len, paraloc3);
   a_load_const_cgpara(list, OS_INT, len, paraloc3);
-  paramanager.allocparaloc(list, paraloc2);
   a_loadaddr_ref_cgpara(list, dest, paraloc2);
   a_loadaddr_ref_cgpara(list, dest, paraloc2);
-  paramanager.allocparaloc(list, paraloc2);
   a_loadaddr_ref_cgpara(list, Source, paraloc1);
   a_loadaddr_ref_cgpara(list, Source, paraloc1);
-  paramanager.freeparaloc(list, paraloc3);
-  paramanager.freeparaloc(list, paraloc2);
-  paramanager.freeparaloc(list, paraloc1);
+  paramanager.freecgpara(list, paraloc3);
+  paramanager.freecgpara(list, paraloc2);
+  paramanager.freecgpara(list, paraloc1);
   alloccpuregisters(list, R_INTREGISTER, paramanager.get_volatile_registers_int(pocall_default));
   alloccpuregisters(list, R_INTREGISTER, paramanager.get_volatile_registers_int(pocall_default));
   alloccpuregisters(list, R_FPUREGISTER, paramanager.get_volatile_registers_fpu(pocall_default));
   alloccpuregisters(list, R_FPUREGISTER, paramanager.get_volatile_registers_fpu(pocall_default));
   a_call_name(list, 'FPC_MOVE', false);
   a_call_name(list, 'FPC_MOVE', false);

+ 53 - 28
compiler/mips/cpupara.pas

@@ -41,6 +41,7 @@ interface
         procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara : TCGPara);override;
         procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara : TCGPara);override;
         function  create_paraloc_info(p : TAbstractProcDef; side: tcallercallee):longint;override;
         function  create_paraloc_info(p : TAbstractProcDef; side: tcallercallee):longint;override;
         function  create_varargs_paraloc_info(p : TAbstractProcDef; varargspara:tvarargsparalist):longint;override;
         function  create_varargs_paraloc_info(p : TAbstractProcDef; varargspara:tvarargsparalist):longint;override;
+        function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
       private
       private
         procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
         procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
         procedure create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
         procedure create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
@@ -134,68 +135,92 @@ implementation
 
 
 
 
     procedure tMIPSELparamanager.create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
     procedure tMIPSELparamanager.create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
+      begin
+        p.funcretloc[side]:=get_funcretloc(p,side,p.returndef);
+      end;
+
+
+    function tMIPSELparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;
       var
       var
+        paraloc : pcgparalocation;
         retcgsize  : tcgsize;
         retcgsize  : tcgsize;
       begin
       begin
+        result.init;
+        result.alignment:=get_para_align(p.proccalloption);
+        { void has no location }
+        if is_void(def) then
+          begin
+            paraloc:=result.add_location;
+            result.size:=OS_NO;
+            result.intsize:=0;
+            paraloc^.size:=OS_NO;
+            paraloc^.loc:=LOC_VOID;
+            exit;
+          end;
         { Constructors return self instead of a boolean }
         { Constructors return self instead of a boolean }
         if (p.proctypeoption=potype_constructor) then
         if (p.proctypeoption=potype_constructor) then
-          retcgsize:=OS_ADDR
+          begin
+            retcgsize:=OS_ADDR;
+            result.intsize:=sizeof(pint);
+          end
         else
         else
-          retcgsize:=def_cgsize(p.returndef);
-
-        location_reset(p.funcretloc[side],LOC_INVALID,OS_NO);
-        p.funcretloc[side].size:=retcgsize;
-        { void has no location }
-        if is_void(p.returndef) then
           begin
           begin
-            p.funcretloc[side].loc:=LOC_VOID;
+            retcgsize:=def_cgsize(def);
+            result.intsize:=def.size;
+          end;
+        result.size:=retcgsize;
+        { Return is passed as var parameter }
+        if ret_in_param(def,p.proccalloption) then
+          begin
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REFERENCE;
+            paraloc^.size:=retcgsize;
             exit;
             exit;
           end;
           end;
 
 
+        paraloc:=result.add_location;
         { Return in FPU register? }
         { Return in FPU register? }
         if p.returndef.typ=floatdef then
         if p.returndef.typ=floatdef then
           begin
           begin
-            p.funcretloc[side].loc:=LOC_FPUREGISTER;
-            p.funcretloc[side].register:=NR_FPU_RESULT_REG;
+            paraloc^.loc:=LOC_FPUREGISTER;
+            paraloc^.register:=NR_FPU_RESULT_REG;
             if retcgsize=OS_F64 then
             if retcgsize=OS_F64 then
-              setsubreg(p.funcretloc[side].register,R_SUBFD);
-            p.funcretloc[side].size:=retcgsize;
+              setsubreg(paraloc^.register,R_SUBFD);
+            paraloc^.size:=retcgsize;
           end
           end
         else
         else
-         { Return in register? }
-         if not ret_in_param(p.returndef,p.proccalloption) then
+         { Return in register }
           begin
           begin
 {$ifndef cpu64bit}
 {$ifndef cpu64bit}
             if retcgsize in [OS_64,OS_S64] then
             if retcgsize in [OS_64,OS_S64] then
              begin
              begin
-               p.funcretloc[side].loc:=LOC_REGISTER;
                { high }
                { high }
+               paraloc^.loc:=LOC_REGISTER;
                if side=callerside then
                if side=callerside then
-                 p.funcretloc[side].register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
                else
                else
-                 p.funcretloc[side].register64.reghi:=NR_FUNCTION_RETURN64_HIGH_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_HIGH_REG;
+               paraloc^.size:=OS_32;
                { low }
                { low }
+               paraloc:=result.add_location;
+               paraloc^.loc:=LOC_REGISTER;
                if side=callerside then
                if side=callerside then
-                 p.funcretloc[side].register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
                else
                else
-                 p.funcretloc[side].register64.reglo:=NR_FUNCTION_RETURN64_LOW_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_LOW_REG;
+               paraloc^.size:=OS_32;
              end
              end
             else
             else
 {$endif cpu64bit}
 {$endif cpu64bit}
              begin
              begin
-               p.funcretloc[side].loc:=LOC_REGISTER;
-               p.funcretloc[side].size:=retcgsize;
+               paraloc^.loc:=LOC_REGISTER;
+               paraloc^.size:=retcgsize;
                if side=callerside then
                if side=callerside then
-                 p.funcretloc[side].register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
                else
                else
-                 p.funcretloc[side].register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
              end;
              end;
           end
           end
-        else
-          begin
-            p.funcretloc[side].loc:=LOC_REFERENCE;
-            p.funcretloc[side].size:=retcgsize;
-          end;
       end;
       end;
 
 
     var
     var

+ 23 - 3
compiler/ncal.pas

@@ -28,7 +28,7 @@ interface
     uses
     uses
        cutils,cclasses,
        cutils,cclasses,
        globtype,constexp,
        globtype,constexp,
-       paramgr,parabase,
+       paramgr,parabase,cgbase,
        node,nbas,nutils,
        node,nbas,nutils,
        {$ifdef state_tracking}
        {$ifdef state_tracking}
        nstate,
        nstate,
@@ -75,6 +75,7 @@ interface
           procedure check_inlining;
           procedure check_inlining;
           function  pass1_normal:tnode;
           function  pass1_normal:tnode;
           procedure register_created_object_types;
           procedure register_created_object_types;
+          function get_expect_loc: tcgloc;
        protected
        protected
           procedure objc_convert_to_message_send;virtual;
           procedure objc_convert_to_message_send;virtual;
 
 
@@ -222,7 +223,6 @@ implementation
       ncnv,nld,ninl,nadd,ncon,nmem,nset,nobjc,
       ncnv,nld,ninl,nadd,ncon,nmem,nset,nobjc,
       objcutil,
       objcutil,
       procinfo,cpuinfo,
       procinfo,cpuinfo,
-      cgbase,
       wpobase
       wpobase
       ;
       ;
 
 
@@ -1762,6 +1762,26 @@ implementation
        end;
        end;
 
 
 
 
+    function tcallnode.get_expect_loc: tcgloc;
+      var
+        realresdef: tstoreddef;
+      begin
+        if not assigned(typedef) then
+          realresdef:=tstoreddef(resultdef)
+        else
+          realresdef:=tstoreddef(typedef);
+        if realresdef.is_intregable then
+          result:=LOC_REGISTER
+        else if realresdef.is_fpuregable then
+          if use_vectorfpu(realresdef) then
+            result:=LOC_MMREGISTER
+          else
+            result:=LOC_FPUREGISTER
+        else
+          result:=LOC_REFERENCE
+      end;
+
+
     procedure tcallnode.objc_convert_to_message_send;
     procedure tcallnode.objc_convert_to_message_send;
       var
       var
         block,
         block,
@@ -3221,7 +3241,7 @@ implementation
              else
              else
              { we have only to handle the result if it is used }
              { we have only to handle the result if it is used }
               if (cnf_return_value_used in callnodeflags) then
               if (cnf_return_value_used in callnodeflags) then
-               expectloc:=procdefinition.funcretloc[callerside].loc
+                expectloc:=get_expect_loc
              else
              else
                expectloc:=LOC_VOID;
                expectloc:=LOC_VOID;
            end
            end

+ 2 - 36
compiler/ncgbas.pas

@@ -70,7 +70,7 @@ interface
       cutils,verbose,
       cutils,verbose,
       aasmbase,aasmtai,aasmdata,aasmcpu,
       aasmbase,aasmtai,aasmdata,aasmcpu,
       symsym,symconst,symdef,defutil,
       symsym,symconst,symdef,defutil,
-      nflw,pass_2,
+      nflw,pass_2,ncgutil,
       cgbase,cgobj,
       cgbase,cgobj,
       procinfo,
       procinfo,
       tgobj
       tgobj
@@ -406,41 +406,7 @@ interface
           end
           end
         else if (ti_may_be_in_reg in tempinfo^.flags) then
         else if (ti_may_be_in_reg in tempinfo^.flags) then
           begin
           begin
-            if tempinfo^.typedef.typ=floatdef then
-              begin
-                if use_vectorfpu(tempinfo^.typedef) then
-                  begin
-                    if (tempinfo^.temptype = tt_persistent) then
-                      location_reset(tempinfo^.location,LOC_CMMREGISTER,def_cgsize(tempinfo^.typedef))
-                    else
-                      location_reset(tempinfo^.location,LOC_MMREGISTER,def_cgsize(tempinfo^.typedef));
-                    tempinfo^.location.register:=cg.getmmregister(current_asmdata.CurrAsmList,tempinfo^.location.size);
-                  end
-                else
-                  begin
-                    if (tempinfo^.temptype = tt_persistent) then
-                      location_reset(tempinfo^.location,LOC_CFPUREGISTER,def_cgsize(tempinfo^.typedef))
-                    else
-                      location_reset(tempinfo^.location,LOC_FPUREGISTER,def_cgsize(tempinfo^.typedef));
-                    tempinfo^.location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,tempinfo^.location.size);
-                  end;
-              end
-            else
-              begin
-                if (tempinfo^.temptype = tt_persistent) then
-                  location_reset(tempinfo^.location,LOC_CREGISTER,def_cgsize(tempinfo^.typedef))
-                else
-                  location_reset(tempinfo^.location,LOC_REGISTER,def_cgsize(tempinfo^.typedef));
-{$ifndef cpu64bitalu}
-                if tempinfo^.location.size in [OS_64,OS_S64] then
-                  begin
-                    tempinfo^.location.register64.reglo:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
-                    tempinfo^.location.register64.reghi:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
-                  end
-                else
-{$endif not cpu64bitalu}
-                  tempinfo^.location.register:=cg.getintregister(current_asmdata.CurrAsmList,tempinfo^.location.size);
-              end;
+            location_allocate_register(current_asmdata.CurrAsmList,tempinfo^.location,tempinfo^.typedef,tempinfo^.temptype = tt_persistent);
           end
           end
         else
         else
           begin
           begin

+ 130 - 235
compiler/ncgcal.pas

@@ -45,7 +45,6 @@ interface
 
 
        tcgcallnode = class(tcallnode)
        tcgcallnode = class(tcallnode)
        private
        private
-          retloc: tlocation;
 
 
           procedure handle_return_value;
           procedure handle_return_value;
           procedure release_unused_return_value;
           procedure release_unused_return_value;
@@ -53,6 +52,8 @@ interface
           procedure pushparas;
           procedure pushparas;
           procedure freeparas;
           procedure freeparas;
        protected
        protected
+          retloc: tcgpara;
+
           framepointer_paraloc : tcgpara;
           framepointer_paraloc : tcgpara;
           {# This routine is used to push the current frame pointer
           {# This routine is used to push the current frame pointer
              on the stack. This is used in nested routines where the
              on the stack. This is used in nested routines where the
@@ -68,8 +69,15 @@ interface
           procedure extra_call_code;virtual;
           procedure extra_call_code;virtual;
           procedure extra_post_call_code;virtual;
           procedure extra_post_call_code;virtual;
           procedure do_syscall;virtual;abstract;
           procedure do_syscall;virtual;abstract;
+
+          { The function result is returned in a tcgpara. This tcgpara has to
+            be translated into a tlocation so the rest of the code generator
+            can work with it. This routine decides what the most appropriate
+            tlocation is and sets self.location based on that. }
+          procedure set_result_location(realresdef: tstoreddef);virtual;
        public
        public
           procedure pass_generate_code;override;
           procedure pass_generate_code;override;
+          destructor destroy;override;
        end;
        end;
 
 
 
 
@@ -271,6 +279,27 @@ implementation
       end;
       end;
 
 
 
 
+    procedure tcgcallnode.set_result_location(realresdef: tstoreddef);
+      begin
+        if realresdef.is_intregable or
+           realresdef.is_fpuregable or
+           { avoid temporarily storing pointer-sized entities that can't be
+             regvars, such as reference-counted pointers, to memory --
+             no exception can occur right now (except in case of existing
+             memory corruption), and we'd store them to a regular temp
+             anyway and that is not safer than keeping them in a register }
+           ((realresdef.size=sizeof(aint)) and
+            (retloc.location^.loc=LOC_REGISTER) and
+            not assigned(retloc.location^.next)) then
+          location_allocate_register(current_asmdata.CurrAsmList,location,realresdef,false)
+        else
+          begin
+            location_reset_ref(location,LOC_REFERENCE,def_cgsize(realresdef),0);
+            tg.GetTemp(current_asmdata.CurrAsmList,retloc.intsize,retloc.Alignment,tt_normal,location.reference);
+          end;
+      end;
+
+
     procedure tcgcallnode.pop_parasize(pop_size:longint);
     procedure tcgcallnode.pop_parasize(pop_size:longint);
       begin
       begin
       end;
       end;
@@ -278,165 +307,61 @@ implementation
 
 
     procedure tcgcallnode.handle_return_value;
     procedure tcgcallnode.handle_return_value;
       var
       var
-        tmpcgsize,
-        cgsize    : tcgsize;
-{$ifdef cpu64bitaddr}
-        ref       : treference;
-{$endif cpu64bitaddr}
-{$ifndef x86}
-        hregister : tregister;
-{$endif not x86}
+        realresdef: tstoreddef;
       begin
       begin
         { Check that the return location is set when the result is passed in
         { Check that the return location is set when the result is passed in
           a parameter }
           a parameter }
         if (procdefinition.proctypeoption<>potype_constructor) and
         if (procdefinition.proctypeoption<>potype_constructor) and
            paramanager.ret_in_param(resultdef,procdefinition.proccalloption) then
            paramanager.ret_in_param(resultdef,procdefinition.proccalloption) then
           begin
           begin
+            { self.location is set near the end of secondcallparan so it
+              refers to the implicit result parameter }
             if location.loc<>LOC_REFERENCE then
             if location.loc<>LOC_REFERENCE then
               internalerror(200304241);
               internalerror(200304241);
             exit;
             exit;
           end;
           end;
 
 
-        { Load normal (ordinal,float,pointer) result value from accumulator }
-        cgsize:=retloc.size;
-        case retloc.loc of
-           LOC_FPUREGISTER:
-             begin
-               location_reset(location,LOC_FPUREGISTER,cgsize);
-               location.register:=retloc.register;
+        if not assigned(typedef) then
+          realresdef:=tstoreddef(resultdef)
+        else
+          realresdef:=tstoreddef(typedef);
+
 {$ifdef x86}
 {$ifdef x86}
-               tcgx86(cg).inc_fpu_stack;
-{$else x86}
-               { Do not move the physical register to a virtual one in case
-                 the return value is not used, because if the virtual one is
-                 then mapped to the same register as the physical one, we will
-                 end up with two deallocs of this register (one inserted here,
-                 one inserted by the register allocator), which unbalances the
-                 register allocation information.  The return register(s) will
-                 be freed by location_free() in release_unused_return_value
-                 (mantis #13536).  }
-               if (cnf_return_value_used in callnodeflags) then
-                 begin
-                   if getsupreg(retloc.register)<first_fpu_imreg then
-                     cg.ungetcpuregister(current_asmdata.CurrAsmList,retloc.register);
-                   hregister:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
-                   cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,location.size,location.size,location.register,hregister);
-                   location.register:=hregister;
-                 end;
+        if (retloc.location^.loc=LOC_FPUREGISTER) then
+          begin
+            tcgx86(cg).inc_fpu_stack;
+            location_reset(location,LOC_FPUREGISTER,retloc.location^.size);
+            location.register:=retloc.location^.register;
+          end
+        else
 {$endif x86}
 {$endif x86}
-             end;
-
-           LOC_REGISTER:
-             begin
-               if cgsize<>OS_NO then
-                begin
-                  location_reset(location,LOC_REGISTER,cgsize);
-{$ifdef cpu64bitaddr}
-                  { x86-64 system v abi:
-                    structs with up to 16 bytes are returned in registers }
-                  if cgsize in [OS_128,OS_S128] then
-                    begin
-                      if retloc.loc<>LOC_REGISTER then
-                        internalerror(2009042001);
-                      { See #13536 comment above.  }
-                      if (cnf_return_value_used in callnodeflags) then
-                        begin
-                          tg.GetTemp(current_asmdata.CurrAsmList,16,8,tt_normal,ref);
-                          location_reset_ref(location,LOC_REFERENCE,OS_NO,0);
-                          location.reference:=ref;
-                          if getsupreg(retloc.register)<first_int_imreg then
-                            cg.ungetcpuregister(current_asmdata.CurrAsmList,retloc.register);
-                          cg.a_load_reg_ref(current_asmdata.CurrAsmList,OS_64,OS_64,retloc.register,ref);
-                          inc(ref.offset,8);
-                          if getsupreg(retloc.registerhi)<first_int_imreg then
-                            cg.ungetcpuregister(current_asmdata.CurrAsmList,retloc.registerhi);
-                          cg.a_load_reg_ref(current_asmdata.CurrAsmList,OS_64,OS_64,retloc.registerhi,ref);
-                        end
-                      else
-                        location:=retloc;
-                    end
-                  else
-{$else cpu64bitaddr}
-                  if cgsize in [OS_64,OS_S64] then
-                    begin
-                      if retloc.loc<>LOC_REGISTER then
-                        internalerror(200409141);
-                      { See #13536 comment above.  }
-                      if (cnf_return_value_used in callnodeflags) then
-                        begin
-                          { the function result registers are already allocated }
-                          if getsupreg(retloc.register64.reglo)<first_int_imreg then
-                            cg.ungetcpuregister(current_asmdata.CurrAsmList,retloc.register64.reglo);
-                          location.register64.reglo:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
-                          cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_32,OS_32,retloc.register64.reglo,location.register64.reglo);
-                          if getsupreg(retloc.register64.reghi)<first_int_imreg then
-                            cg.ungetcpuregister(current_asmdata.CurrAsmList,retloc.register64.reghi);
-                          location.register64.reghi:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
-                          cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_32,OS_32,retloc.register64.reghi,location.register64.reghi);
-                        end
-                      else
-                        location:=retloc;
-                    end
-                  else
-{$endif not cpu64bitaddr}
-                    begin
-                      { change register size after the unget because the
-                        getregister was done for the full register
-                        def_cgsize(resultdef) is used here because
-                        it could be a constructor call }
-
-                      { See #13536 comment above.  }
-                      if (cnf_return_value_used in callnodeflags) then
-                        begin
-                          if getsupreg(retloc.register)<first_int_imreg then
-                            cg.ungetcpuregister(current_asmdata.CurrAsmList,retloc.register);
-
-                          { but use def_size only if it returns something valid because in
-                            case of odd sized structured results in registers def_cgsize(resultdef)
-                            could return OS_NO }
-                          if def_cgsize(resultdef)<>OS_NO then
-                            tmpcgsize:=def_cgsize(resultdef)
-                          else
-                            tmpcgsize:=cgsize;
-
-                          location.register:=cg.getintregister(current_asmdata.CurrAsmList,tmpcgsize);
-                          cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,tmpcgsize,retloc.register,location.register);
-                        end
-                      else
-                        location:=retloc;
-                    end;
+          begin
+            { get a tlocation that can hold the return value that's currently in
+            the the return value's tcgpara }
+            set_result_location(realresdef);
+
+            { Do not move the physical register to a virtual one in case
+              the return value is not used, because if the virtual one is
+              then mapped to the same register as the physical one, we will
+              end up with two deallocs of this register (one inserted here,
+              one inserted by the register allocator), which unbalances the
+              register allocation information.  The return register(s) will
+              be freed by location_free() in release_unused_return_value
+              (mantis #13536).  }
+            if (cnf_return_value_used in callnodeflags) or
+               assigned(funcretnode) then
+              begin
+                gen_load_cgpara_loc(current_asmdata.CurrAsmList,realresdef,retloc,location,false);
 {$ifdef arm}
 {$ifdef arm}
-                  if (resultdef.typ=floatdef) and (current_settings.fputype in [fpu_fpa,fpu_fpa10,fpu_fpa11]) then
-                    begin
-                      location_force_mem(current_asmdata.CurrAsmList,location);
-                    end;
+                if (resultdef.typ=floatdef) and
+                   (location.loc=LOC_REGISTER) and
+                   (current_settings.fputype in [fpu_fpa,fpu_fpa10,fpu_fpa11]) then
+                  begin
+                    location_force_mem(current_asmdata.CurrAsmList,location);
+                  end;
 {$endif arm}
 {$endif arm}
-                end
-               else
-                begin
-                  if resultdef.size>0 then
-                    internalerror(200305131);
-                end;
-             end;
-
-           LOC_MMREGISTER:
-             begin
-               { See #13536 comment above.  }
-               if (cnf_return_value_used in callnodeflags) then
-                 begin
-                   location_reset(location,LOC_MMREGISTER,cgsize);
-                   if getsupreg(retloc.register)<first_mm_imreg then
-                     cg.ungetcpuregister(current_asmdata.CurrAsmList,retloc.register);
-                   location.register:=cg.getmmregister(current_asmdata.CurrAsmList,cgsize);
-                   cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,retloc.register,location.register,mms_movescalar);
-                 end
-               else
-                 location:=retloc;
-             end;
-
-           else
-             internalerror(200405023);
-        end;
+              end;
+          end;
 
 
         { copy value to the final location if this was already provided to the
         { copy value to the final location if this was already provided to the
           callnode. This must be done after the call node, because the location can
           callnode. This must be done after the call node, because the location can
@@ -455,16 +380,16 @@ implementation
             case location.loc of
             case location.loc of
               LOC_REGISTER :
               LOC_REGISTER :
 {$ifndef cpu64bitalu}
 {$ifndef cpu64bitalu}
-                if cgsize in [OS_64,OS_S64] then
+                if location.size in [OS_64,OS_S64] then
                   cg64.a_load64_reg_loc(current_asmdata.CurrAsmList,location.register64,funcretnode.location)
                   cg64.a_load64_reg_loc(current_asmdata.CurrAsmList,location.register64,funcretnode.location)
                 else
                 else
 {$endif}
 {$endif}
-                  cg.a_load_reg_loc(current_asmdata.CurrAsmList,cgsize,location.register,funcretnode.location);
+                  cg.a_load_reg_loc(current_asmdata.CurrAsmList,location.size,location.register,funcretnode.location);
               LOC_REFERENCE:
               LOC_REFERENCE:
                 begin
                 begin
                   case funcretnode.location.loc of
                   case funcretnode.location.loc of
                     LOC_REGISTER:
                     LOC_REGISTER:
-                      cg.a_load_ref_reg(current_asmdata.CurrAsmList,cgsize,cgsize,location.reference,funcretnode.location.register);
+                      cg.a_load_ref_reg(current_asmdata.CurrAsmList,location.size,location.size,location.reference,funcretnode.location.register);
                     LOC_REFERENCE:
                     LOC_REFERENCE:
                       cg.g_concatcopy(current_asmdata.CurrAsmList,location.reference,funcretnode.location.reference,resultdef.size);
                       cg.g_concatcopy(current_asmdata.CurrAsmList,location.reference,funcretnode.location.reference,resultdef.size);
                     else
                     else
@@ -486,24 +411,24 @@ implementation
           tree is generated, because that converts the temp from persistent to normal }
           tree is generated, because that converts the temp from persistent to normal }
         if not(cnf_return_value_used in callnodeflags) then
         if not(cnf_return_value_used in callnodeflags) then
           begin
           begin
-           case location.loc of
-             LOC_REFERENCE :
-               begin
-                 if is_managed_type(resultdef) then
-                    cg.g_finalize(current_asmdata.CurrAsmList,resultdef,location.reference);
-                  tg.ungetiftemp(current_asmdata.CurrAsmList,location.reference);
-               end;
-{$ifdef x86}
-             LOC_FPUREGISTER :
+            case location.loc of
+              LOC_REFERENCE :
                 begin
                 begin
-                  { release FPU stack }
-                  emit_reg(A_FSTP,S_NO,NR_FPU_RESULT_REG);
-                  tcgx86(cg).dec_fpu_stack;
+                  if is_managed_type(resultdef) then
+                     cg.g_finalize(current_asmdata.CurrAsmList,resultdef,location.reference);
+                   tg.ungetiftemp(current_asmdata.CurrAsmList,location.reference);
                 end;
                 end;
+{$ifdef x86}
+              LOC_FPUREGISTER :
+                 begin
+                   { release FPU stack }
+                   emit_reg(A_FSTP,S_NO,NR_FPU_RESULT_REG);
+                   tcgx86(cg).dec_fpu_stack;
+                 end;
 {$endif x86}
 {$endif x86}
-           end;
-            if retloc.size<>OS_NO then
-              location_free(current_asmdata.CurrAsmList,retloc);
+            end;
+            if (retloc.intsize<>0) then
+              paramanager.freecgpara(current_asmdata.CurrAsmList,retloc);
             location_reset(location,LOC_VOID,OS_NO);
             location_reset(location,LOC_VOID,OS_NO);
           end;
           end;
       end;
       end;
@@ -570,7 +495,7 @@ implementation
                  { better check for the real location of the parameter here, when stack passed parameters
                  { better check for the real location of the parameter here, when stack passed parameters
                    are saved temporary in registers, checking for the tmpparaloc.loc is wrong
                    are saved temporary in registers, checking for the tmpparaloc.loc is wrong
                  }
                  }
-                 paramanager.freeparaloc(current_asmdata.CurrAsmList,ppn.tempcgpara);
+                 paramanager.freecgpara(current_asmdata.CurrAsmList,ppn.tempcgpara);
                  tmpparaloc:=ppn.tempcgpara.location;
                  tmpparaloc:=ppn.tempcgpara.location;
                  sizeleft:=ppn.tempcgpara.intsize;
                  sizeleft:=ppn.tempcgpara.intsize;
                  calleralignment:=ppn.parasym.paraloc[callerside].alignment;
                  calleralignment:=ppn.parasym.paraloc[callerside].alignment;
@@ -670,7 +595,7 @@ implementation
              if (ppn.left.nodetype<>nothingn) then
              if (ppn.left.nodetype<>nothingn) then
                begin
                begin
                  if (ppn.parasym.paraloc[callerside].location^.loc <> LOC_REFERENCE) then
                  if (ppn.parasym.paraloc[callerside].location^.loc <> LOC_REFERENCE) then
-                   paramanager.freeparaloc(current_asmdata.CurrAsmList,ppn.parasym.paraloc[callerside]);
+                   paramanager.freecgpara(current_asmdata.CurrAsmList,ppn.parasym.paraloc[callerside]);
                end;
                end;
              ppn:=tcgcallparanode(ppn.right);
              ppn:=tcgcallparanode(ppn.right);
            end;
            end;
@@ -690,6 +615,7 @@ implementation
         pvreg,
         pvreg,
         vmtreg : tregister;
         vmtreg : tregister;
         oldaktcallnode : tcallnode;
         oldaktcallnode : tcallnode;
+        retlocitem: pcgparalocation;
 {$ifdef vtentry}
 {$ifdef vtentry}
         sym : tasmsymbol;
         sym : tasmsymbol;
 {$endif vtentry}
 {$endif vtentry}
@@ -717,48 +643,23 @@ implementation
               retloc:=procdefinition.funcretloc[callerside]
               retloc:=procdefinition.funcretloc[callerside]
             else
             else
               retloc:=paramanager.get_funcretloc(procdefinition,callerside,typedef);
               retloc:=paramanager.get_funcretloc(procdefinition,callerside,typedef);
-            case retloc.loc of
-              LOC_REGISTER,
-              LOC_CREGISTER:
-                begin
-{$ifdef cpu64bitaddr}
-                  { x86-64 system v abi:
-                    structs with up to 16 bytes are returned in registers }
-                  if retloc.size in [OS_128,OS_S128] then
-                    begin
-                      include(regs_to_save_int,getsupreg(retloc.register));
-                      include(regs_to_save_int,getsupreg(retloc.registerhi));
-                    end
+            retlocitem:=retloc.location;
+            while assigned(retlocitem) do
+              begin
+                case retlocitem^.loc of
+                  LOC_REGISTER:
+                    include(regs_to_save_int,getsupreg(retlocitem^.register));
+                  LOC_FPUREGISTER:
+                    include(regs_to_save_fpu,getsupreg(retlocitem^.register));
+                  LOC_MMREGISTER:
+                    include(regs_to_save_mm,getsupreg(retlocitem^.register));
+                  LOC_REFERENCE,
+                  LOC_VOID:
+                    ;
                   else
                   else
-{$else cpu64bitaddr}
-                  if retloc.size in [OS_64,OS_S64] then
-                    begin
-                      include(regs_to_save_int,getsupreg(retloc.register64.reglo));
-                      include(regs_to_save_int,getsupreg(retloc.register64.reghi));
-                    end
-                  else
-{$endif not cpu64bitaddr}
-                    include(regs_to_save_int,getsupreg(retloc.register));
+                    internalerror(2004110213);
                 end;
                 end;
-              LOC_FPUREGISTER,
-              LOC_CFPUREGISTER:
-                begin
-                  include(regs_to_save_fpu,getsupreg(retloc.register));
-{$ifdef SPARC}
-                  { SPARC uses two successive single precision fpu registers
-                    for double-precision values  }
-                  if retloc.size=OS_F64 then
-                    include(regs_to_save_fpu,succ(getsupreg(retloc.register)));
-{$endif SPARC}
-                end;
-              LOC_MMREGISTER,
-              LOC_CMMREGISTER:
-                include(regs_to_save_mm,getsupreg(retloc.register));
-              LOC_REFERENCE,
-              LOC_VOID:
-                ;
-              else
-                internalerror(2004110213);
+                retlocitem:=retlocitem^.next;
               end;
               end;
           end;
           end;
 
 
@@ -948,38 +849,24 @@ implementation
            function result }
            function result }
          if (not is_void(resultdef)) then
          if (not is_void(resultdef)) then
            begin
            begin
-             case retloc.loc of
-               LOC_REGISTER,
-               LOC_CREGISTER:
-                 begin
-{$ifndef cpu64bitalu}
-                   if retloc.size in [OS_64,OS_S64] then
-                     begin
-                       exclude(regs_to_save_int,getsupreg(retloc.register64.reghi));
-                       exclude(regs_to_save_int,getsupreg(retloc.register64.reglo));
-                     end
-{$else not cpu64bitalu}
-                   if retloc.size in [OS_128,OS_S128] then
-                     begin
-                       exclude(regs_to_save_int,getsupreg(retloc.register));
-                       exclude(regs_to_save_int,getsupreg(retloc.registerhi));
-                     end
-{$endif not cpu64bitalu}
+             retlocitem:=retloc.location;
+             while assigned(retlocitem) do
+               begin
+                 case retlocitem^.loc of
+                   LOC_REGISTER:
+                     exclude(regs_to_save_int,getsupreg(retlocitem^.register));
+                   LOC_FPUREGISTER:
+                     exclude(regs_to_save_fpu,getsupreg(retlocitem^.register));
+                   LOC_MMREGISTER:
+                     exclude(regs_to_save_mm,getsupreg(retlocitem^.register));
+                   LOC_REFERENCE,
+                   LOC_VOID:
+                     ;
                    else
                    else
-                     exclude(regs_to_save_int,getsupreg(retloc.register));
+                     internalerror(2004110214);
                  end;
                  end;
-               LOC_FPUREGISTER,
-               LOC_CFPUREGISTER:
-                 exclude(regs_to_save_fpu,getsupreg(retloc.register));
-               LOC_MMREGISTER,
-               LOC_CMMREGISTER:
-                 exclude(regs_to_save_mm,getsupreg(retloc.register));
-               LOC_REFERENCE,
-               LOC_VOID:
-                 ;
-               else
-                 internalerror(2004110214);
-              end;
+                 retlocitem:=retlocitem^.next;
+               end;
            end;
            end;
 
 
 {$if defined(x86) or defined(arm)}
 {$if defined(x86) or defined(arm)}
@@ -1037,6 +924,14 @@ implementation
       end;
       end;
 
 
 
 
+    destructor tcgcallnode.destroy;
+      begin
+        if assigned(typedef) then
+          retloc.done;
+        inherited destroy;
+      end;
+
+
 begin
 begin
    ccallparanode:=tcgcallparanode;
    ccallparanode:=tcgcallparanode;
    ccallnode:=tcgcallnode;
    ccallnode:=tcgcallnode;

+ 8 - 18
compiler/ncgflw.pas

@@ -977,14 +977,12 @@ implementation
               { Push parameters }
               { Push parameters }
               if assigned(right) then
               if assigned(right) then
                 begin
                 begin
-                  paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc3);
                   { frame tree }
                   { frame tree }
                   if assigned(third) then
                   if assigned(third) then
                     cg.a_load_loc_cgpara(current_asmdata.CurrAsmList,third.location,paraloc3)
                     cg.a_load_loc_cgpara(current_asmdata.CurrAsmList,third.location,paraloc3)
                   else
                   else
                     cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_INT,0,paraloc3);
                     cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_INT,0,paraloc3);
                   { push address }
                   { push address }
-                  paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc2);
                   cg.a_load_loc_cgpara(current_asmdata.CurrAsmList,right.location,paraloc2);
                   cg.a_load_loc_cgpara(current_asmdata.CurrAsmList,right.location,paraloc2);
                 end
                 end
               else
               else
@@ -994,20 +992,17 @@ implementation
                    cg.a_label(current_asmdata.CurrAsmList,a);
                    cg.a_label(current_asmdata.CurrAsmList,a);
                    reference_reset_symbol(href2,a,0,1);
                    reference_reset_symbol(href2,a,0,1);
                    { push current frame }
                    { push current frame }
-                   paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc3);
                    cg.a_load_reg_cgpara(current_asmdata.CurrAsmList,OS_ADDR,NR_FRAME_POINTER_REG,paraloc3);
                    cg.a_load_reg_cgpara(current_asmdata.CurrAsmList,OS_ADDR,NR_FRAME_POINTER_REG,paraloc3);
                    { push current address }
                    { push current address }
-                   paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc2);
                    if target_info.system <> system_powerpc_macos then
                    if target_info.system <> system_powerpc_macos then
                      cg.a_loadaddr_ref_cgpara(current_asmdata.CurrAsmList,href2,paraloc2)
                      cg.a_loadaddr_ref_cgpara(current_asmdata.CurrAsmList,href2,paraloc2)
                    else
                    else
                      cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_INT,0,paraloc2);
                      cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_INT,0,paraloc2);
                 end;
                 end;
-              paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
               cg.a_load_loc_cgpara(current_asmdata.CurrAsmList,left.location,paraloc1);
               cg.a_load_loc_cgpara(current_asmdata.CurrAsmList,left.location,paraloc1);
-              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
-              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc2);
-              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc3);
+              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
+              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc2);
+              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc3);
               cg.allocallcpuregisters(current_asmdata.CurrAsmList);
               cg.allocallcpuregisters(current_asmdata.CurrAsmList);
               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_RAISEEXCEPTION',false);
               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_RAISEEXCEPTION',false);
               cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
               cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -1045,10 +1040,9 @@ implementation
          cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
          cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
          paraloc1.init;
          paraloc1.init;
          paramanager.getintparaloc(pocall_default,1,paraloc1);
          paramanager.getintparaloc(pocall_default,1,paraloc1);
-         paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
          cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
          cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
          cg.a_load_reg_cgpara(current_asmdata.CurrAsmList,OS_ADDR,NR_FUNCTION_RESULT_REG,paraloc1);
          cg.a_load_reg_cgpara(current_asmdata.CurrAsmList,OS_ADDR,NR_FUNCTION_RESULT_REG,paraloc1);
-         paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+         paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
          cg.allocallcpuregisters(current_asmdata.CurrAsmList);
          cg.allocallcpuregisters(current_asmdata.CurrAsmList);
          cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DESTROYEXCEPTION',false);
          cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DESTROYEXCEPTION',false);
          cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
          cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -1169,9 +1163,8 @@ implementation
               }
               }
               paraloc1.init;
               paraloc1.init;
               paramanager.getintparaloc(pocall_default,1,paraloc1);
               paramanager.getintparaloc(pocall_default,1,paraloc1);
-              paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
               cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_ADDR,-1,paraloc1);
               cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_ADDR,-1,paraloc1);
-              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
               cg.allocallcpuregisters(current_asmdata.CurrAsmList);
               cg.allocallcpuregisters(current_asmdata.CurrAsmList);
               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_CATCHES',false);
               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_CATCHES',false);
               cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
               cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -1202,9 +1195,8 @@ implementation
               paraloc1.init;
               paraloc1.init;
               paramanager.getintparaloc(pocall_default,1,paraloc1);
               paramanager.getintparaloc(pocall_default,1,paraloc1);
               cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
               cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
-              paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
               cg.a_load_reg_cgpara(current_asmdata.CurrAsmList, OS_ADDR, NR_FUNCTION_RESULT_REG, paraloc1);
               cg.a_load_reg_cgpara(current_asmdata.CurrAsmList, OS_ADDR, NR_FUNCTION_RESULT_REG, paraloc1);
-              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
               cg.allocallcpuregisters(current_asmdata.CurrAsmList);
               cg.allocallcpuregisters(current_asmdata.CurrAsmList);
               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DESTROYEXCEPTION',false);
               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DESTROYEXCEPTION',false);
               cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
               cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -1358,9 +1350,8 @@ implementation
          { send the vmt parameter }
          { send the vmt parameter }
          reference_reset_symbol(href2,current_asmdata.RefAsmSymbol(excepttype.vmt_mangledname),0,sizeof(pint));
          reference_reset_symbol(href2,current_asmdata.RefAsmSymbol(excepttype.vmt_mangledname),0,sizeof(pint));
          paramanager.getintparaloc(pocall_default,1,paraloc1);
          paramanager.getintparaloc(pocall_default,1,paraloc1);
-         paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
          cg.a_loadaddr_ref_cgpara(current_asmdata.CurrAsmList,href2,paraloc1);
          cg.a_loadaddr_ref_cgpara(current_asmdata.CurrAsmList,href2,paraloc1);
-         paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+         paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
          cg.allocallcpuregisters(current_asmdata.CurrAsmList);
          cg.allocallcpuregisters(current_asmdata.CurrAsmList);
          cg.a_call_name(current_asmdata.CurrAsmList,'FPC_CATCHES',false);
          cg.a_call_name(current_asmdata.CurrAsmList,'FPC_CATCHES',false);
          cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
          cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -1431,9 +1422,8 @@ implementation
          cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
          cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
          paramanager.getintparaloc(pocall_default,1,paraloc1);
          paramanager.getintparaloc(pocall_default,1,paraloc1);
          cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
          cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
-         paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
          cg.a_load_reg_cgpara(current_asmdata.CurrAsmList, OS_ADDR, NR_FUNCTION_RESULT_REG, paraloc1);
          cg.a_load_reg_cgpara(current_asmdata.CurrAsmList, OS_ADDR, NR_FUNCTION_RESULT_REG, paraloc1);
-         paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+         paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
          cg.allocallcpuregisters(current_asmdata.CurrAsmList);
          cg.allocallcpuregisters(current_asmdata.CurrAsmList);
          cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DESTROYEXCEPTION',false);
          cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DESTROYEXCEPTION',false);
          cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
          cg.deallocallcpuregisters(current_asmdata.CurrAsmList);

+ 4 - 8
compiler/ncginl.pas

@@ -216,22 +216,18 @@ implementation
        if codegenerror then
        if codegenerror then
           exit;
           exit;
        { push erroraddr }
        { push erroraddr }
-       paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc4);
        cg.a_load_reg_cgpara(current_asmdata.CurrAsmList,OS_ADDR,NR_FRAME_POINTER_REG,paraloc4);
        cg.a_load_reg_cgpara(current_asmdata.CurrAsmList,OS_ADDR,NR_FRAME_POINTER_REG,paraloc4);
        { push lineno }
        { push lineno }
-       paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc3);
        cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_INT,current_filepos.line,paraloc3);
        cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_INT,current_filepos.line,paraloc3);
        { push filename }
        { push filename }
-       paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc2);
        cg.a_loadaddr_ref_cgpara(current_asmdata.CurrAsmList,hp2.location.reference,paraloc2);
        cg.a_loadaddr_ref_cgpara(current_asmdata.CurrAsmList,hp2.location.reference,paraloc2);
        { push msg }
        { push msg }
-       paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
        cg.a_loadaddr_ref_cgpara(current_asmdata.CurrAsmList,hp3.location.reference,paraloc1);
        cg.a_loadaddr_ref_cgpara(current_asmdata.CurrAsmList,hp3.location.reference,paraloc1);
        { call }
        { call }
-       paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
-       paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc2);
-       paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc3);
-       paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc4);
+       paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
+       paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc2);
+       paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc3);
+       paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc4);
        cg.allocallcpuregisters(current_asmdata.CurrAsmList);
        cg.allocallcpuregisters(current_asmdata.CurrAsmList);
        cg.a_call_name(current_asmdata.CurrAsmList,'FPC_ASSERT',false);
        cg.a_call_name(current_asmdata.CurrAsmList,'FPC_ASSERT',false);
        cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
        cg.deallocallcpuregisters(current_asmdata.CurrAsmList);

+ 1 - 2
compiler/ncgld.pas

@@ -349,9 +349,8 @@ implementation
                           reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname),0,sizeof(pint))
                           reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname),0,sizeof(pint))
                         else
                         else
                           reference_reset_symbol(href,current_asmdata.WeakRefAsmSymbol(gvs.mangledname),0,sizeof(pint));
                           reference_reset_symbol(href,current_asmdata.WeakRefAsmSymbol(gvs.mangledname),0,sizeof(pint));
-                        paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
                         cg.a_load_ref_cgpara(current_asmdata.CurrAsmList,OS_32,href,paraloc1);
                         cg.a_load_ref_cgpara(current_asmdata.CurrAsmList,OS_32,href,paraloc1);
-                        paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+                        paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
                         paraloc1.done;
                         paraloc1.done;
                         cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                         cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                         cg.a_call_reg(current_asmdata.CurrAsmList,hregister);
                         cg.a_call_reg(current_asmdata.CurrAsmList,hregister);

+ 1 - 2
compiler/ncgmat.pas

@@ -368,9 +368,8 @@ implementation
                   cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,0,hdenom,hl);
                   cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,0,hdenom,hl);
                   paraloc1.init;
                   paraloc1.init;
                   paramanager.getintparaloc(pocall_default,1,paraloc1);
                   paramanager.getintparaloc(pocall_default,1,paraloc1);
-                  paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
                   cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_S32,200,paraloc1);
                   cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_S32,200,paraloc1);
-                  paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+                  paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
                   cg.a_call_name(current_asmdata.CurrAsmList,'FPC_HANDLEERROR',false);
                   cg.a_call_name(current_asmdata.CurrAsmList,'FPC_HANDLEERROR',false);
                   paraloc1.done;
                   paraloc1.done;
                   cg.a_label(current_asmdata.CurrAsmList,hl);
                   cg.a_label(current_asmdata.CurrAsmList,hl);

+ 10 - 20
compiler/ncgmem.pas

@@ -265,9 +265,8 @@ implementation
           begin
           begin
             paraloc1.init;
             paraloc1.init;
             paramanager.getintparaloc(pocall_default,1,paraloc1);
             paramanager.getintparaloc(pocall_default,1,paraloc1);
-            paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
             cg.a_load_reg_cgpara(current_asmdata.CurrAsmList, OS_ADDR,location.reference.base,paraloc1);
             cg.a_load_reg_cgpara(current_asmdata.CurrAsmList, OS_ADDR,location.reference.base,paraloc1);
-            paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+            paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
             paraloc1.done;
             paraloc1.done;
             cg.allocallcpuregisters(current_asmdata.CurrAsmList);
             cg.allocallcpuregisters(current_asmdata.CurrAsmList);
             cg.a_call_name(current_asmdata.CurrAsmList,'FPC_CHECKPOINTER',false);
             cg.a_call_name(current_asmdata.CurrAsmList,'FPC_CHECKPOINTER',false);
@@ -332,9 +331,8 @@ implementation
                 not(cs_compilesystem in current_settings.moduleswitches) then
                 not(cs_compilesystem in current_settings.moduleswitches) then
               begin
               begin
                 paramanager.getintparaloc(pocall_default,1,paraloc1);
                 paramanager.getintparaloc(pocall_default,1,paraloc1);
-                paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
                 cg.a_load_reg_cgpara(current_asmdata.CurrAsmList, OS_ADDR,location.reference.base,paraloc1);
                 cg.a_load_reg_cgpara(current_asmdata.CurrAsmList, OS_ADDR,location.reference.base,paraloc1);
-                paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+                paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
                 cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                 cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                 cg.a_call_name(current_asmdata.CurrAsmList,'FPC_CHECKPOINTER',false);
                 cg.a_call_name(current_asmdata.CurrAsmList,'FPC_CHECKPOINTER',false);
                 cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
                 cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -351,9 +349,8 @@ implementation
                 not(cs_compilesystem in current_settings.moduleswitches) then
                 not(cs_compilesystem in current_settings.moduleswitches) then
               begin
               begin
                 paramanager.getintparaloc(pocall_default,1,paraloc1);
                 paramanager.getintparaloc(pocall_default,1,paraloc1);
-                paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
                 cg.a_load_reg_cgpara(current_asmdata.CurrAsmList, OS_ADDR,location.reference.base,paraloc1);
                 cg.a_load_reg_cgpara(current_asmdata.CurrAsmList, OS_ADDR,location.reference.base,paraloc1);
-                paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+                paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
                 cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                 cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                 cg.a_call_name(current_asmdata.CurrAsmList,'FPC_CHECKPOINTER',false);
                 cg.a_call_name(current_asmdata.CurrAsmList,'FPC_CHECKPOINTER',false);
                 cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
                 cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -685,12 +682,10 @@ implementation
             begin
             begin
                paramanager.getintparaloc(pocall_default,1,paraloc1);
                paramanager.getintparaloc(pocall_default,1,paraloc1);
                paramanager.getintparaloc(pocall_default,2,paraloc2);
                paramanager.getintparaloc(pocall_default,2,paraloc2);
-               paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc2);
                cg.a_load_loc_cgpara(current_asmdata.CurrAsmList,right.location,paraloc2);
                cg.a_load_loc_cgpara(current_asmdata.CurrAsmList,right.location,paraloc2);
-               paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
                cg.a_load_loc_cgpara(current_asmdata.CurrAsmList,left.location,paraloc1);
                cg.a_load_loc_cgpara(current_asmdata.CurrAsmList,left.location,paraloc1);
-               paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
-               paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc2);
+               paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
+               paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc2);
                cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DYNARRAY_RANGECHECK',false);
                cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DYNARRAY_RANGECHECK',false);
                cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
                cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -771,9 +766,8 @@ implementation
               if (cs_check_range in current_settings.localswitches) then
               if (cs_check_range in current_settings.localswitches) then
                 begin
                 begin
                    paramanager.getintparaloc(pocall_default,1,paraloc1);
                    paramanager.getintparaloc(pocall_default,1,paraloc1);
-                   paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
                    cg.a_load_reg_cgpara(current_asmdata.CurrAsmList,OS_ADDR,location.reference.base,paraloc1);
                    cg.a_load_reg_cgpara(current_asmdata.CurrAsmList,OS_ADDR,location.reference.base,paraloc1);
-                   paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
+                   paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
                    cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                    cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                    cg.a_call_name(current_asmdata.CurrAsmList,'FPC_'+upper(tstringdef(left.resultdef).stringtypname)+'_CHECKZERO',false);
                    cg.a_call_name(current_asmdata.CurrAsmList,'FPC_'+upper(tstringdef(left.resultdef).stringtypname)+'_CHECKZERO',false);
                    cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
                    cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -876,10 +870,8 @@ implementation
                            begin
                            begin
                               paramanager.getintparaloc(pocall_default,1,paraloc1);
                               paramanager.getintparaloc(pocall_default,1,paraloc1);
                               paramanager.getintparaloc(pocall_default,2,paraloc2);
                               paramanager.getintparaloc(pocall_default,2,paraloc2);
-                              paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc2);
                               cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_INT,tordconstnode(right).value.svalue,paraloc2);
                               cg.a_load_const_cgpara(current_asmdata.CurrAsmList,OS_INT,tordconstnode(right).value.svalue,paraloc2);
                               href:=location.reference;
                               href:=location.reference;
-                              paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
                               if not(tf_winlikewidestring in target_info.flags) or
                               if not(tf_winlikewidestring in target_info.flags) or
                                  (tstringdef(left.resultdef).stringtype<>st_widestring) then
                                  (tstringdef(left.resultdef).stringtype<>st_widestring) then
                                 begin
                                 begin
@@ -892,8 +884,8 @@ implementation
                                   dec(href.offset,4-offsetdec);
                                   dec(href.offset,4-offsetdec);
                                   cg.a_load_ref_cgpara(current_asmdata.CurrAsmList,OS_32,href,paraloc1);
                                   cg.a_load_ref_cgpara(current_asmdata.CurrAsmList,OS_32,href,paraloc1);
                                 end;
                                 end;
-                              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
-                              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc2);
+                              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
+                              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc2);
                               cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                               cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_'+upper(tstringdef(left.resultdef).stringtypname)+'_RANGECHECK',false);
                               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_'+upper(tstringdef(left.resultdef).stringtypname)+'_RANGECHECK',false);
                               cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
                               cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
@@ -1038,13 +1030,11 @@ implementation
                            begin
                            begin
                               paramanager.getintparaloc(pocall_default,1,paraloc1);
                               paramanager.getintparaloc(pocall_default,1,paraloc1);
                               paramanager.getintparaloc(pocall_default,2,paraloc2);
                               paramanager.getintparaloc(pocall_default,2,paraloc2);
-                              paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc2);
                               cg.a_load_reg_cgpara(current_asmdata.CurrAsmList,OS_INT,right.location.register,paraloc2);
                               cg.a_load_reg_cgpara(current_asmdata.CurrAsmList,OS_INT,right.location.register,paraloc2);
                               href:=location.reference;
                               href:=location.reference;
                               dec(href.offset,sizeof(pint)-offsetdec);
                               dec(href.offset,sizeof(pint)-offsetdec);
 
 
                               href:=location.reference;
                               href:=location.reference;
-                              paramanager.allocparaloc(current_asmdata.CurrAsmList,paraloc1);
                               if not(tf_winlikewidestring in target_info.flags) or
                               if not(tf_winlikewidestring in target_info.flags) or
                                  (tstringdef(left.resultdef).stringtype<>st_widestring) then
                                  (tstringdef(left.resultdef).stringtype<>st_widestring) then
                                 begin
                                 begin
@@ -1058,8 +1048,8 @@ implementation
                                   cg.a_load_ref_cgpara(current_asmdata.CurrAsmList,OS_32,href,paraloc1);
                                   cg.a_load_ref_cgpara(current_asmdata.CurrAsmList,OS_32,href,paraloc1);
                                 end;
                                 end;
 
 
-                              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc1);
-                              paramanager.freeparaloc(current_asmdata.CurrAsmList,paraloc2);
+                              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
+                              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc2);
                               cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                               cg.allocallcpuregisters(current_asmdata.CurrAsmList);
                               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_'+upper(tstringdef(left.resultdef).stringtypname)+'_RANGECHECK',false);
                               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_'+upper(tstringdef(left.resultdef).stringtypname)+'_RANGECHECK',false);
                               cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
                               cg.deallocallcpuregisters(current_asmdata.CurrAsmList);

+ 158 - 272
compiler/ncgutil.pas

@@ -65,12 +65,13 @@ interface
     procedure location_force_mem(list:TAsmList;var l:tlocation);
     procedure location_force_mem(list:TAsmList;var l:tlocation);
     procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;maybeconst:boolean);
     procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;maybeconst:boolean);
     procedure location_force_mmreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
     procedure location_force_mmreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
+    procedure location_allocate_register(list:TAsmList;out l: tlocation;def: tdef;constant: boolean);
 
 
     { load a tlocation into a cgpara }
     { load a tlocation into a cgpara }
     procedure gen_load_loc_cgpara(list: TAsmList; vardef: tdef; const l: tlocation; const cgpara: tcgpara);
     procedure gen_load_loc_cgpara(list: TAsmList; vardef: tdef; const l: tlocation; const cgpara: tcgpara);
     { loads a cgpara into a tlocation; assumes that loc.loc is already
     { loads a cgpara into a tlocation; assumes that loc.loc is already
       initialised }
       initialised }
-    procedure gen_load_cgpara_loc(list: TAsmList; vardef: tdef; const para: TCGPara; var destloc: tlocation);
+    procedure gen_load_cgpara_loc(list: TAsmList; vardef: tdef; const para: TCGPara; var destloc: tlocation; reusepara: boolean);
 
 
     { allocate registers for a tlocation; assumes that loc.loc is already
     { allocate registers for a tlocation; assumes that loc.loc is already
       set to LOC_CREGISTER/LOC_CFPUREGISTER/... }
       set to LOC_CREGISTER/LOC_CFPUREGISTER/... }
@@ -418,24 +419,20 @@ implementation
         paramanager.getintparaloc(pocall_default,1,paraloc1);
         paramanager.getintparaloc(pocall_default,1,paraloc1);
         paramanager.getintparaloc(pocall_default,2,paraloc2);
         paramanager.getintparaloc(pocall_default,2,paraloc2);
         paramanager.getintparaloc(pocall_default,3,paraloc3);
         paramanager.getintparaloc(pocall_default,3,paraloc3);
-        paramanager.allocparaloc(list,paraloc3);
         cg.a_loadaddr_ref_cgpara(list,t.envbuf,paraloc3);
         cg.a_loadaddr_ref_cgpara(list,t.envbuf,paraloc3);
-        paramanager.allocparaloc(list,paraloc2);
         cg.a_loadaddr_ref_cgpara(list,t.jmpbuf,paraloc2);
         cg.a_loadaddr_ref_cgpara(list,t.jmpbuf,paraloc2);
         { push type of exceptionframe }
         { push type of exceptionframe }
-        paramanager.allocparaloc(list,paraloc1);
         cg.a_load_const_cgpara(list,OS_S32,1,paraloc1);
         cg.a_load_const_cgpara(list,OS_S32,1,paraloc1);
-        paramanager.freeparaloc(list,paraloc3);
-        paramanager.freeparaloc(list,paraloc2);
-        paramanager.freeparaloc(list,paraloc1);
+        paramanager.freecgpara(list,paraloc3);
+        paramanager.freecgpara(list,paraloc2);
+        paramanager.freecgpara(list,paraloc1);
         cg.allocallcpuregisters(list);
         cg.allocallcpuregisters(list);
         cg.a_call_name(list,'FPC_PUSHEXCEPTADDR',false);
         cg.a_call_name(list,'FPC_PUSHEXCEPTADDR',false);
         cg.deallocallcpuregisters(list);
         cg.deallocallcpuregisters(list);
 
 
         paramanager.getintparaloc(pocall_default,1,paraloc1);
         paramanager.getintparaloc(pocall_default,1,paraloc1);
-        paramanager.allocparaloc(list,paraloc1);
         cg.a_load_reg_cgpara(list,OS_ADDR,NR_FUNCTION_RESULT_REG,paraloc1);
         cg.a_load_reg_cgpara(list,OS_ADDR,NR_FUNCTION_RESULT_REG,paraloc1);
-        paramanager.freeparaloc(list,paraloc1);
+        paramanager.freecgpara(list,paraloc1);
         cg.allocallcpuregisters(list);
         cg.allocallcpuregisters(list);
         cg.a_call_name(list,'FPC_SETJMP',false);
         cg.a_call_name(list,'FPC_SETJMP',false);
         cg.deallocallcpuregisters(list);
         cg.deallocallcpuregisters(list);
@@ -791,56 +788,89 @@ implementation
 {$ifdef i386}
 {$ifdef i386}
         href   : treference;
         href   : treference;
         size   : longint;
         size   : longint;
-{$else i386}
-        tmploc : tlocation;
 {$endif i386}
 {$endif i386}
+        tmploc : tlocation;
       begin
       begin
 {$ifdef i386}
 {$ifdef i386}
-           if cgpara.location^.loc<>LOC_REFERENCE then
-             internalerror(200309291);
            case l.loc of
            case l.loc of
              LOC_FPUREGISTER,
              LOC_FPUREGISTER,
              LOC_CFPUREGISTER:
              LOC_CFPUREGISTER:
                begin
                begin
-                 size:=align(locintsize,cgpara.alignment);
-                 if (not use_fixed_stack) and
-                    (cgpara.location^.reference.index=NR_STACK_POINTER_REG) then
-                   begin
-                     cg.g_stackpointer_alloc(list,size);
-                     reference_reset_base(href,NR_STACK_POINTER_REG,0,sizeof(pint));
-                   end
-                 else
-                   reference_reset_base(href,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
-                 cg.a_loadfpu_reg_ref(list,l.size,l.size,l.register,href);
+                 case cgpara.location^.loc of
+                   LOC_REFERENCE:
+                     begin
+                       size:=align(locintsize,cgpara.alignment);
+                       if (not use_fixed_stack) and
+                          (cgpara.location^.reference.index=NR_STACK_POINTER_REG) then
+                         begin
+                           cg.g_stackpointer_alloc(list,size);
+                           reference_reset_base(href,NR_STACK_POINTER_REG,0,sizeof(pint));
+                         end
+                       else
+                         reference_reset_base(href,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
+                       cg.a_loadfpu_reg_ref(list,l.size,l.size,l.register,href);
+                     end;
+                   LOC_FPUREGISTER:
+                     begin
+                       cg.a_loadfpu_reg_reg(list,l.size,cgpara.location^.size,l.register,cgpara.location^.register);
+                     end;
+                   else
+                     internalerror(2010053003);
+                 end;
                end;
                end;
              LOC_MMREGISTER,
              LOC_MMREGISTER,
              LOC_CMMREGISTER:
              LOC_CMMREGISTER:
                begin
                begin
-                 { can't use TCGSize2Size[l.size], because the size of an
-                   80 bit extended parameter can be either 10 or 12 bytes }
-                 size:=align(locintsize,cgpara.alignment);
-                 if (not use_fixed_stack) and
-                    (cgpara.location^.reference.index=NR_STACK_POINTER_REG) then
-                   begin
-                     cg.g_stackpointer_alloc(list,size);
-                     reference_reset_base(href,NR_STACK_POINTER_REG,0,sizeof(pint));
-                   end
-                 else
-                   reference_reset_base(href,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
-                 cg.a_loadmm_reg_ref(list,l.size,l.size,l.register,href,mms_movescalar);
+                 case cgpara.location^.loc of
+                   LOC_REFERENCE:
+                     begin
+                       { can't use TCGSize2Size[l.size], because the size of an
+                         80 bit extended parameter can be either 10 or 12 bytes }
+                       size:=align(locintsize,cgpara.alignment);
+                       if (not use_fixed_stack) and
+                          (cgpara.location^.reference.index=NR_STACK_POINTER_REG) then
+                         begin
+                           cg.g_stackpointer_alloc(list,size);
+                           reference_reset_base(href,NR_STACK_POINTER_REG,0,sizeof(pint));
+                         end
+                       else
+                         reference_reset_base(href,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
+                       cg.a_loadmm_reg_ref(list,l.size,l.size,l.register,href,mms_movescalar);
+                     end;
+                   LOC_FPUREGISTER:
+                     begin
+                       tmploc:=l;
+                       location_force_mem(list,tmploc);
+                       cg.a_loadfpu_ref_cgpara(list,tmploc.size,tmploc.reference,cgpara);
+                       location_freetemp(list,tmploc);
+                     end;
+                   else
+                     internalerror(2010053004);
+                 end;
                end;
                end;
              LOC_REFERENCE,
              LOC_REFERENCE,
              LOC_CREFERENCE :
              LOC_CREFERENCE :
                begin
                begin
-                 size:=align(locintsize,cgpara.alignment);
-                 if (not use_fixed_stack) and
-                    (cgpara.location^.reference.index=NR_STACK_POINTER_REG) then
-                   cg.a_load_ref_cgpara(list,l.size,l.reference,cgpara)
-                 else
-                   begin
-                     reference_reset_base(href,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
-                     cg.g_concatcopy(list,l.reference,href,size);
-                   end;
+                 case cgpara.location^.loc of
+                   LOC_REFERENCE:
+                     begin
+                       size:=align(locintsize,cgpara.alignment);
+                       if (not use_fixed_stack) and
+                          (cgpara.location^.reference.index=NR_STACK_POINTER_REG) then
+                         cg.a_load_ref_cgpara(list,l.size,l.reference,cgpara)
+                       else
+                         begin
+                           reference_reset_base(href,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
+                           cg.g_concatcopy(list,l.reference,href,size);
+                         end;
+                     end;
+                   LOC_FPUREGISTER:
+                     begin
+                       cg.a_loadfpu_ref_cgpara(list,l.size,l.reference,cgpara);
+                     end;
+                   else
+                     internalerror(2010053005);
+                 end;
                end;
                end;
              else
              else
                internalerror(2002042430);
                internalerror(2002042430);
@@ -913,13 +943,16 @@ implementation
                     value is still a const or in a register then write it
                     value is still a const or in a register then write it
                     to a reference first. This situation can be triggered
                     to a reference first. This situation can be triggered
                     by typecasting an int64 constant to a record of 8 bytes }
                     by typecasting an int64 constant to a record of 8 bytes }
-                  tmploc:=l;
-                  if tmploc.size in [OS_64,OS_S64] then
-                    location_force_mem(list,tmploc);
-                  cg.a_load_loc_cgpara(list,tmploc,cgpara);
-{$else not cpu64bitalu}
-                  cg.a_load_loc_cgpara(list,l,cgpara);
+                  if l.size in [OS_64,OS_S64] then
+                    begin
+                      tmploc:=l;
+                      location_force_mem(list,tmploc);
+                      cg.a_load_loc_cgpara(list,tmploc,cgpara);
+                      location_freetemp(list,tmploc);
+                    end
+                  else
 {$endif not cpu64bitalu}
 {$endif not cpu64bitalu}
+                    cg.a_load_loc_cgpara(list,l,cgpara);
                end;
                end;
              else
              else
                internalerror(2002042432);
                internalerror(2002042432);
@@ -963,13 +996,16 @@ implementation
                     value is still a const or in a register then write it
                     value is still a const or in a register then write it
                     to a reference first. This situation can be triggered
                     to a reference first. This situation can be triggered
                     by typecasting an int64 constant to a record of 8 bytes }
                     by typecasting an int64 constant to a record of 8 bytes }
-                  tmploc:=l;
-                  if tmploc.size in [OS_64,OS_S64] then
-                    location_force_mem(list,tmploc);
-                  cg.a_load_loc_cgpara(list,tmploc,cgpara);
-{$else not cpu64bitalu}
-                  cg.a_load_loc_cgpara(list,l,cgpara);
+                  if l.size in [OS_64,OS_S64] then
+                    begin
+                      tmploc:=l;
+                      location_force_mem(list,tmploc);
+                      cg.a_load_loc_cgpara(list,tmploc,cgpara);
+                      location_freetemp(list,tmploc);
+                    end
+                  else
 {$endif not cpu64bitalu}
 {$endif not cpu64bitalu}
+                    cg.a_load_loc_cgpara(list,l,cgpara);
                 end;
                 end;
             end;
             end;
 {$ifdef SUPPORT_MMX}
 {$ifdef SUPPORT_MMX}
@@ -1025,6 +1061,48 @@ implementation
       end;
       end;
 
 
 
 
+    procedure location_allocate_register(list: TAsmList;out l: tlocation;def: tdef;constant: boolean);
+      begin
+        l.size:=def_cgsize(def);
+        if (def.typ=floatdef) and
+           not(cs_fp_emulation in current_settings.moduleswitches) then
+          begin
+            if use_vectorfpu(def) then
+              begin
+                if constant then
+                  location_reset(l,LOC_CMMREGISTER,l.size)
+                else
+                  location_reset(l,LOC_MMREGISTER,l.size);
+                l.register:=cg.getmmregister(list,l.size);
+              end
+            else
+              begin
+                if constant then
+                  location_reset(l,LOC_CFPUREGISTER,l.size)
+                else
+                  location_reset(l,LOC_FPUREGISTER,l.size);
+                l.register:=cg.getfpuregister(list,l.size);
+              end;
+          end
+        else
+          begin
+            if constant then
+              location_reset(l,LOC_CREGISTER,l.size)
+            else
+              location_reset(l,LOC_REGISTER,l.size);
+{$ifndef cpu64bitalu}
+            if l.size in [OS_64,OS_S64,OS_F64] then
+              begin
+                l.register64.reglo:=cg.getintregister(list,OS_32);
+                l.register64.reghi:=cg.getintregister(list,OS_32);
+              end
+            else
+{$endif not cpu64bitalu}
+              l.register:=cg.getintregister(list,l.size);
+          end;
+      end;
+
+
     procedure location_force_mem(list:TAsmList;var l:tlocation);
     procedure location_force_mem(list:TAsmList;var l:tlocation);
       var
       var
         r : treference;
         r : treference;
@@ -1573,12 +1651,8 @@ implementation
 
 
     procedure gen_load_return_value(list:TAsmList);
     procedure gen_load_return_value(list:TAsmList);
       var
       var
-        href   : treference;
         ressym : tabstractnormalvarsym;
         ressym : tabstractnormalvarsym;
-        resloc,
-        restmploc : tlocation;
-        hreg   : tregister;
-        funcretloc : tlocation;
+        funcretloc : TCGPara;
       begin
       begin
         { Is the loading needed? }
         { Is the loading needed? }
         if is_void(current_procinfo.procdef.returndef) or
         if is_void(current_procinfo.procdef.returndef) or
@@ -1599,210 +1673,16 @@ implementation
         if (ressym.refs>0) or
         if (ressym.refs>0) or
            is_managed_type(ressym.vardef) then
            is_managed_type(ressym.vardef) then
           begin
           begin
-            restmploc:=ressym.localloc;
-
-            { Here, we return the function result. In most architectures, the value is
-              passed into the FUNCTION_RETURN_REG, but in a windowed architecure like sparc a
-              function returns in a register and the caller receives it in an other one }
-            case funcretloc.loc of
-              LOC_REGISTER:
-                begin
-{$ifdef cpu64bitalu}
-                  if current_procinfo.procdef.funcretloc[calleeside].size in [OS_128,OS_S128] then
-                    begin
-                      resloc:=current_procinfo.procdef.funcretloc[calleeside];
-                      if resloc.loc<>LOC_REGISTER then
-                        internalerror(200409141);
-                      { Load low and high register separate to generate better register
-                        allocation info }
-                      if getsupreg(resloc.register)<first_int_imreg then
-                        begin
-                          cg.getcpuregister(list,resloc.register);
-                        end;
-                      case restmploc.loc of
-                        LOC_REFERENCE :
-                          begin
-                            href:=restmploc.reference;
-                            if target_info.endian=ENDIAN_BIG then
-                              inc(href.offset,8);
-                            cg.a_load_ref_reg(list,OS_64,OS_64,href,resloc.register);
-                          end;
-                        LOC_CREGISTER :
-                          cg.a_load_reg_reg(list,OS_64,OS_64,restmploc.register,resloc.register);
-                        else
-                          internalerror(200409203);
-                      end;
-                      if getsupreg(resloc.registerhi)<first_int_imreg then
-                        begin
-                          cg.getcpuregister(list,resloc.registerhi);
-                        end;
-                      case restmploc.loc of
-                        LOC_REFERENCE :
-                          begin
-                            href:=restmploc.reference;
-                            if target_info.endian=ENDIAN_LITTLE then
-                              inc(href.offset,8);
-                            cg.a_load_ref_reg(list,OS_64,OS_64,href,resloc.registerhi);
-                          end;
-                        LOC_CREGISTER :
-                          cg.a_load_reg_reg(list,OS_64,OS_64,restmploc.registerhi,resloc.registerhi);
-                        else
-                          internalerror(200409204);
-                      end;
-                    end
-                  else
-                    { this code is for structures etc. being returned in registers and having odd sizes }
-                    if (current_procinfo.procdef.funcretloc[calleeside].size=OS_64) and
-                      (restmploc.size<>OS_64) then
-                      begin
-                        resloc:=current_procinfo.procdef.funcretloc[calleeside];
-                        if resloc.loc<>LOC_REGISTER then
-                          internalerror(200409141);
-                        { Load low and high register separate to generate better register
-                          allocation info }
-                        if getsupreg(resloc.register)<first_int_imreg then
-                          begin
-                            cg.getcpuregister(list,resloc.register);
-                          end;
-                        case restmploc.loc of
-                          LOC_REFERENCE :
-                            begin
-                              href:=restmploc.reference;
-                              cg.a_load_ref_reg(list,OS_64,OS_64,href,resloc.register);
-                            end;
-                          LOC_CREGISTER :
-                            cg.a_load_reg_reg(list,OS_64,OS_64,restmploc.register,resloc.register);
-                          else
-                            internalerror(200409203);
-                        end;
-                      end
-                    else
-{$else cpu64bitalu}
-                  if current_procinfo.procdef.funcretloc[calleeside].size in [OS_64,OS_S64] then
-                    begin
-                      resloc:=current_procinfo.procdef.funcretloc[calleeside];
-                      if resloc.loc<>LOC_REGISTER then
-                        internalerror(200409141);
-                      { Load low and high register separate to generate better register
-                        allocation info }
-                      if getsupreg(resloc.register64.reglo)<first_int_imreg then
-                        begin
-                          cg.getcpuregister(list,resloc.register64.reglo);
-                        end;
-                      case restmploc.loc of
-                        LOC_REFERENCE :
-                          begin
-                            href:=restmploc.reference;
-                            if target_info.endian=ENDIAN_BIG then
-                              inc(href.offset,4);
-                            cg.a_load_ref_reg(list,OS_32,OS_32,href,resloc.register64.reglo);
-                          end;
-                        LOC_CREGISTER :
-                          cg.a_load_reg_reg(list,OS_32,OS_32,restmploc.register64.reglo,resloc.register64.reglo);
-                        LOC_CMMREGISTER :
-                          { perform the whole move at once below, both result
-                            registers are required (and since restmploc is an mmreg
-                            and resloc intregs, they don't conflict anyway) }
-                          ;
-                        else
-                          internalerror(200409203);
-                      end;
-                      if getsupreg(resloc.register64.reghi)<first_int_imreg then
-                        begin
-                          cg.getcpuregister(list,resloc.register64.reghi);
-                        end;
-                      case restmploc.loc of
-                        LOC_REFERENCE :
-                          begin
-                            href:=restmploc.reference;
-                            if target_info.endian=ENDIAN_LITTLE then
-                              inc(href.offset,4);
-                            cg.a_load_ref_reg(list,OS_32,OS_32,href,resloc.register64.reghi);
-                          end;
-                        LOC_CREGISTER :
-                          cg.a_load_reg_reg(list,OS_32,OS_32,restmploc.register64.reghi,resloc.register64.reghi);
-                        LOC_CMMREGISTER :
-                          cg64.a_loadmm_reg_intreg64(list,restmploc.size,restmploc.register,resloc.register64);
-                        else
-                          internalerror(200409204);
-                      end;
-                    end
-                  else
-{$endif cpu64bitalu}
-                  { this code is for structures etc. being returned in registers and having odd sizes }
-                  if (current_procinfo.procdef.funcretloc[calleeside].size=OS_32) and
-                    not(restmploc.size in [OS_S32,OS_32]) then
-                    begin
-                      resloc:=current_procinfo.procdef.funcretloc[calleeside];
-                      if resloc.loc<>LOC_REGISTER then
-                        internalerror(200409141);
-                      { Load low and high register separate to generate better register
-                        allocation info }
-                      if getsupreg(resloc.register)<first_int_imreg then
-                        begin
-                          cg.getcpuregister(list,resloc.register);
-                        end;
-                      case restmploc.loc of
-                        LOC_REFERENCE :
-                          begin
-                            href:=restmploc.reference;
-                            resloc.register:=cg.makeregsize(list,resloc.register,OS_32);
-                            cg.a_load_ref_reg(list,OS_32,OS_32,href,resloc.register);
-                          end;
-                        LOC_CREGISTER :
-                          cg.a_load_reg_reg(list,OS_32,OS_32,restmploc.register,resloc.register);
-                        LOC_CMMREGISTER :
-                          cg.a_loadmm_reg_intreg(list,restmploc.size,resloc.size,restmploc.register,resloc.register,mms_movescalar);
-                        else
-                          internalerror(200409203);
-                      end;
-                    end
-                  else
-                    begin
-                      hreg:=cg.makeregsize(list,funcretloc.register,funcretloc.size);
-                      if getsupreg(funcretloc.register)<first_int_imreg then
-                        begin
-                          cg.getcpuregister(list,funcretloc.register);
-                        end;
-                      { it could be that a structure is passed in memory but the function is expected to
-                        return a pointer to this memory }
-                      if paramanager.ret_in_param(current_procinfo.procdef.returndef,current_procinfo.procdef.proccalloption) then
-                        cg.a_load_loc_reg(list,OS_ADDR,restmploc,hreg)
-                      else
-                        cg.a_load_loc_reg(list,restmploc.size,restmploc,hreg);
-                    end;
-                end;
-              LOC_FPUREGISTER:
-                begin
-                  if getsupreg(funcretloc.register)<first_fpu_imreg then
-                    begin
-                      cg.getcpuregister(list,funcretloc.register);
-                    end;
-                  { we can't do direct moves between fpu and mm registers }
-                  if restmploc.loc in [LOC_MMREGISTER,LOC_CMMREGISTER] then
-                    location_force_fpureg(list,restmploc,false);
-                  cg.a_loadfpu_loc_reg(list,funcretloc.size,restmploc,funcretloc.register);
-                end;
-              LOC_MMREGISTER:
-                begin
-                  if getsupreg(funcretloc.register)<first_mm_imreg then
-                    begin
-                      cg.getcpuregister(list,funcretloc.register);
-                    end;
-                  cg.a_loadmm_loc_reg(list,restmploc.size,restmploc,funcretloc.register,mms_movescalar);
-                end;
-              LOC_INVALID,
-              LOC_REFERENCE:
-                ;
-              else
-                internalerror(200405025);
-            end;
+            { was: don't do anything if funcretloc.loc in [LOC_INVALID,LOC_REFERENCE] }
+            if not paramanager.ret_in_param(current_procinfo.procdef.returndef,current_procinfo.procdef.proccalloption) then
+              gen_load_loc_cgpara(list,ressym.vardef,ressym.localloc,funcretloc);
           end
           end
 {$ifdef x86}
 {$ifdef x86}
          else
          else
           begin
           begin
             { the caller will pop a value from the fpu stack }
             { the caller will pop a value from the fpu stack }
-            if (funcretloc.loc = LOC_FPUREGISTER) then
+            if assigned(funcretloc.location) and
+               (funcretloc.location^.loc = LOC_FPUREGISTER) then
               list.concat(taicpu.op_none(A_FLDZ));
               list.concat(taicpu.op_none(A_FLDZ));
           end;
           end;
 {$endif x86}
 {$endif x86}
@@ -1858,7 +1738,7 @@ implementation
       end;
       end;
 
 
 
 
-    procedure gen_load_cgpara_loc(list: TAsmList; vardef: tdef; const para: TCGPara; var destloc: tlocation);
+    procedure gen_load_cgpara_loc(list: TAsmList; vardef: tdef; const para: TCGPara; var destloc: tlocation; reusepara: boolean);
 
 
       procedure unget_para(const paraloc:TCGParaLocation);
       procedure unget_para(const paraloc:TCGParaLocation);
         begin
         begin
@@ -1893,9 +1773,9 @@ implementation
 {$endif not cpu64bitalu}
 {$endif not cpu64bitalu}
       begin
       begin
         paraloc:=para.location;
         paraloc:=para.location;
-        { skip e.g. empty records }
         if not assigned(paraloc) then
         if not assigned(paraloc) then
           internalerror(200408203);
           internalerror(200408203);
+        { skip e.g. empty records }
         if (paraloc^.loc = LOC_VOID) then
         if (paraloc^.loc = LOC_VOID) then
           exit;
           exit;
         case destloc.loc of
         case destloc.loc of
@@ -1903,7 +1783,7 @@ implementation
             begin
             begin
               { If the parameter location is reused we don't need to copy
               { If the parameter location is reused we don't need to copy
                 anything }
                 anything }
-              if not paramanager.param_use_paraloc(para) then
+              if not reusepara then
                 begin
                 begin
                   href:=destloc.reference;
                   href:=destloc.reference;
                   sizeleft:=para.intsize;
                   sizeleft:=para.intsize;
@@ -1931,11 +1811,15 @@ implementation
                     end;
                     end;
                 end;
                 end;
             end;
             end;
+          LOC_REGISTER,
           LOC_CREGISTER :
           LOC_CREGISTER :
             begin
             begin
 {$ifndef cpu64bitalu}
 {$ifndef cpu64bitalu}
-              if (para.size in [OS_64,OS_S64]) and
-                 is_64bit(vardef) then
+              if (para.size in [OS_64,OS_S64,OS_F64]) and
+                 (is_64bit(vardef) or
+                  { in case of fpu emulation, or abi's that pass fpu values
+                    via integer registers }
+                  (vardef.typ=floatdef)) then
                 begin
                 begin
                   case paraloc^.loc of
                   case paraloc^.loc of
                     LOC_REGISTER:
                     LOC_REGISTER:
@@ -1985,6 +1869,7 @@ implementation
                   cg.a_load_cgparaloc_anyreg(list,destloc.size,paraloc^,destloc.register,sizeof(aint));
                   cg.a_load_cgparaloc_anyreg(list,destloc.size,paraloc^,destloc.register,sizeof(aint));
                 end;
                 end;
             end;
             end;
+          LOC_FPUREGISTER,
           LOC_CFPUREGISTER :
           LOC_CFPUREGISTER :
             begin
             begin
 {$if defined(sparc) or defined(arm)}
 {$if defined(sparc) or defined(arm)}
@@ -2013,6 +1898,7 @@ implementation
                 internalerror(200410109);
                 internalerror(200410109);
 {$endif sparc}
 {$endif sparc}
             end;
             end;
+          LOC_MMREGISTER,
           LOC_CMMREGISTER :
           LOC_CMMREGISTER :
             begin
             begin
 {$ifndef cpu64bitalu}
 {$ifndef cpu64bitalu}
@@ -2050,6 +1936,8 @@ implementation
                   }
                   }
                 end;
                 end;
             end;
             end;
+          else
+            internalerror(2010052903);
         end;
         end;
       end;
       end;
 
 
@@ -2103,7 +1991,7 @@ implementation
         for i:=0 to current_procinfo.procdef.paras.count-1 do
         for i:=0 to current_procinfo.procdef.paras.count-1 do
           begin
           begin
             currpara:=tparavarsym(current_procinfo.procdef.paras[i]);
             currpara:=tparavarsym(current_procinfo.procdef.paras[i]);
-            gen_load_cgpara_loc(list,currpara.vardef,currpara.paraloc[calleeside],currpara.initialloc);
+            gen_load_cgpara_loc(list,currpara.vardef,currpara.paraloc[calleeside],currpara.initialloc,paramanager.param_use_paraloc(currpara.paraloc[calleeside]));
             { gen_load_cgpara_loc() already allocated the initialloc
             { gen_load_cgpara_loc() already allocated the initialloc
               -> don't allocate again }
               -> don't allocate again }
             if currpara.initialloc.loc in [LOC_CREGISTER,LOC_CFPUREGISTER,LOC_CMMREGISTER] then
             if currpara.initialloc.loc in [LOC_CREGISTER,LOC_CFPUREGISTER,LOC_CMMREGISTER] then
@@ -2399,7 +2287,7 @@ implementation
 
 
         { release return registers, needed for optimizer }
         { release return registers, needed for optimizer }
         if not is_void(current_procinfo.procdef.returndef) then
         if not is_void(current_procinfo.procdef.returndef) then
-          location_free(list,current_procinfo.procdef.funcretloc[calleeside]);
+          paramanager.freecgpara(list,current_procinfo.procdef.funcretloc[calleeside]);
 
 
         { end of frame marker for call frame info }
         { end of frame marker for call frame info }
         current_asmdata.asmcfi.end_frame(list);
         current_asmdata.asmcfi.end_frame(list);
@@ -2412,9 +2300,8 @@ implementation
       begin
       begin
         paraloc1.init;
         paraloc1.init;
         paramanager.getintparaloc(pocall_default,1,paraloc1);
         paramanager.getintparaloc(pocall_default,1,paraloc1);
-        paramanager.allocparaloc(list,paraloc1);
         cg.a_load_const_cgpara(list,OS_INT,current_procinfo.calc_stackframe_size,paraloc1);
         cg.a_load_const_cgpara(list,OS_INT,current_procinfo.calc_stackframe_size,paraloc1);
-        paramanager.freeparaloc(list,paraloc1);
+        paramanager.freecgpara(list,paraloc1);
         paraloc1.done;
         paraloc1.done;
       end;
       end;
 
 
@@ -2426,8 +2313,7 @@ implementation
         paraloc1.init;
         paraloc1.init;
         { Also alloc the register needed for the parameter }
         { Also alloc the register needed for the parameter }
         paramanager.getintparaloc(pocall_default,1,paraloc1);
         paramanager.getintparaloc(pocall_default,1,paraloc1);
-        paramanager.allocparaloc(list,paraloc1);
-        paramanager.freeparaloc(list,paraloc1);
+        paramanager.freecgpara(list,paraloc1);
         { Call the helper }
         { Call the helper }
         cg.allocallcpuregisters(list);
         cg.allocallcpuregisters(list);
         cg.a_call_name(list,'FPC_STACKCHECK',false);
         cg.a_call_name(list,'FPC_STACKCHECK',false);
@@ -2885,7 +2771,7 @@ implementation
             exit;
             exit;
         end;
         end;
 
 
-        if (current_procinfo.procdef.funcretloc[calleeside].loc<>LOC_VOID) and
+        if not is_void(current_procinfo.procdef.returndef) and
            assigned(current_procinfo.procdef.funcretsym) and
            assigned(current_procinfo.procdef.funcretsym) and
            (tabstractvarsym(current_procinfo.procdef.funcretsym).refs <> 0) then
            (tabstractvarsym(current_procinfo.procdef.funcretsym).refs <> 0) then
           if (current_procinfo.procdef.proctypeoption=potype_constructor) then
           if (current_procinfo.procdef.proctypeoption=potype_constructor) then

+ 90 - 2
compiler/parabase.pas

@@ -26,7 +26,8 @@ unit parabase;
 
 
     uses
     uses
        cclasses,globtype,
        cclasses,globtype,
-       cpubase,cgbase,cgutils;
+       cpubase,cgbase,cgutils,
+       symtype, ppu;
 
 
     type
     type
        TCGParaReference = record
        TCGParaReference = record
@@ -62,9 +63,9 @@ unit parabase;
 
 
        TCGPara = object
        TCGPara = object
           Location  : PCGParalocation;
           Location  : PCGParalocation;
+          IntSize   : aint; { size of the total location in bytes }
           Alignment : ShortInt;
           Alignment : ShortInt;
           Size      : TCGSize;  { Size of the parameter included in all locations }
           Size      : TCGSize;  { Size of the parameter included in all locations }
-          IntSize: aint; { size of the total location in bytes }
 {$ifdef powerpc}
 {$ifdef powerpc}
           composite: boolean; { under the AIX abi, how certain parameters are passed depends on whether they are composite or not }
           composite: boolean; { under the AIX abi, how certain parameters are passed depends on whether they are composite or not }
 {$endif powerpc}
 {$endif powerpc}
@@ -75,6 +76,9 @@ unit parabase;
           procedure   check_simple_location;
           procedure   check_simple_location;
           function    add_location:pcgparalocation;
           function    add_location:pcgparalocation;
           procedure   get_location(var newloc:tlocation);
           procedure   get_location(var newloc:tlocation);
+
+          procedure   ppuwrite(ppufile:tcompilerppufile);
+          procedure   ppuload(ppufile:tcompilerppufile);
        end;
        end;
 
 
        tvarargsinfo = (
        tvarargsinfo = (
@@ -240,6 +244,90 @@ implementation
       end;
       end;
 
 
 
 
+    procedure TCGPara.ppuwrite(ppufile: tcompilerppufile);
+      var
+        hparaloc: PCGParaLocation;
+        nparaloc: byte;
+      begin
+        ppufile.putbyte(byte(Alignment));
+        ppufile.putbyte(ord(Size));
+        ppufile.putaint(IntSize);
+{$ifdef powerpc}
+        ppufile.putbyte(byte(composite));
+{$endif}
+        nparaloc:=0;
+        hparaloc:=location;
+        while assigned(hparaloc) do
+          begin
+            inc(nparaloc);
+            hparaloc:=hparaloc^.Next;
+          end;
+        ppufile.putbyte(nparaloc);
+        hparaloc:=location;
+        while assigned(hparaloc) do
+          begin
+            ppufile.putbyte(byte(hparaloc^.Size));
+            ppufile.putbyte(byte(hparaloc^.loc));
+            if hparaloc^.loc=LOC_REFERENCE then
+              begin
+                ppufile.putlongint(longint(hparaloc^.reference.index));
+                ppufile.putaint(hparaloc^.reference.offset);
+              end
+            else
+              begin
+{$ifdef powerpc64}
+                ppufile.putbyte(hparaloc^.shiftval);
+{$endif}
+                ppufile.putlongint(longint(hparaloc^.register));
+              end;
+          end;
+      end;
+
+
+    procedure TCGPara.ppuload(ppufile: tcompilerppufile);
+      var
+        hparaloc: PCGParaLocation;
+        nparaloc: byte;
+      begin
+        reset;
+        Alignment:=shortint(ppufile.getbyte);
+        Size:=TCgSize(ppufile.getbyte);
+        IntSize:=ppufile.getaint;
+{$ifdef powerpc}
+        composite:=boolean(ppufile.getbyte);
+{$endif}
+        nparaloc:=ppufile.getbyte;
+        while nparaloc>0 do
+          begin
+            hparaloc:=add_location;
+            hparaloc^.size:=TCGSize(ppufile.getbyte);
+            hparaloc^.loc:=TCGLoc(ppufile.getbyte);
+            case hparaloc^.loc of
+              LOC_REFERENCE:
+                begin
+                  hparaloc^.reference.index:=tregister(ppufile.getlongint);
+                  hparaloc^.reference.offset:=ppufile.getaint;
+                end;
+              LOC_FPUREGISTER,
+              LOC_CFPUREGISTER,
+              LOC_MMREGISTER,
+              LOC_CMMREGISTER,
+              LOC_REGISTER,
+              LOC_CREGISTER :
+                begin
+{$ifdef powerpc64}
+                  hparaloc^.shiftval:=ppufile.getbyte;
+{$endif}
+                  hparaloc^.register:=tregister(ppufile.getlongint);
+                end
+              else
+                internalerror(2010051301);
+            end;
+            dec(nparaloc);
+          end;
+      end;
+
+
 {****************************************************************************
 {****************************************************************************
                           TParaList
                           TParaList
 ****************************************************************************}
 ****************************************************************************}

+ 42 - 26
compiler/paramgr.pas

@@ -76,19 +76,26 @@ unit paramgr;
 
 
           procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara:TCGPara);virtual;abstract;
           procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara:TCGPara);virtual;abstract;
 
 
+          {# allocate an individual pcgparalocation that's part of a tcgpara
+
+            @param(list Current assembler list)
+            @param(loc Parameter location element)
+          }
+          procedure allocparaloc(list: TAsmList; const paraloc: pcgparalocation);
+
           {# allocate a parameter location created with create_paraloc_info
           {# allocate a parameter location created with create_paraloc_info
 
 
             @param(list Current assembler list)
             @param(list Current assembler list)
             @param(loc Parameter location)
             @param(loc Parameter location)
           }
           }
-          procedure allocparaloc(list: TAsmList; const cgpara: TCGPara); virtual;
+          procedure alloccgpara(list: TAsmList; const cgpara: TCGPara); virtual;
 
 
           {# free a parameter location allocated with alloccgpara
           {# free a parameter location allocated with alloccgpara
 
 
             @param(list Current assembler list)
             @param(list Current assembler list)
             @param(loc Parameter location)
             @param(loc Parameter location)
           }
           }
-          procedure freeparaloc(list: TAsmList; const cgpara: TCGPara); virtual;
+          procedure freecgpara(list: TAsmList; const cgpara: TCGPara); virtual;
 
 
           { This is used to populate the location information on all parameters
           { This is used to populate the location information on all parameters
             for the routine as seen in either the caller or the callee. It returns
             for the routine as seen in either the caller or the callee. It returns
@@ -100,7 +107,7 @@ unit paramgr;
             function result instead of its actual result. Used if the compiler
             function result instead of its actual result. Used if the compiler
             forces the function result to something different than the real
             forces the function result to something different than the real
             result.  }
             result.  }
-          function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;virtual;abstract;
+          function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;virtual;abstract;
 
 
           { This is used to populate the location information on all parameters
           { This is used to populate the location information on all parameters
             for the routine when it is being inlined. It returns
             for the routine when it is being inlined. It returns
@@ -223,39 +230,48 @@ implementation
       end;
       end;
 
 
 
 
-    procedure tparamanager.allocparaloc(list: TAsmList; const cgpara: TCGPara);
+    procedure tparamanager.allocparaloc(list: TAsmList; const paraloc: pcgparalocation);
+      begin
+        case paraloc^.loc of
+          LOC_REGISTER,
+          LOC_CREGISTER:
+            begin
+              if getsupreg(paraloc^.register)<first_int_imreg then
+                cg.getcpuregister(list,paraloc^.register);
+            end;
+{$ifndef x86}
+{ don't allocate ST(x), they're not handled by the register allocator }
+          LOC_FPUREGISTER,
+          LOC_CFPUREGISTER:
+            begin
+              if getsupreg(paraloc^.register)<first_fpu_imreg then
+                cg.getcpuregister(list,paraloc^.register);
+            end;
+{$endif not x86}
+          LOC_MMREGISTER,
+          LOC_CMMREGISTER :
+            begin
+              if getsupreg(paraloc^.register)<first_mm_imreg then
+                cg.getcpuregister(list,paraloc^.register);
+            end;
+        end;
+      end;
+
+
+    procedure tparamanager.alloccgpara(list: TAsmList; const cgpara: TCGPara);
       var
       var
         paraloc : pcgparalocation;
         paraloc : pcgparalocation;
       begin
       begin
         paraloc:=cgpara.location;
         paraloc:=cgpara.location;
         while assigned(paraloc) do
         while assigned(paraloc) do
           begin
           begin
-            case paraloc^.loc of
-              LOC_REGISTER,
-              LOC_CREGISTER:
-                begin
-                  if getsupreg(paraloc^.register)<first_int_imreg then
-                    cg.getcpuregister(list,paraloc^.register);
-                end;
-              LOC_FPUREGISTER,
-              LOC_CFPUREGISTER:
-                begin
-                  if getsupreg(paraloc^.register)<first_fpu_imreg then
-                    cg.getcpuregister(list,paraloc^.register);
-                end;
-              LOC_MMREGISTER,
-              LOC_CMMREGISTER :
-                begin
-                  if getsupreg(paraloc^.register)<first_mm_imreg then
-                    cg.getcpuregister(list,paraloc^.register);
-                end;
-            end;
+            allocparaloc(list,paraloc);
             paraloc:=paraloc^.next;
             paraloc:=paraloc^.next;
           end;
           end;
       end;
       end;
 
 
 
 
-    procedure tparamanager.freeparaloc(list: TAsmList; const cgpara: TCGPara);
+    procedure tparamanager.freecgpara(list: TAsmList; const cgpara: TCGPara);
       var
       var
         paraloc : Pcgparalocation;
         paraloc : Pcgparalocation;
         href : treference;
         href : treference;
@@ -293,7 +309,7 @@ implementation
                       fillchar(href,sizeof(href),0);
                       fillchar(href,sizeof(href),0);
                       href.base:=paraloc^.reference.index;
                       href.base:=paraloc^.reference.index;
                       href.offset:=paraloc^.reference.offset;
                       href.offset:=paraloc^.reference.offset;
-                      tg.ungettemp(list,href);
+                      tg.ungetiftemp(list,href);
                     end;
                     end;
                 end;
                 end;
               else
               else

+ 0 - 73
compiler/powerpc/cgcpu.pas

@@ -37,14 +37,6 @@ unit cgcpu;
         procedure init_register_allocators;override;
         procedure init_register_allocators;override;
         procedure done_register_allocators;override;
         procedure done_register_allocators;override;
 
 
-        { passing parameters, per default the parameter is pushed }
-        { nr gives the number of the parameter (enumerated from   }
-        { left to right), this allows to move the parameter to    }
-        { register, if the cpu supports register calling          }
-        { conventions                                             }
-        procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : tcgpara);override;
-
-
         procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
         procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
         procedure a_call_reg(list : TAsmList;reg: tregister); override;
         procedure a_call_reg(list : TAsmList;reg: tregister); override;
 
 
@@ -183,71 +175,6 @@ const
         inherited done_register_allocators;
         inherited done_register_allocators;
       end;
       end;
 
 
-
-    procedure tcgppc.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : tcgpara);
-
-      var
-        tmpref, ref: treference;
-        location: pcgparalocation;
-        sizeleft: aint;
-
-      begin
-        location := paraloc.location;
-        tmpref := r;
-        sizeleft := paraloc.intsize;
-        while assigned(location) do
-          begin
-            case location^.loc of
-              LOC_REGISTER,LOC_CREGISTER:
-                begin
-{$ifndef cpu64bitaddr}
-                  if (sizeleft <> 3) then
-                    begin
-                      a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
-                    end
-                  else
-                    begin
-                      a_load_ref_reg(list,OS_16,OS_16,tmpref,location^.register);
-                      a_reg_alloc(list,NR_R0);
-                      inc(tmpref.offset,2);
-                      a_load_ref_reg(list,OS_8,OS_8,tmpref,newreg(R_INTREGISTER,RS_R0,R_SUBNONE));
-                      a_op_const_reg(list,OP_SHL,OS_INT,16,location^.register);
-                      list.concat(taicpu.op_reg_reg_const_const_const(A_RLWIMI,location^.register,newreg(R_INTREGISTER,RS_R0,R_SUBNONE),8,16,31-8));
-                      a_reg_dealloc(list,NR_R0);
-                      dec(tmpref.offset,2);
-                    end;
-{$else not cpu64bitaddr}
-{$error add 64 bit support for non power of 2 loads in a_load_ref_cgpara}
-{$endif not cpu64bitaddr}
-                end;
-              LOC_REFERENCE:
-                begin
-                   reference_reset_base(ref,location^.reference.index,location^.reference.offset,paraloc.alignment);
-                   g_concatcopy(list,tmpref,ref,sizeleft);
-                   if assigned(location^.next) then
-                     internalerror(2005010710);
-                end;
-              LOC_FPUREGISTER,LOC_CFPUREGISTER:
-                case location^.size of
-                   OS_F32, OS_F64:
-                     a_loadfpu_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
-                   else
-                     internalerror(2002072801);
-                end;
-              LOC_VOID:
-                begin
-                  // nothing to do
-                end;
-              else
-                internalerror(2002081103);
-            end;
-            inc(tmpref.offset,tcgsize2size[location^.size]);
-            dec(sizeleft,tcgsize2size[location^.size]);
-            location := location^.next;
-          end;
-      end;
-
-
     { calling a procedure by name }
     { calling a procedure by name }
     procedure tcgppc.a_call_name(list : TAsmList;const s : string; weak: boolean);
     procedure tcgppc.a_call_name(list : TAsmList;const s : string; weak: boolean);
       begin
       begin

+ 43 - 27
compiler/powerpc/cpupara.pas

@@ -40,7 +40,7 @@ unit cpupara;
           procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara:TCGPara);override;
           procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara:TCGPara);override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
-          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;override;
+          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
           procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
           procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
          private
          private
           procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword);
           procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword);
@@ -250,65 +250,81 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    function tppcparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;
+    function tppcparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;
       var
       var
+        paraloc : pcgparalocation;
         retcgsize  : tcgsize;
         retcgsize  : tcgsize;
       begin
       begin
-        { Constructors return self instead of a boolean }
-        if (p.proctypeoption=potype_constructor) then
-          retcgsize:=OS_ADDR
-        else
-          retcgsize:=def_cgsize(def);
-
-        location_reset(result,LOC_INVALID,OS_NO);
-        result.size:=retcgsize;
+        result.init;
+        result.alignment:=get_para_align(p.proccalloption);
         { void has no location }
         { void has no location }
         if is_void(def) then
         if is_void(def) then
           begin
           begin
-            result.loc:=LOC_VOID;
+            paraloc:=result.add_location;
+            result.size:=OS_NO;
+            result.intsize:=0;
+            paraloc^.size:=OS_NO;
+            paraloc^.loc:=LOC_VOID;
             exit;
             exit;
           end;
           end;
+        { Constructors return self instead of a boolean }
+        if (p.proctypeoption=potype_constructor) then
+          begin
+            retcgsize:=OS_ADDR;
+            result.intsize:=sizeof(pint);
+          end
+        else
+          begin
+            retcgsize:=def_cgsize(def);
+            result.intsize:=def.size;
+          end;
+        result.size:=retcgsize;
         { Return is passed as var parameter }
         { Return is passed as var parameter }
         if ret_in_param(def,p.proccalloption) then
         if ret_in_param(def,p.proccalloption) then
           begin
           begin
-            result.loc:=LOC_REFERENCE;
-            result.size:=retcgsize;
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REFERENCE;
+            paraloc^.size:=retcgsize;
             exit;
             exit;
           end;
           end;
+
+        paraloc:=result.add_location;
         { Return in FPU register? }
         { Return in FPU register? }
         if def.typ=floatdef then
         if def.typ=floatdef then
           begin
           begin
-            result.loc:=LOC_FPUREGISTER;
-            result.register:=NR_FPU_RESULT_REG;
-            result.size:=retcgsize;
+            paraloc^.loc:=LOC_FPUREGISTER;
+            paraloc^.register:=NR_FPU_RESULT_REG;
+            paraloc^.size:=retcgsize;
           end
           end
         else
         else
          { Return in register }
          { Return in register }
           begin
           begin
-{$ifndef cpu64bitaddr}
             if retcgsize in [OS_64,OS_S64] then
             if retcgsize in [OS_64,OS_S64] then
              begin
              begin
                { low 32bits }
                { low 32bits }
-               result.loc:=LOC_REGISTER;
+               paraloc^.loc:=LOC_REGISTER;
                if side=callerside then
                if side=callerside then
-                 result.register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
                else
                else
-                 result.register64.reghi:=NR_FUNCTION_RETURN64_HIGH_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_HIGH_REG;
+               paraloc^.size:=OS_32;
                { high 32bits }
                { high 32bits }
+               paraloc:=result.add_location;
+               paraloc^.loc:=LOC_REGISTER;
                if side=callerside then
                if side=callerside then
-                 result.register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
                else
                else
-                 result.register64.reglo:=NR_FUNCTION_RETURN64_LOW_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_LOW_REG;
+               paraloc^.size:=OS_32;
              end
              end
             else
             else
-{$endif cpu64bitaddr}
              begin
              begin
-               result.loc:=LOC_REGISTER;
-               result.size:=retcgsize;
+               paraloc^.loc:=LOC_REGISTER;
                if side=callerside then
                if side=callerside then
-                 result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
                else
                else
-                 result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
+               paraloc^.size:=retcgsize;
              end;
              end;
           end;
           end;
       end;
       end;

+ 1 - 0
compiler/powerpc64/cgcpu.pas

@@ -409,6 +409,7 @@ begin
   sizeleft := paraloc.intsize;
   sizeleft := paraloc.intsize;
   adjusttail := false;
   adjusttail := false;
   while assigned(location) do begin
   while assigned(location) do begin
+    paramanager.allocparaloc(list,location);
     case location^.loc of
     case location^.loc of
       LOC_REGISTER, LOC_CREGISTER:
       LOC_REGISTER, LOC_CREGISTER:
         begin
         begin

+ 47 - 32
compiler/powerpc64/cpupara.pas

@@ -45,7 +45,7 @@ type
     function create_paraloc_info(p: tabstractprocdef; side: tcallercallee): longint; override;
     function create_paraloc_info(p: tabstractprocdef; side: tcallercallee): longint; override;
     function create_varargs_paraloc_info(p: tabstractprocdef; varargspara:
     function create_varargs_paraloc_info(p: tabstractprocdef; varargspara:
       tvarargsparalist): longint; override;
       tvarargsparalist): longint; override;
-    function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;override;
+    function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
     procedure create_funcretloc_info(p: tabstractprocdef; side: tcallercallee);
     procedure create_funcretloc_info(p: tabstractprocdef; side: tcallercallee);
 
 
   private
   private
@@ -210,47 +210,62 @@ begin
 end;
 end;
 
 
 function tppcparamanager.get_funcretloc(p : tabstractprocdef; side:
 function tppcparamanager.get_funcretloc(p : tabstractprocdef; side:
-  tcallercallee; def: tdef): tlocation;
+  tcallercallee; def: tdef): tcgpara;
 var
 var
-  retcgsize: tcgsize;
+  paraloc : pcgparalocation;
+  retcgsize  : tcgsize;
 begin
 begin
+  result.init;
+  result.alignment:=get_para_align(p.proccalloption);
+  { void has no location }
+  if is_void(def) then
+    begin
+      paraloc:=result.add_location;
+      result.size:=OS_NO;
+      result.intsize:=0;
+      paraloc^.size:=OS_NO;
+      paraloc^.loc:=LOC_VOID;
+      exit;
+    end;
   { Constructors return self instead of a boolean }
   { Constructors return self instead of a boolean }
-  if (p.proctypeoption = potype_constructor) then
-    retcgsize := OS_ADDR
+  if (p.proctypeoption=potype_constructor) then
+    begin
+      retcgsize:=OS_ADDR;
+      result.intsize:=sizeof(pint);
+    end
   else
   else
-    retcgsize := def_cgsize(def);
-
-  location_reset(result, LOC_INVALID, OS_NO);
-  result.size := retcgsize;
-  { void has no location }
-  if is_void(def) then begin
-    result.loc := LOC_VOID;
-    exit;
-  end;
+    begin
+      retcgsize:=def_cgsize(def);
+      result.intsize:=def.size;
+    end;
+  result.size:=retcgsize;
   { Return is passed as var parameter }
   { Return is passed as var parameter }
-  if ret_in_param(def, p.proccalloption) then
+  if ret_in_param(def,p.proccalloption) then
     begin
     begin
-      result.loc := LOC_REFERENCE;
-      result.size := retcgsize;
+      paraloc:=result.add_location;
+      paraloc^.loc:=LOC_REFERENCE;
+      paraloc^.size:=retcgsize;
       exit;
       exit;
     end;
     end;
+
+  paraloc:=result.add_location;
   { Return in FPU register? }
   { Return in FPU register? }
-  if def.typ = floatdef then begin
-    result.loc := LOC_FPUREGISTER;
-    result.register := NR_FPU_RESULT_REG;
-    result.size := retcgsize;
-  end else
-    { Return in register }
+  if def.typ=floatdef then
     begin
     begin
-      result.loc := LOC_REGISTER;
-      result.size := retcgsize;
-      if side = callerside then
-        result.register := newreg(R_INTREGISTER,
-          RS_FUNCTION_RESULT_REG, cgsize2subreg(R_INTREGISTER, retcgsize))
-      else
-        result.register := newreg(R_INTREGISTER,
-          RS_FUNCTION_RETURN_REG, cgsize2subreg(R_INTREGISTER, retcgsize));
-    end;
+      paraloc^.loc:=LOC_FPUREGISTER;
+      paraloc^.register:=NR_FPU_RESULT_REG;
+      paraloc^.size:=retcgsize;
+    end
+  else
+   { Return in register }
+    begin
+       paraloc^.loc:=LOC_REGISTER;
+       if side=callerside then
+         paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
+       else
+         paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
+       paraloc^.size:=retcgsize;
+     end;
 end;
 end;
 
 
 function tppcparamanager.create_paraloc_info(p: tabstractprocdef; side:
 function tppcparamanager.create_paraloc_info(p: tabstractprocdef; side:

+ 3 - 1
compiler/ppcgen/cgppc.pas

@@ -163,6 +163,7 @@ unit cgppc;
       ref: treference;
       ref: treference;
     begin
     begin
       paraloc.check_simple_location;
       paraloc.check_simple_location;
+      paramanager.allocparaloc(list,paraloc.location);
       case paraloc.location^.loc of
       case paraloc.location^.loc of
         LOC_REGISTER, LOC_CREGISTER:
         LOC_REGISTER, LOC_CREGISTER:
           a_load_const_reg(list, size, a, paraloc.location^.register);
           a_load_const_reg(list, size, a, paraloc.location^.register);
@@ -186,6 +187,7 @@ unit cgppc;
 
 
       begin
       begin
         paraloc.check_simple_location;
         paraloc.check_simple_location;
+        paramanager.allocparaloc(list,paraloc.location);
         case paraloc.location^.loc of
         case paraloc.location^.loc of
            LOC_REGISTER,LOC_CREGISTER:
            LOC_REGISTER,LOC_CREGISTER:
              a_loadaddr_ref_reg(list,r,paraloc.location^.register);
              a_loadaddr_ref_reg(list,r,paraloc.location^.register);
@@ -616,7 +618,7 @@ unit cgppc;
           paraloc1.init;
           paraloc1.init;
           paramanager.getintparaloc(pocall_cdecl,1,paraloc1);
           paramanager.getintparaloc(pocall_cdecl,1,paraloc1);
           a_load_reg_cgpara(list,OS_ADDR,NR_R0,paraloc1);
           a_load_reg_cgpara(list,OS_ADDR,NR_R0,paraloc1);
-          paramanager.freeparaloc(list,paraloc1);
+          paramanager.freecgpara(list,paraloc1);
           paraloc1.done;
           paraloc1.done;
           allocallcpuregisters(list);
           allocallcpuregisters(list);
           a_call_name(list,'mcount',false);
           a_call_name(list,'mcount',false);

+ 25 - 12
compiler/sparc/cgcpu.pas

@@ -327,6 +327,7 @@ implementation
         Ref:TReference;
         Ref:TReference;
       begin
       begin
         paraloc.check_simple_location;
         paraloc.check_simple_location;
+        paramanager.alloccgpara(list,paraloc);
         case paraloc.location^.loc of
         case paraloc.location^.loc of
           LOC_REGISTER,LOC_CREGISTER:
           LOC_REGISTER,LOC_CREGISTER:
             a_load_const_reg(list,size,a,paraloc.location^.register);
             a_load_const_reg(list,size,a,paraloc.location^.register);
@@ -353,11 +354,12 @@ implementation
         tmpreg:TRegister;
         tmpreg:TRegister;
       begin
       begin
         paraloc.check_simple_location;
         paraloc.check_simple_location;
+        paramanager.alloccgpara(list,paraloc);
         with paraloc.location^ do
         with paraloc.location^ do
           begin
           begin
             case loc of
             case loc of
               LOC_REGISTER,LOC_CREGISTER :
               LOC_REGISTER,LOC_CREGISTER :
-                a_load_ref_reg(list,sz,sz,r,Register);
+                a_load_ref_reg(list,sz,paraloc.location^.size,r,Register);
               LOC_REFERENCE:
               LOC_REFERENCE:
                 begin
                 begin
                   { Code conventions need the parameters being allocated in %o6+92 }
                   { Code conventions need the parameters being allocated in %o6+92 }
@@ -392,6 +394,7 @@ implementation
         TmpReg:TRegister;
         TmpReg:TRegister;
       begin
       begin
         paraloc.check_simple_location;
         paraloc.check_simple_location;
+        paramanager.alloccgpara(list,paraloc);
         with paraloc.location^ do
         with paraloc.location^ do
           begin
           begin
             case loc of
             case loc of
@@ -422,14 +425,17 @@ implementation
         hloc:=paraloc.location;
         hloc:=paraloc.location;
         while assigned(hloc) do
         while assigned(hloc) do
           begin
           begin
+            paramanager.allocparaloc(list,hloc);
             case hloc^.loc of
             case hloc^.loc of
-              LOC_REGISTER :
+              LOC_REGISTER,LOC_CREGISTER :
                 a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
                 a_load_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
               LOC_REFERENCE :
               LOC_REFERENCE :
                 begin
                 begin
                   reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,paraloc.alignment);
                   reference_reset_base(href2,hloc^.reference.index,hloc^.reference.offset,paraloc.alignment);
                   a_load_ref_ref(list,hloc^.size,hloc^.size,href,href2);
                   a_load_ref_ref(list,hloc^.size,hloc^.size,href,href2);
                 end;
                 end;
+              LOC_FPUREGISTER,LOC_CFPUREGISTER :
+                a_loadfpu_ref_reg(list,hloc^.size,hloc^.size,href,hloc^.register);
               else
               else
                 internalerror(200408241);
                 internalerror(200408241);
            end;
            end;
@@ -443,10 +449,20 @@ implementation
       var
       var
         href : treference;
         href : treference;
       begin
       begin
-        tg.GetTemp(list,TCGSize2Size[size],TCGSize2Size[size],tt_normal,href);
-        a_loadfpu_reg_ref(list,size,size,r,href);
-        a_loadfpu_ref_cgpara(list,size,href,paraloc);
-        tg.Ungettemp(list,href);
+        { happens for function result loc }
+        if paraloc.location^.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
+          begin
+            paraloc.check_simple_location;
+            paramanager.allocparaloc(list,paraloc.location);
+            a_loadfpu_reg_reg(list,size,paraloc.location^.size,r,paraloc.location^.register);
+          end
+        else
+          begin
+            tg.GetTemp(list,TCGSize2Size[size],TCGSize2Size[size],tt_normal,href);
+            a_loadfpu_reg_ref(list,size,size,r,href);
+            a_loadfpu_ref_cgpara(list,size,href,paraloc);
+            tg.Ungettemp(list,href);
+          end;
       end;
       end;
 
 
 
 
@@ -1160,15 +1176,12 @@ implementation
         paramanager.getintparaloc(pocall_default,1,paraloc1);
         paramanager.getintparaloc(pocall_default,1,paraloc1);
         paramanager.getintparaloc(pocall_default,2,paraloc2);
         paramanager.getintparaloc(pocall_default,2,paraloc2);
         paramanager.getintparaloc(pocall_default,3,paraloc3);
         paramanager.getintparaloc(pocall_default,3,paraloc3);
-        paramanager.allocparaloc(list,paraloc3);
         a_load_const_cgpara(list,OS_INT,len,paraloc3);
         a_load_const_cgpara(list,OS_INT,len,paraloc3);
-        paramanager.allocparaloc(list,paraloc2);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
-        paramanager.allocparaloc(list,paraloc2);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
-        paramanager.freeparaloc(list,paraloc3);
-        paramanager.freeparaloc(list,paraloc2);
-        paramanager.freeparaloc(list,paraloc1);
+        paramanager.freecgpara(list,paraloc3);
+        paramanager.freecgpara(list,paraloc2);
+        paramanager.freecgpara(list,paraloc1);
         alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
         alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
         alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
         alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
         a_call_name(list,'FPC_MOVE',false);
         a_call_name(list,'FPC_MOVE',false);

+ 44 - 26
compiler/sparc/cpupara.pas

@@ -41,7 +41,7 @@ interface
         procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara : TCGPara);override;
         procedure getintparaloc(calloption : tproccalloption; nr : longint;var cgpara : TCGPara);override;
         function  create_paraloc_info(p : TAbstractProcDef; side: tcallercallee):longint;override;
         function  create_paraloc_info(p : TAbstractProcDef; side: tcallercallee):longint;override;
         function  create_varargs_paraloc_info(p : TAbstractProcDef; varargspara:tvarargsparalist):longint;override;
         function  create_varargs_paraloc_info(p : TAbstractProcDef; varargspara:tvarargsparalist):longint;override;
-        function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;override;
+        function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
       private
       private
         procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
         procedure create_funcretloc_info(p : tabstractprocdef; side: tcallercallee);
         procedure create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
         procedure create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
@@ -145,39 +145,53 @@ implementation
       end;
       end;
 
 
 
 
-    function tsparcparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;
+    function tsparcparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;
       var
       var
+        paraloc : pcgparalocation;
         retcgsize  : tcgsize;
         retcgsize  : tcgsize;
       begin
       begin
-        { Constructors return self instead of a boolean }
-        if (p.proctypeoption=potype_constructor) then
-          retcgsize:=OS_ADDR
-        else
-          retcgsize:=def_cgsize(def);
-
-        location_reset(result,LOC_INVALID,OS_NO);
-        result.size:=retcgsize;
+        result.init;
+        result.alignment:=get_para_align(p.proccalloption);
         { void has no location }
         { void has no location }
         if is_void(def) then
         if is_void(def) then
           begin
           begin
-            result.loc:=LOC_VOID;
+            paraloc:=result.add_location;
+            result.size:=OS_NO;
+            result.intsize:=0;
+            paraloc^.size:=OS_NO;
+            paraloc^.loc:=LOC_VOID;
             exit;
             exit;
           end;
           end;
+        { Constructors return self instead of a boolean }
+        if (p.proctypeoption=potype_constructor) then
+          begin
+            retcgsize:=OS_ADDR;
+            result.intsize:=sizeof(pint);
+          end
+        else
+          begin
+            retcgsize:=def_cgsize(def);
+            result.intsize:=def.size;
+          end;
+        result.size:=retcgsize;
         { Return is passed as var parameter }
         { Return is passed as var parameter }
         if ret_in_param(def,p.proccalloption) then
         if ret_in_param(def,p.proccalloption) then
           begin
           begin
-            result.loc:=LOC_REFERENCE;
-            result.size:=retcgsize;
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REFERENCE;
+            paraloc^.size:=retcgsize;
             exit;
             exit;
           end;
           end;
+
+        paraloc:=result.add_location;
         { Return in FPU register? }
         { Return in FPU register? }
         if def.typ=floatdef then
         if def.typ=floatdef then
           begin
           begin
-            result.loc:=LOC_FPUREGISTER;
-            result.register:=NR_FPU_RESULT_REG;
+            paraloc^.loc:=LOC_FPUREGISTER;
+            paraloc^.register:=NR_FPU_RESULT_REG;
             if retcgsize=OS_F64 then
             if retcgsize=OS_F64 then
-              setsubreg(result.register,R_SUBFD);
-            result.size:=retcgsize;
+              setsubreg(paraloc^.register,R_SUBFD);
+            paraloc^.size:=retcgsize;
           end
           end
         else
         else
          { Return in register }
          { Return in register }
@@ -185,27 +199,31 @@ implementation
 {$ifndef cpu64bitaddr}
 {$ifndef cpu64bitaddr}
             if retcgsize in [OS_64,OS_S64] then
             if retcgsize in [OS_64,OS_S64] then
              begin
              begin
-               result.loc:=LOC_REGISTER;
+               paraloc^.loc:=LOC_REGISTER;
                { high }
                { high }
                if (side=callerside) or (po_inline in p.procoptions) then
                if (side=callerside) or (po_inline in p.procoptions) then
-                 result.register64.reghi:=NR_FUNCTION_RESULT64_HIGH_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
                else
                else
-                 result.register64.reghi:=NR_FUNCTION_RETURN64_HIGH_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_HIGH_REG;
+               paraloc^.size:=OS_32;
                { low }
                { low }
+               paraloc:=result.add_location;
+               paraloc^.loc:=LOC_REGISTER;
                if (side=callerside) or (po_inline in p.procoptions) then
                if (side=callerside) or (po_inline in p.procoptions) then
-                 result.register64.reglo:=NR_FUNCTION_RESULT64_LOW_REG
+                 paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
                else
                else
-                 result.register64.reglo:=NR_FUNCTION_RETURN64_LOW_REG;
+                 paraloc^.register:=NR_FUNCTION_RETURN64_LOW_REG;
+               paraloc^.size:=OS_32;
              end
              end
             else
             else
 {$endif not cpu64bitaddr}
 {$endif not cpu64bitaddr}
              begin
              begin
-               result.loc:=LOC_REGISTER;
-               result.size:=retcgsize;
+               paraloc^.loc:=LOC_REGISTER;
+               paraloc^.size:=retcgsize;
                if (side=callerside) then
                if (side=callerside) then
-                 result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
                else
                else
-                 result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
+                 paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
              end;
              end;
           end;
           end;
       end;
       end;

+ 9 - 17
compiler/symdef.pas

@@ -413,7 +413,7 @@ interface
 {$ifdef m68k}
 {$ifdef m68k}
           exp_funcretloc : tregister;   { explicit funcretloc for AmigaOS }
           exp_funcretloc : tregister;   { explicit funcretloc for AmigaOS }
 {$endif}
 {$endif}
-          funcretloc : array[tcallercallee] of TLocation;
+          funcretloc : array[tcallercallee] of TCGPara;
           has_paraloc_info : boolean; { paraloc info is available }
           has_paraloc_info : boolean; { paraloc info is available }
           constructor create(dt:tdeftyp;level:byte);
           constructor create(dt:tdeftyp;level:byte);
           constructor ppuload(dt:tdeftyp;ppufile:tcompilerppufile);
           constructor ppuload(dt:tdeftyp;ppufile:tcompilerppufile);
@@ -2755,8 +2755,8 @@ implementation
          savesize:=sizeof(pint);
          savesize:=sizeof(pint);
          requiredargarea:=0;
          requiredargarea:=0;
          has_paraloc_info:=false;
          has_paraloc_info:=false;
-         location_reset(funcretloc[callerside],LOC_INVALID,OS_NO);
-         location_reset(funcretloc[calleeside],LOC_INVALID,OS_NO);
+         funcretloc[callerside].init;
+         funcretloc[calleeside].init;
       end;
       end;
 
 
 
 
@@ -2784,6 +2784,8 @@ implementation
             memprocparast.stop;
             memprocparast.stop;
 {$endif MEMDEBUG}
 {$endif MEMDEBUG}
           end;
           end;
+         funcretloc[callerside].done;
+         funcretloc[calleeside].done;
          inherited destroy;
          inherited destroy;
       end;
       end;
 
 
@@ -2870,15 +2872,9 @@ implementation
          proccalloption:=tproccalloption(ppufile.getbyte);
          proccalloption:=tproccalloption(ppufile.getbyte);
          ppufile.getnormalset(procoptions);
          ppufile.getnormalset(procoptions);
 
 
-         location_reset(funcretloc[callerside],LOC_INVALID,OS_NO);
-         location_reset(funcretloc[calleeside],LOC_INVALID,OS_NO);
+         funcretloc[callerside].init;
          if po_explicitparaloc in procoptions then
          if po_explicitparaloc in procoptions then
-           begin
-             b:=ppufile.getbyte;
-             if b<>sizeof(funcretloc[callerside]) then
-               internalerror(200411155);
-             ppufile.getdata(funcretloc[callerside],sizeof(funcretloc[callerside]));
-           end;
+           funcretloc[callerside].ppuload(ppufile);
 
 
          savesize:=sizeof(pint);
          savesize:=sizeof(pint);
          has_paraloc_info:=(po_explicitparaloc in procoptions);
          has_paraloc_info:=(po_explicitparaloc in procoptions);
@@ -2903,11 +2899,7 @@ implementation
          ppufile.do_interface_crc:=oldintfcrc;
          ppufile.do_interface_crc:=oldintfcrc;
 
 
          if (po_explicitparaloc in procoptions) then
          if (po_explicitparaloc in procoptions) then
-           begin
-             { Make a 'valid' funcretloc for procedures }
-             ppufile.putbyte(sizeof(funcretloc[callerside]));
-             ppufile.putdata(funcretloc[callerside],sizeof(funcretloc[callerside]));
-           end;
+           funcretloc[callerside].ppuwrite(ppufile);
       end;
       end;
 
 
 
 
@@ -3758,7 +3750,7 @@ implementation
         tprocvardef(result).maxparacount:=maxparacount;
         tprocvardef(result).maxparacount:=maxparacount;
         tprocvardef(result).minparacount:=minparacount;
         tprocvardef(result).minparacount:=minparacount;
         for i:=low(tcallercallee) to high(tcallercallee) do
         for i:=low(tcallercallee) to high(tcallercallee) do
-          location_copy(tprocvardef(result).funcretloc[i],funcretloc[i]);
+          tprocvardef(result).funcretloc[i]:=funcretloc[i].getcopy;
         tprocvardef(result).has_paraloc_info:=has_paraloc_info;
         tprocvardef(result).has_paraloc_info:=has_paraloc_info;
 {$ifdef m68k}
 {$ifdef m68k}
         tprocvardef(result).exp_funcretloc:=exp_funcretloc;
         tprocvardef(result).exp_funcretloc:=exp_funcretloc;

+ 0 - 48
compiler/x86_64/cgcpu.pas

@@ -40,8 +40,6 @@ unit cgcpu;
         procedure g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);override;
         procedure g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);override;
         procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
         procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
 
 
-        procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);override;
-
         procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
         procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
         procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister;shuffle : pmmshuffle); override;
         procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister;shuffle : pmmshuffle); override;
       end;
       end;
@@ -108,52 +106,6 @@ unit cgcpu;
       end;
       end;
 
 
 
 
-    procedure tcgx86_64.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);
-      var
-        tmpref, ref: treference;
-        location: pcgparalocation;
-        sizeleft: aint;
-        sourcesize: tcgsize;
-      begin
-        location := paraloc.location;
-        tmpref := r;
-        { make sure we handle passing a 32 bit value in memory to a }
-        { 64 bit register location etc. correctly                   }
-        if (size<>OS_NO) and
-           (tcgsize2size[size]<paraloc.intsize) then
-          begin
-            paraloc.check_simple_location;
-            if not(location^.loc in [LOC_REGISTER,LOC_CREGISTER]) then
-              internalerror(2008031801);
-            sizeleft:=tcgsize2size[size]
-          end
-        else
-          sizeleft:=paraloc.intsize;
-        while assigned(location) do
-          begin
-            case location^.loc of
-              LOC_REGISTER,LOC_CREGISTER:
-                begin
-                  sourcesize:=int_cgsize(sizeleft);
-                  if (sourcesize=OS_NO) then
-                    sourcesize:=location^.size;
-                  a_load_ref_reg(list,sourcesize,location^.size,tmpref,location^.register);
-                end;
-              LOC_REFERENCE:
-                begin
-                  reference_reset_base(ref,location^.reference.index,location^.reference.offset,paraloc.alignment);
-                  g_concatcopy(list,tmpref,ref,sizeleft);
-                end;
-              else
-                internalerror(2002081103);
-            end;
-            inc(tmpref.offset,tcgsize2size[location^.size]);
-            dec(sizeleft,tcgsize2size[location^.size]);
-            location := location^.next;
-          end;
-      end;
-
-
     procedure tcgx86_64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
     procedure tcgx86_64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
       var
       var
         stacksize : longint;
         stacksize : longint;

+ 62 - 32
compiler/x86_64/cpupara.pas

@@ -48,7 +48,7 @@ unit cpupara;
           function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
-          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;override;
+          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;override;
        end;
        end;
 
 
   implementation
   implementation
@@ -404,47 +404,71 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    function tx86_64paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tlocation;
+    function tx86_64paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; def: tdef): tcgpara;
       var
       var
         retcgsize : tcgsize;
         retcgsize : tcgsize;
+        paraloc : pcgparalocation;
       begin
       begin
-        { Constructors return self instead of a boolean }
-        if (p.proctypeoption=potype_constructor) then
-          retcgsize:=OS_ADDR
-        else
-          retcgsize:=def_cgsize(def);
-        location_reset(result,LOC_INVALID,OS_NO);
+        result.init;
+        result.alignment:=get_para_align(p.proccalloption);
         { void has no location }
         { void has no location }
         if is_void(def) then
         if is_void(def) then
           begin
           begin
-            location_reset(result,LOC_VOID,OS_NO);
+            paraloc:=result.add_location;
+            result.size:=OS_NO;
+            result.intsize:=0;
+            paraloc^.size:=OS_NO;
+            paraloc^.loc:=LOC_VOID;
             exit;
             exit;
           end;
           end;
+        { Constructors return self instead of a boolean }
+        if (p.proctypeoption=potype_constructor) then
+          begin
+            retcgsize:=OS_ADDR;
+            result.intsize:=sizeof(pint);
+          end
+        else
+          begin
+            retcgsize:=def_cgsize(def);
+            result.intsize:=def.size;
+          end;
+        result.size:=retcgsize;
         { Return is passed as var parameter }
         { Return is passed as var parameter }
         if ret_in_param(def,p.proccalloption) then
         if ret_in_param(def,p.proccalloption) then
           begin
           begin
-            result.loc:=LOC_REFERENCE;
-            result.size:=retcgsize;
+            paraloc:=result.add_location;
+            paraloc^.loc:=LOC_REFERENCE;
+            paraloc^.size:=retcgsize;
             exit;
             exit;
           end;
           end;
+
+        paraloc:=result.add_location;
         { Return in FPU register? }
         { Return in FPU register? }
         if def.typ=floatdef then
         if def.typ=floatdef then
           begin
           begin
             case tfloatdef(def).floattype of
             case tfloatdef(def).floattype of
-              s32real,s64real:
+              s32real:
+                begin
+                  paraloc^.loc:=LOC_MMREGISTER;
+                  paraloc^.register:=newreg(R_MMREGISTER,RS_MM_RESULT_REG,R_SUBMMS);
+                  paraloc^.size:=retcgsize;
+                end;
+              s64real:
                 begin
                 begin
-                  result.loc:=LOC_MMREGISTER;
-                  result.register:=NR_MM_RESULT_REG;
-                  result.size:=retcgsize;
+                  paraloc^.loc:=LOC_MMREGISTER;
+                  paraloc^.register:=newreg(R_MMREGISTER,RS_MM_RESULT_REG,R_SUBMMD);
+                  paraloc^.size:=retcgsize;
                 end;
                 end;
+              { the first two only exist on targets with an x87, on others
+                they are replace by int64 }
               s64currency,
               s64currency,
               s64comp,
               s64comp,
               s80real,
               s80real,
               sc80real:
               sc80real:
                 begin
                 begin
-                  result.loc:=LOC_FPUREGISTER;
-                  result.register:=NR_FPU_RESULT_REG;
-                  result.size:=retcgsize;
+                  paraloc^.loc:=LOC_FPUREGISTER;
+                  paraloc^.register:=NR_FPU_RESULT_REG;
+                  paraloc^.size:=retcgsize;
                 end;
                 end;
               else
               else
                 internalerror(200405034);
                 internalerror(200405034);
@@ -453,41 +477,47 @@ unit cpupara;
         else
         else
          { Return in register }
          { Return in register }
           begin
           begin
-            result.loc:=LOC_REGISTER;
+            paraloc^.loc:=LOC_REGISTER;
             if retcgsize=OS_NO then
             if retcgsize=OS_NO then
               begin
               begin
                 case def.size of
                 case def.size of
                   0..4:
                   0..4:
                     begin
                     begin
-                      result.size:=OS_32;
-                      result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,R_SUBD);
+                      paraloc^.size:=OS_32;
+                      paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,R_SUBD);
                     end;
                     end;
                   5..8:
                   5..8:
                     begin
                     begin
-                      result.size:=OS_64;
-                      result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,R_SUBQ);
+                      paraloc^.size:=OS_64;
+                      paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,R_SUBQ);
                     end;
                     end;
                   9..16:
                   9..16:
                     begin
                     begin
-                      result.size:=OS_128;
-                      result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,R_SUBWHOLE);
-                      result.registerhi:=newreg(R_INTREGISTER,RS_RDX,R_SUBWHOLE);
+                      paraloc^.size:=OS_64;
+                      paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,R_SUBWHOLE);
+                      paraloc:=result.add_location;
+                      paraloc^.size:=OS_64;
+                      paraloc^.loc:=LOC_REGISTER;
+                      paraloc^.register:=newreg(R_INTREGISTER,RS_RDX,R_SUBWHOLE);
                     end;
                     end;
                 end;
                 end;
               end
               end
             else if retcgsize in [OS_128,OS_S128] then
             else if retcgsize in [OS_128,OS_S128] then
               begin
               begin
-                result.size:=retcgsize;
-                result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,R_SUBWHOLE);
-                result.registerhi:=newreg(R_INTREGISTER,RS_RDX,R_SUBWHOLE);
+                paraloc^.size:=OS_64;
+                paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,R_SUBWHOLE);
+                paraloc:=result.add_location;
+                paraloc^.size:=OS_64;
+                paraloc^.loc:=LOC_REGISTER;
+                paraloc^.register:=newreg(R_INTREGISTER,RS_RDX,R_SUBWHOLE);
               end
               end
             else
             else
               begin
               begin
-                result.size:=retcgsize;
+                paraloc^.size:=retcgsize;
                 if side=callerside then
                 if side=callerside then
-                  result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
+                  paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
                 else
                 else
-                  result.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
+                  paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RETURN_REG,cgsize2subreg(R_INTREGISTER,retcgsize));
               end;
               end;
           end;
           end;
       end;
       end;