浏览代码

* optimized releasing of registers

peter 22 年之前
父节点
当前提交
540691bf02
共有 10 个文件被更改,包括 649 次插入515 次删除
  1. 5 2
      compiler/i386/n386mat.pas
  2. 5 2
      compiler/i386/n386mem.pas
  3. 244 48
      compiler/ncgcal.pas
  4. 129 2
      compiler/ncgld.pas
  5. 10 11
      compiler/ncgmem.pas
  6. 194 423
      compiler/ncgutil.pas
  7. 35 13
      compiler/psub.pas
  8. 13 6
      compiler/rgobj.pas
  9. 7 4
      compiler/x86/aasmcpu.pas
  10. 7 4
      compiler/x86/cgx86.pas

+ 5 - 2
compiler/i386/n386mat.pas

@@ -510,8 +510,8 @@ implementation
              LOC_CREFERENCE :
                begin
                  location_force_reg(exprasmlist,left.location,def_cgsize(resulttype.def),true);
-                 emit_reg_reg(A_TEST,opsize,left.location.register,left.location.register);
                  location_release(exprasmlist,left.location);
+                 emit_reg_reg(A_TEST,opsize,left.location.register,left.location.register);
                  location_reset(location,LOC_FLAGS,OS_NO);
                  location.resflags:=F_E;
                end;
@@ -569,7 +569,10 @@ begin
 end.
 {
   $Log$
-  Revision 1.61  2003-09-28 21:48:20  peter
+  Revision 1.62  2003-09-29 20:58:56  peter
+    * optimized releasing of registers
+
+  Revision 1.61  2003/09/28 21:48:20  peter
     * fix register leaks
 
   Revision 1.60  2003/09/03 15:55:01  peter

+ 5 - 2
compiler/i386/n386mem.pas

@@ -107,8 +107,8 @@ implementation
           end
          else
           begin
-            cg.a_loadaddr_ref_reg(exprasmlist,location.reference,location.reference.index);
             rg.ungetregisterint(exprasmlist,location.reference.base);
+            cg.a_loadaddr_ref_reg(exprasmlist,location.reference,location.reference.index);
             reference_reset_base(location.reference,location.reference.index,0);
           end;
          { insert the new index register and scalefactor or
@@ -142,7 +142,10 @@ begin
 end.
 {
   $Log$
-  Revision 1.54  2003-09-03 15:55:01  peter
+  Revision 1.55  2003-09-29 20:58:56  peter
+    * optimized releasing of registers
+
+  Revision 1.54  2003/09/03 15:55:01  peter
     * NEWRA branch merged
 
   Revision 1.53.2.2  2003/08/31 15:46:26  peter

+ 244 - 48
compiler/ncgcal.pas

@@ -35,6 +35,9 @@ interface
        tcgcallparanode = class(tcallparanode)
        private
           tempparaloc : tparalocation;
+          procedure allocate_tempparaloc;
+          procedure push_addr_para;
+          procedure push_value_para(calloption:tproccalloption;alignment:byte);
        public
           procedure secondcallparan(calloption:tproccalloption;alignment:byte);override;
        end;
@@ -97,12 +100,231 @@ implementation
                              TCGCALLPARANODE
 *****************************************************************************}
 
+    procedure tcgcallparanode.allocate_tempparaloc;
+      begin
+         { Allocate (temporary) paralocation }
+         tempparaloc:=paraitem.paraloc[callerside];
+         if tempparaloc.loc=LOC_REGISTER then
+           paramanager.alloctempregs(exprasmlist,tempparaloc)
+         else
+           paramanager.allocparaloc(exprasmlist,tempparaloc);
+      end;
+
+
+    procedure tcgcallparanode.push_addr_para;
+      begin
+        if not(left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
+          internalerror(200304235);
+        location_release(exprasmlist,left.location);
+        allocate_tempparaloc;
+        cg.a_paramaddr_ref(exprasmlist,left.location.reference,tempparaloc);
+      end;
+
+
+    procedure tcgcallparanode.push_value_para(calloption:tproccalloption;alignment:byte);
+      var
+        href : treference;
+{$ifdef i386}
+        tempreference : treference;
+        sizetopush : longint;
+{$endif i386}
+        size : longint;
+        cgsize : tcgsize;
+      begin
+        { we've nothing to push when the size of the parameter is 0 }
+        if left.resulttype.def.size=0 then
+          exit;
+
+        { Move flags and jump in register to make it less complex }
+        if left.location.loc in [LOC_FLAGS,LOC_JUMP] then
+          location_force_reg(exprasmlist,left.location,def_cgsize(left.resulttype.def),false);
+
+        { Handle Floating point types differently }
+        if left.resulttype.def.deftype=floatdef then
+         begin
+(*
+           if calloption=pocall_inline then
+             begin
+               size:=align(tfloatdef(p.resulttype.def).size,alignment);
+               inc(pushedparasize,size);
+               reference_reset_base(href,current_procinfo.framepointer,para_offset-pushedparasize);
+               case left.location.loc of
+                 LOC_FPUREGISTER,
+                 LOC_CFPUREGISTER:
+                   cg.a_loadfpu_reg_ref(exprasmlist,def_cgsize(p.resulttype.def),left.location.register,href);
+                 LOC_REFERENCE,
+                 LOC_CREFERENCE :
+                   cg.g_concatcopy(exprasmlist,left.location.reference,href,size,false,false);
+                 else
+                   internalerror(200204243);
+               end;
+             end
+           else
+*)
+             begin
+               location_release(exprasmlist,left.location);
+               allocate_tempparaloc;
+{$ifdef i386}
+               case left.location.loc of
+                 LOC_FPUREGISTER,
+                 LOC_CFPUREGISTER:
+                   begin
+                      if tempparaloc.loc<>LOC_REFERENCE then
+                        internalerror(200309291);
+                      size:=align(tfloatdef(left.resulttype.def).size,alignment);
+                      inc(pushedparasize,size);
+                      cg.g_stackpointer_alloc(exprasmlist,size);
+                      reference_reset_base(href,NR_STACK_POINTER_REG,0);
+                      cg.a_loadfpu_reg_ref(exprasmlist,def_cgsize(left.resulttype.def),left.location.register,href);
+                   end;
+                 LOC_REFERENCE,
+                 LOC_CREFERENCE :
+                   begin
+                     sizetopush:=align(left.resulttype.def.size,alignment);
+                     tempreference:=left.location.reference;
+                     inc(tempreference.offset,sizetopush);
+                     while (sizetopush>0) do
+                      begin
+                        if sizetopush>=4 then
+                         begin
+                           cgsize:=OS_32;
+                           inc(pushedparasize,4);
+                           dec(tempreference.offset,4);
+                           dec(sizetopush,4);
+                         end
+                        else
+                         begin
+                           cgsize:=OS_16;
+                           inc(pushedparasize,2);
+                           dec(tempreference.offset,2);
+                           dec(sizetopush,2);
+                         end;
+                        cg.a_param_ref(exprasmlist,cgsize,tempreference,tempparaloc);
+                      end;
+                   end;
+                 else
+                   internalerror(200204243);
+               end;
+{$else i386}
+               case left.location.loc of
+                 LOC_FPUREGISTER,
+                 LOC_CFPUREGISTER:
+                   cg.a_paramfpu_reg(exprasmlist,def_cgsize(p.resulttype.def),left.location.register,tempparaloc);
+                 LOC_REFERENCE,
+                 LOC_CREFERENCE :
+                   cg.a_paramfpu_ref(exprasmlist,def_cgsize(p.resulttype.def),left.location.reference,tempparaloc)
+                 else
+                   internalerror(200204243);
+               end;
+{$endif i386}
+             end;
+         end
+        else
+         begin
+           { copy the value on the stack or use normal parameter push? }
+           if paramanager.copy_value_on_stack(paraitem.paratyp,left.resulttype.def,calloption) then
+            begin
+              location_release(exprasmlist,left.location);
+              allocate_tempparaloc;
+{$ifdef i386}
+              if tempparaloc.loc<>LOC_REFERENCE then
+                internalerror(200309292);
+              if not (left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
+                internalerror(200204241);
+              { push on stack }
+              size:=align(left.resulttype.def.size,alignment);
+              inc(pushedparasize,size);
+              cg.g_stackpointer_alloc(exprasmlist,size);
+              reference_reset_base(href,NR_STACK_POINTER_REG,0);
+              cg.g_concatcopy(exprasmlist,left.location.reference,href,size,false,false);
+{$else i386}
+              cg.a_param_copy_ref(exprasmlist,left.resulttype.def.size,left.location.reference,tempparaloc);
+{$endif i386}
+            end
+           else
+            begin
+              case left.location.loc of
+                LOC_CONSTANT,
+                LOC_REGISTER,
+                LOC_CREGISTER,
+                LOC_REFERENCE,
+                LOC_CREFERENCE :
+                  begin
+                    cgsize:=def_cgsize(left.resulttype.def);
+                    if cgsize in [OS_64,OS_S64] then
+                     begin
+                       inc(pushedparasize,8);
+(*
+                       if calloption=pocall_inline then
+                        begin
+                          reference_reset_base(href,current_procinfo.framepointer,para_offset-pushedparasize);
+                          if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
+                            begin
+                              size:=align(p.resulttype.def.size,alignment);
+                              cg.g_concatcopy(exprasmlist,left.location.reference,href,size,false,false)
+                            end
+                          else
+                            cg64.a_load64_loc_ref(exprasmlist,left.location,href);
+                        end
+                       else
+*)
+                        allocate_tempparaloc;
+                        cg64.a_param64_loc(exprasmlist,left.location,tempparaloc);
+                        location_release(exprasmlist,left.location);
+                     end
+                    else
+                     begin
+                       location_release(exprasmlist,left.location);
+                       allocate_tempparaloc;
+                       inc(pushedparasize,alignment);
+(*
+                       if calloption=pocall_inline then
+                        begin
+                          reference_reset_base(href,current_procinfo.framepointer,para_offset-pushedparasize);
+                          if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
+                            begin
+                              size:=align(p.resulttype.def.size,alignment);
+                              cg.g_concatcopy(exprasmlist,left.location.reference,href,size,false,false)
+                            end
+                          else
+                            cg.a_load_loc_ref(exprasmlist,left.location.size,left.location,href);
+                        end
+                       else
+*)
+                        cg.a_param_loc(exprasmlist,left.location,tempparaloc);
+                     end;
+                  end;
+{$ifdef SUPPORT_MMX}
+                LOC_MMXREGISTER,
+                LOC_CMMXREGISTER:
+                  begin
+                     location_release(exprasmlist,left.location);
+                     allocate_tempparaloc;
+                     inc(pushedparasize,8);
+(*
+                     if calloption=pocall_inline then
+                       begin
+                          reference_reset_base(href,current_procinfo.framepointer,para_offset-pushedparasize);
+                          cg.a_loadmm_reg_ref(exprasmlist,left.location.register,href);
+                       end
+                     else
+*)
+                      cg.a_parammm_reg(exprasmlist,left.location.register);
+                  end;
+{$endif SUPPORT_MMX}
+                else
+                  internalerror(200204241);
+              end;
+           end;
+         end;
+      end;
+
+
+
     procedure tcgcallparanode.secondcallparan(calloption:tproccalloption;alignment:byte);
       var
          otlabel,
          oflabel : tasmlabel;
-         tmpreg  : tregister;
-         href    : treference;
       begin
          if not(assigned(paraitem.paratype.def) or
                 assigned(paraitem.parasym)) then
@@ -119,24 +341,13 @@ implementation
          objectlibrary.getlabel(falselabel);
          secondpass(left);
 
-         { Allocate (temporary) paralocation }
-         tempparaloc:=paraitem.paraloc[callerside];
-         if tempparaloc.loc=LOC_REGISTER then
-           paramanager.alloctempregs(exprasmlist,tempparaloc)
-         else
-           paramanager.allocparaloc(exprasmlist,tempparaloc);
-
          { handle varargs first, because defcoll is not valid }
          if (nf_varargs_para in flags) then
            begin
              if paramanager.push_addr_param(vs_value,left.resulttype.def,calloption) then
-               begin
-                 inc(pushedparasize,POINTER_SIZE);
-                 location_release(exprasmlist,left.location);
-                 cg.a_paramaddr_ref(exprasmlist,left.location.reference,tempparaloc);
-               end
+               push_addr_para
              else
-               push_value_para(exprasmlist,left,vs_value,calloption,alignment,tempparaloc);
+               push_value_para(calloption,alignment);
            end
          { hidden parameters }
          else if paraitem.is_hidden then
@@ -146,18 +357,9 @@ implementation
              if (vo_is_funcret in tvarsym(paraitem.parasym).varoptions) or
                 (not(left.resulttype.def.deftype in [pointerdef,classrefdef]) and
                  paramanager.push_addr_param(paraitem.paratyp,paraitem.paratype.def,calloption)) then
-               begin
-                  if not(left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
-                    internalerror(200305071);
-
-                  inc(pushedparasize,POINTER_SIZE);
-                  location_release(exprasmlist,left.location);
-                  cg.a_paramaddr_ref(exprasmlist,left.location.reference,tempparaloc);
-               end
+               push_addr_para
              else
-               begin
-                 push_value_para(exprasmlist,left,paraitem.paratyp,calloption,alignment,tempparaloc);
-               end;
+               push_value_para(calloption,alignment);
            end
          { filter array of const c styled args }
          else if is_array_of_const(left.resulttype.def) and (nf_cargs in left.flags) then
@@ -174,21 +376,16 @@ implementation
                 location_force_mem(exprasmlist,left.location);
 
               { allow @var }
-              inc(pushedparasize,POINTER_SIZE);
               if (left.nodetype=addrn) and
                  (not(nf_procvarload in left.flags)) then
                 begin
+                  inc(pushedparasize,POINTER_SIZE);
                   location_release(exprasmlist,left.location);
+                  allocate_tempparaloc;
                   cg.a_param_loc(exprasmlist,left.location,tempparaloc);
                 end
               else
-                begin
-                   if not(left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
-                     internalerror(200304235);
-
-                   location_release(exprasmlist,left.location);
-                   cg.a_paramaddr_ref(exprasmlist,left.location.reference,tempparaloc);
-                end;
+                push_addr_para;
            end
          { Normal parameter }
          else
@@ -211,18 +408,13 @@ implementation
                             is_self_node(left)) then
                       internalerror(200106041);
                    end;
-                  { Move to memory }
+                  { Force to be in memory }
                   if not(left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
                     location_force_mem(exprasmlist,left.location);
-                  { Push address }
-                  inc(pushedparasize,POINTER_SIZE);
-                  location_release(exprasmlist,left.location);
-                  cg.a_paramaddr_ref(exprasmlist,left.location.reference,tempparaloc);
+                  push_addr_para;
                end
              else
-               begin
-                 push_value_para(exprasmlist,left,paraitem.paratyp,calloption,alignment,tempparaloc);
-               end;
+               push_value_para(calloption,alignment);
            end;
          truelabel:=otlabel;
          falselabel:=oflabel;
@@ -429,7 +621,7 @@ implementation
          { adress returned from an I/O-error }
          iolabel : tasmlabel;
          { help reference pointer }
-         href,helpref : treference;
+         href : treference;
          para_alignment,
          pop_size : longint;
          pvreg,
@@ -553,9 +745,6 @@ implementation
          oldpushedparasize:=pushedparasize;
          pushedparasize:=0;
 
-         { Align stack if required }
-         pop_size:=align_parasize;
-
          { Process parameters, register parameters will be loaded
            in imaginary registers. The actual load to the correct
            register is done just before the call }
@@ -565,6 +754,9 @@ implementation
            tcallparanode(left).secondcallparan(procdefinition.proccalloption,procdefinition.paraalign);
          aktcallnode:=oldaktcallnode;
 
+         { Align stack if required }
+         pop_size:=align_parasize;
+
          { procedure variable or normal function call ? }
          if (right=nil) then
            begin
@@ -642,11 +834,12 @@ implementation
            { now procedure variable case }
            begin
               secondpass(right);
+
               location_release(exprasmlist,right.location);
               pvreg:=rg.getabtregisterint(exprasmlist,OS_ADDR);
               rg.ungetregisterint(exprasmlist,pvreg);
               { Only load OS_ADDR from the reference }
-              if right.location.loc in  [LOC_REFERENCE,LOC_CREFERENCE] then
+              if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
                 cg.a_load_ref_reg(exprasmlist,OS_ADDR,OS_ADDR,right.location.reference,pvreg)
               else
                 cg.a_load_loc_reg(exprasmlist,OS_ADDR,right.location,pvreg);
@@ -1111,7 +1304,10 @@ begin
 end.
 {
   $Log$
-  Revision 1.119  2003-09-28 17:55:03  peter
+  Revision 1.120  2003-09-29 20:58:55  peter
+    * optimized releasing of registers
+
+  Revision 1.119  2003/09/28 17:55:03  peter
     * parent framepointer changed to hidden parameter
     * tloadparentfpnode added
 

+ 129 - 2
compiler/ncgld.pas

@@ -47,6 +47,7 @@ interface
 implementation
 
     uses
+      cutils,
       systems,
       verbose,globtype,globals,
       symconst,symtype,symdef,symsym,symtable,defutil,paramgr,
@@ -695,6 +696,129 @@ implementation
         elesize : longint;
         tmpreg  : tregister;
         paraloc : tparalocation;
+
+        procedure push_value(p:tnode);
+        var
+          href : treference;
+{$ifdef i386}
+          tempreference : treference;
+          sizetopush : longint;
+{$endif i386}
+          size : longint;
+          cgsize : tcgsize;
+        begin
+          { we've nothing to push when the size of the parameter is 0 }
+          if p.resulttype.def.size=0 then
+           exit;
+
+          if p.location.loc in [LOC_FLAGS,LOC_JUMP] then
+            internalerror(200309293);
+
+          { Handle Floating point types differently }
+          if p.resulttype.def.deftype=floatdef then
+            begin
+              location_release(exprasmlist,p.location);
+{$ifdef i386}
+              case p.location.loc of
+                LOC_FPUREGISTER,
+                LOC_CFPUREGISTER:
+                  begin
+                     size:=align(tfloatdef(p.resulttype.def).size,std_param_align);
+                     inc(pushedparasize,size);
+                     cg.g_stackpointer_alloc(exprasmlist,size);
+                     reference_reset_base(href,NR_STACK_POINTER_REG,0);
+                     cg.a_loadfpu_reg_ref(exprasmlist,def_cgsize(p.resulttype.def),p.location.register,href);
+                  end;
+                LOC_REFERENCE,
+                LOC_CREFERENCE :
+                  begin
+                    sizetopush:=align(p.resulttype.def.size,std_param_align);
+                    tempreference:=p.location.reference;
+                    inc(tempreference.offset,sizetopush);
+                    while (sizetopush>0) do
+                     begin
+                       if sizetopush>=4 then
+                        begin
+                          cgsize:=OS_32;
+                          inc(pushedparasize,4);
+                          dec(tempreference.offset,4);
+                          dec(sizetopush,4);
+                        end
+                       else
+                        begin
+                          cgsize:=OS_16;
+                          inc(pushedparasize,2);
+                          dec(tempreference.offset,2);
+                          dec(sizetopush,2);
+                        end;
+                       cg.a_param_ref(exprasmlist,cgsize,tempreference,paraloc);
+                     end;
+                  end;
+                else
+                  internalerror(200204243);
+              end;
+{$else i386}
+              case p.location.loc of
+                LOC_FPUREGISTER,
+                LOC_CFPUREGISTER:
+                  cg.a_paramfpu_reg(exprasmlist,def_cgsize(p.resulttype.def),p.location.register,locpara);
+                LOC_REFERENCE,
+                LOC_CREFERENCE :
+                  cg.a_paramfpu_ref(exprasmlist,def_cgsize(p.resulttype.def),p.location.reference,locpara)
+                else
+                  internalerror(200204243);
+              end;
+{$endif i386}
+            end
+          else
+            begin
+              { copy the value on the stack or use normal parameter push? }
+              if paramanager.copy_value_on_stack(vs_value,p.resulttype.def,pocall_cdecl) then
+               begin
+                 location_release(exprasmlist,p.location);
+                 if not (p.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
+                   internalerror(200204241);
+{$ifdef i386}
+                 { push on stack }
+                 size:=align(p.resulttype.def.size,std_param_align);
+                 inc(pushedparasize,size);
+                 cg.g_stackpointer_alloc(exprasmlist,size);
+                 reference_reset_base(href,NR_STACK_POINTER_REG,0);
+                 cg.g_concatcopy(exprasmlist,p.location.reference,href,size,false,false);
+{$else i386}
+                 cg.a_param_copy_ref(exprasmlist,p.resulttype.def.size,p.location.reference,locpara);
+{$endif i386}
+               end
+              else
+               begin
+                 case p.location.loc of
+                   LOC_CONSTANT,
+                   LOC_REGISTER,
+                   LOC_CREGISTER,
+                   LOC_REFERENCE,
+                   LOC_CREFERENCE :
+                     begin
+                       cgsize:=def_cgsize(p.resulttype.def);
+                       if cgsize in [OS_64,OS_S64] then
+                        begin
+                          inc(pushedparasize,8);
+                          cg64.a_param64_loc(exprasmlist,p.location,paraloc);
+                          location_release(exprasmlist,p.location);
+                        end
+                       else
+                        begin
+                          location_release(exprasmlist,p.location);
+                          inc(pushedparasize,std_param_align);
+                          cg.a_param_loc(exprasmlist,p.location,paraloc);
+                        end;
+                     end;
+                   else
+                     internalerror(200204241);
+                 end;
+               end;
+            end;
+        end;
+
       begin
         dovariant:=(nf_forcevaria in flags) or tarraydef(resulttype.def).isvariant;
         if dovariant then
@@ -838,7 +962,7 @@ implementation
                      end
                     else
                       if vtype in [vtInt64,vtQword,vtExtended] then
-                        push_value_para(exprasmlist,hp.left,vs_value,pocall_cdecl,std_param_align,paraloc)
+                        push_value(hp.left)
                     else
                       begin
                         cg.a_param_loc(exprasmlist,hp.left.location,paraloc);
@@ -921,7 +1045,10 @@ begin
 end.
 {
   $Log$
-  Revision 1.87  2003-09-28 21:46:18  peter
+  Revision 1.88  2003-09-29 20:58:56  peter
+    * optimized releasing of registers
+
+  Revision 1.87  2003/09/28 21:46:18  peter
     * fix allocation of threadvar parameter
 
   Revision 1.86  2003/09/28 17:55:03  peter

+ 10 - 11
compiler/ncgmem.pas

@@ -289,9 +289,8 @@ implementation
             paramanager.allocparaloc(exprasmlist,paraloc1);
             cg.a_param_reg(exprasmlist, OS_ADDR,location.reference.base,paraloc1);
             paramanager.freeparaloc(exprasmlist,paraloc1);
-            rg.allocexplicitregistersint(exprasmlist,paramanager.get_volatile_registers_int(pocall_default));
+            { FPC_CHECKPOINTER uses saveregisters }
             cg.a_call_name(exprasmlist,'FPC_CHECKPOINTER');
-            rg.deallocexplicitregistersint(exprasmlist,paramanager.get_volatile_registers_int(pocall_default));
           end;
       end;
 
@@ -342,15 +341,14 @@ implementation
                 paramanager.allocparaloc(exprasmlist,paraloc1);
                 cg.a_param_reg(exprasmlist, OS_ADDR,location.reference.base,paraloc1);
                 paramanager.freeparaloc(exprasmlist,paraloc1);
-                rg.allocexplicitregistersint(exprasmlist,paramanager.get_volatile_registers_int(pocall_default));
+                { FPC_CHECKPOINTER uses saveregisters }
                 cg.a_call_name(exprasmlist,'FPC_CHECKPOINTER');
-                rg.deallocexplicitregistersint(exprasmlist,paramanager.get_volatile_registers_int(pocall_default));
               end;
            end
          else if is_interfacecom(left.resulttype.def) then
            begin
-              tg.GetTemp(exprasmlist,pointer_size,tt_interfacecom,location.reference);
-              cg.a_load_loc_ref(exprasmlist,OS_ADDR,left.location,location.reference);
+             tg.GetTemp(exprasmlist,pointer_size,tt_interfacecom,location.reference);
+             cg.a_load_loc_ref(exprasmlist,OS_ADDR,left.location,location.reference);
              { implicit deferencing also for interfaces }
              if (cs_gdb_heaptrc in aktglobalswitches) and
                 (cs_checkpointer in aktglobalswitches) and
@@ -360,11 +358,9 @@ implementation
                 paramanager.allocparaloc(exprasmlist,paraloc1);
                 cg.a_param_reg(exprasmlist, OS_ADDR,location.reference.base,paraloc1);
                 paramanager.freeparaloc(exprasmlist,paraloc1);
-                rg.allocexplicitregistersint(exprasmlist,paramanager.get_volatile_registers_int(pocall_default));
+                { FPC_CHECKPOINTER uses saveregisters }
                 cg.a_call_name(exprasmlist,'FPC_CHECKPOINTER');
-                rg.allocexplicitregistersint(exprasmlist,paramanager.get_volatile_registers_int(pocall_default));
               end;
-
            end
          else
            location_copy(location,left.location);
@@ -537,6 +533,7 @@ implementation
                objectlibrary.getlabel(neglabel);
                objectlibrary.getlabel(poslabel);
                cg.a_cmp_const_reg_label(exprasmlist,OS_INT,OC_LT,0,hreg,poslabel);
+               location_release(exprasmlist,hightree.location);
                cg.a_cmp_loc_reg_label(exprasmlist,OS_INT,OC_BE,hightree.location,hreg,neglabel);
                if freereg then
                  rg.ungetregisterint(exprasmlist,hreg);
@@ -544,7 +541,6 @@ implementation
                cg.a_call_name(exprasmlist,'FPC_RANGEERROR');
                cg.a_label(exprasmlist,neglabel);
                { release hightree }
-               location_release(exprasmlist,hightree.location);
                hightree.free;
              end;
           end
@@ -874,7 +870,10 @@ begin
 end.
 {
   $Log$
-  Revision 1.75  2003-09-28 21:45:52  peter
+  Revision 1.76  2003-09-29 20:58:56  peter
+    * optimized releasing of registers
+
+  Revision 1.75  2003/09/28 21:45:52  peter
     * fix register leak in with debug
 
   Revision 1.74  2003/09/28 17:55:03  peter

+ 194 - 423
compiler/ncgutil.pas

@@ -50,22 +50,16 @@ interface
 
     function  maybe_pushfpu(list:taasmoutput;needed : byte;var l:tlocation) : boolean;
 
-    procedure push_value_para(list:taasmoutput;p:tnode;
-                              varspez:tvarspez;
-                              calloption:tproccalloption;
-                              alignment:byte;
-                              const locpara : tparalocation);
-
-    procedure gen_load_return_value(list:TAAsmoutput; var uses_acc,uses_acchi,uses_fpu : boolean);
-    procedure gen_initialize_code(list:TAAsmoutput;inlined:boolean);
-    procedure gen_finalize_code(list : TAAsmoutput;inlined:boolean);
-
     procedure gen_proc_symbol(list:Taasmoutput);
+    procedure gen_proc_symbol_end(list:Taasmoutput);
     procedure gen_stackalloc_code(list:Taasmoutput);
+    procedure gen_stackfree_code(list:Taasmoutput;usesacc,usesacchi:boolean);
     procedure gen_save_used_regs(list : TAAsmoutput);
     procedure gen_restore_used_regs(list : TAAsmoutput;usesacc,usesacchi,usesfpu:boolean);
-    procedure gen_entry_code(list:TAAsmoutput;inlined:boolean);
-    procedure gen_exit_code(list:TAAsmoutput;inlined,usesacc,usesacchi:boolean);
+    procedure gen_initialize_code(list:TAAsmoutput;inlined:boolean);
+    procedure gen_finalize_code(list : TAAsmoutput;inlined:boolean);
+    procedure gen_load_para_value(list:TAAsmoutput);
+    procedure gen_load_return_value(list:TAAsmoutput; var uses_acc,uses_acchi,uses_fpu : boolean);
 
 (*
     procedure geninlineentrycode(list : TAAsmoutput;stackframe:longint);
@@ -322,13 +316,17 @@ implementation
 
 {$ifndef cpu64bit}
     { 32-bit version }
-    procedure location_force(list: TAAsmoutput;var l:tlocation;dst_size:TCGSize;maybeconst:boolean);
+    procedure location_force_reg(list: TAAsmoutput;var l:tlocation;dst_size:TCGSize;maybeconst:boolean);
       var
         hregister,
         hregisterhi : tregister;
         hreg64 : tregister64;
         hl : tasmlabel;
+        oldloc : tlocation;
      begin
+        oldloc:=l;
+        if dst_size=OS_NO then
+          internalerror(200309144);
         { handle transformations to 64bit separate }
         if dst_size in [OS_64,OS_S64] then
          begin
@@ -341,7 +339,10 @@ implementation
                  cg.a_load_reg_reg(list,l.size,OS_32,l.registerlow,hregister);
                end
               else
-               hregister:=rg.getregisterint(list,OS_INT);
+               begin
+                 location_release(list,l);
+                 hregister:=rg.getregisterint(list,OS_INT);
+               end;
               { load value in low register }
               case l.loc of
                 LOC_FLAGS :
@@ -396,6 +397,7 @@ implementation
                begin
                  hregister:=rg.getregisterint(list,OS_INT);
                  hregisterhi:=rg.getregisterint(list,OS_INT);
+                 location_release(list,l);
                end;
               hreg64.reglo:=hregister;
               hreg64.reghi:=hregisterhi;
@@ -415,12 +417,11 @@ implementation
                rg.ungetregisterint(list,l.registerhigh);
                l.registerhigh:=NR_NO;
              end;
-            if l.loc=LOC_REGISTER then
-              rg.ungetregisterint(list,l.register);
            {Do not bother to recycle the existing register. The register
             allocator eliminates unnecessary moves, so it's not needed
             and trying to recycle registers can cause problems because
             the registers changes size and may need aditional constraints.}
+           location_release(list,l);
            hregister:=rg.getregisterint(list,dst_size);
            { load value in new register }
            case l.loc of
@@ -470,24 +471,34 @@ implementation
              location_reset(l,LOC_CREGISTER,dst_size);
            l.register:=hregister;
          end;
+       { Release temp when it was a reference }
+       if oldloc.loc=LOC_REFERENCE then
+         location_freetemp(list,oldloc);
      end;
 
 {$else cpu64bit}
 
     { 64-bit version }
-    procedure location_force(list: TAAsmoutput;var l:tlocation;dst_size:TCGSize;maybeconst:boolean);
+    procedure location_force_reg(list: TAAsmoutput;var l:tlocation;dst_size:TCGSize;maybeconst:boolean);
       var
         hregister : tregister;
         hl : tasmlabel;
+        oldloc : tlocation;
      begin
+        oldloc:=l;
+        if dst_size=OS_NO then
+          internalerror(200309144);
         { handle transformations to 64bit separate }
         if dst_size in [OS_64,OS_S64] then
-         begin
+          begin
               { load a smaller size to OS_64 }
               if l.loc=LOC_REGISTER then
                hregister:=rg.makeregsize(l.register,OS_INT)
               else
-               hregister:=rg.getregisterint(list,OS_INT);
+               begin
+                 location_release(list,l);
+                 hregister:=rg.getregisterint(list,OS_INT);
+               end;
               { load value in low register }
               case l.loc of
 {$ifdef cpuflags}
@@ -526,7 +537,10 @@ implementation
                  (TCGSize2Size[dst_size]=TCGSize2Size[l.size]) then
                hregister:=l.register
               else
-               hregister:=rg.getregisterint(list,OS_INT);
+               begin
+                 location_release(list,l);
+                 hregister:=rg.getregisterint(list,OS_INT);
+               end;
             end;
            hregister:=rg.makeregsize(hregister,dst_size);
            { load value in new register }
@@ -576,28 +590,13 @@ implementation
            location_reset(l,LOC_REGISTER,dst_size);
            l.register:=hregister;
          end;
+       { Release temp when it was a reference }
+       if oldloc.loc=LOC_REFERENCE then
+         location_freetemp(list,oldloc);
      end;
 {$endif cpu64bit}
 
 
-    procedure location_force_reg(list: TAAsmoutput;var l:tlocation;dst_size:TCGSize;maybeconst:boolean);
-
-    var oldloc:Tlocation;
-
-      begin
-        if dst_size=OS_NO then
-          internalerror(200309144);
-        oldloc:=l;
-        location_force(list, l, dst_size, maybeconst);
-        { release previous location before demanding a new register }
-        if (oldloc.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
-         begin
-           location_freetemp(list,oldloc);
-           location_release(list,oldloc);
-         end;
-      end;
-
-
     procedure location_force_fpureg(list: TAAsmoutput;var l: tlocation;maybeconst:boolean);
       var
         reg : tregister;
@@ -672,204 +671,6 @@ implementation
       end;
 
 
-{*****************************************************************************
-                                Push Value Para
-*****************************************************************************}
-
-    procedure push_value_para(list:taasmoutput;p:tnode;
-                              varspez:tvarspez;
-                              calloption:tproccalloption;
-                              alignment:byte;
-                              const locpara : tparalocation);
-      var
-        href : treference;
-{$ifdef i386}
-        tempreference : treference;
-        sizetopush : longint;
-{$endif i386}
-        size : longint;
-        cgsize : tcgsize;
-      begin
-        { we've nothing to push when the size of the parameter is 0 }
-        if p.resulttype.def.size=0 then
-         exit;
-
-        { Move flags and jump in register to make it less complex }
-        if p.location.loc in [LOC_FLAGS,LOC_JUMP] then
-         location_force_reg(list,p.location,def_cgsize(p.resulttype.def),false);
-
-        { Handle Floating point types differently }
-        if p.resulttype.def.deftype=floatdef then
-         begin
-(*
-           if calloption=pocall_inline then
-             begin
-               size:=align(tfloatdef(p.resulttype.def).size,alignment);
-               inc(pushedparasize,size);
-               reference_reset_base(href,current_procinfo.framepointer,para_offset-pushedparasize);
-               case p.location.loc of
-                 LOC_FPUREGISTER,
-                 LOC_CFPUREGISTER:
-                   cg.a_loadfpu_reg_ref(list,def_cgsize(p.resulttype.def),p.location.register,href);
-                 LOC_REFERENCE,
-                 LOC_CREFERENCE :
-                   cg.g_concatcopy(list,p.location.reference,href,size,false,false);
-                 else
-                   internalerror(200204243);
-               end;
-             end
-           else
-*)
-             begin
-               location_release(list,p.location);
-{$ifdef i386}
-               case p.location.loc of
-                 LOC_FPUREGISTER,
-                 LOC_CFPUREGISTER:
-                   begin
-                      size:=align(tfloatdef(p.resulttype.def).size,alignment);
-                      inc(pushedparasize,size);
-                      cg.g_stackpointer_alloc(list,size);
-                      reference_reset_base(href,NR_STACK_POINTER_REG,0);
-                      cg.a_loadfpu_reg_ref(list,def_cgsize(p.resulttype.def),p.location.register,href);
-                   end;
-                 LOC_REFERENCE,
-                 LOC_CREFERENCE :
-                   begin
-                     sizetopush:=align(p.resulttype.def.size,alignment);
-                     tempreference:=p.location.reference;
-                     inc(tempreference.offset,sizetopush);
-                     while (sizetopush>0) do
-                      begin
-                        if sizetopush>=4 then
-                         begin
-                           cgsize:=OS_32;
-                           inc(pushedparasize,4);
-                           dec(tempreference.offset,4);
-                           dec(sizetopush,4);
-                         end
-                        else
-                         begin
-                           cgsize:=OS_16;
-                           inc(pushedparasize,2);
-                           dec(tempreference.offset,2);
-                           dec(sizetopush,2);
-                         end;
-                        cg.a_param_ref(list,cgsize,tempreference,locpara);
-                      end;
-                   end;
-                 else
-                   internalerror(200204243);
-               end;
-{$else i386}
-               case p.location.loc of
-                 LOC_FPUREGISTER,
-                 LOC_CFPUREGISTER:
-                   cg.a_paramfpu_reg(list,def_cgsize(p.resulttype.def),p.location.register,locpara);
-                 LOC_REFERENCE,
-                 LOC_CREFERENCE :
-                   cg.a_paramfpu_ref(list,def_cgsize(p.resulttype.def),p.location.reference,locpara)
-                 else
-                   internalerror(200204243);
-               end;
-{$endif i386}
-             end;
-         end
-        else
-         begin
-           { copy the value on the stack or use normal parameter push? }
-           if paramanager.copy_value_on_stack(varspez,p.resulttype.def,calloption) then
-            begin
-              location_release(list,p.location);
-{$ifdef i386}
-              if not (p.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
-                internalerror(200204241);
-              { push on stack }
-              size:=align(p.resulttype.def.size,alignment);
-              inc(pushedparasize,size);
-              cg.g_stackpointer_alloc(list,size);
-              reference_reset_base(href,NR_STACK_POINTER_REG,0);
-              cg.g_concatcopy(list,p.location.reference,href,size,false,false);
-{$else i386}
-              cg.a_param_copy_ref(list,p.resulttype.def.size,p.location.reference,locpara);
-{$endif i386}
-            end
-           else
-            begin
-              case p.location.loc of
-                LOC_CONSTANT,
-                LOC_REGISTER,
-                LOC_CREGISTER,
-                LOC_REFERENCE,
-                LOC_CREFERENCE :
-                  begin
-                    cgsize:=def_cgsize(p.resulttype.def);
-                    if cgsize in [OS_64,OS_S64] then
-                     begin
-                       inc(pushedparasize,8);
-(*
-                       if calloption=pocall_inline then
-                        begin
-                          reference_reset_base(href,current_procinfo.framepointer,para_offset-pushedparasize);
-                          if p.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
-                            begin
-                              size:=align(p.resulttype.def.size,alignment);
-                              cg.g_concatcopy(list,p.location.reference,href,size,false,false)
-                            end
-                          else
-                            cg64.a_load64_loc_ref(list,p.location,href);
-                        end
-                       else
-*)
-                        cg64.a_param64_loc(list,p.location,locpara);
-                        location_release(list,p.location);
-                     end
-                    else
-                     begin
-                       location_release(list,p.location);
-                       inc(pushedparasize,alignment);
-(*
-                       if calloption=pocall_inline then
-                        begin
-                          reference_reset_base(href,current_procinfo.framepointer,para_offset-pushedparasize);
-                          if p.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
-                            begin
-                              size:=align(p.resulttype.def.size,alignment);
-                              cg.g_concatcopy(list,p.location.reference,href,size,false,false)
-                            end
-                          else
-                            cg.a_load_loc_ref(list,p.location.size,p.location,href);
-                        end
-                       else
-*)
-                        cg.a_param_loc(list,p.location,locpara);
-                     end;
-                  end;
-{$ifdef SUPPORT_MMX}
-                LOC_MMXREGISTER,
-                LOC_CMMXREGISTER:
-                  begin
-                     location_release(list,p.location);
-                     inc(pushedparasize,8);
-(*
-                     if calloption=pocall_inline then
-                       begin
-                          reference_reset_base(href,current_procinfo.framepointer,para_offset-pushedparasize);
-                          cg.a_loadmm_reg_ref(list,p.location.register,href);
-                       end
-                     else
-*)
-                      cg.a_parammm_reg(list,p.location.register);
-                  end;
-{$endif SUPPORT_MMX}
-                else
-                  internalerror(200204241);
-              end;
-           end;
-         end;
-      end;
-
-
 {****************************************************************************
                             Init/Finalize Code
 ****************************************************************************}
@@ -879,7 +680,9 @@ implementation
         href1,href2 : treference;
         list : taasmoutput;
         hsym : tvarsym;
+        l    : longint;
         loadref : boolean;
+        localcopyloc : tparalocation;
       begin
         list:=taasmoutput(arg);
         if (tsym(p).typ=varsym) and
@@ -887,15 +690,15 @@ implementation
            (paramanager.push_addr_param(tvarsym(p).varspez,tvarsym(p).vartype.def,current_procinfo.procdef.proccalloption)) then
          begin
            loadref:=true;
-           case tvarsym(p).paraitem.paraloc[calleeside].loc of
+           case tvarsym(p).localloc.loc of
              LOC_REGISTER :
                begin
-                 reference_reset_base(href1,tvarsym(p).paraitem.paraloc[calleeside].register,0);
+                 reference_reset_base(href1,tvarsym(p).localloc.register,0);
                  loadref:=false;
                end;
              LOC_REFERENCE :
-               reference_reset_base(href1,tvarsym(p).paraitem.paraloc[calleeside].reference.index,
-                   tvarsym(p).paraitem.paraloc[calleeside].reference.offset);
+               reference_reset_base(href1,tvarsym(p).localloc.reference.index,
+                   tvarsym(p).localloc.reference.offset);
              else
                internalerror(200309181);
            end;
@@ -924,11 +727,20 @@ implementation
             begin
               if tvarsym(p).localloc.loc<>LOC_REFERENCE then
                 internalerror(200309183);
-              reference_reset_base(href2,tvarsym(p).localloc.reference.index,tvarsym(p).localloc.reference.offset);
+              { Allocate space for the local copy }
+              l:=tvarsym(p).getvaluesize;
+              localcopyloc.loc:=LOC_REFERENCE;
+              localcopyloc.size:=int_cgsize(l);
+              tg.GetLocal(list,l,localcopyloc.reference);
+              { Copy data }
+              reference_reset_base(href2,localcopyloc.reference.index,localcopyloc.reference.offset);
               if is_shortstring(tvarsym(p).vartype.def) then
                 cg.g_copyshortstring(list,href1,href2,tstringdef(tvarsym(p).vartype.def).len,false,loadref)
               else
                 cg.g_concatcopy(list,href1,href2,tvarsym(p).vartype.def.size,true,loadref);
+              { update localloc of varsym }
+              tg.Ungetlocal(list,tvarsym(p).localloc.reference);
+              tvarsym(p).localloc:=localcopyloc;
             end;
          end;
       end;
@@ -1274,16 +1086,13 @@ implementation
       end;
 
 
-    procedure gen_initialize_code(list:TAAsmoutput;inlined:boolean);
+    procedure gen_load_para_value(list:TAAsmoutput);
       var
         hp : tparaitem;
         href : treference;
-        paraloc1,
-        paraloc2 : tparalocation;
-        hregister : tregister;
         gotregvarparas : boolean;
       begin
-        { Save register parameters }
+        { Store register parameters in reference or in register variable }
         if assigned(current_procinfo.procdef.parast) and
            not (po_assembler in current_procinfo.procdef.procoptions) then
           begin
@@ -1294,20 +1103,6 @@ implementation
             gotregvarparas := false;
             while assigned(hp) do
               begin
-                if hp.paraloc[calleeside].loc=LOC_REGISTER then
-                  begin
-                    hregister:=rg.getregisterint(list,hp.paraloc[calleeside].size);
-                    rg.ungetregisterint(list,hregister);
-                    cg.a_load_param_reg(list,hp.paraloc[calleeside],hregister);
-                    rg.makeregvarint(getsupreg(hregister));
-                    { Update register }
-                    hp.paraloc[calleeside].register:=hregister;
-                    { Update localloc when there is no local copy }
-                    if not(vo_has_local_copy in tvarsym(hp.parasym).varoptions) then
-                      tvarsym(hp.parasym).localloc:=hp.paraloc[calleeside];
-                    gotregvarparas:=true;
-                  end;
-(*
                 case tvarsym(hp.parasym).localloc.loc of
                   LOC_REGISTER :
                     begin
@@ -1328,7 +1123,6 @@ implementation
                   else
                     internalerror(200309185);
                 end;
-*)
                 hp:=tparaitem(hp.next);
               end;
             if gotregvarparas then
@@ -1343,6 +1137,21 @@ implementation
                   end;
               end;
           end;
+
+        { generate copies of call by value parameters, must be done before
+          the initialization and body is parsed because the refcounts are
+          incremented using the local copies }
+        if not(po_assembler in current_procinfo.procdef.procoptions) then
+          current_procinfo.procdef.parast.foreach_static({$ifndef TP}@{$endif}copyvalueparas,list);
+      end;
+
+
+    procedure gen_initialize_code(list:TAAsmoutput;inlined:boolean);
+      var
+        href : treference;
+        paraloc1,
+        paraloc2 : tparalocation;
+      begin
         { the actual profile code can clobber some registers,
           therefore if the context must be saved, do it before
           the actual call to the profile code
@@ -1381,12 +1190,6 @@ implementation
         { initialisizes temp. ansi/wide string data }
         inittempvariables(list);
 
-        { generate copies of call by value parameters, must be done before
-          the initialization because the refcounts are incremented using
-          the local copies }
-        if not(po_assembler in current_procinfo.procdef.procoptions) then
-          current_procinfo.procdef.parast.foreach_static({$ifndef TP}@{$endif}copyvalueparas,list);
-
         { initialize ansi/widesstring para's }
         current_procinfo.procdef.parast.foreach_static({$ifndef TP}@{$endif}init_paras,list);
 
@@ -1516,140 +1319,20 @@ implementation
       end;
 
 
-    procedure gen_stackalloc_code(list:Taasmoutput);
-      var
-        stackframe : longint;
-      begin
-        { Calculate size of stackframe }
-        stackframe:=current_procinfo.calc_stackframe_size;
-
-{$ifndef powerpc}
-        { at least for the ppc this applies always, so this code isn't usable (FK) }
-        { omit stack frame ? }
-        if (current_procinfo.framepointer=NR_STACK_POINTER_REG) then
-          begin
-            CGmessage(cg_d_stackframe_omited);
-            if stackframe<>0 then
-              cg.g_stackpointer_alloc(list,stackframe);
-          end
-        else
-{$endif powerpc}
-          begin
-            if (po_interrupt in current_procinfo.procdef.procoptions) then
-              cg.g_interrupt_stackframe_entry(list);
-
-            cg.g_stackframe_entry(list,stackframe);
-
-            {Never call stack checking before the standard system unit
-             has been initialized.}
-             if (cs_check_stack in aktlocalswitches) and (current_procinfo.procdef.proctypeoption<>potype_proginit) then
-               cg.g_stackcheck(list,stackframe);
-          end;
-      end;
-
-
-    procedure gen_save_used_regs(list : TAAsmoutput);
-      begin
-        { Pure assembler routines need to save the registers themselves }
-        if (po_assembler in current_procinfo.procdef.procoptions) then
-          exit;
-
-        { for the save all registers we can simply use a pusha,popa which
-          push edi,esi,ebp,esp(ignored),ebx,edx,ecx,eax }
-        if (po_saveregisters in current_procinfo.procdef.procoptions) then
-          cg.g_save_all_registers(list)
-        else
-          if current_procinfo.procdef.proccalloption in savestdregs_pocalls then
-            cg.g_save_standard_registers(list,rg.used_in_proc_int);
-      end;
-
-
-    procedure gen_restore_used_regs(list : TAAsmoutput;usesacc,usesacchi,usesfpu:boolean);
-      begin
-        { Pure assembler routines need to save the registers themselves }
-        if (po_assembler in current_procinfo.procdef.procoptions) then
-          exit;
-
-        { for the save all registers we can simply use a pusha,popa which
-          push edi,esi,ebp,esp(ignored),ebx,edx,ecx,eax }
-        if (po_saveregisters in current_procinfo.procdef.procoptions) then
-          cg.g_restore_all_registers(list,usesacc,usesacchi)
-        else
-          if current_procinfo.procdef.proccalloption in savestdregs_pocalls then
-            cg.g_restore_standard_registers(list,rg.used_in_proc_int);
-      end;
-
-
-    procedure gen_entry_code(list:TAAsmoutput;inlined:boolean);
-      var
-        href : treference;
-        hp : tparaitem;
-        gotregvarparas: boolean;
-      begin
-      end;
-
-
-    procedure gen_exit_code(list:TAAsmoutput;inlined,usesacc,usesacchi:boolean);
-      var
+    procedure gen_proc_symbol_end(list:Taasmoutput);
 {$ifdef GDB}
+      var
         stabsendlabel : tasmlabel;
         mangled_length : longint;
         p : pchar;
 {$endif GDB}
-        stacksize,
-        retsize : longint;
       begin
-
+        list.concat(Tai_symbol_end.Createname(current_procinfo.procdef.mangledname));
 {$ifdef GDB}
-        if ((cs_debuginfo in aktmoduleswitches) and not inlined) then
+        if (cs_debuginfo in aktmoduleswitches) then
           begin
             objectlibrary.getlabel(stabsendlabel);
             cg.a_label(list,stabsendlabel);
-          end;
-{$endif GDB}
-
-{$ifndef powerpc}
-        { remove stackframe }
-        if not inlined then
-         begin
-           if (current_procinfo.framepointer=NR_STACK_POINTER_REG) then
-            begin
-              stacksize:=current_procinfo.calc_stackframe_size;
-              if (stacksize<>0) then
-                cg.a_op_const_reg(list,OP_ADD,OS_32,stacksize,current_procinfo.framepointer);
-            end
-           else
-            cg.g_restore_frame_pointer(list);
-         end;
-{$endif}
-
-        { at last, the return is generated }
-        if not inlined then
-         begin
-           if (po_interrupt in current_procinfo.procdef.procoptions) then
-            cg.g_interrupt_stackframe_exit(list,usesacc,usesacchi)
-           else
-            begin
-              if current_procinfo.procdef.proccalloption in clearstack_pocalls then
-                begin
-                  retsize:=0;
-                  if paramanager.ret_in_param(current_procinfo.procdef.rettype.def,current_procinfo.procdef.proccalloption) then
-                    inc(retsize,POINTER_SIZE);
-                end
-              else
-                begin
-                  retsize:=current_procinfo.para_stack_size;
-                end;
-              cg.g_return_from_proc(list,retsize);
-            end;
-         end;
-
-        if not inlined then
-          list.concat(Tai_symbol_end.Createname(current_procinfo.procdef.mangledname));
-
-{$ifdef GDB}
-        if (cs_debuginfo in aktmoduleswitches) and not inlined  then
-          begin
             { define calling EBP as pseudo local var PM }
             { this enables test if the function is a local one !! }
             {if  assigned(current_procinfo.parent) and
@@ -1714,6 +1397,104 @@ implementation
       end;
 
 
+    procedure gen_stackalloc_code(list:Taasmoutput);
+      var
+        stackframe : longint;
+      begin
+        { Calculate size of stackframe }
+        stackframe:=current_procinfo.calc_stackframe_size;
+
+{$ifndef powerpc}
+        { at least for the ppc this applies always, so this code isn't usable (FK) }
+        { omit stack frame ? }
+        if (current_procinfo.framepointer=NR_STACK_POINTER_REG) then
+          begin
+            CGmessage(cg_d_stackframe_omited);
+            if stackframe<>0 then
+              cg.g_stackpointer_alloc(list,stackframe);
+          end
+        else
+{$endif powerpc}
+          begin
+            if (po_interrupt in current_procinfo.procdef.procoptions) then
+              cg.g_interrupt_stackframe_entry(list);
+
+            cg.g_stackframe_entry(list,stackframe);
+
+            {Never call stack checking before the standard system unit
+             has been initialized.}
+             if (cs_check_stack in aktlocalswitches) and (current_procinfo.procdef.proctypeoption<>potype_proginit) then
+               cg.g_stackcheck(list,stackframe);
+          end;
+      end;
+
+
+    procedure gen_stackfree_code(list:Taasmoutput;usesacc,usesacchi:boolean);
+      var
+        stacksize,
+        retsize : longint;
+      begin
+{$ifndef powerpc}
+        { remove stackframe }
+        if (current_procinfo.framepointer=NR_STACK_POINTER_REG) then
+          begin
+            stacksize:=current_procinfo.calc_stackframe_size;
+            if (stacksize<>0) then
+              cg.a_op_const_reg(list,OP_ADD,OS_32,stacksize,current_procinfo.framepointer);
+          end
+        else
+          cg.g_restore_frame_pointer(list);
+{$endif}
+        { at last, the return is generated }
+        if (po_interrupt in current_procinfo.procdef.procoptions) then
+          cg.g_interrupt_stackframe_exit(list,usesacc,usesacchi)
+        else
+          begin
+            if current_procinfo.procdef.proccalloption in clearstack_pocalls then
+              begin
+                retsize:=0;
+                if paramanager.ret_in_param(current_procinfo.procdef.rettype.def,current_procinfo.procdef.proccalloption) then
+                  inc(retsize,POINTER_SIZE);
+              end
+            else
+              retsize:=current_procinfo.para_stack_size;
+            cg.g_return_from_proc(list,retsize);
+          end;
+      end;
+
+
+    procedure gen_save_used_regs(list : TAAsmoutput);
+      begin
+        { Pure assembler routines need to save the registers themselves }
+        if (po_assembler in current_procinfo.procdef.procoptions) then
+          exit;
+
+        { for the save all registers we can simply use a pusha,popa which
+          push edi,esi,ebp,esp(ignored),ebx,edx,ecx,eax }
+        if (po_saveregisters in current_procinfo.procdef.procoptions) then
+          cg.g_save_all_registers(list)
+        else
+          if current_procinfo.procdef.proccalloption in savestdregs_pocalls then
+            cg.g_save_standard_registers(list,rg.used_in_proc_int);
+      end;
+
+
+    procedure gen_restore_used_regs(list : TAAsmoutput;usesacc,usesacchi,usesfpu:boolean);
+      begin
+        { Pure assembler routines need to save the registers themselves }
+        if (po_assembler in current_procinfo.procdef.procoptions) then
+          exit;
+
+        { for the save all registers we can simply use a pusha,popa which
+          push edi,esi,ebp,esp(ignored),ebx,edx,ecx,eax }
+        if (po_saveregisters in current_procinfo.procdef.procoptions) then
+          cg.g_restore_all_registers(list,usesacc,usesacchi)
+        else
+          if current_procinfo.procdef.proccalloption in savestdregs_pocalls then
+            cg.g_restore_standard_registers(list,rg.used_in_proc_int);
+      end;
+
+
 {****************************************************************************
                                  Inlining
 ****************************************************************************}
@@ -1997,7 +1778,6 @@ implementation
     procedure gen_alloc_parast(list: taasmoutput;st:tparasymtable);
       var
         sym : tsym;
-        l   : longint;
       begin
         sym:=tsym(st.symindex.first);
         while assigned(sym) do
@@ -2006,41 +1786,29 @@ implementation
               begin
                 with tvarsym(sym) do
                   begin
-                    l:=getvaluesize;
-                    { Allocate local copy? }
-                    if (vo_has_local_copy in varoptions) and
-                       (l>0) then
-                      begin
-                        localloc.loc:=LOC_REFERENCE;
-                        localloc.size:=int_cgsize(l);
-                        tg.GetLocal(list,l,localloc.reference);
-                      end
-                    else
+                    { Allocate imaginary register for register parameters }
+                    if paraitem.paraloc[calleeside].loc=LOC_REGISTER then
                       begin
-                        { Allocate imaginary register for register parameters }
-                        if paraitem.paraloc[calleeside].loc=LOC_REGISTER then
-                          begin
-                            (*
+                        (*
 {$warning TODO Allocate register paras}
-                            localloc.loc:=LOC_REGISTER;
-                            localloc.size:=paraitem.paraloc[calleeside].size;
+                        localloc.loc:=LOC_REGISTER;
+                        localloc.size:=paraitem.paraloc[calleeside].size;
 {$ifndef cpu64bit}
-                            if localloc.size in [OS_64,OS_S64] then
-                              begin
-                                localloc.registerlow:=rg.getregisterint(list,OS_32);
-                                localloc.registerhigh:=rg.getregisterint(list,OS_32);
-                              end
-                            else
-{$endif cpu64bit}
-                              localloc.register:=rg.getregisterint(list,localloc.size);
-                              *)
-                            {localloc.loc:=LOC_REFERENCE;
-                            localloc.size:=paraitem.paraloc[calleeside].size;
-                            tg.GetLocal(list,tcgsize2size[localloc.size],localloc.reference);}
+                        if localloc.size in [OS_64,OS_S64] then
+                          begin
+                            localloc.registerlow:=rg.getregisterint(list,OS_32);
+                            localloc.registerhigh:=rg.getregisterint(list,OS_32);
                           end
                         else
-                          localloc:=paraitem.paraloc[calleeside];
-                      end;
+{$endif cpu64bit}
+                          localloc.register:=rg.getregisterint(list,localloc.size);
+                          *)
+                        localloc.loc:=LOC_REFERENCE;
+                        localloc.size:=paraitem.paraloc[calleeside].size;
+                        tg.GetLocal(list,tcgsize2size[localloc.size],localloc.reference);
+                      end
+                    else
+                      localloc:=paraitem.paraloc[calleeside];
                   end;
               end;
             sym:=tsym(sym.indexnext);
@@ -2088,7 +1856,10 @@ implementation
 end.
 {
   $Log$
-  Revision 1.151  2003-09-28 21:47:18  peter
+  Revision 1.152  2003-09-29 20:58:56  peter
+    * optimized releasing of registers
+
+  Revision 1.151  2003/09/28 21:47:18  peter
     * register paras and local copies updates
 
   Revision 1.150  2003/09/28 17:55:03  peter

+ 35 - 13
compiler/psub.pas

@@ -102,12 +102,6 @@ implementation
        {$endif}
        ;
 
-
-    const
-      { Maximum number of loops when spilling registers }
-      maxspillingcounter = 20;
-
-
 {****************************************************************************
                       PROCEDURE/FUNCTION BODY PARSING
 ****************************************************************************}
@@ -584,6 +578,9 @@ implementation
         usesacc,
         usesfpu,
         usesacchi      : boolean;
+{$ifdef ra_debug}
+        i,
+{$endif ra_debug}
         spillingcounter : integer;
         fastspill:boolean;
       begin
@@ -622,10 +619,17 @@ implementation
         paramanager.create_paraloc_info(current_procinfo.procdef,calleeside);
 
         { Allocate space in temp/registers for parast and localst }
+        aktfilepos:=entrypos;
         gen_alloc_parast(aktproccode,tparasymtable(current_procinfo.procdef.parast));
         if current_procinfo.procdef.localst.symtabletype=localsymtable then
           gen_alloc_localst(aktproccode,tlocalsymtable(current_procinfo.procdef.localst));
 
+        { Load register parameters in temps and insert local copies
+          for values parameters. This must be done before the body is parsed
+          because the localloc is updated }
+        aktfilepos:=entrypos;
+        gen_load_para_value(aktproccode);
+
 {$warning FIXME!!}
         { FIXME!! If a procedure contains assembler blocks (or is pure assembler), }
         { then rg.used_in_proc_int already contains info because of that. However, }
@@ -688,9 +692,7 @@ implementation
         aktfilepos:=entrypos;
         gen_proc_symbol(templist);
         headertai:=tai(templist.last);
-        { add entry code after header }
-        gen_entry_code(templist,false);
-        { insert symbol and entry code }
+        { insert symbol }
         aktproccode.insertlist(templist);
 
         { Free space in temp/registers for parast and localst, must be
@@ -708,7 +710,7 @@ implementation
             spillingcounter:=0;
             repeat
 {$ifdef ra_debug}
-              if aktfilepos.line=2502 then
+              if aktfilepos.line=1206 then
                 rg.writegraph(spillingcounter);
 {$endif ra_debug}
               rg.prepare_colouring;
@@ -718,8 +720,21 @@ implementation
               if rg.spillednodes<>'' then
                 begin
                   inc(spillingcounter);
-                  if spillingcounter>maxspillingcounter then
+                  if spillingcounter>20 then
+{$ifdef ra_debug}
+                    break;
+{$else ra_debug}
                     internalerror(200309041);
+{$endif ra_debug}
+
+{$ifdef ra_debug}
+                  if aktfilepos.line=1207 then
+                    begin
+                      writeln('Spilling registers:');
+                      for i:=1 to length(rg.spillednodes) do
+                        writeln(ord(rg.spillednodes[i]));
+                    end;
+{$endif ra_debug}
                   fastspill:=rg.spill_registers(aktproccode,headertai,rg.spillednodes);
                 end;
             until (rg.spillednodes='') or not fastspill;
@@ -747,7 +762,11 @@ implementation
         aktproccode.insertlistafter(headertai,templist);
         { Add exit code at the end }
         aktfilepos:=exitpos;
-        gen_exit_code(templist,false,usesacc,usesacchi);
+        gen_stackfree_code(templist,usesacc,usesacchi);
+        aktproccode.concatlist(templist);
+        { Add end symbol and debug info }
+        aktfilepos:=exitpos;
+        gen_proc_symbol_end(templist);
         aktproccode.concatlist(templist);
 
         { save local data (casetable) also in the same file }
@@ -1288,7 +1307,10 @@ begin
 end.
 {
   $Log$
-  Revision 1.153  2003-09-28 17:55:04  peter
+  Revision 1.154  2003-09-29 20:58:56  peter
+    * optimized releasing of registers
+
+  Revision 1.153  2003/09/28 17:55:04  peter
     * parent framepointer changed to hidden parameter
     * tloadparentfpnode added
 

+ 13 - 6
compiler/rgobj.pas

@@ -1663,9 +1663,8 @@ unit rgobj;
     end;
 
     procedure Trgobj.select_spill;
-
-    var n:char;
-
+    var
+      n : char;
     begin
       {This code is WAY too naive. We need not to select just a register, but
        the register that is used the least...}
@@ -1735,7 +1734,7 @@ unit rgobj;
             include(used_in_proc_int,colour[k]);
         end;
 {$ifdef ra_debug}
-      if aktfilepos.line=2502 then
+      if aktfilepos.line=-1 then
         begin
           writeln('colourlist ',length(freezeworklist));
           for i:=0 to maxintreg do
@@ -1926,16 +1925,17 @@ unit rgobj;
               adj:=igraph.adjlist[Tsuperregister(i)];
               if adj=nil then
                 begin
+                  p:=i;
                   min:=0;
                   break;  {We won't find smaller ones.}
                 end
               else
                 if length(adj^)<min then
                   begin
+                    p:=i;
                     min:=length(adj^);
                     if min=0 then
                       break;  {We won't find smaller ones.}
-                    p:=i;
                   end;
             end;
         end;
@@ -1950,6 +1950,10 @@ unit rgobj;
 {$endif}
          end;
 
+{$ifdef ra_debug}
+       writeln('Spilling temp: ',p,' min ',min);
+{$endif ra_debug}
+
        exclude(unusedregsint,p);
        include(used_in_proc_int,p);
        r:=newreg(R_INTREGISTER,p,subreg);
@@ -2257,7 +2261,10 @@ end.
 
 {
   $Log$
-  Revision 1.78  2003-09-28 13:41:12  peter
+  Revision 1.79  2003-09-29 20:58:56  peter
+    * optimized releasing of registers
+
+  Revision 1.78  2003/09/28 13:41:12  peter
     * return reg 255 when allowdupreg is defined
 
   Revision 1.77  2003/09/25 16:19:32  peter

+ 7 - 4
compiler/x86/aasmcpu.pas

@@ -2218,7 +2218,7 @@ implementation
                       oper[1].typ:=top_reg;
                       oper[1].reg:=helpreg;
                       list.insertafter(helpins,self);
-                      rgunget(list,helpins,helpreg);
+                      rgunget(list,self,helpreg);
                     end;
                 end;
               end;
@@ -2233,7 +2233,7 @@ implementation
                   A_BT,A_BTS,
                   A_BTC,A_BTR :
                     begin
-                      {Yikes! We just changed the destination register into
+                      {Yikes! We just changed the source register into
                        a memory location above here.
 
                        Situation example:
@@ -2255,7 +2255,7 @@ implementation
                       dispose(oper[0].ref);
                       oper[0].typ:=top_reg;
                       oper[0].reg:=helpreg;
-                      rgunget(list,self,helpreg);
+                      rgunget(list,helpins,helpreg);
                     end;
                 end;
               end;
@@ -2317,7 +2317,10 @@ implementation
 end.
 {
   $Log$
-  Revision 1.28  2003-09-28 21:49:30  peter
+  Revision 1.29  2003-09-29 20:58:56  peter
+    * optimized releasing of registers
+
+  Revision 1.28  2003/09/28 21:49:30  peter
     * fixed invalid opcode handling in spill registers
 
   Revision 1.27  2003/09/28 13:37:07  peter

+ 7 - 4
compiler/x86/cgx86.pas

@@ -1111,14 +1111,14 @@ unit cgx86;
                   cgsize:=OS_16;
                 end;
               dec(len,copysize);
-              r:=rg.getregisterint(list,cgsize);
-              a_load_ref_reg(list,cgsize,cgsize,srcref,r);
               if (len=0) and delsource then
                 reference_release(list,source);
+              r:=rg.getregisterint(list,cgsize);
+              a_load_ref_reg(list,cgsize,cgsize,srcref,r);
+              rg.ungetregisterint(list,r);
               a_load_reg_ref(list,cgsize,cgsize,r,dstref);
               inc(srcref.offset,copysize);
               inc(dstref.offset,copysize);
-              rg.ungetregisterint(list,r);
             end;
         end
       else
@@ -1598,7 +1598,10 @@ unit cgx86;
 end.
 {
   $Log$
-  Revision 1.67  2003-09-28 13:37:19  peter
+  Revision 1.68  2003-09-29 20:58:56  peter
+    * optimized releasing of registers
+
+  Revision 1.67  2003/09/28 13:37:19  peter
     * a_call_ref removed
 
   Revision 1.66  2003/09/25 21:29:16  peter