瀏覽代碼

* Reworked the optimization of unused $parentfp for nested routines.
- Do not remove the $parentfp parameter as was done in the previous optimization approach. Instead when $parentfp is unused to the following:
- On the caller side: Omit passing the value for $parentfp for targets where tcgcallparanode.push_zero_sized_value_para=false (classic CPU targets).
Pass 0/nil as $parentfp for targets where tcgcallparanode.push_zero_sized_value_para=true;
- On the callee side: Prevent allocation of registers/resources for $parentfp.
- When possible keep $parentfp in a register.
- Set the pio_nested_access flag in tprocinfo.set_needs_parentfp() to properly handle deep nesting levels;

git-svn-id: trunk@45436 -

yury 5 年之前
父節點
當前提交
2808873d1b
共有 12 個文件被更改,包括 150 次插入63 次删除
  1. 4 2
      compiler/dbgbase.pas
  2. 1 2
      compiler/defcmp.pas
  3. 55 2
      compiler/ncgcal.pas
  4. 1 3
      compiler/ncgmem.pas
  5. 1 1
      compiler/ncgnstmm.pas
  6. 10 0
      compiler/ncgutil.pas
  7. 0 2
      compiler/nld.pas
  8. 16 23
      compiler/nmem.pas
  9. 2 1
      compiler/pparautl.pas
  10. 11 6
      compiler/procinfo.pas
  11. 0 21
      compiler/psub.pas
  12. 49 0
      compiler/symdef.pas

+ 4 - 2
compiler/dbgbase.pas

@@ -103,7 +103,8 @@ implementation
 
 
     uses
     uses
       cutils,
       cutils,
-      verbose;
+      verbose,
+      cgbase;
 
 
 
 
     constructor TDebugInfo.Create;
     constructor TDebugInfo.Create;
@@ -430,7 +431,8 @@ implementation
           localvarsym :
           localvarsym :
             appendsym_localvar(list,tlocalvarsym(sym));
             appendsym_localvar(list,tlocalvarsym(sym));
           paravarsym :
           paravarsym :
-            appendsym_paravar(list,tparavarsym(sym));
+            if tparavarsym(sym).localloc.loc<>LOC_VOID then
+              appendsym_paravar(list,tparavarsym(sym));
           constsym :
           constsym :
             appendsym_const(list,tconstsym(sym));
             appendsym_const(list,tconstsym(sym));
           typesym :
           typesym :

+ 1 - 2
compiler/defcmp.pas

@@ -2366,8 +2366,7 @@ implementation
               not is_nested_pd(def2))) or
               not is_nested_pd(def2))) or
             ((def1.typ=procdef) and                                 { d) }
             ((def1.typ=procdef) and                                 { d) }
              is_nested_pd(def1) and
              is_nested_pd(def1) and
-             ((not(po_delphi_nested_cc in def1.procoptions) and
-              (pio_needs_parentfp in tprocdef(def1).implprocoptions)) or
+             (not(po_delphi_nested_cc in def1.procoptions) or
               not is_nested_pd(def2))) or
               not is_nested_pd(def2))) or
             ((def1.typ=procvardef) and                              { e) }
             ((def1.typ=procvardef) and                              { e) }
              (is_nested_pd(def1)<>is_nested_pd(def2))) then
              (is_nested_pd(def1)<>is_nested_pd(def2))) then

+ 55 - 2
compiler/ncgcal.pas

@@ -41,6 +41,7 @@ interface
           procedure push_value_para;virtual;
           procedure push_value_para;virtual;
           procedure push_formal_para;virtual;
           procedure push_formal_para;virtual;
           procedure push_copyout_para;virtual;abstract;
           procedure push_copyout_para;virtual;abstract;
+          function maybe_push_unused_para:boolean;virtual;
        public
        public
           tempcgpara : tcgpara;
           tempcgpara : tcgpara;
 
 
@@ -60,6 +61,7 @@ interface
           procedure release_para_temps;
           procedure release_para_temps;
           procedure reorder_parameters;
           procedure reorder_parameters;
           procedure freeparas;
           procedure freeparas;
+          function is_parentfp_pushed:boolean;
        protected
        protected
           retloc: tcgpara;
           retloc: tcgpara;
           paralocs: array of pcgpara;
           paralocs: array of pcgpara;
@@ -169,6 +171,8 @@ implementation
       begin
       begin
         if not(left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
         if not(left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
           internalerror(200304235);
           internalerror(200304235);
+        if maybe_push_unused_para then
+          exit;
         { see the call to keep_para_array_range in ncal: if that call returned
         { see the call to keep_para_array_range in ncal: if that call returned
           true, we overwrite the resultdef of left with its original resultdef
           true, we overwrite the resultdef of left with its original resultdef
           (to keep track of the range of the original array); we inserted a type
           (to keep track of the range of the original array); we inserted a type
@@ -262,6 +266,9 @@ implementation
            not push_zero_sized_value_para then
            not push_zero_sized_value_para then
           exit;
           exit;
 
 
+        if maybe_push_unused_para then
+          exit;
+
         { Move flags and jump in register to make it less complex }
         { Move flags and jump in register to make it less complex }
         if left.location.loc in [LOC_FLAGS,LOC_JUMP,LOC_SUBSETREG,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF] then
         if left.location.loc in [LOC_FLAGS,LOC_JUMP,LOC_SUBSETREG,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF] then
           hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
           hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
@@ -273,6 +280,8 @@ implementation
 
 
     procedure tcgcallparanode.push_formal_para;
     procedure tcgcallparanode.push_formal_para;
       begin
       begin
+        if maybe_push_unused_para then
+          exit;
         { allow passing of a constant to a const formaldef }
         { allow passing of a constant to a const formaldef }
         if (parasym.varspez=vs_const) and
         if (parasym.varspez=vs_const) and
            not(left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
            not(left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
@@ -281,6 +290,35 @@ implementation
       end;
       end;
 
 
 
 
+    function tcgcallparanode.maybe_push_unused_para: boolean;
+      begin
+        { Check if the parameter is unused.
+          Only the $parentfp parameter is supported for now. }
+        result:=(vo_is_parentfp in parasym.varoptions) and (parasym.varstate<=vs_initialised);
+        if not result then
+          exit;
+        { The parameter is unused.
+          We can skip loading of the parameter when:
+            - the target does not strictly require all parameters (push_zero_sized_value_para = false)
+            and
+            - fixed stack is used
+              or the parameter is in a register
+              or the parameter is $parentfp. }
+        if not push_zero_sized_value_para and
+          (paramanager.use_fixed_stack or
+           (vo_is_parentfp in parasym.varoptions) or
+           (parasym.paraloc[callerside].Location^.Loc in [LOC_REGISTER,LOC_CREGISTER])) then
+          begin
+            { Skip loading }
+            parasym.paraloc[callerside].Location^.Loc:=LOC_VOID;
+            tempcgpara.Location^.Loc:=LOC_VOID;
+          end
+        else
+          { Load the dummy nil/0 value }
+          hlcg.a_load_const_cgpara(current_asmdata.CurrAsmList,left.resultdef,0,tempcgpara);
+      end;
+
+
     procedure tcgcallparanode.secondcallparan;
     procedure tcgcallparanode.secondcallparan;
       var
       var
          pushaddr: boolean;
          pushaddr: boolean;
@@ -909,6 +947,20 @@ implementation
        end;
        end;
 
 
 
 
+    function tcgcallnode.is_parentfp_pushed: boolean;
+      var
+        i : longint;
+      begin
+        for i:=0 to procdefinition.paras.Count-1 do
+          with tparavarsym(procdefinition.paras[i]) do
+            if vo_is_parentfp in varoptions then
+              begin
+                result:=paraloc[callerside].Location^.Loc in [LOC_REFERENCE,LOC_CREFERENCE];
+                exit;
+              end;
+        result:=false;
+      end;
+
 
 
     procedure tcgcallnode.pass_generate_code;
     procedure tcgcallnode.pass_generate_code;
       var
       var
@@ -1258,9 +1310,10 @@ implementation
                pop_parasize(0);
                pop_parasize(0);
            end
            end
          { frame pointer parameter is popped by the caller when it's passed the
          { frame pointer parameter is popped by the caller when it's passed the
-           Delphi way }
+           Delphi way and $parentfp is used }
          else if (po_delphi_nested_cc in procdefinition.procoptions) and
          else if (po_delphi_nested_cc in procdefinition.procoptions) and
-           not paramanager.use_fixed_stack then
+            not paramanager.use_fixed_stack and
+            is_parentfp_pushed() then
            pop_parasize(sizeof(pint));
            pop_parasize(sizeof(pint));
 
 
          if procdefinition.generate_safecall_wrapper then
          if procdefinition.generate_safecall_wrapper then

+ 1 - 3
compiler/ncgmem.pas

@@ -170,9 +170,7 @@ implementation
             location_reset(location,LOC_REGISTER,def_cgsize(parentfpvoidpointertype));
             location_reset(location,LOC_REGISTER,def_cgsize(parentfpvoidpointertype));
             currpi:=current_procinfo;
             currpi:=current_procinfo;
             { load framepointer of current proc }
             { load framepointer of current proc }
-            hsym:=tparavarsym(currpi.procdef.parast.Find('parentfp'));
-            if not assigned(hsym) then
-              internalerror(200309281);
+            hsym:=parentfpsym;
             if (currpi.procdef.owner.symtablelevel=parentpd.parast.symtablelevel) and (hsym.localloc.loc in [LOC_REGISTER,LOC_CREGISTER]) then
             if (currpi.procdef.owner.symtablelevel=parentpd.parast.symtablelevel) and (hsym.localloc.loc in [LOC_REGISTER,LOC_CREGISTER]) then
               location.register:=hsym.localloc.register
               location.register:=hsym.localloc.register
             else
             else

+ 1 - 1
compiler/ncgnstmm.pas

@@ -105,7 +105,7 @@ implementation
           of the current routine (and hence it has not been moved into the
           of the current routine (and hence it has not been moved into the
           nestedfp struct), get the original nestedfp parameter }
           nestedfp struct), get the original nestedfp parameter }
         useparentfppara:=not assigned(current_procinfo.procdef.parentfpstruct);
         useparentfppara:=not assigned(current_procinfo.procdef.parentfpstruct);
-        hsym:=tparavarsym(current_procinfo.procdef.parast.Find('parentfp'));
+        hsym:=parentfpsym;
         if current_procinfo.procdef.parast.symtablelevel>parentpd.parast.symtablelevel then
         if current_procinfo.procdef.parast.symtablelevel>parentpd.parast.symtablelevel then
           useparentfppara:=
           useparentfppara:=
             useparentfppara or
             useparentfppara or

+ 10 - 0
compiler/ncgutil.pas

@@ -865,6 +865,13 @@ implementation
                       location_reset(vs.initialloc,LOC_REGISTER,OS_ADDR);
                       location_reset(vs.initialloc,LOC_REGISTER,OS_ADDR);
                       vs.initialloc.register:=NR_FRAME_POINTER_REG;
                       vs.initialloc.register:=NR_FRAME_POINTER_REG;
                     end
                     end
+                  { Unused parameters ($parentfp for now) need to be kept in the original location
+                    to prevent allocation of registers/resources for them. }
+                  else if (vs.varstate <= vs_initialised) and
+                    (vo_is_parentfp in vs.varoptions) then
+                    begin
+                      tparavarsym(vs).paraloc[calleeside].get_location(vs.initialloc);
+                    end
                   else
                   else
                     begin
                     begin
                       { if an open array is used, also its high parameter is used,
                       { if an open array is used, also its high parameter is used,
@@ -1055,6 +1062,9 @@ implementation
           loadn:
           loadn:
             if (tloadnode(n).symtableentry.typ in [staticvarsym,localvarsym,paravarsym]) then
             if (tloadnode(n).symtableentry.typ in [staticvarsym,localvarsym,paravarsym]) then
               add_regvars(rv^,tabstractnormalvarsym(tloadnode(n).symtableentry).localloc);
               add_regvars(rv^,tabstractnormalvarsym(tloadnode(n).symtableentry).localloc);
+          loadparentfpn:
+            if current_procinfo.procdef.parast.symtablelevel>tloadparentfpnode(n).parentpd.parast.symtablelevel then
+              add_regvars(rv^,tloadparentfpnode(n).parentfpsym.localloc);
           vecn:
           vecn:
             begin
             begin
               { range checks sometimes need the high parameter }
               { range checks sometimes need the high parameter }

+ 0 - 2
compiler/nld.pas

@@ -357,8 +357,6 @@ implementation
                    if assigned(left) then
                    if assigned(left) then
                      internalerror(200309289);
                      internalerror(200309289);
                    left:=cloadparentfpnode.create(tprocdef(symtable.defowner),lpf_forload);
                    left:=cloadparentfpnode.create(tprocdef(symtable.defowner),lpf_forload);
-                   { we can't inline the referenced parent procedure }
-                   include(tprocdef(symtable.defowner).implprocoptions,pio_nested_access);
                    { reference in nested procedures, variable needs to be in memory }
                    { reference in nested procedures, variable needs to be in memory }
                    { and behaves as if its address escapes its parent block         }
                    { and behaves as if its address escapes its parent block         }
                    make_not_regable(self,[ra_different_scope]);
                    make_not_regable(self,[ra_different_scope]);

+ 16 - 23
compiler/nmem.pas

@@ -53,6 +53,10 @@ interface
          lpf_forload
          lpf_forload
        );
        );
        tloadparentfpnode = class(tunarynode)
        tloadparentfpnode = class(tunarynode)
+       private
+          _parentfpsym: tparavarsym;
+          function getparentfpsym: tparavarsym;
+       public
           parentpd : tprocdef;
           parentpd : tprocdef;
           parentpdderef : tderef;
           parentpdderef : tderef;
           kind: tloadparentfpkind;
           kind: tloadparentfpkind;
@@ -65,6 +69,7 @@ interface
           function pass_typecheck:tnode;override;
           function pass_typecheck:tnode;override;
           function docompare(p: tnode): boolean; override;
           function docompare(p: tnode): boolean; override;
           function dogetcopy : tnode;override;
           function dogetcopy : tnode;override;
+          property parentfpsym: tparavarsym read getparentfpsym;
        end;
        end;
        tloadparentfpnodeclass = class of tloadparentfpnode;
        tloadparentfpnodeclass = class of tloadparentfpnode;
 
 
@@ -372,32 +377,9 @@ implementation
 
 
 
 
     function tloadparentfpnode.pass_typecheck:tnode;
     function tloadparentfpnode.pass_typecheck:tnode;
-{$ifdef dummy}
-      var
-        currpi : tprocinfo;
-        hsym   : tparavarsym;
-{$endif dummy}
       begin
       begin
         result:=nil;
         result:=nil;
         resultdef:=parentfpvoidpointertype;
         resultdef:=parentfpvoidpointertype;
-{$ifdef dummy}
-        { currently parentfps are never loaded in registers (FK) }
-        if (current_procinfo.procdef.parast.symtablelevel<>parentpd.parast.symtablelevel) then
-          begin
-            currpi:=current_procinfo;
-            { walk parents }
-            while (currpi.procdef.owner.symtablelevel>parentpd.parast.symtablelevel) do
-              begin
-                currpi:=currpi.parent;
-                if not assigned(currpi) then
-                  internalerror(2005040602);
-                hsym:=tparavarsym(currpi.procdef.parast.Find('parentfp'));
-                if not assigned(hsym) then
-                  internalerror(2005040601);
-                hsym.varregable:=vr_none;
-              end;
-          end;
-{$endif dummy}
       end;
       end;
 
 
 
 
@@ -408,6 +390,17 @@ implementation
       end;
       end;
 
 
 
 
+    function tloadparentfpnode.getparentfpsym: tparavarsym;
+      begin
+        if not assigned(_parentfpsym) then
+          begin
+            _parentfpsym:=tparavarsym(current_procinfo.procdef.parast.Find('parentfp'));
+            if not assigned(_parentfpsym) then
+              internalerror(200309281);
+          end;
+        result:=_parentfpsym;
+      end;
+
 {*****************************************************************************
 {*****************************************************************************
                              TADDRNODE
                              TADDRNODE
 *****************************************************************************}
 *****************************************************************************}

+ 2 - 1
compiler/pparautl.pas

@@ -156,7 +156,8 @@ implementation
               begin
               begin
                 vs:=cparavarsym.create('$parentfp',paranr,vs_value
                 vs:=cparavarsym.create('$parentfp',paranr,vs_value
                       ,parentfpvoidpointertype,[vo_is_parentfp,vo_is_hidden_para]);
                       ,parentfpvoidpointertype,[vo_is_parentfp,vo_is_hidden_para]);
-                vs.varregable:=vr_none;
+                { Mark $parentfp as used by default }
+                vs.varstate:=vs_read;
               end
               end
             else
             else
               begin
               begin

+ 11 - 6
compiler/procinfo.pas

@@ -202,8 +202,9 @@ unit procinfo;
           procedure start_eh(list : TAsmList); virtual;
           procedure start_eh(list : TAsmList); virtual;
           { called to insert needed eh info into the exit code }
           { called to insert needed eh info into the exit code }
           procedure end_eh(list : TAsmList); virtual;
           procedure end_eh(list : TAsmList); virtual;
-          { Sets the pio_needs_parentfp flag for the current nested procedure and
-            all its parent procedures until parent_level }
+          { Sets the pio_needs_parentfp flag for the current nested procedure.
+            Sets both pio_needs_parentfp and pio_nested_access for all parent
+            procedures until parent_level }
           procedure set_needs_parentfp(parent_level: byte);
           procedure set_needs_parentfp(parent_level: byte);
        end;
        end;
        tcprocinfo = class of tprocinfo;
        tcprocinfo = class of tprocinfo;
@@ -442,11 +443,15 @@ implementation
           Internalerror(2020050302);
           Internalerror(2020050302);
         if parent_level<normal_function_level then
         if parent_level<normal_function_level then
           parent_level:=normal_function_level;
           parent_level:=normal_function_level;
+        { Set pio_needs_parentfp for the current proc }
         pi:=Self;
         pi:=Self;
-        repeat
-          include(pi.procdef.implprocoptions, pio_needs_parentfp);
-          pi:=pi.parent;
-        until pi.procdef.parast.symtablelevel<=parent_level;
+        include(pi.procdef.implprocoptions, pio_needs_parentfp);
+        { Set both pio_needs_parentfp and pio_nested_access for all parent procs until parent_level }
+        while pi.procdef.parast.symtablelevel>parent_level do
+          begin
+            pi:=pi.parent;
+            pi.procdef.implprocoptions:=pi.procdef.implprocoptions+[pio_needs_parentfp,pio_nested_access];
+          end;
       end;
       end;
 
 
 end.
 end.

+ 0 - 21
compiler/psub.pas

@@ -2309,8 +2309,6 @@ implementation
          parentfpinitblock: tnode;
          parentfpinitblock: tnode;
          old_parse_generic: boolean;
          old_parse_generic: boolean;
          recordtokens : boolean;
          recordtokens : boolean;
-         parentfp_sym: TSymEntry;
-
       begin
       begin
          old_current_procinfo:=current_procinfo;
          old_current_procinfo:=current_procinfo;
          old_block_type:=block_type;
          old_block_type:=block_type;
@@ -2386,25 +2384,6 @@ implementation
          { parse the code ... }
          { parse the code ... }
          code:=block(current_module.islibrary);
          code:=block(current_module.islibrary);
 
 
-         { If this is a nested procedure which does not access its parent's frame
-           pointer, we can optimize it by removing the hidden $parentfp parameter.
-           Do not perform this for:
-             - targets which use a special struct to access parent's variables;
-             - pure assembler procedures (for compatibility with old code).
-         }
-         if not (target_info.system in systems_fpnestedstruct) and
-            is_nested_pd(procdef) and
-            not (pio_needs_parentfp in procdef.implprocoptions) and
-            not (po_assembler in procdef.procoptions) then
-           begin
-             exclude(procdef.procoptions, po_delphi_nested_cc);
-             parentfp_sym:=procdef.parast.Find('parentfp');
-             if parentfp_sym = nil then
-               Internalerror(2020050301);
-             procdef.parast.Delete(parentfp_sym);
-             procdef.calcparas;
-           end;
-
          if recordtokens then
          if recordtokens then
            begin
            begin
              { stop token recorder for generic template }
              { stop token recorder for generic template }

+ 49 - 0
compiler/symdef.pas

@@ -702,6 +702,8 @@ interface
        private
        private
           procedure count_para(p:TObject;arg:pointer);
           procedure count_para(p:TObject;arg:pointer);
           procedure insert_para(p:TObject;arg:pointer);
           procedure insert_para(p:TObject;arg:pointer);
+       protected
+          procedure handle_unused_paras(side: tcallercallee); virtual;
        end;
        end;
 
 
        tprocvardef = class(tabstractprocdef)
        tprocvardef = class(tabstractprocdef)
@@ -812,6 +814,7 @@ interface
          procedure SetIsEmpty(AValue: boolean);
          procedure SetIsEmpty(AValue: boolean);
          function GetHasInliningInfo: boolean;
          function GetHasInliningInfo: boolean;
          procedure SetHasInliningInfo(AValue: boolean);
          procedure SetHasInliningInfo(AValue: boolean);
+         procedure handle_unused_paras(side: tcallercallee); override;
        public
        public
           messageinf : tmessageinf;
           messageinf : tmessageinf;
           dispid : longint;
           dispid : longint;
@@ -5273,6 +5276,11 @@ implementation
       end;
       end;
 
 
 
 
+    procedure tabstractprocdef.handle_unused_paras(side: tcallercallee);
+    begin
+    end;
+
+
     procedure tabstractprocdef.calcparas;
     procedure tabstractprocdef.calcparas;
       var
       var
         paracount : longint;
         paracount : longint;
@@ -5706,6 +5714,7 @@ implementation
               has_paraloc_info:=callbothsides
               has_paraloc_info:=callbothsides
             else
             else
               has_paraloc_info:=callerside;
               has_paraloc_info:=callerside;
+            handle_unused_paras(callerside);
           end;
           end;
         if (side in [calleeside,callbothsides]) and
         if (side in [calleeside,callbothsides]) and
            not(has_paraloc_info in [calleeside,callbothsides]) then
            not(has_paraloc_info in [calleeside,callbothsides]) then
@@ -5718,6 +5727,7 @@ implementation
               has_paraloc_info:=callbothsides
               has_paraloc_info:=callbothsides
             else
             else
               has_paraloc_info:=calleeside;
               has_paraloc_info:=calleeside;
+            handle_unused_paras(calleeside);
           end;
           end;
       end;
       end;
 
 
@@ -6011,6 +6021,45 @@ implementation
       end;
       end;
 
 
 
 
+    procedure tprocdef.handle_unused_paras(side: tcallercallee);
+      var
+        i : longint;
+      begin
+        { Optimize unused parameters by preventing loading them on the callee side
+          and, if possible, preventing passing them on the caller side.
+          The caller side optimization is handled by tcgcallparanode.maybe_push_unused_para().
+        }
+        if (proctypeoption = potype_exceptfilter) or
+          (po_assembler in procoptions) then
+          exit;
+        { Only $parentfp is optmized for now. }
+        if not is_nested_pd(self) then
+          exit;
+        { Handle unused parameters }
+        for i:=0 to paras.Count-1 do
+          with tparavarsym(paras[i]) do
+            if vo_is_parentfp in varoptions then
+              begin
+                if pio_needs_parentfp in implprocoptions then
+                  begin
+                    { If this routine is accessed from other nested routine,
+                      $parentfp must be in a memory location. }
+                    if pio_nested_access in implprocoptions then
+                      varregable:=vr_none;
+                  end
+                else
+                  begin
+                    { Mark $parentfp as unused, since it has vs_read by default }
+                    varstate:=vs_initialised;
+                    if side=calleeside then
+                      { Set LOC_VOID as the parameter's location on the callee side }
+                      paraloc[side].location^.Loc:=LOC_VOID;
+                    break;
+                  end;
+              end;
+      end;
+
+
     procedure tprocdef.Setinterfacedef(AValue: boolean);
     procedure tprocdef.Setinterfacedef(AValue: boolean);
       begin
       begin
         if not assigned(implprocdefinfo) then
         if not assigned(implprocdefinfo) then