3 жил өмнө · 11d16be702
--- a/compiler/ncal.pas
+++ b/compiler/ncal.pas
@@ -88,6 +88,7 @@ interface
 
				           procedure add_done_statement(n:tnode);
			
 
				           procedure convert_carg_array_of_const;
			
 
				           procedure order_parameters;
			
 
				+          function heuristics_favors_inlining:boolean;
			
 
				           procedure check_inlining;
			
 
				           function  pass1_normal:tnode;
			
 
				           procedure register_created_object_types;
			
@@ -4753,6 +4754,30 @@ implementation
 
				       end;
			
 
				 
			
 
				 
			
 
				+    function tcallnode.heuristics_favors_inlining:boolean;
			
 
				+      var
			
 
				+        limExcluding: cardinal;
			
 
				+      begin
			
 
				+        {  Prevent too deep inlining recursion and code bloat by inlining
			
 
				+
			
 
				+           The actual formuala is
			
 
				+                             inlinelevel/3+1    /-------
			
 
				+               node count <  -----------------\/  10000
			
 
				+
			
 
				+           This allows exponential grow of the code only to a certain limit.
			
 
				+
			
 
				+           Remarks
			
 
				+            - The current approach calculates the inlining level top down, so outer call nodes (nodes closer to the leaf) might not be inlined
			
 
				+              if the max. complexity is reached. This is done because it makes the implementation easier and because
			
 
				+              there might be situations were it is more beneficial to inline inner nodes and do the calls to the outer nodes
			
 
				+              if the outer nodes are in a seldomly used code path
			
 
				+            - The code avoids to use functions from the math unit
			
 
				+        }
			
 
				+        limExcluding:=round(exp((1.0/(inlinelevel/3.0+1))*ln(10000)));
			
 
				+        result:=node_count(tprocdef(procdefinition).inlininginfo^.code,limExcluding)<limExcluding;
			
 
				+      end;
			
 
				+
			
 
				+
			
 
				     procedure tcallnode.check_inlining;
			
 
				       var
			
 
				         st   : tsymtable;
			
@@ -4762,22 +4787,7 @@ implementation
 
				         if (po_inline in procdefinition.procoptions) and
			
 
				            (procdefinition.typ=procdef) and
			
 
				            tprocdef(procdefinition).has_inlininginfo and
			
 
				-           {  Prevent too deep inlining recursion and code bloat by inlining
			
 
				-
			
 
				-              The actual formuala is
			
 
				-                                inlinelevel/3+1    /-------
			
 
				-                  node count <  -----------------\/  10000
			
 
				-
			
 
				-              This allows exponential grow of the code only to a certain limit.
			
 
				-
			
 
				-              Remarks
			
 
				-               - The current approach calculates the inlining level top down, so outer call nodes (nodes closer to the leaf) might not be inlined
			
 
				-                 if the max. complexity is reached. This is done because it makes the implementation easier and because
			
 
				-                 there might be situations were it is more beneficial to inline inner nodes and do the calls to the outer nodes
			
 
				-                 if the outer nodes are in a seldomly used code path
			
 
				-               - The code avoids to use functions from the math unit
			
 
				-           }
			
 
				-           (node_count(tprocdef(procdefinition).inlininginfo^.code)<round(exp((1.0/(inlinelevel/3.0+1))*ln(10000)))) then
			
 
				+           heuristics_favors_inlining then
			
 
				           begin
			
 
				             include(callnodeflags,cnf_do_inline);
			
 
				             { Check if we can inline the procedure when it references proc/var that
			
--- a/compiler/nutils.pas
+++ b/compiler/nutils.pas
@@ -134,10 +134,11 @@ interface
 
				     function has_conditional_nodes(n : tnode) : boolean;
			
 
				 
			
 
				     { count the number of nodes in the node tree,
			
 
				-      rough estimation how large the tree "node" is }
			
 
				-    function node_count(node : tnode) : dword;
			
 
				+      rough estimation how large the tree "node" is
			
 
				+      If more than max nodes, returns max, so node_count(n, 10 + 1) <= 10 answers whether the tree has ≤10 nodes but avoids traversing the remaining 990. }
			
 
				+    function node_count(node : tnode; max : dword = High(dword)) : dword;
			
 
				 
			
 
				-    function node_count_weighted(node : tnode) : dword;
			
 
				+    function node_count_weighted(node : tnode; max : dword = High(dword)) : dword;
			
 
				 
			
 
				     { returns true, if the value described by node is constant/immutable, this approximation is safe
			
 
				       if no dirty tricks like buffer overflows or pointer magic are used }
			
@@ -1438,37 +1439,49 @@ implementation
 
				         result:=foreachnodestatic(n,@check_for_conditional_nodes,nil);
			
 
				       end;
			
 
				 
			
 
				-    var
			
 
				-      nodecount : dword;
			
 
				 
			
 
				     function donodecount(var n: tnode; arg: pointer): foreachnoderesult;
			
 
				       begin
			
 
				-        inc(nodecount);
			
 
				-        result:=fen_false;
			
 
				+        if PDWord(arg)^>0 then
			
 
				+          begin
			
 
				+            dec(PDWord(arg)^);
			
 
				+            result:=fen_false;
			
 
				+          end
			
 
				+        else
			
 
				+          result:=fen_norecurse_false;
			
 
				       end;
			
 
				 
			
 
				 
			
 
				-    function node_count(node : tnode) : dword;
			
 
				+    function node_count(node : tnode; max : dword = High(dword)) : dword;
			
 
				+      var
			
 
				+        left : dword;
			
 
				       begin
			
 
				-        nodecount:=0;
			
 
				-        foreachnodestatic(node,@donodecount,nil);
			
 
				-        result:=nodecount;
			
 
				+        left:=max;
			
 
				+        foreachnodestatic(node,@donodecount,@left);
			
 
				+        result:=max-left;
			
 
				       end;
			
 
				 
			
 
				 
			
 
				     function donodecount_weighted(var n: tnode; arg: pointer): foreachnoderesult;
			
 
				       begin
			
 
				-        if not(n.nodetype in [blockn,statementn,callparan,nothingn]) then
			
 
				-          inc(nodecount);
			
 
				-        result:=fen_false;
			
 
				+        if PDWord(arg)^>0 then
			
 
				+          begin
			
 
				+            if not(n.nodetype in [blockn,statementn,callparan,nothingn]) then
			
 
				+              dec(PDWord(arg)^);
			
 
				+            result:=fen_false;
			
 
				+          end
			
 
				+        else
			
 
				+          result:=fen_norecurse_false;
			
 
				       end;
			
 
				 
			
 
				 
			
 
				-    function node_count_weighted(node : tnode) : dword;
			
 
				+    function node_count_weighted(node : tnode; max : dword = High(dword)) : dword;
			
 
				+      var
			
 
				+        left : dword;
			
 
				       begin
			
 
				-        nodecount:=0;
			
 
				-        foreachnodestatic(node,@donodecount_weighted,nil);
			
 
				-        result:=nodecount;
			
 
				+        left:=max;
			
 
				+        foreachnodestatic(node,@donodecount_weighted,@left);
			
 
				+        result:=max-left;
			
 
				       end;
			
 
				 
			
 
				 
			
--- a/compiler/optloop.pas
+++ b/compiler/optloop.pas
@@ -52,6 +52,8 @@ unit optloop;
 
				       procinfo;
			
 
				 
			
 
				     function number_unrolls(node : tnode) : cardinal;
			
 
				+      var
			
 
				+        nodeCount : cardinal;
			
 
				       begin
			
 
				         { calculate how often a loop shall be unrolled.
			
 
				 
			
@@ -60,10 +62,22 @@ unit optloop;
 
				 {$ifdef i386}
			
 
				         { multiply by 2 for CPUs with a long pipeline }
			
 
				         if current_settings.optimizecputype in [cpu_Pentium4] then
			
 
				-          number_unrolls:=trunc(round((60+(60*ord(node_count_weighted(node)<15)))/max(node_count_weighted(node),1)))
			
 
				+          begin
			
 
				+            { See the common branch below for an explanation. }
			
 
				+            nodeCount:=node_count_weighted(node,41);
			
 
				+            number_unrolls:=round((60+(60*ord(nodeCount<15)))/max(nodeCount,1))
			
 
				+          end
			
 
				         else
			
 
				 {$endif i386}
			
 
				-          number_unrolls:=trunc(round((30+(60*ord(node_count_weighted(node)<15)))/max(node_count_weighted(node),1)));
			
 
				+          begin
			
 
				+            { If nodeCount >= 15, numerator will be 30,
			
 
				+              and the largest number (starting from 15) that makes sense as its denominator
			
 
				+              (the smallest number that gives number_unrolls = 1) is 21 = trunc(30/1.5+1),
			
 
				+              so there's no point in counting for more than 21 nodes.
			
 
				+              "Long pipeline" variant above is the same with numerator=60 and max denominator = 41. }
			
 
				+            nodeCount:=node_count_weighted(node,21);
			
 
				+            number_unrolls:=round((30+(60*ord(nodeCount<15)))/max(nodeCount,1));
			
 
				+          end;
			
 
				 
			
 
				         if number_unrolls=0 then
			
 
				           number_unrolls:=1;
			
--- a/compiler/psub.pas
+++ b/compiler/psub.pas
@@ -1829,6 +1829,23 @@ implementation
 
				              end;
			
 
				          end;
			
 
				 
			
 
				+       function heuristics_favors_autoinlining(code: tnode): boolean;
			
 
				+         var
			
 
				+           complexityAvail : integer;
			
 
				+         begin
			
 
				+           { rough approximation if we should auto inline:
			
 
				+             - if the tree is simple enough
			
 
				+             - if the tree is not too big
			
 
				+             A bigger tree which is simpler might be autoinlined otoh
			
 
				+             a smaller and complexer tree as well: so we use the sum of
			
 
				+             both measures here }
			
 
				+
			
 
				+           { This is a shortcutted version of
			
 
				+             "result:=node_count(code)+node_complexity(code)<=25". }
			
 
				+           complexityAvail:=25-node_complexity(code);
			
 
				+           result:=(complexityAvail>0) and (node_count(code,complexityAvail+1)<=dword(complexityAvail));
			
 
				+         end;
			
 
				+
			
 
				       var
			
 
				         old_current_procinfo : tprocinfo;
			
 
				         oldmaxfpuregisters : longint;
			
@@ -1911,13 +1928,7 @@ implementation
 
				                                            potype_destructor,potype_class_constructor,potype_class_destructor]) and
			
 
				             ((procdef.procoptions*[po_exports,po_external,po_interrupt,po_virtualmethod,po_iocheck])=[]) and
			
 
				             (not(procdef.proccalloption in [pocall_safecall])) and
			
 
				-            { rough approximation if we should auto inline:
			
 
				-              - if the tree is simple enough
			
 
				-              - if the tree is not too big
			
 
				-              A bigger tree which is simpler might be autoinlined otoh
			
 
				-              a smaller and complexer tree as well: so we use the sum of
			
 
				-              both measures here }
			
 
				-            (node_count(code)+node_complexity(code)<=25) then
			
 
				+            heuristics_favors_autoinlining(code) then
			
 
				           begin
			
 
				             { Can we inline this procedure? }
			
 
				             if checknodeinlining(procdef) then