optcse.pas 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. {
  2. Common subexpression elimination on base blocks
  3. Copyright (c) 2005 by Florian Klaempfl
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit optcse;
  18. {$i fpcdefs.inc}
  19. { $define csedebug}
  20. { $define csestats}
  21. interface
  22. uses
  23. node;
  24. {
  25. the function creates non optimal code so far:
  26. - cse's in chained expressions are not recognized: the common subexpression
  27. in (a1 and b and c) vs. (a2 and b and c) is not recognized because there is no common
  28. subtree b and c
  29. - the cse knows nothing about register pressure. In case of high register pressure, cse might
  30. have a negative impact
  31. - assignment nodes are currently cse borders: things like a[i,j]:=a[i,j]+1; are not improved
  32. - the list of cseinvariant node types and inline numbers is not complete yet
  33. Further, it could be done probably in a faster way though the complexity can't probably not reduced
  34. }
  35. function do_optcse(var rootnode : tnode) : tnode;
  36. implementation
  37. uses
  38. globtype,
  39. cclasses,
  40. verbose,
  41. nutils,
  42. nbas,nld,ninl,
  43. pass_1,
  44. symtype,symdef;
  45. const
  46. cseinvariant : set of tnodetype = [loadn,addn,muln,subn,divn,slashn,modn,andn,orn,xorn,notn,vecn,
  47. derefn,equaln,unequaln,ltn,gtn,lten,gten,typeconvn,subscriptn,
  48. inn,symdifn,shrn,shln,ordconstn,realconstn,unaryminusn,pointerconstn,stringconstn,setconstn,
  49. isn,asn,starstarn,nothingn,temprefn,callparan];
  50. function searchsubdomain(var n:tnode; arg: pointer) : foreachnoderesult;
  51. begin
  52. if (n.nodetype in cseinvariant) or
  53. ((n.nodetype=inlinen) and
  54. (tinlinenode(n).inlinenumber in [in_assigned_x])
  55. ) then
  56. result:=fen_true
  57. else
  58. begin
  59. pboolean(arg)^:=false;
  60. result:=fen_norecurse_true;
  61. end;
  62. end;
  63. type
  64. tlists = record
  65. nodelist : tfplist;
  66. locationlist : tfplist;
  67. equalto : tfplist;
  68. refs : tfplist;
  69. end;
  70. plists = ^tlists;
  71. function collectnodes(var n:tnode; arg: pointer) : foreachnoderesult;
  72. var
  73. i : longint;
  74. begin
  75. result:=fen_false;
  76. { node worth to add? }
  77. if (node_complexity(n)>1) and (tstoreddef(n.resultdef).is_intregable or tstoreddef(n.resultdef).is_fpuregable) and
  78. { adding tempref nodes is worthless but their complexity is probably <= 1 anyways }
  79. not(n.nodetype in [temprefn]) then
  80. begin
  81. plists(arg)^.nodelist.Add(n);
  82. plists(arg)^.locationlist.Add(@n);
  83. plists(arg)^.refs.Add(nil);
  84. plists(arg)^.equalto.Add(pointer(-1));
  85. for i:=0 to plists(arg)^.nodelist.count-2 do
  86. begin
  87. if tnode(plists(arg)^.nodelist[i]).isequal(n) then
  88. begin
  89. { use always the first occurence }
  90. if ptrint(plists(arg)^.equalto[i])<>-1 then
  91. plists(arg)^.equalto[plists(arg)^.nodelist.count-1]:=plists(arg)^.equalto[i]
  92. else
  93. plists(arg)^.equalto[plists(arg)^.nodelist.count-1]:=pointer(i);
  94. plists(arg)^.refs[i]:=pointer(plists(arg)^.refs[i])+1;
  95. exit;
  96. end;
  97. end;
  98. end;
  99. end;
  100. function searchcsedomain(var n: tnode; arg: pointer) : foreachnoderesult;
  101. var
  102. csedomain : boolean;
  103. lists : tlists;
  104. templist : tfplist;
  105. i : longint;
  106. def : tstoreddef;
  107. nodes : tblocknode;
  108. creates,
  109. statements : tstatementnode;
  110. hp : ttempcreatenode;
  111. begin
  112. result:=fen_false;
  113. if n.nodetype in cseinvariant then
  114. begin
  115. csedomain:=true;
  116. foreachnodestatic(pm_postprocess,n,@searchsubdomain,@csedomain);
  117. { found a cse domain }
  118. if csedomain then
  119. begin
  120. statements:=nil;
  121. result:=fen_norecurse_true;
  122. {$ifdef csedebug}
  123. writeln('============ cse domain ==================');
  124. printnode(output,n);
  125. writeln('Complexity: ',node_complexity(n));
  126. {$endif csedebug}
  127. lists.nodelist:=tfplist.create;
  128. lists.locationlist:=tfplist.create;
  129. lists.equalto:=tfplist.create;
  130. lists.refs:=tfplist.create;
  131. foreachnodestatic(pm_postprocess,n,@collectnodes,@lists);
  132. templist:=tfplist.create;
  133. templist.count:=lists.nodelist.count;
  134. for i:=0 to lists.nodelist.count-1 do
  135. begin
  136. { current node used more than once? }
  137. if ptrint(lists.refs[i])<>0 then
  138. begin
  139. if not(assigned(statements)) then
  140. begin
  141. nodes:=internalstatements(statements);
  142. addstatement(statements,internalstatements(creates));
  143. end;
  144. def:=tstoreddef(tnode(lists.nodelist[i]).resultdef);
  145. templist[i]:=ctempcreatenode.create_value(def,def.size,tt_persistent,
  146. def.is_intregable or def.is_fpuregable,tnode(lists.nodelist[i]));
  147. addstatement(creates,tnode(templist[i]));
  148. hp:=ttempcreatenode(templist[i]);
  149. do_firstpass(tnode(hp));
  150. templist[i]:=hp;
  151. pnode(lists.locationlist[i])^:=ctemprefnode.create(ttempcreatenode(templist[i]));
  152. do_firstpass(pnode(lists.locationlist[i])^);
  153. {$ifdef csedebug}
  154. printnode(output,statements);
  155. {$endif csedebug}
  156. end
  157. { current node reference to another node? }
  158. else if ptrint(lists.equalto[i])<>-1 then
  159. begin
  160. {$if defined(csedebug) or defined(csestats)}
  161. printnode(output,tnode(lists.nodelist[i]));
  162. writeln(i,' equals ',ptrint(lists.equalto[i]));
  163. printnode(output,tnode(lists.nodelist[ptrint(lists.equalto[i])]));
  164. {$endif defined(csedebug) or defined(csestats)}
  165. templist[i]:=templist[ptrint(lists.equalto[i])];
  166. pnode(lists.locationlist[i])^:=ctemprefnode.create(ttempcreatenode(templist[ptrint(lists.equalto[i])]));
  167. do_firstpass(pnode(lists.locationlist[i])^);
  168. end;
  169. end;
  170. { clean up unused trees }
  171. for i:=0 to lists.nodelist.count-1 do
  172. if ptrint(lists.equalto[i])<>-1 then
  173. tnode(lists.nodelist[i]).free;
  174. {$ifdef csedebug}
  175. writeln('nodes: ',lists.nodelist.count);
  176. writeln('==========================================');
  177. {$endif csedebug}
  178. lists.nodelist.free;
  179. lists.locationlist.free;
  180. lists.equalto.free;
  181. lists.refs.free;
  182. templist.free;
  183. if assigned(statements) then
  184. begin
  185. addstatement(statements,n);
  186. n:=nodes;
  187. do_firstpass(n);
  188. {$ifdef csedebug}
  189. printnode(output,nodes);
  190. {$endif csedebug}
  191. end;
  192. end
  193. end;
  194. end;
  195. function do_optcse(var rootnode : tnode) : tnode;
  196. begin
  197. foreachnodestatic(pm_postprocess,rootnode,@searchcsedomain,nil);
  198. result:=nil;
  199. (*
  200. { create a linear list of nodes }
  201. { create hash values }
  202. { sort by hash values, taking care of nf_csebarrier and keeping the
  203. original order of the nodes }
  204. { compare nodes with equal hash values }
  205. { search barrier }
  206. for i:=0 to nodelist.length-1 do
  207. begin
  208. { and then search backward so we get always the largest equal trees }
  209. j:=i+1;
  210. { collect equal nodes }
  211. while (j<=nodelist.length-1) and
  212. nodelist[i].isequal(nodelist[j]) do
  213. inc(j);
  214. dec(j);
  215. if j>i then
  216. begin
  217. { cse found }
  218. { create temp. location }
  219. { replace first node by
  220. - temp. creation
  221. - expression calculation
  222. - assignment of expression to temp. }
  223. tempnode:=ctempcreatenode.create(nodelist[i].resultdef,nodelist[i].resultdef.size,tt_persistent,
  224. nodelist[i].resultdef.is_intregable or nodelist[i].resultdef.is_fpuregable);
  225. addstatement(createstatement,tempnode);
  226. addstatement(createstatement,cassignmentnode.create(ctemprefnode.create(tempnode),
  227. caddrnode.create_internal(para.left)));
  228. para.left := ctypeconvnode.create_internal(cderefnode.create(ctemprefnode.create(tempnode)),para.left.resultdef);
  229. addstatement(deletestatement,ctempdeletenode.create(tempnode));
  230. { replace next nodes by loading the temp. reference }
  231. { replace last node by loading the temp. reference and
  232. delete the temp. }
  233. end;
  234. end;
  235. *)
  236. end;
  237. end.