optcse.pas 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. {
  2. Common subexpression elimination on base blocks
  3. Copyright (c) 2005 by Florian Klaempfl
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit optcse;
  18. {$i fpcdefs.inc}
  19. { $define csedebug}
  20. { $define csestats}
  21. interface
  22. uses
  23. node;
  24. {
  25. the function is not ready for use yet
  26. - when handling short boolean evaluation, the cse's are evaluated before the
  27. whole expression, this is wrong and can lead to crashes
  28. further, it creates non optimal code:
  29. - nested cse's in another cse are not recognized
  30. - cse's in chained expressions are not recognized: the common subexpression
  31. in (a1 and b and c) vs. (a2 and b and c) is not recognized because there is no common
  32. subtree b and c
  33. - the cse knows nothing about register pressure. In case of high register pressure, cse might
  34. have a negative impact
  35. - it could be done probably in a faster way, currently the complexity is approx. O(n^4)
  36. }
  37. function do_optcse(var rootnode : tnode) : tnode;
  38. implementation
  39. uses
  40. globtype,
  41. cclasses,
  42. verbose,
  43. nutils,
  44. nbas,nld,
  45. pass_1,
  46. symtype,symdef;
  47. const
  48. cseinvariant : set of tnodetype = [loadn,addn,muln,subn,divn,slashn,modn,andn,orn,xorn,notn,vecn,
  49. derefn,equaln,unequaln,ltn,gtn,lten,gten,typeconvn,subscriptn,
  50. inn,symdifn,shrn,shln,ordconstn,realconstn,unaryminusn,pointerconstn,stringconstn,setconstn,
  51. isn,asn,starstarn,nothingn,temprefn];
  52. function searchsubdomain(var n:tnode; arg: pointer) : foreachnoderesult;
  53. begin
  54. if not(n.nodetype in cseinvariant) then
  55. begin
  56. pboolean(arg)^:=false;
  57. result:=fen_norecurse_true;
  58. end
  59. else
  60. result:=fen_true;
  61. end;
  62. type
  63. tlists = record
  64. nodelist : tfplist;
  65. locationlist : tfplist;
  66. end;
  67. plists = ^tlists;
  68. function collectnodes(var n:tnode; arg: pointer) : foreachnoderesult;
  69. begin
  70. result:=fen_false;
  71. { node worth to add? }
  72. if (node_complexity(n)>1) and (tstoreddef(n.resultdef).is_intregable or tstoreddef(n.resultdef).is_fpuregable) and
  73. { adding tempref nodes is worthless but their complexity is probably <= 1 anyways }
  74. not(n.nodetype in [temprefn]) then
  75. begin
  76. plists(arg)^.nodelist.Add(n);
  77. plists(arg)^.locationlist.Add(@n);
  78. end;
  79. {
  80. else
  81. result:=fen_norecurse_false;
  82. }
  83. end;
  84. function searchcsedomain(var n: tnode; arg: pointer) : foreachnoderesult;
  85. var
  86. restart : boolean;
  87. csedomain : boolean;
  88. lists : tlists;
  89. templist : tfplist;
  90. i,j : longint;
  91. def : tstoreddef;
  92. nodes : tblocknode;
  93. creates,
  94. statements : tstatementnode;
  95. hp : ttempcreatenode;
  96. begin
  97. result:=fen_false;
  98. if n.nodetype in cseinvariant then
  99. begin
  100. csedomain:=true;
  101. foreachnodestatic(pm_postprocess,n,@searchsubdomain,@csedomain);
  102. { found a cse domain }
  103. if csedomain then
  104. begin
  105. statements:=nil;
  106. result:=fen_norecurse_true;
  107. {$ifdef csedebug}
  108. writeln('============ cse domain ==================');
  109. printnode(output,n);
  110. writeln('Complexity: ',node_complexity(n));
  111. {$endif csedebug}
  112. repeat
  113. lists.nodelist:=tfplist.create;
  114. lists.locationlist:=tfplist.create;
  115. foreachnodestatic(pm_postprocess,n,@collectnodes,@lists);
  116. templist:=tfplist.create;
  117. templist.count:=lists.nodelist.count;
  118. restart:=false;
  119. { this is poorly coded, just comparing every node with all other nodes }
  120. { the nodes are sorted by size so we'll find the largest matching tree
  121. first }
  122. for i:=0 to lists.nodelist.count-1 do
  123. begin
  124. for j:=lists.nodelist.count-1 downto i+1 do
  125. begin
  126. {
  127. writeln(i);
  128. writeln(j);
  129. writeln(dword(tnode(lists.nodelist[i]).nodetype));
  130. writeln(dword(tnode(lists.nodelist[j]).nodetype)); }
  131. if not(tnode(lists.nodelist[i]).nodetype in [tempcreaten,temprefn]) and
  132. not(tnode(lists.nodelist[j]).nodetype in [tempcreaten,temprefn]) and
  133. tnode(lists.nodelist[i]).isequal(tnode(lists.nodelist[j])) then
  134. begin
  135. if not(assigned(statements)) then
  136. begin
  137. nodes:=internalstatements(statements);
  138. addstatement(statements,internalstatements(creates));
  139. end;
  140. {$if defined(csedebug) or defined(csestats)}
  141. writeln(' ==== ');
  142. printnode(output,tnode(lists.nodelist[i]));
  143. writeln(' equals ');
  144. printnode(output,tnode(lists.nodelist[j]));
  145. writeln(' ==== ');
  146. {$endif defined(csedebug) or defined(csestats)}
  147. def:=tstoreddef(tnode(lists.nodelist[i]).resultdef);
  148. if assigned(def) then
  149. begin
  150. restart:=true;
  151. if assigned(templist[i]) then
  152. begin
  153. templist[j]:=templist[i];
  154. pnode(lists.locationlist[j])^.free;
  155. pnode(lists.locationlist[j])^:=ctemprefnode.create(ttempcreatenode(templist[j]));
  156. do_firstpass(pnode(lists.locationlist[j])^);
  157. lists.nodelist[j]:=pnode(lists.locationlist[j])^;
  158. end
  159. else
  160. begin
  161. templist[i]:=ctempcreatenode.create(def,def.size,tt_persistent,
  162. def.is_intregable or def.is_fpuregable);
  163. addstatement(creates,tnode(templist[i]));
  164. { properties can't be passed by "var" }
  165. hp:=ttempcreatenode(templist[i]);
  166. do_firstpass(tnode(hp));
  167. addstatement(statements,cassignmentnode.create(ctemprefnode.create(ttempcreatenode(templist[i])),
  168. tnode(lists.nodelist[i]).getcopy));
  169. pnode(lists.locationlist[i])^:=ctemprefnode.create(ttempcreatenode(templist[i]));
  170. do_firstpass(pnode(lists.locationlist[i])^);
  171. templist[j]:=templist[i];
  172. pnode(lists.locationlist[j])^.free;
  173. pnode(lists.locationlist[j])^:=ctemprefnode.create(ttempcreatenode(templist[j]));
  174. do_firstpass(pnode(lists.locationlist[j])^);
  175. lists.nodelist[j]:=pnode(lists.locationlist[j])^;
  176. {$ifdef csedebug}
  177. printnode(output,statements);
  178. {$endif csedebug}
  179. end;
  180. end
  181. else
  182. internalerror(2007091701);
  183. end;
  184. end;
  185. { if a node in a cse domain has been replaced, we've to restart
  186. searching else we could find nested trees of the replaced node
  187. }
  188. if restart then
  189. break;
  190. end;
  191. {$ifdef csedebug}
  192. writeln('nodes: ',lists.nodelist.count);
  193. writeln('==========================================');
  194. {$endif csedebug}
  195. lists.nodelist.free;
  196. lists.locationlist.free;
  197. templist.free;
  198. until not(restart);
  199. if assigned(statements) then
  200. begin
  201. addstatement(statements,n);
  202. n:=nodes;
  203. do_firstpass(n);
  204. {$ifdef csedebug}
  205. printnode(output,nodes);
  206. {$endif csedebug}
  207. end;
  208. end
  209. end;
  210. end;
  211. function do_optcse(var rootnode : tnode) : tnode;
  212. begin
  213. foreachnodestatic(pm_postprocess,rootnode,@searchcsedomain,nil);
  214. result:=nil;
  215. (*
  216. { create a linear list of nodes }
  217. { create hash values }
  218. { sort by hash values, taking care of nf_csebarrier and keeping the
  219. original order of the nodes }
  220. { compare nodes with equal hash values }
  221. { search barrier }
  222. for i:=0 to nodelist.length-1 do
  223. begin
  224. { and then search backward so we get always the largest equal trees }
  225. j:=i+1;
  226. { collect equal nodes }
  227. while (j<=nodelist.length-1) and
  228. nodelist[i].isequal(nodelist[j]) do
  229. inc(j);
  230. dec(j);
  231. if j>i then
  232. begin
  233. { cse found }
  234. { create temp. location }
  235. { replace first node by
  236. - temp. creation
  237. - expression calculation
  238. - assignment of expression to temp. }
  239. tempnode:=ctempcreatenode.create(nodelist[i].resultdef,nodelist[i].resultdef.size,tt_persistent,
  240. nodelist[i].resultdef.is_intregable or nodelist[i].resultdef.is_fpuregable);
  241. addstatement(createstatement,tempnode);
  242. addstatement(createstatement,cassignmentnode.create(ctemprefnode.create(tempnode),
  243. caddrnode.create_internal(para.left)));
  244. para.left := ctypeconvnode.create_internal(cderefnode.create(ctemprefnode.create(tempnode)),para.left.resultdef);
  245. addstatement(deletestatement,ctempdeletenode.create(tempnode));
  246. { replace next nodes by loading the temp. reference }
  247. { replace last node by loading the temp. reference and
  248. delete the temp. }
  249. end;
  250. end;
  251. *)
  252. end;
  253. end.