optcse.pas 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. {
  2. Common subexpression elimination on base blocks
  3. Copyright (c) 2005 by Florian Klaempfl
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit optcse;
  18. {$i fpcdefs.inc}
  19. { $define csedebug}
  20. { $define csestats}
  21. interface
  22. uses
  23. node;
  24. {
  25. the function creates non optimal code so far:
  26. - call para nodes are cse barriers because they can be reordered and thus the
  27. temp. creation can be done too late
  28. - cse's in chained expressions are not recognized: the common subexpression
  29. in (a1 and b and c) vs. (a2 and b and c) is not recognized because there is no common
  30. subtree b and c
  31. - the cse knows nothing about register pressure. In case of high register pressure, cse might
  32. have a negative impact
  33. - assignment nodes are currently cse borders: things like a[i,j]:=a[i,j]+1; are not improved
  34. - the list of cseinvariant node types and inline numbers is not complete yet
  35. Further, it could be done probably in a faster way though the complexity can't probably not reduced
  36. }
  37. function do_optcse(var rootnode : tnode) : tnode;
  38. implementation
  39. uses
  40. globtype,
  41. cclasses,
  42. verbose,
  43. nutils,
  44. nbas,nld,ninl,ncal,
  45. pass_1,
  46. symconst,symtype,symdef;
  47. const
  48. cseinvariant : set of tnodetype = [loadn,addn,muln,subn,divn,slashn,modn,andn,orn,xorn,notn,vecn,
  49. derefn,equaln,unequaln,ltn,gtn,lten,gten,typeconvn,subscriptn,
  50. inn,symdifn,shrn,shln,ordconstn,realconstn,unaryminusn,pointerconstn,stringconstn,setconstn,
  51. isn,asn,starstarn,nothingn,temprefn {,callparan}];
  52. function searchsubdomain(var n:tnode; arg: pointer) : foreachnoderesult;
  53. begin
  54. if (n.nodetype in cseinvariant) or
  55. ((n.nodetype=inlinen) and
  56. (tinlinenode(n).inlinenumber in [in_assigned_x])
  57. ) then
  58. result:=fen_true
  59. else
  60. begin
  61. pboolean(arg)^:=false;
  62. result:=fen_norecurse_true;
  63. end;
  64. end;
  65. type
  66. tlists = record
  67. nodelist : tfplist;
  68. locationlist : tfplist;
  69. equalto : tfplist;
  70. refs : tfplist;
  71. end;
  72. plists = ^tlists;
  73. function collectnodes(var n:tnode; arg: pointer) : foreachnoderesult;
  74. var
  75. i : longint;
  76. begin
  77. result:=fen_false;
  78. { don't add the tree below an untyped const parameter: there is
  79. no information available that this kind of tree actually needs
  80. to be addresable, this could be improved }
  81. if ((n.nodetype=callparan) and
  82. (tcallparanode(n).left.resultdef.typ=formaldef) and
  83. (tcallparanode(n).parasym.varspez=vs_const)) then
  84. begin
  85. result:=fen_norecurse_false;
  86. exit;
  87. end;
  88. { so far, we can handle only nodes being read }
  89. if (n.flags*[nf_write,nf_modify]=[]) and
  90. { node possible to add? }
  91. assigned(n.resultdef) and (tstoreddef(n.resultdef).is_intregable or tstoreddef(n.resultdef).is_fpuregable) and
  92. { is_int/fpuregable allows arrays and records to be in registers, cse cannot handle this }
  93. not(n.resultdef.typ in [arraydef,recorddef]) and
  94. { adding tempref nodes is worthless but their complexity is probably <= 1 anyways }
  95. not(n.nodetype in [temprefn]) and
  96. { node worth to add? }
  97. (node_complexity(n)>1) then
  98. begin
  99. plists(arg)^.nodelist.Add(n);
  100. plists(arg)^.locationlist.Add(@n);
  101. plists(arg)^.refs.Add(nil);
  102. plists(arg)^.equalto.Add(pointer(-1));
  103. for i:=0 to plists(arg)^.nodelist.count-2 do
  104. begin
  105. if tnode(plists(arg)^.nodelist[i]).isequal(n) then
  106. begin
  107. { use always the first occurence }
  108. if ptrint(plists(arg)^.equalto[i])<>-1 then
  109. plists(arg)^.equalto[plists(arg)^.nodelist.count-1]:=plists(arg)^.equalto[i]
  110. else
  111. plists(arg)^.equalto[plists(arg)^.nodelist.count-1]:=pointer(i);
  112. plists(arg)^.refs[i]:=pointer(plists(arg)^.refs[i])+1;
  113. exit;
  114. end;
  115. end;
  116. end;
  117. end;
  118. function searchcsedomain(var n: tnode; arg: pointer) : foreachnoderesult;
  119. var
  120. csedomain : boolean;
  121. lists : tlists;
  122. templist : tfplist;
  123. i : longint;
  124. def : tstoreddef;
  125. nodes : tblocknode;
  126. creates,
  127. statements : tstatementnode;
  128. hp : ttempcreatenode;
  129. begin
  130. result:=fen_false;
  131. if n.nodetype in cseinvariant then
  132. begin
  133. csedomain:=true;
  134. foreachnodestatic(pm_postprocess,n,@searchsubdomain,@csedomain);
  135. { found a cse domain }
  136. if csedomain then
  137. begin
  138. statements:=nil;
  139. result:=fen_norecurse_true;
  140. {$ifdef csedebug}
  141. writeln('============ cse domain ==================');
  142. printnode(output,n);
  143. writeln('Complexity: ',node_complexity(n));
  144. {$endif csedebug}
  145. lists.nodelist:=tfplist.create;
  146. lists.locationlist:=tfplist.create;
  147. lists.equalto:=tfplist.create;
  148. lists.refs:=tfplist.create;
  149. foreachnodestatic(pm_postprocess,n,@collectnodes,@lists);
  150. templist:=tfplist.create;
  151. templist.count:=lists.nodelist.count;
  152. { check all nodes if one is used more than once }
  153. for i:=0 to lists.nodelist.count-1 do
  154. begin
  155. { current node used more than once? }
  156. if ptrint(lists.refs[i])<>0 then
  157. begin
  158. if not(assigned(statements)) then
  159. begin
  160. nodes:=internalstatements(statements);
  161. addstatement(statements,internalstatements(creates));
  162. end;
  163. def:=tstoreddef(tnode(lists.nodelist[i]).resultdef);
  164. templist[i]:=ctempcreatenode.create_value(def,def.size,tt_persistent,
  165. def.is_intregable or def.is_fpuregable,tnode(lists.nodelist[i]));
  166. addstatement(creates,tnode(templist[i]));
  167. hp:=ttempcreatenode(templist[i]);
  168. do_firstpass(tnode(hp));
  169. templist[i]:=hp;
  170. pnode(lists.locationlist[i])^:=ctemprefnode.create(ttempcreatenode(templist[i]));
  171. do_firstpass(pnode(lists.locationlist[i])^);
  172. {$ifdef csedebug}
  173. printnode(output,statements);
  174. {$endif csedebug}
  175. end
  176. { current node reference to another node? }
  177. else if ptrint(lists.equalto[i])<>-1 then
  178. begin
  179. {$if defined(csedebug) or defined(csestats)}
  180. printnode(output,tnode(lists.nodelist[i]));
  181. writeln(i,' equals ',ptrint(lists.equalto[i]));
  182. printnode(output,tnode(lists.nodelist[ptrint(lists.equalto[i])]));
  183. {$endif defined(csedebug) or defined(csestats)}
  184. templist[i]:=templist[ptrint(lists.equalto[i])];
  185. pnode(lists.locationlist[i])^:=ctemprefnode.create(ttempcreatenode(templist[ptrint(lists.equalto[i])]));
  186. do_firstpass(pnode(lists.locationlist[i])^);
  187. end;
  188. end;
  189. { clean up unused trees }
  190. for i:=0 to lists.nodelist.count-1 do
  191. if ptrint(lists.equalto[i])<>-1 then
  192. tnode(lists.nodelist[i]).free;
  193. {$ifdef csedebug}
  194. writeln('nodes: ',lists.nodelist.count);
  195. writeln('==========================================');
  196. {$endif csedebug}
  197. lists.nodelist.free;
  198. lists.locationlist.free;
  199. lists.equalto.free;
  200. lists.refs.free;
  201. templist.free;
  202. if assigned(statements) then
  203. begin
  204. { call para nodes need a special handling because
  205. they can be only children nodes of call nodes
  206. so the initialization code is inserted below the
  207. call para node
  208. }
  209. if n.nodetype=callparan then
  210. begin
  211. addstatement(statements,tcallparanode(n).left);
  212. tcallparanode(n).left:=nodes;
  213. do_firstpass(tcallparanode(n).left);
  214. end
  215. else
  216. begin
  217. addstatement(statements,n);
  218. n:=nodes;
  219. do_firstpass(n);
  220. end;
  221. {$ifdef csedebug}
  222. printnode(output,nodes);
  223. {$endif csedebug}
  224. end;
  225. end
  226. end;
  227. end;
  228. function do_optcse(var rootnode : tnode) : tnode;
  229. begin
  230. foreachnodestatic(pm_postprocess,rootnode,@searchcsedomain,nil);
  231. result:=nil;
  232. (*
  233. { create a linear list of nodes }
  234. { create hash values }
  235. { sort by hash values, taking care of nf_csebarrier and keeping the
  236. original order of the nodes }
  237. { compare nodes with equal hash values }
  238. { search barrier }
  239. for i:=0 to nodelist.length-1 do
  240. begin
  241. { and then search backward so we get always the largest equal trees }
  242. j:=i+1;
  243. { collect equal nodes }
  244. while (j<=nodelist.length-1) and
  245. nodelist[i].isequal(nodelist[j]) do
  246. inc(j);
  247. dec(j);
  248. if j>i then
  249. begin
  250. { cse found }
  251. { create temp. location }
  252. { replace first node by
  253. - temp. creation
  254. - expression calculation
  255. - assignment of expression to temp. }
  256. tempnode:=ctempcreatenode.create(nodelist[i].resultdef,nodelist[i].resultdef.size,tt_persistent,
  257. nodelist[i].resultdef.is_intregable or nodelist[i].resultdef.is_fpuregable);
  258. addstatement(createstatement,tempnode);
  259. addstatement(createstatement,cassignmentnode.create(ctemprefnode.create(tempnode),
  260. caddrnode.create_internal(para.left)));
  261. para.left := ctypeconvnode.create_internal(cderefnode.create(ctemprefnode.create(tempnode)),para.left.resultdef);
  262. addstatement(deletestatement,ctempdeletenode.create(tempnode));
  263. { replace next nodes by loading the temp. reference }
  264. { replace last node by loading the temp. reference and
  265. delete the temp. }
  266. end;
  267. end;
  268. *)
  269. end;
  270. end.