optcse.pas 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. {
  2. Common subexpression elimination on base blocks
  3. Copyright (c) 2005 by Florian Klaempfl
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit optcse;
  18. {$i fpcdefs.inc}
  19. { $define csedebug}
  20. { $define csestats}
  21. interface
  22. uses
  23. node;
  24. {
  25. the function creates non optimal code so far:
  26. - call para nodes are cse barriers because they can be reordered and thus the
  27. temp. creation can be done too late
  28. - cse's in chained expressions are not recognized: the common subexpression
  29. in (a1 and b and c) vs. (a2 and b and c) is not recognized because there is no common
  30. subtree b and c
  31. - the cse knows nothing about register pressure. In case of high register pressure, cse might
  32. have a negative impact
  33. - assignment nodes are currently cse borders: things like a[i,j]:=a[i,j]+1; are not improved
  34. - the list of cseinvariant node types and inline numbers is not complete yet
  35. Further, it could be done probably in a faster way though the complexity can't probably not reduced
  36. }
  37. function do_optcse(var rootnode : tnode) : tnode;
  38. implementation
  39. uses
  40. globtype,
  41. cclasses,
  42. verbose,
  43. nutils,
  44. nbas,nld,ninl,ncal,ncnv,nadd,
  45. pass_1,
  46. symconst,symtype,symdef,
  47. defutil,
  48. optbase;
  49. const
  50. cseinvariant : set of tnodetype = [loadn,addn,muln,subn,divn,slashn,modn,andn,orn,xorn,notn,vecn,
  51. derefn,equaln,unequaln,ltn,gtn,lten,gten,typeconvn,subscriptn,
  52. inn,symdifn,shrn,shln,ordconstn,realconstn,unaryminusn,pointerconstn,stringconstn,setconstn,
  53. isn,asn,starstarn,nothingn,temprefn {,callparan}];
  54. function searchsubdomain(var n:tnode; arg: pointer) : foreachnoderesult;
  55. begin
  56. if (n.nodetype in cseinvariant) or
  57. ((n.nodetype=inlinen) and
  58. (tinlinenode(n).inlinenumber in [in_assigned_x])
  59. ) then
  60. result:=fen_true
  61. else
  62. begin
  63. pboolean(arg)^:=false;
  64. result:=fen_norecurse_true;
  65. end;
  66. end;
  67. type
  68. tlists = record
  69. nodelist : tfplist;
  70. locationlist : tfplist;
  71. equalto : tfplist;
  72. refs : tfplist;
  73. avail : TDFASet;
  74. end;
  75. plists = ^tlists;
  76. { collectnodes needs the address of itself to call foreachnodestatic,
  77. so we need a wrapper because @<func> inside <func doesn't work }
  78. function collectnodes(var n:tnode; arg: pointer) : foreachnoderesult;forward;
  79. function collectnodes2(var n:tnode; arg: pointer) : foreachnoderesult;
  80. begin
  81. result:=collectnodes(n,arg);
  82. end;
  83. function collectnodes(var n:tnode; arg: pointer) : foreachnoderesult;
  84. var
  85. i,j : longint;
  86. begin
  87. result:=fen_false;
  88. { don't add the tree below an untyped const parameter: there is
  89. no information available that this kind of tree actually needs
  90. to be addresable, this could be improved }
  91. if ((n.nodetype=callparan) and
  92. (tcallparanode(n).left.resultdef.typ=formaldef) and
  93. (tcallparanode(n).parasym.varspez=vs_const)) then
  94. begin
  95. result:=fen_norecurse_false;
  96. exit;
  97. end;
  98. { so far, we can handle only nodes being read }
  99. if (n.flags*[nf_write,nf_modify]=[]) and
  100. { node possible to add? }
  101. assigned(n.resultdef) and (tstoreddef(n.resultdef).is_intregable or tstoreddef(n.resultdef).is_fpuregable) and
  102. { is_int/fpuregable allows arrays and records to be in registers, cse cannot handle this }
  103. not(n.resultdef.typ in [arraydef,recorddef]) and
  104. { adding tempref nodes is worthless but their complexity is probably <= 1 anyways }
  105. not(n.nodetype in [temprefn]) and
  106. { node worth to add?
  107. We consider every node because even loading a variables from
  108. a register instead of memory is more beneficial. This behaviour should
  109. not increase register pressure because if a variable is already
  110. in a register, the reg. allocator can merge the nodes. If a variable
  111. is loaded from memory, loading this variable and spilling another register
  112. should not add a speed penalty.
  113. Const nodes however are only considered if their complexity is >1
  114. This might be the case for the risc architectures if they need
  115. more than one instruction to load this particular value
  116. }
  117. (not(is_constnode(n)) or (node_complexity(n)>1)) then
  118. begin
  119. plists(arg)^.nodelist.Add(n);
  120. plists(arg)^.locationlist.Add(@n);
  121. plists(arg)^.refs.Add(nil);
  122. plists(arg)^.equalto.Add(pointer(-1));
  123. DFASetInclude(plists(arg)^.avail,plists(arg)^.nodelist.count-1);
  124. for i:=0 to plists(arg)^.nodelist.count-2 do
  125. begin
  126. if tnode(plists(arg)^.nodelist[i]).isequal(n) and DFASetIn(plists(arg)^.avail,i) then
  127. begin
  128. { use always the first occurence }
  129. if ptrint(plists(arg)^.equalto[i])<>-1 then
  130. plists(arg)^.equalto[plists(arg)^.nodelist.count-1]:=plists(arg)^.equalto[i]
  131. else
  132. plists(arg)^.equalto[plists(arg)^.nodelist.count-1]:=pointer(i);
  133. plists(arg)^.refs[i]:=pointer(plists(arg)^.refs[i])+1;
  134. break;
  135. end;
  136. end;
  137. { boolean and/or require a special handling: after evaluating the and/or node,
  138. the expressions of the right side might not be available due to short boolean
  139. evaluation, so after handling the right side, mark those expressions
  140. as unavailable }
  141. if (n.nodetype in [orn,andn]) and is_boolean(taddnode(n).left.resultdef) then
  142. begin
  143. foreachnodestatic(pm_postprocess,taddnode(n).left,@collectnodes2,arg);
  144. j:=plists(arg)^.nodelist.count;
  145. foreachnodestatic(pm_postprocess,taddnode(n).right,@collectnodes2,arg);
  146. for i:=j to plists(arg)^.nodelist.count-1 do
  147. DFASetExclude(plists(arg)^.avail,i);
  148. result:=fen_norecurse_false;
  149. end;
  150. end;
  151. end;
  152. function searchcsedomain(var n: tnode; arg: pointer) : foreachnoderesult;
  153. var
  154. csedomain : boolean;
  155. lists : tlists;
  156. templist : tfplist;
  157. i : longint;
  158. def : tstoreddef;
  159. nodes : tblocknode;
  160. creates,
  161. statements : tstatementnode;
  162. hp : ttempcreatenode;
  163. begin
  164. result:=fen_false;
  165. if n.nodetype in cseinvariant then
  166. begin
  167. csedomain:=true;
  168. foreachnodestatic(pm_postprocess,n,@searchsubdomain,@csedomain);
  169. { found a cse domain }
  170. if csedomain then
  171. begin
  172. statements:=nil;
  173. result:=fen_norecurse_true;
  174. {$ifdef csedebug}
  175. writeln('============ cse domain ==================');
  176. printnode(output,n);
  177. writeln('Complexity: ',node_complexity(n));
  178. {$endif csedebug}
  179. lists.nodelist:=tfplist.create;
  180. lists.locationlist:=tfplist.create;
  181. lists.equalto:=tfplist.create;
  182. lists.refs:=tfplist.create;
  183. foreachnodestatic(pm_postprocess,n,@collectnodes,@lists);
  184. templist:=tfplist.create;
  185. templist.count:=lists.nodelist.count;
  186. { check all nodes if one is used more than once }
  187. for i:=0 to lists.nodelist.count-1 do
  188. begin
  189. { current node used more than once? }
  190. if ptrint(lists.refs[i])<>0 then
  191. begin
  192. if not(assigned(statements)) then
  193. begin
  194. nodes:=internalstatements(statements);
  195. addstatement(statements,internalstatements(creates));
  196. end;
  197. def:=tstoreddef(tnode(lists.nodelist[i]).resultdef);
  198. templist[i]:=ctempcreatenode.create_value(def,def.size,tt_persistent,
  199. def.is_intregable or def.is_fpuregable,tnode(lists.nodelist[i]));
  200. addstatement(creates,tnode(templist[i]));
  201. hp:=ttempcreatenode(templist[i]);
  202. do_firstpass(tnode(hp));
  203. templist[i]:=hp;
  204. pnode(lists.locationlist[i])^:=ctemprefnode.create(ttempcreatenode(templist[i]));
  205. do_firstpass(pnode(lists.locationlist[i])^);
  206. {$ifdef csedebug}
  207. printnode(output,statements);
  208. {$endif csedebug}
  209. end
  210. { current node reference to another node? }
  211. else if ptrint(lists.equalto[i])<>-1 then
  212. begin
  213. {$if defined(csedebug) or defined(csestats)}
  214. printnode(output,tnode(lists.nodelist[i]));
  215. writeln(i,' equals ',ptrint(lists.equalto[i]));
  216. printnode(output,tnode(lists.nodelist[ptrint(lists.equalto[i])]));
  217. {$endif defined(csedebug) or defined(csestats)}
  218. templist[i]:=templist[ptrint(lists.equalto[i])];
  219. pnode(lists.locationlist[i])^:=ctemprefnode.create(ttempcreatenode(templist[ptrint(lists.equalto[i])]));
  220. do_firstpass(pnode(lists.locationlist[i])^);
  221. end;
  222. end;
  223. { clean up unused trees }
  224. for i:=0 to lists.nodelist.count-1 do
  225. if ptrint(lists.equalto[i])<>-1 then
  226. tnode(lists.nodelist[i]).free;
  227. {$ifdef csedebug}
  228. writeln('nodes: ',lists.nodelist.count);
  229. writeln('==========================================');
  230. {$endif csedebug}
  231. lists.nodelist.free;
  232. lists.locationlist.free;
  233. lists.equalto.free;
  234. lists.refs.free;
  235. templist.free;
  236. if assigned(statements) then
  237. begin
  238. { call para nodes need a special handling because
  239. they can be only children nodes of call nodes
  240. so the initialization code is inserted below the
  241. call para node
  242. }
  243. if n.nodetype=callparan then
  244. begin
  245. addstatement(statements,tcallparanode(n).left);
  246. tcallparanode(n).left:=nodes;
  247. do_firstpass(tcallparanode(n).left);
  248. end
  249. else
  250. begin
  251. addstatement(statements,n);
  252. n:=nodes;
  253. do_firstpass(n);
  254. end;
  255. {$ifdef csedebug}
  256. printnode(output,nodes);
  257. {$endif csedebug}
  258. end;
  259. end
  260. end;
  261. end;
  262. function do_optcse(var rootnode : tnode) : tnode;
  263. begin
  264. foreachnodestatic(pm_postprocess,rootnode,@searchcsedomain,nil);
  265. result:=nil;
  266. end;
  267. end.