optcse.pas 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. {
  2. Common subexpression elimination on base blocks
  3. Copyright (c) 2005 by Florian Klaempfl
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit optcse;
  18. {$i fpcdefs.inc}
  19. { $define csedebug}
  20. { $define csestats}
  21. interface
  22. uses
  23. node;
  24. {
  25. the function creates non optimal code so far:
  26. - call para nodes are cse barriers because they can be reordered and thus the
  27. temp. creation can be done too late
  28. - cse's in chained expressions are not recognized: the common subexpression
  29. in (a1 and b and c) vs. (a2 and b and c) is not recognized because there is no common
  30. subtree b and c
  31. - the cse knows nothing about register pressure. In case of high register pressure, cse might
  32. have a negative impact
  33. - assignment nodes are currently cse borders: things like a[i,j]:=a[i,j]+1; are not improved
  34. - the list of cseinvariant node types and inline numbers is not complete yet
  35. Further, it could be done probably in a faster way though the complexity can't probably not reduced
  36. }
  37. function do_optcse(var rootnode : tnode) : tnode;
  38. implementation
  39. uses
  40. globtype,globals,
  41. cclasses,
  42. verbose,
  43. nutils,
  44. procinfo,
  45. nbas,nld,ninl,ncal,ncnv,nadd,nmem,
  46. pass_1,
  47. symconst,symtype,symdef,symsym,
  48. defutil,
  49. optbase;
  50. const
  51. cseinvariant : set of tnodetype = [addn,muln,subn,divn,slashn,modn,andn,orn,xorn,notn,vecn,
  52. derefn,equaln,unequaln,ltn,gtn,lten,gten,typeconvn,subscriptn,
  53. inn,symdifn,shrn,shln,ordconstn,realconstn,unaryminusn,pointerconstn,stringconstn,setconstn,niln,
  54. setelementn,arrayconstructorn,arrayconstructorrangen,
  55. isn,asn,starstarn,nothingn,temprefn,loadparentfpn {,callparan},assignn];
  56. function searchsubdomain(var n:tnode; arg: pointer) : foreachnoderesult;
  57. begin
  58. if (n.nodetype in cseinvariant) or
  59. ((n.nodetype=inlinen) and
  60. (tinlinenode(n).inlinenumber in [in_assigned_x])
  61. ) or
  62. ((n.nodetype=callparan) and not(assigned(tcallparanode(n).right))) or
  63. ((n.nodetype=loadn) and
  64. not((tloadnode(n).symtableentry.typ in [staticvarsym,localvarsym,paravarsym]) and
  65. (vo_volatile in tabstractvarsym(tloadnode(n).symtableentry).varoptions))
  66. ) then
  67. result:=fen_true
  68. else
  69. begin
  70. pboolean(arg)^:=false;
  71. result:=fen_norecurse_true;
  72. end;
  73. end;
  74. type
  75. tlists = record
  76. nodelist : tfplist;
  77. locationlist : tfplist;
  78. equalto : tfplist;
  79. refs : tfplist;
  80. avail : TDFASet;
  81. end;
  82. plists = ^tlists;
  83. { collectnodes needs the address of itself to call foreachnodestatic,
  84. so we need a wrapper because @<func> inside <func doesn't work }
  85. function collectnodes(var n:tnode; arg: pointer) : foreachnoderesult;forward;
  86. function collectnodes2(var n:tnode; arg: pointer) : foreachnoderesult;
  87. begin
  88. result:=collectnodes(n,arg);
  89. end;
  90. function collectnodes(var n:tnode; arg: pointer) : foreachnoderesult;
  91. var
  92. i,j : longint;
  93. begin
  94. result:=fen_false;
  95. { don't add the tree below an untyped const parameter: there is
  96. no information available that this kind of tree actually needs
  97. to be addresable, this could be improved }
  98. if ((n.nodetype=callparan) and
  99. (tcallparanode(n).left.resultdef.typ=formaldef) and
  100. (tcallparanode(n).parasym.varspez=vs_const)) then
  101. begin
  102. result:=fen_norecurse_false;
  103. exit;
  104. end;
  105. { so far, we can handle only nodes being read }
  106. if
  107. { node possible to add? }
  108. assigned(n.resultdef) and
  109. (
  110. { regable expressions }
  111. (n.actualtargetnode.flags*[nf_write,nf_modify]=[]) and
  112. ((tstoreddef(n.resultdef).is_intregable or tstoreddef(n.resultdef).is_fpuregable) and
  113. { is_int/fpuregable allows arrays and records to be in registers, cse cannot handle this }
  114. (not(n.resultdef.typ in [arraydef,recorddef])) and
  115. { same for voiddef }
  116. not(is_void(n.resultdef)) and
  117. { adding tempref and callpara nodes itself is worthless but
  118. their complexity is probably <= 1 anyways }
  119. not(n.nodetype in [temprefn,callparan]) and
  120. { node worth to add?
  121. We consider almost every node because even loading a variables from
  122. a register instead of memory is more beneficial. This behaviour should
  123. not increase register pressure because if a variable is already
  124. in a register, the reg. allocator can merge the nodes. If a variable
  125. is loaded from memory, loading this variable and spilling another register
  126. should not add a speed penalty.
  127. }
  128. {
  129. load nodes are not considered if they load para or local symbols from the
  130. current stack frame, those are in registers anyways if possible
  131. }
  132. (not(n.nodetype=loadn) or
  133. not(tloadnode(n).symtableentry.typ in [paravarsym,localvarsym]) or
  134. (node_complexity(n)>1)
  135. ) and
  136. {
  137. Const nodes however are only considered if their complexity is >1
  138. This might be the case for the risc architectures if they need
  139. more than one instruction to load this particular value
  140. }
  141. (not(is_constnode(n)) or (node_complexity(n)>1)))
  142. {$ifndef x86}
  143. or
  144. { store reference of expression? }
  145. { loading the address of a global symbol takes typically more than
  146. one instruction on every platform except x86
  147. so consider in this case loading the address of the data
  148. }
  149. (((n.resultdef.typ in [arraydef,recorddef]) or is_object(n.resultdef)) and
  150. (n.nodetype=loadn) and
  151. (tloadnode(n).symtableentry.typ=staticvarsym)
  152. )
  153. {$endif x86}
  154. ) then
  155. begin
  156. plists(arg)^.nodelist.Add(n);
  157. plists(arg)^.locationlist.Add(@n);
  158. plists(arg)^.refs.Add(nil);
  159. plists(arg)^.equalto.Add(pointer(-1));
  160. DFASetInclude(plists(arg)^.avail,plists(arg)^.nodelist.count-1);
  161. for i:=0 to plists(arg)^.nodelist.count-2 do
  162. begin
  163. if tnode(plists(arg)^.nodelist[i]).isequal(n) and DFASetIn(plists(arg)^.avail,i) then
  164. begin
  165. { use always the first occurence }
  166. if plists(arg)^.equalto[i]<>pointer(-1) then
  167. plists(arg)^.equalto[plists(arg)^.nodelist.count-1]:=plists(arg)^.equalto[i]
  168. else
  169. plists(arg)^.equalto[plists(arg)^.nodelist.count-1]:=pointer(ptrint(i));
  170. plists(arg)^.refs[i]:=pointer(plists(arg)^.refs[i])+1;
  171. break;
  172. end;
  173. end;
  174. { boolean and/or require a special handling: after evaluating the and/or node,
  175. the expressions of the right side might not be available due to short boolean
  176. evaluation, so after handling the right side, mark those expressions
  177. as unavailable }
  178. if (n.nodetype in [orn,andn]) and is_boolean(taddnode(n).left.resultdef) then
  179. begin
  180. foreachnodestatic(pm_postprocess,taddnode(n).left,@collectnodes2,arg);
  181. j:=plists(arg)^.nodelist.count;
  182. foreachnodestatic(pm_postprocess,taddnode(n).right,@collectnodes2,arg);
  183. for i:=j to plists(arg)^.nodelist.count-1 do
  184. DFASetExclude(plists(arg)^.avail,i);
  185. result:=fen_norecurse_false;
  186. end;
  187. end;
  188. end;
  189. function searchcsedomain(var n: tnode; arg: pointer) : foreachnoderesult;
  190. var
  191. csedomain : boolean;
  192. lists : tlists;
  193. templist : tfplist;
  194. i : longint;
  195. def : tstoreddef;
  196. nodes : tblocknode;
  197. creates,
  198. statements : tstatementnode;
  199. hp : ttempcreatenode;
  200. addrstored : boolean;
  201. hp2 : tnode;
  202. begin
  203. result:=fen_false;
  204. if n.nodetype in cseinvariant then
  205. begin
  206. csedomain:=true;
  207. foreachnodestatic(pm_postprocess,n,@searchsubdomain,@csedomain);
  208. if not(csedomain) then
  209. begin
  210. { try to transform the tree to get better cse domains, consider:
  211. +
  212. / \
  213. + C
  214. / \
  215. A B
  216. if A is not cse'able but B and C are, then the compiler cannot do cse so the tree is transformed into
  217. +
  218. / \
  219. A +
  220. / \
  221. B C
  222. Because A could be another tree of this kind, the whole process is done in a while loop
  223. }
  224. if (n.nodetype in [andn,orn,addn,muln]) and
  225. (n.nodetype=tbinarynode(n).left.nodetype) and
  226. { do is optimizations only for integers, reals (no currency!), vectors, sets or booleans }
  227. (is_integer(n.resultdef) or is_real(n.resultdef) or is_vector(n.resultdef) or is_set(n.resultdef) or
  228. is_boolean(n.resultdef)) and
  229. { either if fastmath is on }
  230. ((cs_opt_fastmath in current_settings.optimizerswitches) or
  231. { or for the logical operators, they cannot overflow }
  232. (n.nodetype in [andn,orn]) or
  233. { or for integers if range checking is off }
  234. ((is_integer(n.resultdef) and
  235. (n.localswitches*[cs_check_range,cs_check_overflow]=[]) and
  236. (tbinarynode(n).left.localswitches*[cs_check_range,cs_check_overflow]=[]))) or
  237. { for sets, we can do this always }
  238. (is_set(n.resultdef))
  239. ) then
  240. while n.nodetype=tbinarynode(n).left.nodetype do
  241. begin
  242. csedomain:=true;
  243. foreachnodestatic(pm_postprocess,tbinarynode(n).right,@searchsubdomain,@csedomain);
  244. if csedomain then
  245. begin
  246. csedomain:=true;
  247. foreachnodestatic(pm_postprocess,tbinarynode(tbinarynode(n).left).right,@searchsubdomain,@csedomain);
  248. if csedomain then
  249. begin
  250. hp2:=tbinarynode(tbinarynode(n).left).left;
  251. tbinarynode(tbinarynode(n).left).left:=tbinarynode(tbinarynode(n).left).right;
  252. tbinarynode(tbinarynode(n).left).right:=tbinarynode(n).right;
  253. tbinarynode(n).right:=tbinarynode(n).left;
  254. tbinarynode(n).left:=hp2;
  255. { the transformed tree could result in new possibilities to fold constants
  256. so force a firstpass on the root node }
  257. exclude(tbinarynode(n).right.flags,nf_pass1_done);
  258. do_firstpass(tbinarynode(n).right);
  259. end
  260. else
  261. break;
  262. end
  263. else
  264. break;
  265. end;
  266. end
  267. else
  268. begin
  269. statements:=nil;
  270. result:=fen_norecurse_true;
  271. {$ifdef csedebug}
  272. writeln('============ cse domain ==================');
  273. printnode(output,n);
  274. writeln('Complexity: ',node_complexity(n));
  275. {$endif csedebug}
  276. lists.nodelist:=tfplist.create;
  277. lists.locationlist:=tfplist.create;
  278. lists.equalto:=tfplist.create;
  279. lists.refs:=tfplist.create;
  280. foreachnodestatic(pm_postprocess,n,@collectnodes,@lists);
  281. templist:=tfplist.create;
  282. templist.count:=lists.nodelist.count;
  283. { check all nodes if one is used more than once }
  284. for i:=0 to lists.nodelist.count-1 do
  285. begin
  286. { current node used more than once? }
  287. if assigned(lists.refs[i]) then
  288. begin
  289. if not(assigned(statements)) then
  290. begin
  291. nodes:=internalstatements(statements);
  292. addstatement(statements,internalstatements(creates));
  293. end;
  294. def:=tstoreddef(tnode(lists.nodelist[i]).resultdef);
  295. { we cannot handle register stored records or array in CSE yet
  296. but we can store their reference }
  297. addrstored:=(def.typ in [arraydef,recorddef]) or is_object(def);
  298. if addrstored then
  299. templist[i]:=ctempcreatenode.create_value(getpointerdef(def),voidpointertype.size,tt_persistent,
  300. true,caddrnode.create(tnode(lists.nodelist[i])))
  301. else
  302. templist[i]:=ctempcreatenode.create_value(def,def.size,tt_persistent,
  303. def.is_intregable or def.is_fpuregable,tnode(lists.nodelist[i]));
  304. { make debugging easier and set temp. location to the original location }
  305. tnode(templist[i]).fileinfo:=tnode(lists.nodelist[i]).fileinfo;
  306. addstatement(creates,tnode(templist[i]));
  307. { make debugging easier and set temp. location to the original location }
  308. creates.fileinfo:=tnode(lists.nodelist[i]).fileinfo;
  309. hp:=ttempcreatenode(templist[i]);
  310. do_firstpass(tnode(hp));
  311. templist[i]:=hp;
  312. if addrstored then
  313. pnode(lists.locationlist[i])^:=cderefnode.Create(ctemprefnode.create(ttempcreatenode(templist[i])))
  314. else
  315. pnode(lists.locationlist[i])^:=ctemprefnode.create(ttempcreatenode(templist[i]));
  316. { make debugging easier and set temp. location to the original location }
  317. pnode(lists.locationlist[i])^.fileinfo:=tnode(lists.nodelist[i]).fileinfo;
  318. do_firstpass(pnode(lists.locationlist[i])^);
  319. {$ifdef csedebug}
  320. printnode(output,statements);
  321. {$endif csedebug}
  322. end
  323. { current node reference to another node? }
  324. else if lists.equalto[i]<>pointer(-1) then
  325. begin
  326. def:=tstoreddef(tnode(lists.nodelist[i]).resultdef);
  327. { we cannot handle register stored records or array in CSE yet
  328. but we can store their reference }
  329. addrstored:=(def.typ in [arraydef,recorddef]) or is_object(def);
  330. {$if defined(csedebug) or defined(csestats)}
  331. writeln;
  332. writeln('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!');
  333. writeln('Complexity: ',node_complexity(tnode(lists.nodelist[i])),' Node ',i,' equals Node ',ptrint(lists.equalto[i]));
  334. printnode(output,tnode(lists.nodelist[i]));
  335. printnode(output,tnode(lists.nodelist[ptrint(lists.equalto[i])]));
  336. writeln('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!');
  337. writeln;
  338. {$endif defined(csedebug) or defined(csestats)}
  339. templist[i]:=templist[ptrint(lists.equalto[i])];
  340. if addrstored then
  341. pnode(lists.locationlist[i])^:=cderefnode.Create(ctemprefnode.create(ttempcreatenode(templist[ptrint(lists.equalto[i])])))
  342. else
  343. pnode(lists.locationlist[i])^:=ctemprefnode.create(ttempcreatenode(templist[ptrint(lists.equalto[i])]));
  344. { make debugging easier and set temp. location to the original location }
  345. pnode(lists.locationlist[i])^.fileinfo:=tnode(lists.nodelist[i]).fileinfo;
  346. do_firstpass(pnode(lists.locationlist[i])^);
  347. end;
  348. end;
  349. { clean up unused trees }
  350. for i:=0 to lists.nodelist.count-1 do
  351. if lists.equalto[i]<>pointer(-1) then
  352. tnode(lists.nodelist[i]).free;
  353. {$ifdef csedebug}
  354. writeln('nodes: ',lists.nodelist.count);
  355. writeln('==========================================');
  356. {$endif csedebug}
  357. lists.nodelist.free;
  358. lists.locationlist.free;
  359. lists.equalto.free;
  360. lists.refs.free;
  361. templist.free;
  362. if assigned(statements) then
  363. begin
  364. { call para nodes need a special handling because
  365. they can be only children nodes of call nodes
  366. so the initialization code is inserted below the
  367. call para node
  368. }
  369. if n.nodetype=callparan then
  370. begin
  371. addstatement(statements,tcallparanode(n).left);
  372. tcallparanode(n).left:=nodes;
  373. do_firstpass(tcallparanode(n).left);
  374. end
  375. else
  376. begin
  377. addstatement(statements,n);
  378. n:=nodes;
  379. do_firstpass(n);
  380. end;
  381. {$ifdef csedebug}
  382. printnode(output,nodes);
  383. {$endif csedebug}
  384. end;
  385. end
  386. end;
  387. end;
  388. function do_optcse(var rootnode : tnode) : tnode;
  389. begin
  390. {$ifdef csedebug}
  391. writeln('====================================================================================');
  392. writeln('CSE optimization pass started');
  393. writeln('====================================================================================');
  394. printnode(rootnode);
  395. writeln('====================================================================================');
  396. writeln;
  397. {$endif csedebug}
  398. foreachnodestatic(pm_postprocess,rootnode,@searchcsedomain,nil);
  399. result:=nil;
  400. end;
  401. end.