optcse.pas 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. {
  2. Common subexpression elimination on base blocks
  3. Copyright (c) 2005-2012 by Florian Klaempfl
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit optcse;
  18. {$i fpcdefs.inc}
  19. { $define csedebug}
  20. { $define csestats}
  21. interface
  22. uses
  23. node;
  24. {
  25. the function creates non optimal code so far:
  26. - call para nodes are cse barriers because they can be reordered and thus the
  27. temp. creation could be done too late
  28. - the cse knows nothing about register pressure. In case of high register pressure, cse might
  29. have a negative impact
  30. - the list of cseinvariant node types and inline numbers is not complete yet
  31. Further, it could be done probably in a faster way though the complexity can't probably not reduced
  32. }
  33. function do_optcse(var rootnode : tnode) : tnode;
  34. implementation
  35. uses
  36. globtype,globals,
  37. cutils,cclasses,
  38. verbose,
  39. nutils,
  40. procinfo,
  41. nbas,nld,ninl,ncal,ncnv,nadd,nmem,
  42. pass_1,
  43. symconst,symtype,symdef,symsym,
  44. defutil,
  45. optbase;
  46. const
  47. cseinvariant : set of tnodetype = [addn,muln,subn,divn,slashn,modn,andn,orn,xorn,notn,vecn,
  48. derefn,equaln,unequaln,ltn,gtn,lten,gten,typeconvn,subscriptn,
  49. inn,symdifn,shrn,shln,ordconstn,realconstn,unaryminusn,pointerconstn,stringconstn,setconstn,niln,
  50. setelementn,{arrayconstructorn,arrayconstructorrangen,}
  51. isn,asn,starstarn,nothingn,temprefn,loadparentfpn {,callparan},assignn];
  52. function searchsubdomain(var n:tnode; arg: pointer) : foreachnoderesult;
  53. begin
  54. if (n.nodetype in cseinvariant) or
  55. ((n.nodetype=inlinen) and
  56. (tinlinenode(n).inlinenumber in [in_assigned_x])
  57. ) or
  58. ((n.nodetype=callparan) and not(assigned(tcallparanode(n).right))) or
  59. ((n.nodetype=loadn) and
  60. not((tloadnode(n).symtableentry.typ in [staticvarsym,localvarsym,paravarsym]) and
  61. (vo_volatile in tabstractvarsym(tloadnode(n).symtableentry).varoptions))
  62. ) then
  63. result:=fen_true
  64. else
  65. begin
  66. pboolean(arg)^:=false;
  67. result:=fen_norecurse_true;
  68. end;
  69. end;
  70. type
  71. tlists = record
  72. nodelist : tfplist;
  73. locationlist : tfplist;
  74. equalto : tfplist;
  75. refs : tfplist;
  76. avail : TDFASet;
  77. end;
  78. plists = ^tlists;
  79. { collectnodes needs the address of itself to call foreachnodestatic,
  80. so we need a wrapper because @<func> inside <func doesn't work }
  81. function collectnodes(var n:tnode; arg: pointer) : foreachnoderesult;forward;
  82. function collectnodes2(var n:tnode; arg: pointer) : foreachnoderesult;
  83. begin
  84. result:=collectnodes(n,arg);
  85. end;
  86. function collectnodes(var n:tnode; arg: pointer) : foreachnoderesult;
  87. { when compiling a tree like
  88. and
  89. / \
  90. and C
  91. / \
  92. A B
  93. all expressions of B are available during evaluation of C. However considerung the whole expression,
  94. values of B and C might not be available due to short boolean evaluation.
  95. So recurseintobooleanchain detectes such chained and/or expressions and makes sub-expressions of B
  96. available during the evaluation of C
  97. firstleftend is later used to remove all sub expressions of B and C by storing the expression count
  98. in the cse table after handling A
  99. }
  100. var
  101. firstleftend : longint;
  102. procedure recurseintobooleanchain(t : tnodetype;n : tnode);
  103. begin
  104. if (tbinarynode(n).left.nodetype=t) and is_boolean(tbinarynode(n).left.resultdef) then
  105. recurseintobooleanchain(t,tbinarynode(n).left)
  106. else
  107. foreachnodestatic(pm_postprocess,tbinarynode(n).left,@collectnodes2,arg);
  108. firstleftend:=min(plists(arg)^.nodelist.count,firstleftend);
  109. foreachnodestatic(pm_postprocess,tbinarynode(n).right,@collectnodes2,arg);
  110. end;
  111. var
  112. i,j : longint;
  113. begin
  114. result:=fen_false;
  115. { don't add the tree below an untyped const parameter: there is
  116. no information available that this kind of tree actually needs
  117. to be addresable, this could be improved }
  118. if ((n.nodetype=callparan) and
  119. (tcallparanode(n).left.resultdef.typ=formaldef) and
  120. (tcallparanode(n).parasym.varspez=vs_const)) then
  121. begin
  122. result:=fen_norecurse_false;
  123. exit;
  124. end;
  125. if
  126. { node possible to add? }
  127. assigned(n.resultdef) and
  128. (
  129. { regable expressions }
  130. (n.actualtargetnode.flags*[nf_write,nf_modify]=[]) and
  131. ((tstoreddef(n.resultdef).is_intregable or tstoreddef(n.resultdef).is_fpuregable) and
  132. { is_int/fpuregable allows arrays and records to be in registers, cse cannot handle this }
  133. (not(n.resultdef.typ in [arraydef,recorddef])) and
  134. { same for voiddef }
  135. not(is_void(n.resultdef)) and
  136. { adding tempref and callpara nodes itself is worthless but
  137. their complexity is probably <= 1 anyways }
  138. not(n.nodetype in [temprefn,callparan]) and
  139. { node worth to add?
  140. We consider almost every node because even loading a variables from
  141. a register instead of memory is more beneficial. This behaviour should
  142. not increase register pressure because if a variable is already
  143. in a register, the reg. allocator can merge the nodes. If a variable
  144. is loaded from memory, loading this variable and spilling another register
  145. should not add a speed penalty.
  146. }
  147. {
  148. load nodes are not considered if they load para or local symbols from the
  149. current stack frame, those are in registers anyways if possible
  150. }
  151. (not(n.nodetype=loadn) or
  152. not(tloadnode(n).symtableentry.typ in [paravarsym,localvarsym]) or
  153. (node_complexity(n)>1)
  154. ) and
  155. {
  156. Const nodes however are only considered if their complexity is >1
  157. This might be the case for the risc architectures if they need
  158. more than one instruction to load this particular value
  159. }
  160. (not(is_constnode(n)) or (node_complexity(n)>1)))
  161. {$ifndef x86}
  162. or
  163. { store reference of expression? }
  164. { loading the address of a global symbol takes typically more than
  165. one instruction on every platform except x86
  166. so consider in this case loading the address of the data
  167. }
  168. (((n.resultdef.typ in [arraydef,recorddef]) or is_object(n.resultdef)) and
  169. (n.nodetype=loadn) and
  170. (tloadnode(n).symtableentry.typ=staticvarsym)
  171. )
  172. {$endif x86}
  173. ) then
  174. begin
  175. plists(arg)^.nodelist.Add(n);
  176. plists(arg)^.locationlist.Add(@n);
  177. plists(arg)^.refs.Add(nil);
  178. plists(arg)^.equalto.Add(pointer(-1));
  179. DFASetInclude(plists(arg)^.avail,plists(arg)^.nodelist.count-1);
  180. for i:=0 to plists(arg)^.nodelist.count-2 do
  181. begin
  182. if tnode(plists(arg)^.nodelist[i]).isequal(n) and DFASetIn(plists(arg)^.avail,i) then
  183. begin
  184. { use always the first occurence }
  185. if plists(arg)^.equalto[i]<>pointer(-1) then
  186. plists(arg)^.equalto[plists(arg)^.nodelist.count-1]:=plists(arg)^.equalto[i]
  187. else
  188. plists(arg)^.equalto[plists(arg)^.nodelist.count-1]:=pointer(ptrint(i));
  189. plists(arg)^.refs[i]:=pointer(plists(arg)^.refs[i])+1;
  190. break;
  191. end;
  192. end;
  193. end;
  194. { boolean and/or require a special handling: after evaluating the and/or node,
  195. the expressions of the right side might not be available due to short boolean
  196. evaluation, so after handling the right side, mark those expressions
  197. as unavailable }
  198. if (n.nodetype in [orn,andn]) and is_boolean(taddnode(n).left.resultdef) then
  199. begin
  200. firstleftend:=high(longint);
  201. recurseintobooleanchain(n.nodetype,n);
  202. for i:=firstleftend to plists(arg)^.nodelist.count-1 do
  203. DFASetExclude(plists(arg)^.avail,i);
  204. result:=fen_norecurse_false;
  205. end;
  206. end;
  207. function searchcsedomain(var n: tnode; arg: pointer) : foreachnoderesult;
  208. var
  209. csedomain : boolean;
  210. lists : tlists;
  211. templist : tfplist;
  212. i : longint;
  213. def : tstoreddef;
  214. nodes : tblocknode;
  215. creates,
  216. statements : tstatementnode;
  217. hp : ttempcreatenode;
  218. addrstored : boolean;
  219. hp2 : tnode;
  220. begin
  221. result:=fen_false;
  222. if n.nodetype in cseinvariant then
  223. begin
  224. csedomain:=true;
  225. foreachnodestatic(pm_postprocess,n,@searchsubdomain,@csedomain);
  226. if not(csedomain) then
  227. begin
  228. { try to transform the tree to get better cse domains, consider:
  229. +
  230. / \
  231. + C
  232. / \
  233. A B
  234. if A is not cse'able but B and C are, then the compiler cannot do cse so the tree is transformed into
  235. +
  236. / \
  237. A +
  238. / \
  239. B C
  240. Because A could be another tree of this kind, the whole process is done in a while loop
  241. }
  242. if (n.nodetype in [andn,orn,addn,muln]) and
  243. (n.nodetype=tbinarynode(n).left.nodetype) and
  244. { do is optimizations only for integers, reals (no currency!), vectors, sets or booleans }
  245. (is_integer(n.resultdef) or is_real(n.resultdef) or is_vector(n.resultdef) or is_set(n.resultdef) or
  246. is_boolean(n.resultdef)) and
  247. { either if fastmath is on }
  248. ((cs_opt_fastmath in current_settings.optimizerswitches) or
  249. { or for the logical operators, they cannot overflow }
  250. (n.nodetype in [andn,orn]) or
  251. { or for integers if range checking is off }
  252. ((is_integer(n.resultdef) and
  253. (n.localswitches*[cs_check_range,cs_check_overflow]=[]) and
  254. (tbinarynode(n).left.localswitches*[cs_check_range,cs_check_overflow]=[]))) or
  255. { for sets, we can do this always }
  256. (is_set(n.resultdef))
  257. ) then
  258. while n.nodetype=tbinarynode(n).left.nodetype do
  259. begin
  260. csedomain:=true;
  261. foreachnodestatic(pm_postprocess,tbinarynode(n).right,@searchsubdomain,@csedomain);
  262. if csedomain then
  263. begin
  264. csedomain:=true;
  265. foreachnodestatic(pm_postprocess,tbinarynode(tbinarynode(n).left).right,@searchsubdomain,@csedomain);
  266. if csedomain then
  267. begin
  268. hp2:=tbinarynode(tbinarynode(n).left).left;
  269. tbinarynode(tbinarynode(n).left).left:=tbinarynode(tbinarynode(n).left).right;
  270. tbinarynode(tbinarynode(n).left).right:=tbinarynode(n).right;
  271. tbinarynode(n).right:=tbinarynode(n).left;
  272. tbinarynode(n).left:=hp2;
  273. { the transformed tree could result in new possibilities to fold constants
  274. so force a firstpass on the root node }
  275. exclude(tbinarynode(n).right.flags,nf_pass1_done);
  276. do_firstpass(tbinarynode(n).right);
  277. end
  278. else
  279. break;
  280. end
  281. else
  282. break;
  283. end;
  284. end
  285. else
  286. begin
  287. statements:=nil;
  288. result:=fen_norecurse_true;
  289. {$ifdef csedebug}
  290. writeln('============ cse domain ==================');
  291. printnode(output,n);
  292. writeln('Complexity: ',node_complexity(n));
  293. {$endif csedebug}
  294. lists.nodelist:=tfplist.create;
  295. lists.locationlist:=tfplist.create;
  296. lists.equalto:=tfplist.create;
  297. lists.refs:=tfplist.create;
  298. foreachnodestatic(pm_postprocess,n,@collectnodes,@lists);
  299. templist:=tfplist.create;
  300. templist.count:=lists.nodelist.count;
  301. { check all nodes if one is used more than once }
  302. for i:=0 to lists.nodelist.count-1 do
  303. begin
  304. { current node used more than once? }
  305. if assigned(lists.refs[i]) then
  306. begin
  307. if not(assigned(statements)) then
  308. begin
  309. nodes:=internalstatements(statements);
  310. addstatement(statements,internalstatements(creates));
  311. end;
  312. def:=tstoreddef(tnode(lists.nodelist[i]).resultdef);
  313. { we cannot handle register stored records or array in CSE yet
  314. but we can store their reference }
  315. addrstored:=(def.typ in [arraydef,recorddef]) or is_object(def);
  316. if addrstored then
  317. templist[i]:=ctempcreatenode.create_value(getpointerdef(def),voidpointertype.size,tt_persistent,
  318. true,caddrnode.create(tnode(lists.nodelist[i])))
  319. else
  320. templist[i]:=ctempcreatenode.create_value(def,def.size,tt_persistent,
  321. def.is_intregable or def.is_fpuregable,tnode(lists.nodelist[i]));
  322. { make debugging easier and set temp. location to the original location }
  323. tnode(templist[i]).fileinfo:=tnode(lists.nodelist[i]).fileinfo;
  324. addstatement(creates,tnode(templist[i]));
  325. { make debugging easier and set temp. location to the original location }
  326. creates.fileinfo:=tnode(lists.nodelist[i]).fileinfo;
  327. hp:=ttempcreatenode(templist[i]);
  328. do_firstpass(tnode(hp));
  329. templist[i]:=hp;
  330. if addrstored then
  331. pnode(lists.locationlist[i])^:=cderefnode.Create(ctemprefnode.create(ttempcreatenode(templist[i])))
  332. else
  333. pnode(lists.locationlist[i])^:=ctemprefnode.create(ttempcreatenode(templist[i]));
  334. { make debugging easier and set temp. location to the original location }
  335. pnode(lists.locationlist[i])^.fileinfo:=tnode(lists.nodelist[i]).fileinfo;
  336. do_firstpass(pnode(lists.locationlist[i])^);
  337. {$ifdef csedebug}
  338. printnode(output,statements);
  339. {$endif csedebug}
  340. end
  341. { current node reference to another node? }
  342. else if lists.equalto[i]<>pointer(-1) then
  343. begin
  344. def:=tstoreddef(tnode(lists.nodelist[i]).resultdef);
  345. { we cannot handle register stored records or array in CSE yet
  346. but we can store their reference }
  347. addrstored:=(def.typ in [arraydef,recorddef]) or is_object(def);
  348. {$if defined(csedebug) or defined(csestats)}
  349. writeln;
  350. writeln('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!');
  351. writeln('Complexity: ',node_complexity(tnode(lists.nodelist[i])),' Node ',i,' equals Node ',ptrint(lists.equalto[i]));
  352. printnode(output,tnode(lists.nodelist[i]));
  353. printnode(output,tnode(lists.nodelist[ptrint(lists.equalto[i])]));
  354. writeln('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!');
  355. writeln;
  356. {$endif defined(csedebug) or defined(csestats)}
  357. templist[i]:=templist[ptrint(lists.equalto[i])];
  358. if addrstored then
  359. pnode(lists.locationlist[i])^:=cderefnode.Create(ctemprefnode.create(ttempcreatenode(templist[ptrint(lists.equalto[i])])))
  360. else
  361. pnode(lists.locationlist[i])^:=ctemprefnode.create(ttempcreatenode(templist[ptrint(lists.equalto[i])]));
  362. { make debugging easier and set temp. location to the original location }
  363. pnode(lists.locationlist[i])^.fileinfo:=tnode(lists.nodelist[i]).fileinfo;
  364. do_firstpass(pnode(lists.locationlist[i])^);
  365. end;
  366. end;
  367. { clean up unused trees }
  368. for i:=0 to lists.nodelist.count-1 do
  369. if lists.equalto[i]<>pointer(-1) then
  370. tnode(lists.nodelist[i]).free;
  371. {$ifdef csedebug}
  372. writeln('nodes: ',lists.nodelist.count);
  373. writeln('==========================================');
  374. {$endif csedebug}
  375. lists.nodelist.free;
  376. lists.locationlist.free;
  377. lists.equalto.free;
  378. lists.refs.free;
  379. templist.free;
  380. if assigned(statements) then
  381. begin
  382. { call para nodes need a special handling because
  383. they can be only children nodes of call nodes
  384. so the initialization code is inserted below the
  385. call para node
  386. }
  387. if n.nodetype=callparan then
  388. begin
  389. addstatement(statements,tcallparanode(n).left);
  390. tcallparanode(n).left:=nodes;
  391. do_firstpass(tcallparanode(n).left);
  392. end
  393. else
  394. begin
  395. addstatement(statements,n);
  396. n:=nodes;
  397. do_firstpass(n);
  398. end;
  399. {$ifdef csedebug}
  400. printnode(output,nodes);
  401. {$endif csedebug}
  402. end;
  403. end
  404. end;
  405. end;
  406. function do_optcse(var rootnode : tnode) : tnode;
  407. begin
  408. {$ifdef csedebug}
  409. writeln('====================================================================================');
  410. writeln('CSE optimization pass started');
  411. writeln('====================================================================================');
  412. printnode(rootnode);
  413. writeln('====================================================================================');
  414. writeln;
  415. {$endif csedebug}
  416. foreachnodestatic(pm_postprocess,rootnode,@searchcsedomain,nil);
  417. result:=nil;
  418. end;
  419. end.