optcse.pas 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846
  1. {
  2. Common subexpression elimination on base blocks
  3. Copyright (c) 2005-2012 by Florian Klaempfl
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit optcse;
  18. {$i fpcdefs.inc}
  19. { $define csedebug}
  20. { $define csestats}
  21. interface
  22. uses
  23. node;
  24. {
  25. the function creates non optimal code so far:
  26. - call para nodes are cse barriers because they can be reordered and thus the
  27. temp. creation could be done too late
  28. - the cse knows nothing about register pressure. In case of high register pressure, cse might
  29. have a negative impact
  30. - the list of cseinvariant node types and inline numbers is not complete yet
  31. Further, it could be done probably in a faster way though the complexity can't probably not reduced
  32. }
  33. function do_optcse(var rootnode : tnode) : tnode;
  34. function do_consttovar(var rootnode : tnode) : tnode;
  35. implementation
  36. uses
  37. globtype,globals,
  38. cutils,cclasses,
  39. nutils,compinnr,
  40. nbas,nld,ninl,ncal,nadd,nmem,ncnv,
  41. pass_1,
  42. procinfo,
  43. paramgr,
  44. cpubase,
  45. symconst,symdef,symsym,symtable,symtype,
  46. defutil,
  47. optbase;
  48. const
  49. cseinvariant : set of tnodetype = [addn,muln,subn,divn,slashn,modn,andn,orn,xorn,notn,vecn,
  50. derefn,equaln,unequaln,ltn,gtn,lten,gten,typeconvn,subscriptn,
  51. inn,symdifn,shrn,shln,ordconstn,realconstn,unaryminusn,pointerconstn,stringconstn,setconstn,niln,
  52. setelementn,{arrayconstructorn,arrayconstructorrangen,}
  53. isn,asn,starstarn,nothingn,temprefn,loadparentfpn {,callparan},assignn,addrn];
  54. function searchsubdomain(var n:tnode; arg: pointer) : foreachnoderesult;
  55. begin
  56. if (n.nodetype in cseinvariant) or
  57. ((n.nodetype=inlinen) and
  58. (tinlinenode(n).inlinenumber in [in_length_x,in_assigned_x,in_sqr_real,in_sqrt_real,in_sin_real,in_cos_real,in_abs_long,
  59. in_abs_real,in_exp_real,in_ln_real,in_pi_real,in_popcnt_x,in_arctan_real,in_round_real,in_trunc_real,
  60. { cse on fma will still not work because it would require proper handling of call nodes
  61. with more than one parameter }
  62. in_fma_single,in_fma_double,in_fma_extended,in_fma_float128,
  63. in_min_single,in_min_double,in_max_single,in_max_double,
  64. in_max_longint,in_max_dword,in_min_longint,in_min_dword,
  65. in_max_int64,in_max_qword,in_min_int64,in_min_qword
  66. ])
  67. ) or
  68. ((n.nodetype=callparan) and not(assigned(tcallparanode(n).right))) or
  69. ((n.nodetype=loadn) and
  70. not((tloadnode(n).symtableentry.typ in [staticvarsym,localvarsym,paravarsym]) and
  71. (vo_volatile in tabstractvarsym(tloadnode(n).symtableentry).varoptions))
  72. ) then
  73. result:=fen_true
  74. else
  75. begin
  76. pboolean(arg)^:=false;
  77. result:=fen_norecurse_true;
  78. end;
  79. end;
  80. type
  81. tlists = record
  82. nodelist : tfplist;
  83. locationlist : tfplist;
  84. equalto : tfplist;
  85. refs : tfplist;
  86. avail : TDFASet;
  87. end;
  88. plists = ^tlists;
  89. { collectnodes needs the address of itself to call foreachnodestatic,
  90. so we need a wrapper because @<func> inside <func doesn't work }
  91. function collectnodes(var n:tnode; arg: pointer) : foreachnoderesult;forward;
  92. function collectnodes2(var n:tnode; arg: pointer) : foreachnoderesult;
  93. begin
  94. result:=collectnodes(n,arg);
  95. end;
  96. function collectnodes(var n:tnode; arg: pointer) : foreachnoderesult;
  97. { when compiling a tree like
  98. and
  99. / \
  100. and C
  101. / \
  102. A B
  103. all expressions of B are available during evaluation of C. However considerung the whole expression,
  104. values of B and C might not be available due to short boolean evaluation.
  105. So recurseintobooleanchain detectes such chained and/or expressions and makes sub-expressions of B
  106. available during the evaluation of C
  107. firstleftend is later used to remove all sub expressions of B and C by storing the expression count
  108. in the cse table after handling A
  109. }
  110. var
  111. firstleftend : longint;
  112. procedure recurseintobooleanchain(t : tnodetype;n : tnode);
  113. begin
  114. if (tbinarynode(n).left.nodetype=t) and is_boolean(tbinarynode(n).left.resultdef) then
  115. recurseintobooleanchain(t,tbinarynode(n).left)
  116. else
  117. foreachnodestatic(pm_postprocess,tbinarynode(n).left,@collectnodes2,arg);
  118. firstleftend:=min(plists(arg)^.nodelist.count,firstleftend);
  119. foreachnodestatic(pm_postprocess,tbinarynode(n).right,@collectnodes2,arg);
  120. end;
  121. var
  122. i : longint;
  123. tempdef : tdef;
  124. begin
  125. result:=fen_false;
  126. { don't add the tree below an untyped const parameter: there is
  127. no information available that this kind of tree actually needs
  128. to be addresable, this could be improved }
  129. { the nodes below a type conversion node created for an absolute
  130. reference cannot be handled separately, because the absolute reference
  131. may have special requirements (no regability, must be in memory, ...)
  132. }
  133. if (((n.nodetype=callparan) and
  134. (tcallparanode(n).left.resultdef.typ=formaldef) and
  135. (tcallparanode(n).parasym.varspez=vs_const)) or
  136. ((n.nodetype=typeconvn) and
  137. (nf_absolute in n.flags))
  138. ) then
  139. begin
  140. result:=fen_norecurse_false;
  141. exit;
  142. end;
  143. if
  144. { node possible to add? }
  145. assigned(n.resultdef) and
  146. ((
  147. { regable expressions }
  148. (actualtargetnode(@n)^.flags*[nf_write,nf_modify,nf_address_taken]=[]) and
  149. (
  150. tstoreddef(n.resultdef).is_intregable or
  151. tstoreddef(n.resultdef).is_fpuregable or
  152. tstoreddef(n.resultdef).is_const_intregable
  153. ) and
  154. { is_int/fpuregable allows arrays and records to be in registers, cse cannot handle this }
  155. (
  156. not(n.resultdef.typ in [arraydef,recorddef]) or
  157. (
  158. (
  159. (n.resultdef.typ = recorddef) and
  160. tabstractrecordsymtable(tabstractrecorddef(n.resultdef).symtable).has_single_field(tempdef)
  161. ) or
  162. is_dynamic_array(n.resultdef) or
  163. (
  164. not(is_special_array(tstoreddef(n.resultdef))) and
  165. not(tstoreddef(n.resultdef).is_intregable) and
  166. not(tstoreddef(n.resultdef).is_fpuregable)
  167. )
  168. )
  169. ) and
  170. { same for voiddef }
  171. not(is_void(n.resultdef)) and
  172. { adding tempref and callpara nodes itself is worthless but
  173. their complexity is probably <= 1 anyways
  174. neither add setelementn nodes because the compiler sometimes depends on the fact
  175. that a certain node stays a setelementn, this does not hurt either because
  176. setelementn nodes itself generate no real code (except moving data into register) }
  177. not(n.nodetype in [temprefn,callparan,setelementn]) and
  178. { node worth to add?
  179. We consider almost every node because even loading a variables from
  180. a register instead of memory is more beneficial. This behaviour should
  181. not increase register pressure because if a variable is already
  182. in a register, the reg. allocator can merge the nodes. If a variable
  183. is loaded from memory, loading this variable and spilling another register
  184. should not add a speed penalty.
  185. }
  186. {
  187. load nodes are not considered if they load para or local symbols from the
  188. current stack frame, those are in registers anyways if possible
  189. }
  190. (not(actualtargetnode(@n)^.nodetype=loadn) or
  191. not(tloadnode(actualtargetnode(@n)^).symtableentry.typ in [paravarsym,localvarsym,staticvarsym]) or
  192. { apply cse on non-regable variables }
  193. ((tloadnode(actualtargetnode(@n)^).symtableentry.typ in [paravarsym,localvarsym,staticvarsym]) and
  194. not(tabstractvarsym(tloadnode(actualtargetnode(@n)^).symtableentry).is_regvar(true)) and
  195. not(vo_volatile in tabstractvarsym(tloadnode(actualtargetnode(@n)^).symtableentry).varoptions)) or
  196. (node_complexity(n)>1)
  197. ) and
  198. {
  199. Const nodes however are only considered if their complexity is >1
  200. This might be the case for the risc architectures if they need
  201. more than one instruction to load this particular value
  202. }
  203. (not(is_constnode(n)) or (node_complexity(n)>1)))
  204. {$if not(defined(i386)) and not(defined(i8086))}
  205. or
  206. { store reference of expression? }
  207. { loading the address of a global symbol takes typically more than
  208. one instruction on every platform except i8086/i386
  209. so consider in this case loading the address of the data
  210. }
  211. (((n.resultdef.typ in [arraydef,recorddef]) or is_object(n.resultdef)) and not(is_dynamic_array(n.resultdef)) and
  212. (n.nodetype=loadn) and
  213. (tloadnode(n).symtableentry.typ=staticvarsym)
  214. )
  215. {$endif not(defined(i386)) and not(defined(i8086))}
  216. ) then
  217. begin
  218. plists(arg)^.nodelist.Add(n);
  219. plists(arg)^.locationlist.Add(@n);
  220. plists(arg)^.refs.Add(nil);
  221. plists(arg)^.equalto.Add(pointer(-1));
  222. DFASetInclude(plists(arg)^.avail,plists(arg)^.nodelist.count-1);
  223. for i:=0 to plists(arg)^.nodelist.count-2 do
  224. begin
  225. if tnode(plists(arg)^.nodelist[i]).isequal(n) and DFASetIn(plists(arg)^.avail,i) then
  226. begin
  227. { use always the first occurence }
  228. if plists(arg)^.equalto[i]<>pointer(-1) then
  229. plists(arg)^.equalto[plists(arg)^.nodelist.count-1]:=plists(arg)^.equalto[i]
  230. else
  231. plists(arg)^.equalto[plists(arg)^.nodelist.count-1]:=pointer(ptrint(i));
  232. plists(arg)^.refs[i]:=pointer(plists(arg)^.refs[i])+1;
  233. { tree has been found, no need to search further,
  234. sub-trees have been added by the first occurence of
  235. the tree already }
  236. result:=fen_norecurse_false;
  237. break;
  238. end;
  239. end;
  240. end;
  241. { boolean and/or require a special handling: after evaluating the and/or node,
  242. the expressions of the right side might not be available due to short boolean
  243. evaluation, so after handling the right side, mark those expressions
  244. as unavailable }
  245. if (n.nodetype in [orn,andn]) and is_boolean(taddnode(n).left.resultdef) then
  246. begin
  247. firstleftend:=high(longint);
  248. recurseintobooleanchain(n.nodetype,n);
  249. for i:=firstleftend to plists(arg)^.nodelist.count-1 do
  250. DFASetExclude(plists(arg)^.avail,i);
  251. result:=fen_norecurse_false;
  252. end;
  253. {$ifdef cpuhighleveltarget}
  254. { The high level targets use the functionality from ncgnstld for
  255. nested accesses, and that one stores the complete location of the
  256. nested variable in tloadnode.left rather than only the location of
  257. the parent context containing it. This causes problems with the
  258. CSE in case the nested variable is used as an lvalue, so disable
  259. CSE in that case
  260. }
  261. if (n.nodetype=loadn) and assigned(tloadnode(n).left) then
  262. result:=fen_norecurse_false;
  263. {$endif}
  264. end;
  265. function searchcsedomain(var n: tnode; arg: pointer) : foreachnoderesult;
  266. var
  267. csedomain : boolean;
  268. lists : tlists;
  269. templist : tfplist;
  270. i : longint;
  271. def : tstoreddef;
  272. nodes : tblocknode;
  273. creates,
  274. statements : tstatementnode;
  275. deletetemp : ttempdeletenode;
  276. hp : ttempcreatenode;
  277. addrstored : boolean;
  278. hp2 : tnode;
  279. begin
  280. result:=fen_false;
  281. nodes:=nil;
  282. if (n.nodetype in cseinvariant) and
  283. { a setelement node is cseinvariant, but it might not be replaced by a block so
  284. it cannot be the root of the cse search }
  285. (n.nodetype<>setelementn) then
  286. begin
  287. csedomain:=true;
  288. foreachnodestatic(pm_postprocess,n,@searchsubdomain,@csedomain);
  289. if not(csedomain) then
  290. begin
  291. { try to transform the tree to get better cse domains, consider:
  292. + (1)
  293. / \
  294. (2) + C
  295. / \
  296. A B
  297. if A is not cse'able but B and C are, then the compiler cannot do cse so the tree is transformed into
  298. +
  299. / \
  300. A +
  301. / \
  302. B C
  303. Because A could be another tree of this kind, the whole process is done in a while loop
  304. }
  305. if (n.nodetype in [andn,orn,addn,muln]) and
  306. (n.nodetype=tbinarynode(n).left.nodetype) and
  307. { do is optimizations only for integers, reals (no currency!), vectors, sets or booleans }
  308. (is_integer(n.resultdef) or is_real(n.resultdef) or is_vector(n.resultdef) or is_set(n.resultdef) or
  309. is_boolean(n.resultdef)) and
  310. { either if fastmath is on }
  311. ((cs_opt_fastmath in current_settings.optimizerswitches) or
  312. { or for the logical operators, they cannot overflow }
  313. (n.nodetype in [andn,orn]) or
  314. { or for integers if range checking is off }
  315. ((is_integer(n.resultdef) and
  316. (n.localswitches*[cs_check_range,cs_check_overflow]=[]) and
  317. (tbinarynode(n).left.localswitches*[cs_check_range,cs_check_overflow]=[]))) or
  318. { for sets, we can do this always }
  319. (is_set(n.resultdef))
  320. ) then
  321. while (n.nodetype=tbinarynode(n).left.nodetype) and
  322. { if node (1) is fully boolean evaluated and node (2) not, we cannot do the swap as this might result in B being evaluated always,
  323. the other way round is no problem, C is still evaluated only if needed }
  324. (not(is_boolean(n.resultdef)) or not(n.nodetype in [andn,orn]) or doshortbooleval(n) or not(doshortbooleval(tbinarynode(n).left))) and
  325. { the resulttypes of the operands we'll swap must be equal,
  326. required in case of a 32x32->64 multiplication, then we
  327. cannot swap out one of the 32 bit operands for a 64 bit one
  328. }
  329. (tbinarynode(tbinarynode(n).left).left.resultdef=tbinarynode(n).left.resultdef) and
  330. (tbinarynode(n).left.resultdef=tbinarynode(n).right.resultdef) do
  331. begin
  332. csedomain:=true;
  333. foreachnodestatic(pm_postprocess,tbinarynode(n).right,@searchsubdomain,@csedomain);
  334. if csedomain then
  335. begin
  336. csedomain:=true;
  337. foreachnodestatic(pm_postprocess,tbinarynode(tbinarynode(n).left).right,@searchsubdomain,@csedomain);
  338. if csedomain then
  339. begin
  340. { move the full boolean evaluation of (2) to (1), if it was there (so it again applies to A and
  341. what follows) }
  342. if not(doshortbooleval(tbinarynode(n).left)) and
  343. doshortbooleval(n) then
  344. begin
  345. n.localswitches:=n.localswitches+(tbinarynode(n).left.localswitches*[cs_full_boolean_eval]);
  346. exclude(tbinarynode(n).left.localswitches,cs_full_boolean_eval);
  347. if (n.nodetype in [orn,andn]) then
  348. begin
  349. if (tbinarynode(n).left.nodetype in [orn,andn]) then
  350. taddnode(tbinarynode(n).left).addnodeflags:=taddnode(tbinarynode(n).left).addnodeflags+
  351. (taddnode(n).addnodeflags*[anf_short_bool]);
  352. exclude(taddnode(n).addnodeflags,anf_short_bool);
  353. end;
  354. end;
  355. hp2:=tbinarynode(tbinarynode(n).left).left;
  356. tbinarynode(tbinarynode(n).left).left:=tbinarynode(tbinarynode(n).left).right;
  357. tbinarynode(tbinarynode(n).left).right:=tbinarynode(n).right;
  358. tbinarynode(n).right:=tbinarynode(n).left;
  359. tbinarynode(n).left:=hp2;
  360. { the transformed tree could result in new possibilities to fold constants
  361. so force a firstpass on the root node }
  362. exclude(tbinarynode(n).right.transientflags,tnf_pass1_done);
  363. do_firstpass(tbinarynode(n).right);
  364. end
  365. else
  366. break;
  367. end
  368. else
  369. break;
  370. end;
  371. end
  372. else
  373. begin
  374. statements:=nil;
  375. result:=fen_norecurse_true;
  376. {$ifdef csedebug}
  377. writeln('============ cse domain ==================');
  378. printnode(output,n);
  379. writeln('Complexity: ',node_complexity(n));
  380. {$endif csedebug}
  381. lists.nodelist:=tfplist.create;
  382. lists.locationlist:=tfplist.create;
  383. lists.equalto:=tfplist.create;
  384. lists.refs:=tfplist.create;
  385. foreachnodestatic(pm_postprocess,n,@collectnodes,@lists);
  386. templist:=tfplist.create;
  387. templist.count:=lists.nodelist.count;
  388. { check all nodes if one is used more than once }
  389. for i:=0 to lists.nodelist.count-1 do
  390. begin
  391. { current node used more than once? }
  392. if assigned(lists.refs[i]) then
  393. begin
  394. if not(assigned(statements)) then
  395. begin
  396. nodes:=internalstatements(statements);
  397. addstatement(statements,internalstatements(creates));
  398. end;
  399. def:=tstoreddef(tnode(lists.nodelist[i]).resultdef);
  400. { we cannot handle register stored records or array in CSE yet
  401. but we can store their reference }
  402. addrstored:=((def.typ in [arraydef,recorddef]) or is_object(def)) and not(is_dynamic_array(def));
  403. if addrstored then
  404. templist[i]:=ctempcreatenode.create_value(cpointerdef.getreusable(def),voidpointertype.size,tt_persistent,
  405. true,caddrnode.create_internal(tnode(lists.nodelist[i])))
  406. else
  407. templist[i]:=ctempcreatenode.create_value(def,def.size,tt_persistent,
  408. def.is_intregable or def.is_fpuregable or def.is_const_intregable,tnode(lists.nodelist[i]));
  409. { the value described by the temp. is immutable and the temp. can be always in register
  410. ttempcreatenode.create normally takes care of the register location but it does not
  411. know about immutability so it cannot take care of managed types }
  412. ttempcreatenode(templist[i]).includetempflag(ti_const);
  413. ttempcreatenode(templist[i]).includetempflag(ti_may_be_in_reg);
  414. { make debugging easier and set temp. location to the original location }
  415. tnode(templist[i]).fileinfo:=tnode(lists.nodelist[i]).fileinfo;
  416. addstatement(creates,tnode(templist[i]));
  417. { the delete node has no semantic use yet, it is just used to clean up memory }
  418. deletetemp:=ctempdeletenode.create(ttempcreatenode(templist[i]));
  419. deletetemp.includetempflag(ti_cleanup_only);
  420. addstatement(tstatementnode(arg^),deletetemp);
  421. { make debugging easier and set temp. location to the original location }
  422. creates.fileinfo:=tnode(lists.nodelist[i]).fileinfo;
  423. hp:=ttempcreatenode(templist[i]);
  424. do_firstpass(tnode(hp));
  425. templist[i]:=hp;
  426. if addrstored then
  427. pnode(lists.locationlist[i])^:=cderefnode.Create(ctemprefnode.create(ttempcreatenode(templist[i])))
  428. else
  429. pnode(lists.locationlist[i])^:=ctemprefnode.create(ttempcreatenode(templist[i]));
  430. { make debugging easier and set temp. location to the original location }
  431. pnode(lists.locationlist[i])^.fileinfo:=tnode(lists.nodelist[i]).fileinfo;
  432. do_firstpass(pnode(lists.locationlist[i])^);
  433. {$ifdef csedebug}
  434. printnode(output,statements);
  435. {$endif csedebug}
  436. end
  437. { current node reference to another node? }
  438. else if lists.equalto[i]<>pointer(-1) then
  439. begin
  440. def:=tstoreddef(tnode(lists.nodelist[i]).resultdef);
  441. { we cannot handle register stored records or array in CSE yet
  442. but we can store their reference }
  443. addrstored:=((def.typ in [arraydef,recorddef]) or is_object(def)) and not(is_dynamic_array(def));
  444. {$if defined(csedebug) or defined(csestats)}
  445. writeln;
  446. writeln('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!');
  447. writeln('Complexity: ',node_complexity(tnode(lists.nodelist[i])),' Node ',i,' equals Node ',ptrint(lists.equalto[i]));
  448. printnode(output,tnode(lists.nodelist[i]));
  449. printnode(output,tnode(lists.nodelist[ptrint(lists.equalto[i])]));
  450. writeln('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!');
  451. writeln;
  452. {$endif defined(csedebug) or defined(csestats)}
  453. templist[i]:=templist[ptrint(lists.equalto[i])];
  454. if addrstored then
  455. pnode(lists.locationlist[i])^:=cderefnode.Create(ctemprefnode.create(ttempcreatenode(templist[ptrint(lists.equalto[i])])))
  456. else
  457. pnode(lists.locationlist[i])^:=ctemprefnode.create(ttempcreatenode(templist[ptrint(lists.equalto[i])]));
  458. { make debugging easier and set temp. location to the original location }
  459. pnode(lists.locationlist[i])^.fileinfo:=tnode(lists.nodelist[i]).fileinfo;
  460. do_firstpass(pnode(lists.locationlist[i])^);
  461. end;
  462. end;
  463. { clean up unused trees }
  464. for i:=0 to lists.nodelist.count-1 do
  465. if lists.equalto[i]<>pointer(-1) then
  466. tnode(lists.nodelist[i]).free;
  467. {$ifdef csedebug}
  468. writeln('nodes: ',lists.nodelist.count);
  469. writeln('==========================================');
  470. {$endif csedebug}
  471. lists.nodelist.free;
  472. lists.locationlist.free;
  473. lists.equalto.free;
  474. lists.refs.free;
  475. templist.free;
  476. if assigned(statements) then
  477. begin
  478. { call para nodes need a special handling because
  479. they can be only children nodes of call nodes
  480. so the initialization code is inserted below the
  481. call para node
  482. }
  483. if n.nodetype=callparan then
  484. begin
  485. addstatement(statements,tcallparanode(n).left);
  486. tcallparanode(n).left:=nodes;
  487. do_firstpass(tcallparanode(n).left);
  488. end
  489. else
  490. begin
  491. addstatement(statements,n);
  492. n:=nodes;
  493. do_firstpass(n);
  494. end;
  495. {$ifdef csedebug}
  496. printnode(output,nodes);
  497. {$endif csedebug}
  498. end;
  499. end
  500. end;
  501. end;
  502. function do_optcse(var rootnode : tnode) : tnode;
  503. var
  504. deletes,
  505. statements : tstatementnode;
  506. deleteblock,
  507. rootblock : tblocknode;
  508. begin
  509. {$ifdef csedebug}
  510. writeln('====================================================================================');
  511. writeln('CSE optimization pass started');
  512. writeln('====================================================================================');
  513. printnode(rootnode);
  514. writeln('====================================================================================');
  515. writeln;
  516. {$endif csedebug}
  517. deleteblock:=internalstatements(deletes);
  518. foreachnodestatic(pm_postprocess,rootnode,@searchcsedomain,@deletes);
  519. rootblock:=internalstatements(statements);
  520. addstatement(statements,rootnode);
  521. addstatement(statements,deleteblock);
  522. rootnode:=rootblock;
  523. do_firstpass(rootnode);
  524. {$ifdef csedebug}
  525. writeln('====================================================================================');
  526. writeln('CSE optimization result');
  527. writeln('====================================================================================');
  528. printnode(rootnode);
  529. writeln('====================================================================================');
  530. writeln;
  531. {$endif csedebug}
  532. result:=nil;
  533. end;
  534. type
  535. tconstentry = record
  536. valuenode : tnode;
  537. weight : TCGInt;
  538. temp : ttempcreatenode;
  539. end;
  540. pconstentry = ^tconstentry;
  541. tconstentries = array of tconstentry;
  542. pconstentries = ^tconstentries;
  543. function CSEOnReference(n : tnode) : Boolean;
  544. begin
  545. Result:=(n.nodetype=loadn) and (tloadnode(n).symtableentry.typ=staticvarsym)
  546. and ((vo_is_thread_var in tstaticvarsym(tloadnode(n).symtableentry).varoptions) or
  547. (cs_create_pic in current_settings.moduleswitches)
  548. {$if defined(aarch64) or defined(sparc) or defined(sparc64)}
  549. or (not(tabstractvarsym(tloadnode(n).symtableentry).is_regvar(false)))
  550. {$endif defined(aarch64) or defined(sparc) or defined(sparc64)}
  551. );
  552. end;
  553. function collectconsts(var n:tnode; arg: pointer) : foreachnoderesult;
  554. var
  555. consts: pconstentries;
  556. found: Boolean;
  557. i: Integer;
  558. begin
  559. result:=fen_true;
  560. consts:=pconstentries(arg);
  561. if ((n.nodetype=realconstn)
  562. {$ifdef x86}
  563. { x87 consts would end up in memory, so loading them in temps. makes no sense }
  564. and use_vectorfpu(n.resultdef)
  565. {$endif x86}
  566. ) or
  567. CSEOnReference(n) then
  568. begin
  569. found:=false;
  570. i:=0;
  571. for i:=0 to High(consts^) do
  572. begin
  573. if tnode(consts^[i].valuenode).isequal(n) then
  574. begin
  575. found:=true;
  576. break;
  577. end;
  578. end;
  579. if found then
  580. inc(consts^[i].weight)
  581. else
  582. begin
  583. SetLength(consts^,length(consts^)+1);
  584. with consts^[high(consts^)] do
  585. begin
  586. valuenode:=n.getcopy;
  587. valuenode.fileinfo:=current_procinfo.entrypos;
  588. weight:=1;
  589. end;
  590. end;
  591. end;
  592. end;
  593. function replaceconsts(var n:tnode; arg: pointer) : foreachnoderesult;
  594. var
  595. hp: tnode;
  596. begin
  597. result:=fen_true;
  598. if tnode(pconstentry(arg)^.valuenode).isequal(n) then
  599. begin
  600. { shall we take the address? }
  601. if CSEOnReference(pconstentry(arg)^.valuenode) then
  602. begin
  603. hp:=ctypeconvnode.create_internal(cderefnode.create(ctemprefnode.create(pconstentry(arg)^.temp)),pconstentry(arg)^.valuenode.resultdef);
  604. ttypeconvnode(hp).left.fileinfo:=n.fileinfo;
  605. end
  606. else
  607. hp:=ctemprefnode.create(pconstentry(arg)^.temp);
  608. hp.fileinfo:=n.fileinfo;
  609. n.Free;
  610. n:=hp;
  611. do_firstpass(n);
  612. end;
  613. end;
  614. function do_consttovar(var rootnode : tnode) : tnode;
  615. var
  616. constentries : tconstentries;
  617. Procedure QuickSort(L, R : Longint);
  618. var
  619. I, J, P: Longint;
  620. Item, Q : tconstentry;
  621. begin
  622. repeat
  623. I := L;
  624. J := R;
  625. P := (L + R) div 2;
  626. repeat
  627. Item := constentries[P];
  628. while Item.weight>constentries[I].weight do
  629. I := I + 1;
  630. while Item.weight<constentries[J].weight do
  631. J := J - 1;
  632. If I <= J then
  633. begin
  634. Q := constentries[I];
  635. constentries[I] := constentries[J];
  636. constentries[J] := Q;
  637. if P = I then
  638. P := J
  639. else if P = J then
  640. P := I;
  641. I := I + 1;
  642. J := J - 1;
  643. end;
  644. until I > J;
  645. if L < J then
  646. QuickSort(L, J);
  647. L := I;
  648. until I >= R;
  649. end;
  650. var
  651. creates,
  652. deletes,
  653. statements : tstatementnode;
  654. createblock,
  655. deleteblock,
  656. rootblock : tblocknode;
  657. i, max_fpu_regs_assigned, fpu_regs_assigned,
  658. max_int_regs_assigned, int_regs_assigned: Integer;
  659. old_current_filepos: tfileposinfo;
  660. begin
  661. {$ifdef csedebug}
  662. writeln('====================================================================================');
  663. writeln('Const optimization pass started');
  664. writeln('====================================================================================');
  665. printnode(rootnode);
  666. writeln('====================================================================================');
  667. writeln;
  668. {$endif csedebug}
  669. foreachnodestatic(pm_postprocess,rootnode,@collectconsts,@constentries);
  670. createblock:=nil;
  671. deleteblock:=nil;
  672. rootblock:=nil;
  673. { estimate how many int registers can be used }
  674. if pi_do_call in current_procinfo.flags then
  675. max_int_regs_assigned:=length(paramanager.get_saved_registers_int(current_procinfo.procdef.proccalloption))
  676. { we store only addresses, so take care of the relation between address sizes and register sizes }
  677. div max(sizeof(PtrUInt) div sizeof(ALUUInt),1)
  678. { heuristics, just use a quarter of all registers at maximum }
  679. div 4
  680. else
  681. max_int_regs_assigned:=max(first_int_imreg div 4,1);
  682. {$if defined(x86) or defined(aarch64) or defined(arm)}
  683. { x86, aarch64 and arm (neglecting fpa) use mm registers for floats }
  684. if pi_do_call in current_procinfo.flags then
  685. { heuristics, just use a fifth of all registers at maximum }
  686. max_fpu_regs_assigned:=length(paramanager.get_saved_registers_mm(current_procinfo.procdef.proccalloption)) div 5
  687. else
  688. max_fpu_regs_assigned:=max(first_mm_imreg div 5,1);
  689. {$else defined(x86) or defined(aarch64) or defined(arm)}
  690. if pi_do_call in current_procinfo.flags then
  691. { heuristics, just use a fifth of all registers at maximum }
  692. max_fpu_regs_assigned:=length(paramanager.get_saved_registers_fpu(current_procinfo.procdef.proccalloption)) div 5
  693. else
  694. max_fpu_regs_assigned:=max(first_fpu_imreg div 5,1);
  695. {$endif defined(x86) or defined(aarch64) or defined(arm)}
  696. fpu_regs_assigned:=0;
  697. int_regs_assigned:=0;
  698. if Length(constentries)>0 then
  699. begin
  700. { sort entries by weight }
  701. QuickSort(0,High(constentries));
  702. { assign only the constants with the highest weight to a register }
  703. for i:=High(constentries) downto 0 do
  704. begin
  705. if (constentries[i].valuenode.nodetype=realconstn) and
  706. { if there is a call, we need most likely to save/restore a register }
  707. ((constentries[i].weight>3) or
  708. ((constentries[i].weight>1) and not(pi_do_call in current_procinfo.flags)))
  709. then
  710. begin
  711. if fpu_regs_assigned>=max_fpu_regs_assigned then
  712. break;
  713. old_current_filepos:=current_filepos;
  714. current_filepos:=current_procinfo.entrypos;
  715. if not(assigned(createblock)) then
  716. begin
  717. rootblock:=internalstatements(statements);
  718. createblock:=internalstatements(creates);
  719. deleteblock:=internalstatements(deletes);
  720. end;
  721. constentries[i].temp:=ctempcreatenode.create(constentries[i].valuenode.resultdef,
  722. constentries[i].valuenode.resultdef.size,tt_persistent,true);
  723. addstatement(creates,constentries[i].temp);
  724. addstatement(creates,cassignmentnode.create_internal(ctemprefnode.create(constentries[i].temp),constentries[i].valuenode));
  725. current_filepos:=old_current_filepos;
  726. foreachnodestatic(pm_postprocess,rootnode,@replaceconsts,@constentries[i]);
  727. inc(fpu_regs_assigned);
  728. end
  729. else if CSEOnReference(constentries[i].valuenode) and
  730. { if there is a call, we need most likely to save/restore a register }
  731. ((constentries[i].weight>2) or
  732. ((constentries[i].weight>1) and not(pi_do_call in current_procinfo.flags)))
  733. then
  734. begin
  735. if int_regs_assigned>=max_int_regs_assigned then
  736. break;
  737. old_current_filepos:=current_filepos;
  738. current_filepos:=current_procinfo.entrypos;
  739. if not(assigned(createblock)) then
  740. begin
  741. rootblock:=internalstatements(statements);
  742. createblock:=internalstatements(creates);
  743. deleteblock:=internalstatements(deletes);
  744. end;
  745. constentries[i].temp:=ctempcreatenode.create(cpointerdef.getreusable(constentries[i].valuenode.resultdef),
  746. voidpointertype.size,tt_persistent,true);
  747. addstatement(creates,constentries[i].temp);
  748. addstatement(creates,cassignmentnode.create_internal(ctemprefnode.create(constentries[i].temp),
  749. caddrnode.create_internal(constentries[i].valuenode)));
  750. current_filepos:=old_current_filepos;
  751. foreachnodestatic(pm_postprocess,rootnode,@replaceconsts,@constentries[i]);
  752. inc(int_regs_assigned);
  753. end;
  754. end;
  755. end;
  756. if assigned(createblock) then
  757. begin
  758. addstatement(statements,createblock);
  759. addstatement(statements,rootnode);
  760. addstatement(statements,deleteblock);
  761. rootnode:=rootblock;
  762. do_firstpass(rootnode);
  763. end;
  764. {$ifdef csedebug}
  765. writeln('====================================================================================');
  766. writeln('Const optimization result');
  767. writeln('====================================================================================');
  768. printnode(rootnode);
  769. writeln('====================================================================================');
  770. writeln;
  771. {$endif csedebug}
  772. result:=nil;
  773. end;
  774. end.