optloop.pas 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717
  1. {
  2. Loop optimization
  3. Copyright (c) 2005 by Florian Klaempfl
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit optloop;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_OPTSTRENGTH}
  20. { $define DEBUG_OPTFORLOOP}
  21. interface
  22. uses
  23. node;
  24. function unroll_loop(node : tnode) : tnode;
  25. function OptimizeInductionVariables(node : tnode) : boolean;
  26. function OptimizeForLoop(var node : tnode) : boolean;
  27. implementation
  28. uses
  29. cutils,cclasses,compinnr,
  30. globtype,globals,constexp,
  31. verbose,
  32. symdef,symsym,
  33. defutil,
  34. cpuinfo,
  35. nutils,
  36. nadd,nbas,nflw,ncon,ninl,ncal,nld,nmem,ncnv,
  37. ncgmem,
  38. pass_1,
  39. optbase,optutils,
  40. procinfo;
  41. function number_unrolls(node : tnode) : cardinal;
  42. var
  43. nodeCount : cardinal;
  44. begin
  45. { calculate how often a loop shall be unrolled.
  46. The term (60*ord(node_count_weighted(node)<15)) is used to get small loops unrolled more often as
  47. the counter management takes more time in this case. }
  48. {$ifdef i386}
  49. { multiply by 2 for CPUs with a long pipeline }
  50. if current_settings.optimizecputype in [cpu_Pentium4] then
  51. begin
  52. { See the common branch below for an explanation. }
  53. nodeCount:=node_count_weighted(node,41);
  54. number_unrolls:=round((60+(60*ord(nodeCount<15)))/max(nodeCount,1))
  55. end
  56. else
  57. {$endif i386}
  58. begin
  59. { If nodeCount >= 15, numerator will be 30,
  60. and the largest number (starting from 15) that makes sense as its denominator
  61. (the smallest number that gives number_unrolls = 1) is 21 = trunc(30/1.5+1),
  62. so there's no point in counting for more than 21 nodes.
  63. "Long pipeline" variant above is the same with numerator=60 and max denominator = 41. }
  64. nodeCount:=node_count_weighted(node,21);
  65. number_unrolls:=round((30+(60*ord(nodeCount<15)))/max(nodeCount,1));
  66. end;
  67. if number_unrolls=0 then
  68. number_unrolls:=1;
  69. end;
  70. type
  71. treplaceinfo = record
  72. node : tnode;
  73. value : Tconstexprint;
  74. end;
  75. preplaceinfo = ^treplaceinfo;
  76. function checkcontrollflowstatements(var n:tnode; arg: pointer): foreachnoderesult;
  77. begin
  78. if n.nodetype in [breakn,continuen,goton,labeln,exitn,raisen] then
  79. result:=fen_norecurse_true
  80. else
  81. result:=fen_false;
  82. end;
  83. function replaceloadnodes(var n: tnode; arg: pointer): foreachnoderesult;
  84. begin
  85. if n.isequal(preplaceinfo(arg)^.node) then
  86. begin
  87. if n.flags*[nf_modify,nf_write,nf_address_taken]<>[] then
  88. internalerror(2012090402);
  89. n.free;
  90. n:=cordconstnode.create(preplaceinfo(arg)^.value,preplaceinfo(arg)^.node.resultdef,false);
  91. do_firstpass(n);
  92. end;
  93. result:=fen_false;
  94. end;
  95. function unroll_loop(node : tnode) : tnode;
  96. var
  97. unrolls,i : cardinal;
  98. counts : qword;
  99. unrollstatement,newforstatement : tstatementnode;
  100. unrollblock : tblocknode;
  101. getridoffor : boolean;
  102. replaceinfo : treplaceinfo;
  103. hascontrollflowstatements : boolean;
  104. begin
  105. result:=nil;
  106. if (cs_opt_size in current_settings.optimizerswitches) then
  107. exit;
  108. if ErrorCount<>0 then
  109. exit;
  110. if not(node.nodetype in [forn]) then
  111. exit;
  112. unrolls:=number_unrolls(tfornode(node).t2);
  113. if (unrolls>1) and
  114. ((tfornode(node).left.nodetype<>loadn) or
  115. { the address of the counter variable might be taken if it is passed by constref to a
  116. subroutine, so really check if it is not taken }
  117. ((tfornode(node).left.nodetype=loadn) and (tloadnode(tfornode(node).left).symtableentry is tabstractvarsym) and
  118. not(tabstractvarsym(tloadnode(tfornode(node).left).symtableentry).addr_taken) and
  119. not(tabstractvarsym(tloadnode(tfornode(node).left).symtableentry).different_scope))
  120. ) then
  121. begin
  122. { number of executions known? }
  123. if (tfornode(node).right.nodetype=ordconstn) and (tfornode(node).t1.nodetype=ordconstn) then
  124. begin
  125. if lnf_backward in tfornode(node).loopflags then
  126. counts:=tordconstnode(tfornode(node).right).value-tordconstnode(tfornode(node).t1).value+1
  127. else
  128. counts:=tordconstnode(tfornode(node).t1).value-tordconstnode(tfornode(node).right).value+1;
  129. hascontrollflowstatements:=foreachnodestatic(tfornode(node).t2,@checkcontrollflowstatements,nil);
  130. { don't unroll more than we need,
  131. multiply unroll by two here because we can get rid
  132. of the counter variable completely and replace it by a constant
  133. if unrolls=counts }
  134. if unrolls*2>=counts then
  135. unrolls:=counts;
  136. { create block statement }
  137. unrollblock:=internalstatements(unrollstatement);
  138. { can we get rid completly of the for ? }
  139. getridoffor:=(unrolls=counts) and not(hascontrollflowstatements) and
  140. { TP/Macpas allows assignments to the for-variables, so we cannot get rid of the for }
  141. ([m_tp7,m_mac]*current_settings.modeswitches=[]);
  142. if getridoffor then
  143. begin
  144. replaceinfo.node:=tfornode(node).left;
  145. replaceinfo.value:=tordconstnode(tfornode(node).right).value;
  146. end
  147. else
  148. { we consider currently unrolling not beneficial, if we cannot get rid of the for completely, this
  149. might change if a more sophisticated heuristics is used (FK) }
  150. exit;
  151. { let's unroll (and rock of course) }
  152. for i:=1 to unrolls do
  153. begin
  154. { create and insert copy of the statement block }
  155. addstatement(unrollstatement,tfornode(node).t2.getcopy);
  156. { set and insert entry label? }
  157. if (counts mod unrolls<>0) and
  158. ((counts mod unrolls)=unrolls-i) then
  159. begin
  160. tfornode(node).entrylabel:=clabelnode.create(cnothingnode.create,clabelsym.create('$optunrol'));
  161. addstatement(unrollstatement,tfornode(node).entrylabel);
  162. end;
  163. if getridoffor then
  164. begin
  165. foreachnodestatic(tnode(unrollstatement),@replaceloadnodes,@replaceinfo);
  166. if lnf_backward in tfornode(node).loopflags then
  167. replaceinfo.value:=replaceinfo.value-1
  168. else
  169. replaceinfo.value:=replaceinfo.value+1;
  170. end
  171. else
  172. begin
  173. { for itself increases at the last iteration }
  174. if i<unrolls then
  175. begin
  176. { insert incr/decrementation of counter var }
  177. if lnf_backward in tfornode(node).loopflags then
  178. addstatement(unrollstatement,
  179. geninlinenode(in_dec_x,false,ccallparanode.create(tfornode(node).left.getcopy,nil)))
  180. else
  181. addstatement(unrollstatement,
  182. geninlinenode(in_inc_x,false,ccallparanode.create(tfornode(node).left.getcopy,nil)));
  183. end;
  184. end;
  185. end;
  186. { can we get rid of the for statement? }
  187. if getridoffor then
  188. begin
  189. { create block statement }
  190. result:=internalstatements(newforstatement);
  191. addstatement(newforstatement,unrollblock);
  192. doinlinesimplify(result);
  193. end;
  194. end
  195. else
  196. begin
  197. { unrolling is a little bit more tricky if we don't know the
  198. loop count at compile time, but the solution is to use a jump table
  199. which is indexed by "loop count mod unrolls" at run time and which
  200. jumps then at the appropriate place inside the loop. Because
  201. a module division is expensive, we can use only unroll counts dividable
  202. by 2 }
  203. case unrolls of
  204. 1..2:
  205. ;
  206. 3:
  207. unrolls:=2;
  208. 4..7:
  209. unrolls:=4;
  210. { unrolls>4 already make no sense imo, but who knows (FK) }
  211. 8..15:
  212. unrolls:=8;
  213. 16..31:
  214. unrolls:=16;
  215. 32..63:
  216. unrolls:=32;
  217. 64..$7fff:
  218. unrolls:=64;
  219. else
  220. exit;
  221. end;
  222. { we don't handle this yet }
  223. exit;
  224. end;
  225. if not(assigned(result)) then
  226. begin
  227. tfornode(node).t2.free;
  228. tfornode(node).t2:=unrollblock;
  229. end;
  230. end;
  231. end;
  232. var
  233. initcode,
  234. calccode,
  235. deletecode : tblocknode;
  236. initcodestatements,
  237. calccodestatements,
  238. deletecodestatements: tstatementnode;
  239. templist : tfplist;
  240. inductionexprs : tfplist;
  241. changedforloop,
  242. containsnestedforloop,
  243. docalcatend: boolean;
  244. function checkcontinue(var n:tnode; arg: pointer): foreachnoderesult;
  245. begin
  246. if n.nodetype=continuen then
  247. result:=fen_norecurse_true
  248. else
  249. result:=fen_false;
  250. end;
  251. function is_loop_invariant(loop : tnode;expr : tnode) : boolean;
  252. begin
  253. result:=is_constnode(expr);
  254. case expr.nodetype of
  255. loadn:
  256. begin
  257. if (pi_dfaavailable in current_procinfo.flags) and
  258. assigned(loop.optinfo) and
  259. assigned(expr.optinfo) and
  260. not(expr.isequal(tfornode(loop).left)) then
  261. { no aliasing? }
  262. result:=(([nf_write,nf_modify]*expr.flags)=[]) and not(tabstractvarsym(tloadnode(expr).symtableentry).addr_taken) and
  263. { no definition in the loop? }
  264. not(DFASetIn(tfornode(loop).t2.optinfo^.defsum,expr.optinfo^.index));
  265. end;
  266. vecn:
  267. begin
  268. result:=((tvecnode(expr).left.nodetype=loadn) or is_loop_invariant(loop,tvecnode(expr).left)) and
  269. is_loop_invariant(loop,tvecnode(expr).right);
  270. end;
  271. typeconvn:
  272. result:=is_loop_invariant(loop,ttypeconvnode(expr).left);
  273. addn,subn:
  274. result:=is_loop_invariant(loop,taddnode(expr).left) and is_loop_invariant(loop,taddnode(expr).right);
  275. else
  276. ;
  277. end;
  278. end;
  279. { checks if the strength of n can be recuded, arg is the tforloop being considered }
  280. function dostrengthreductiontest(var n: tnode; arg: pointer): foreachnoderesult;
  281. function findpreviousstrengthreduction : boolean;
  282. var
  283. i : longint;
  284. hp : tnode;
  285. begin
  286. result:=false;
  287. for i:=0 to inductionexprs.count-1 do
  288. begin
  289. { do we already maintain one expression? }
  290. if tnode(inductionexprs[i]).isequal(n) then
  291. begin
  292. case n.nodetype of
  293. muln:
  294. hp:=ctemprefnode.create(ttempcreatenode(templist[i]));
  295. vecn:
  296. hp:=ctypeconvnode.create_internal(cderefnode.create(ctemprefnode.create(
  297. ttempcreatenode(templist[i]))),n.resultdef);
  298. else
  299. internalerror(200809211);
  300. end;
  301. n.free;
  302. n:=hp;
  303. result:=true;
  304. exit;
  305. end;
  306. end;
  307. end;
  308. procedure CreateNodes;
  309. begin
  310. if not assigned(initcode) then
  311. begin
  312. initcode:=internalstatements(initcodestatements);
  313. calccode:=internalstatements(calccodestatements);
  314. deletecode:=internalstatements(deletecodestatements);
  315. end;
  316. end;
  317. procedure CheckCalcAtEnd;
  318. begin
  319. if not assigned(initcode) then
  320. docalcatend:=not(assigned(tfornode(arg).entrylabel)) and
  321. not(foreachnodestatic(tfornode(arg).t2,@checkcontinue,nil));
  322. end;
  323. var
  324. tempnode,startvaltemp : ttempcreatenode;
  325. dummy : longint;
  326. nn : tnode;
  327. nt : tnodetype;
  328. nflags : tnodeflags;
  329. begin
  330. result:=fen_false;
  331. nflags:=n.flags;
  332. case n.nodetype of
  333. forn:
  334. { inform for loop search routine, that it needs to search more deeply }
  335. containsnestedforloop:=true;
  336. muln:
  337. begin
  338. if (taddnode(n).right.nodetype=loadn) and
  339. taddnode(n).right.isequal(tfornode(arg).left) and
  340. { plain read of the loop variable? }
  341. not(nf_write in taddnode(n).right.flags) and
  342. not(nf_modify in taddnode(n).right.flags) and
  343. is_loop_invariant(tfornode(arg),taddnode(n).left) then
  344. taddnode(n).swapleftright;
  345. if (taddnode(n).left.nodetype=loadn) and
  346. taddnode(n).left.isequal(tfornode(arg).left) and
  347. { plain read of the loop variable? }
  348. not(nf_write in taddnode(n).left.flags) and
  349. not(nf_modify in taddnode(n).left.flags) and
  350. is_loop_invariant(tfornode(arg),taddnode(n).right) then
  351. begin
  352. changedforloop:=true;
  353. { did we use the same expression before already? }
  354. if not(findpreviousstrengthreduction) then
  355. begin
  356. {$ifdef DEBUG_OPTSTRENGTH}
  357. writeln('**********************************************************************************');
  358. writeln(parser_current_file, ': Found expression for strength reduction (MUL): ');
  359. printnode(n);
  360. writeln('**********************************************************************************');
  361. {$endif DEBUG_OPTSTRENGTH}
  362. CheckCalcAtEnd;
  363. tempnode:=ctempcreatenode.create(n.resultdef,n.resultdef.size,tt_persistent,
  364. tstoreddef(n.resultdef).is_intregable or tstoreddef(n.resultdef).is_fpuregable);
  365. templist.Add(tempnode);
  366. inductionexprs.Add(n);
  367. CreateNodes;
  368. if lnf_backward in tfornode(arg).loopflags then
  369. addstatement(calccodestatements,
  370. geninlinenode(in_dec_x,false,
  371. ccallparanode.create(ctemprefnode.create(tempnode),ccallparanode.create(taddnode(n).right.getcopy,nil))))
  372. else
  373. addstatement(calccodestatements,
  374. geninlinenode(in_inc_x,false,
  375. ccallparanode.create(ctemprefnode.create(tempnode),ccallparanode.create(taddnode(n).right.getcopy,nil))));
  376. addstatement(initcodestatements,tempnode);
  377. nn:=tfornode(arg).right.getcopy;
  378. { If the calculation is not performed at the end
  379. it is needed to adjust the starting value }
  380. if not docalcatend then
  381. begin
  382. if lnf_backward in tfornode(arg).loopflags then
  383. nt:=addn
  384. else
  385. nt:=subn;
  386. nn:=caddnode.create_internal(nt,nn,
  387. cordconstnode.create(1,nn.resultdef,false));
  388. end;
  389. addstatement(initcodestatements,cassignmentnode.create(ctemprefnode.create(tempnode),
  390. caddnode.create(muln,nn,
  391. taddnode(n).right.getcopy)
  392. )
  393. );
  394. { finally replace the node by a temp. ref }
  395. n:=ctemprefnode.create(tempnode);
  396. { ... and add a temp. release node }
  397. addstatement(deletecodestatements,ctempdeletenode.create(tempnode));
  398. end;
  399. { set types }
  400. do_firstpass(n);
  401. result:=fen_norecurse_false;
  402. end;
  403. end;
  404. vecn:
  405. begin
  406. { is the index the counter variable? }
  407. if not(is_special_array(tvecnode(n).left.resultdef)) and
  408. not(is_packed_array(tvecnode(n).left.resultdef)) and
  409. (tvecnode(n).right.isequal(tfornode(arg).left) or
  410. { fpc usually creates a type cast to access an array }
  411. ((tvecnode(n).right.nodetype=typeconvn) and
  412. ttypeconvnode(tvecnode(n).right).left.isequal(tfornode(arg).left)
  413. )
  414. ) and
  415. { plain read of the loop variable? }
  416. not(nf_write in tvecnode(n).right.flags) and
  417. not(nf_modify in tvecnode(n).right.flags) and
  418. { direct array access? }
  419. ((tvecnode(n).left.nodetype=loadn) or
  420. { ... or loop invariant expression? }
  421. is_loop_invariant(tfornode(arg),tvecnode(n).right))
  422. {$if not (defined(cpu16bitalu) or defined(cpu8bitalu))}
  423. { removing the multiplication is only worth the
  424. effort if it's not a simple shift }
  425. and not(ispowerof2(tcgvecnode(n).get_mul_size,dummy))
  426. {$endif}
  427. then
  428. begin
  429. changedforloop:=true;
  430. { did we use the same expression before already? }
  431. if not(findpreviousstrengthreduction) then
  432. begin
  433. {$ifdef DEBUG_OPTSTRENGTH}
  434. writeln('**********************************************************************************');
  435. writeln(parser_current_file,': Found expression for strength reduction (VEC): ');
  436. printnode(n);
  437. writeln('**********************************************************************************');
  438. {$endif DEBUG_OPTSTRENGTH}
  439. CheckCalcAtEnd;
  440. tempnode:=ctempcreatenode.create(voidpointertype,voidpointertype.size,tt_persistent,true);
  441. templist.Add(tempnode);
  442. inductionexprs.Add(n);
  443. CreateNodes;
  444. if lnf_backward in tfornode(arg).loopflags then
  445. addstatement(calccodestatements,
  446. cinlinenode.createintern(in_dec_x,false,
  447. ccallparanode.create(ctemprefnode.create(tempnode),ccallparanode.create(
  448. cordconstnode.create(tcgvecnode(n).get_mul_size,sizeuinttype,false),nil))))
  449. else
  450. addstatement(calccodestatements,
  451. cinlinenode.createintern(in_inc_x,false,
  452. ccallparanode.create(ctemprefnode.create(tempnode),ccallparanode.create(
  453. cordconstnode.create(tcgvecnode(n).get_mul_size,sizeuinttype,false),nil))));
  454. addstatement(initcodestatements,tempnode);
  455. startvaltemp:=maybereplacewithtemp(tfornode(arg).right,initcode,initcodestatements,tfornode(arg).right.resultdef.size,true);
  456. nn:=caddrnode.create(
  457. cvecnode.create(tvecnode(n).left.getcopy,ctypeconvnode.create_internal(tfornode(arg).right.getcopy,tvecnode(n).right.resultdef))
  458. );
  459. { If the calculation is not performed at the end
  460. it is needed to adjust the starting value }
  461. if not docalcatend then
  462. begin
  463. if lnf_backward in tfornode(arg).loopflags then
  464. nt:=addn
  465. else
  466. nt:=subn;
  467. nn:=caddnode.create_internal(nt,
  468. ctypeconvnode.create_internal(nn,voidpointertype),
  469. cordconstnode.create(tcgvecnode(n).get_mul_size,sizeuinttype,false));
  470. end;
  471. addstatement(initcodestatements,cassignmentnode.create(ctemprefnode.create(tempnode),nn));
  472. { finally replace the node by a temp. ref }
  473. n:=ctypeconvnode.create_internal(cderefnode.create(ctemprefnode.create(tempnode)),n.resultdef);
  474. { ... and add a temp. release node }
  475. if startvaltemp<>nil then
  476. addstatement(deletecodestatements,ctempdeletenode.create(startvaltemp));
  477. addstatement(deletecodestatements,ctempdeletenode.create(tempnode));
  478. end;
  479. { Copy the nf_write,nf_modify flags to the new deref node of the temp.
  480. Othewise assignments to vector elements will be removed. }
  481. if nflags*[nf_write,nf_modify]<>[] then
  482. begin
  483. if (n.nodetype<>typeconvn) or (ttypeconvnode(n).left.nodetype<>derefn) then
  484. internalerror(2021091501);
  485. ttypeconvnode(n).left.flags:=ttypeconvnode(n).left.flags+nflags*[nf_write,nf_modify];
  486. end;
  487. { set types }
  488. do_firstpass(n);
  489. result:=fen_norecurse_false;
  490. end;
  491. end;
  492. else
  493. ;
  494. end;
  495. end;
  496. function OptimizeInductionVariablesSingleForLoop(node : tnode) : tnode;
  497. var
  498. loopcode : tblocknode;
  499. loopcodestatements,
  500. newcodestatements : tstatementnode;
  501. fornode : tfornode;
  502. begin
  503. result:=nil;
  504. if node.nodetype<>forn then
  505. exit;
  506. templist:=TFPList.Create;
  507. inductionexprs:=TFPList.Create;
  508. initcode:=nil;
  509. calccode:=nil;
  510. deletecode:=nil;
  511. initcodestatements:=nil;
  512. calccodestatements:=nil;
  513. deletecodestatements:=nil;
  514. docalcatend:=false;
  515. { find all expressions being candidates for strength reduction
  516. and replace them }
  517. foreachnodestatic(pm_postprocess,node,@dostrengthreductiontest,node);
  518. { clue everything together }
  519. if assigned(initcode) then
  520. begin
  521. do_firstpass(tnode(initcode));
  522. do_firstpass(tnode(calccode));
  523. do_firstpass(tnode(deletecode));
  524. { create a new for node, the old one will be released by the compiler }
  525. with tfornode(node) do
  526. begin
  527. fornode:=cfornode.create(left,right,t1,t2,lnf_backward in loopflags);
  528. left:=nil;
  529. right:=nil;
  530. t1:=nil;
  531. t2:=nil;
  532. end;
  533. node:=fornode;
  534. loopcode:=internalstatements(loopcodestatements);
  535. if not docalcatend then
  536. addstatement(loopcodestatements,calccode);
  537. addstatement(loopcodestatements,tfornode(node).t2);
  538. if docalcatend then
  539. addstatement(loopcodestatements,calccode);
  540. tfornode(node).t2:=loopcode;
  541. do_firstpass(node);
  542. result:=internalstatements(newcodestatements);
  543. addstatement(newcodestatements,initcode);
  544. initcode:=nil;
  545. addstatement(newcodestatements,node);
  546. addstatement(newcodestatements,deletecode);
  547. end;
  548. templist.Free;
  549. inductionexprs.Free;
  550. end;
  551. function OptimizeInductionVariables_iterforloops(var n: tnode; arg: pointer): foreachnoderesult;
  552. var
  553. hp : tnode;
  554. begin
  555. Result:=fen_false;
  556. if n.nodetype=forn then
  557. begin
  558. { do we have DFA available? }
  559. if pi_dfaavailable in current_procinfo.flags then
  560. begin
  561. CalcDefSum(tfornode(n).t2);
  562. end;
  563. containsnestedforloop:=false;
  564. hp:=OptimizeInductionVariablesSingleForLoop(n);
  565. if assigned(hp) then
  566. begin
  567. n.Free;
  568. n:=hp;
  569. end;
  570. { can we avoid further searching? }
  571. if not(containsnestedforloop) then
  572. Result:=fen_norecurse_false;
  573. end;
  574. end;
  575. function OptimizeInductionVariables(node : tnode) : boolean;
  576. begin
  577. Result:=false;
  578. if not(pi_dfaavailable in current_procinfo.flags) then
  579. exit;
  580. changedforloop:=false;
  581. foreachnodestatic(pm_postprocess,node,@OptimizeInductionVariables_iterforloops,nil);
  582. Result:=changedforloop;
  583. end;
  584. function OptimizeForLoop_iterforloops(var n: tnode; arg: pointer): foreachnoderesult;
  585. begin
  586. Result:=fen_false;
  587. if (n.nodetype=forn) and
  588. not(lnf_backward in tfornode(n).loopflags) and
  589. (lnf_dont_mind_loopvar_on_exit in tfornode(n).loopflags) and
  590. is_constintnode(tfornode(n).right) and
  591. (([cs_check_overflow,cs_check_range]*n.localswitches)=[]) and
  592. (([cs_check_overflow,cs_check_range]*tfornode(n).left.localswitches)=[]) and
  593. ((tfornode(n).left.nodetype=loadn) and (tloadnode(tfornode(n).left).symtableentry is tabstractvarsym) and
  594. not(tabstractvarsym(tloadnode(tfornode(n).left).symtableentry).addr_taken) and
  595. not(tabstractvarsym(tloadnode(tfornode(n).left).symtableentry).different_scope)) then
  596. begin
  597. { do we have DFA available? }
  598. if pi_dfaavailable in current_procinfo.flags then
  599. begin
  600. CalcUseSum(tfornode(n).t2);
  601. CalcDefSum(tfornode(n).t2);
  602. end
  603. else
  604. Internalerror(2017122801);
  605. if not(assigned(tfornode(n).left.optinfo)) then
  606. exit;
  607. if not(DFASetIn(tfornode(n).t2.optinfo^.usesum,tfornode(n).left.optinfo^.index)) and
  608. not(DFASetIn(tfornode(n).t2.optinfo^.defsum,tfornode(n).left.optinfo^.index)) then
  609. begin
  610. { convert the loop from i:=a to b into i:=b-a+1 to 1 as this simplifies the
  611. abort condition }
  612. {$ifdef DEBUG_OPTFORLOOP}
  613. writeln('**********************************************************************************');
  614. writeln('Found loop for reverting: ');
  615. printnode(n);
  616. writeln('**********************************************************************************');
  617. {$endif DEBUG_OPTFORLOOP}
  618. include(tfornode(n).loopflags,lnf_backward);
  619. tfornode(n).right:=ctypeconvnode.create_internal(
  620. caddnode.create_internal(addn,caddnode.create_internal(subn,
  621. tfornode(n).t1,tfornode(n).right),
  622. cordconstnode.create(1,tfornode(n).left.resultdef,false)),
  623. tfornode(n).left.resultdef);
  624. tfornode(n).t1:=cordconstnode.create(1,tfornode(n).left.resultdef,false);
  625. include(tfornode(n).loopflags,lnf_counter_not_used);
  626. exclude(n.transientflags,tnf_pass1_done);
  627. do_firstpass(n);
  628. {$ifdef DEBUG_OPTFORLOOP}
  629. writeln('Loop reverted: ');
  630. printnode(n);
  631. writeln('**********************************************************************************');
  632. {$endif DEBUG_OPTFORLOOP}
  633. changedforloop:=true;
  634. end;
  635. end;
  636. end;
  637. function OptimizeForLoop(var node : tnode) : boolean;
  638. begin
  639. Result:=false;
  640. if not(pi_dfaavailable in current_procinfo.flags) then
  641. exit;
  642. changedforloop:=false;
  643. foreachnodestatic(pm_postprocess,node,@OptimizeForLoop_iterforloops,nil);
  644. Result:=changedforloop;
  645. end;
  646. end.