optloop.pas 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702
  1. {
  2. Loop optimization
  3. Copyright (c) 2005 by Florian Klaempfl
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit optloop;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_OPTSTRENGTH}
  20. { $define DEBUG_OPTFORLOOP}
  21. interface
  22. uses
  23. node;
  24. function unroll_loop(node : tnode) : tnode;
  25. function OptimizeInductionVariables(node : tnode) : boolean;
  26. function OptimizeForLoop(var node : tnode) : boolean;
  27. implementation
  28. uses
  29. cutils,cclasses,compinnr,
  30. globtype,globals,constexp,
  31. verbose,
  32. symdef,symsym,
  33. defutil,
  34. cpuinfo,
  35. nutils,
  36. nadd,nbas,nflw,ncon,ninl,ncal,nld,nmem,ncnv,
  37. ncgmem,
  38. pass_1,
  39. optbase,optutils,
  40. procinfo;
  41. function number_unrolls(node : tnode) : cardinal;
  42. begin
  43. { calculate how often a loop shall be unrolled.
  44. The term (60*ord(node_count_weighted(node)<15)) is used to get small loops unrolled more often as
  45. the counter management takes more time in this case. }
  46. {$ifdef i386}
  47. { multiply by 2 for CPUs with a long pipeline }
  48. if current_settings.optimizecputype in [cpu_Pentium4] then
  49. number_unrolls:=trunc(round((60+(60*ord(node_count_weighted(node)<15)))/max(node_count_weighted(node),1)))
  50. else
  51. {$endif i386}
  52. number_unrolls:=trunc(round((30+(60*ord(node_count_weighted(node)<15)))/max(node_count_weighted(node),1)));
  53. if number_unrolls=0 then
  54. number_unrolls:=1;
  55. end;
  56. type
  57. treplaceinfo = record
  58. node : tnode;
  59. value : Tconstexprint;
  60. end;
  61. preplaceinfo = ^treplaceinfo;
  62. function checkcontrollflowstatements(var n:tnode; arg: pointer): foreachnoderesult;
  63. begin
  64. if n.nodetype in [breakn,continuen,goton,labeln,exitn,raisen] then
  65. result:=fen_norecurse_true
  66. else
  67. result:=fen_false;
  68. end;
  69. function replaceloadnodes(var n: tnode; arg: pointer): foreachnoderesult;
  70. begin
  71. if n.isequal(preplaceinfo(arg)^.node) then
  72. begin
  73. if n.flags*[nf_modify,nf_write,nf_address_taken]<>[] then
  74. internalerror(2012090402);
  75. n.free;
  76. n:=cordconstnode.create(preplaceinfo(arg)^.value,preplaceinfo(arg)^.node.resultdef,false);
  77. do_firstpass(n);
  78. end;
  79. result:=fen_false;
  80. end;
  81. function unroll_loop(node : tnode) : tnode;
  82. var
  83. unrolls,i : cardinal;
  84. counts : qword;
  85. unrollstatement,newforstatement : tstatementnode;
  86. unrollblock : tblocknode;
  87. getridoffor : boolean;
  88. replaceinfo : treplaceinfo;
  89. hascontrollflowstatements : boolean;
  90. begin
  91. result:=nil;
  92. if (cs_opt_size in current_settings.optimizerswitches) then
  93. exit;
  94. if ErrorCount<>0 then
  95. exit;
  96. if not(node.nodetype in [forn]) then
  97. exit;
  98. unrolls:=number_unrolls(tfornode(node).t2);
  99. if (unrolls>1) and
  100. ((tfornode(node).left.nodetype<>loadn) or
  101. { the address of the counter variable might be taken if it is passed by constref to a
  102. subroutine, so really check if it is not taken }
  103. ((tfornode(node).left.nodetype=loadn) and (tloadnode(tfornode(node).left).symtableentry is tabstractvarsym) and
  104. not(tabstractvarsym(tloadnode(tfornode(node).left).symtableentry).addr_taken) and
  105. not(tabstractvarsym(tloadnode(tfornode(node).left).symtableentry).different_scope))
  106. ) then
  107. begin
  108. { number of executions known? }
  109. if (tfornode(node).right.nodetype=ordconstn) and (tfornode(node).t1.nodetype=ordconstn) then
  110. begin
  111. if lnf_backward in tfornode(node).loopflags then
  112. counts:=tordconstnode(tfornode(node).right).value-tordconstnode(tfornode(node).t1).value+1
  113. else
  114. counts:=tordconstnode(tfornode(node).t1).value-tordconstnode(tfornode(node).right).value+1;
  115. hascontrollflowstatements:=foreachnodestatic(tfornode(node).t2,@checkcontrollflowstatements,nil);
  116. { don't unroll more than we need,
  117. multiply unroll by two here because we can get rid
  118. of the counter variable completely and replace it by a constant
  119. if unrolls=counts }
  120. if unrolls*2>=counts then
  121. unrolls:=counts;
  122. { create block statement }
  123. unrollblock:=internalstatements(unrollstatement);
  124. { can we get rid completly of the for ? }
  125. getridoffor:=(unrolls=counts) and not(hascontrollflowstatements) and
  126. { TP/Macpas allows assignments to the for-variables, so we cannot get rid of the for }
  127. ([m_tp7,m_mac]*current_settings.modeswitches=[]);
  128. if getridoffor then
  129. begin
  130. replaceinfo.node:=tfornode(node).left;
  131. replaceinfo.value:=tordconstnode(tfornode(node).right).value;
  132. end
  133. else
  134. { we consider currently unrolling not beneficial, if we cannot get rid of the for completely, this
  135. might change if a more sophisticated heuristics is used (FK) }
  136. exit;
  137. { let's unroll (and rock of course) }
  138. for i:=1 to unrolls do
  139. begin
  140. { create and insert copy of the statement block }
  141. addstatement(unrollstatement,tfornode(node).t2.getcopy);
  142. { set and insert entry label? }
  143. if (counts mod unrolls<>0) and
  144. ((counts mod unrolls)=unrolls-i) then
  145. begin
  146. tfornode(node).entrylabel:=clabelnode.create(cnothingnode.create,clabelsym.create('$optunrol'));
  147. addstatement(unrollstatement,tfornode(node).entrylabel);
  148. end;
  149. if getridoffor then
  150. begin
  151. foreachnodestatic(tnode(unrollstatement),@replaceloadnodes,@replaceinfo);
  152. if lnf_backward in tfornode(node).loopflags then
  153. replaceinfo.value:=replaceinfo.value-1
  154. else
  155. replaceinfo.value:=replaceinfo.value+1;
  156. end
  157. else
  158. begin
  159. { for itself increases at the last iteration }
  160. if i<unrolls then
  161. begin
  162. { insert incr/decrementation of counter var }
  163. if lnf_backward in tfornode(node).loopflags then
  164. addstatement(unrollstatement,
  165. geninlinenode(in_dec_x,false,ccallparanode.create(tfornode(node).left.getcopy,nil)))
  166. else
  167. addstatement(unrollstatement,
  168. geninlinenode(in_inc_x,false,ccallparanode.create(tfornode(node).left.getcopy,nil)));
  169. end;
  170. end;
  171. end;
  172. { can we get rid of the for statement? }
  173. if getridoffor then
  174. begin
  175. { create block statement }
  176. result:=internalstatements(newforstatement);
  177. addstatement(newforstatement,unrollblock);
  178. doinlinesimplify(result);
  179. end;
  180. end
  181. else
  182. begin
  183. { unrolling is a little bit more tricky if we don't know the
  184. loop count at compile time, but the solution is to use a jump table
  185. which is indexed by "loop count mod unrolls" at run time and which
  186. jumps then at the appropriate place inside the loop. Because
  187. a module division is expensive, we can use only unroll counts dividable
  188. by 2 }
  189. case unrolls of
  190. 1..2:
  191. ;
  192. 3:
  193. unrolls:=2;
  194. 4..7:
  195. unrolls:=4;
  196. { unrolls>4 already make no sense imo, but who knows (FK) }
  197. 8..15:
  198. unrolls:=8;
  199. 16..31:
  200. unrolls:=16;
  201. 32..63:
  202. unrolls:=32;
  203. 64..$7fff:
  204. unrolls:=64;
  205. else
  206. exit;
  207. end;
  208. { we don't handle this yet }
  209. exit;
  210. end;
  211. if not(assigned(result)) then
  212. begin
  213. tfornode(node).t2.free;
  214. tfornode(node).t2:=unrollblock;
  215. end;
  216. end;
  217. end;
  218. var
  219. initcode,
  220. calccode,
  221. deletecode : tblocknode;
  222. initcodestatements,
  223. calccodestatements,
  224. deletecodestatements: tstatementnode;
  225. templist : tfplist;
  226. inductionexprs : tfplist;
  227. changedforloop,
  228. containsnestedforloop,
  229. docalcatend: boolean;
  230. function checkcontinue(var n:tnode; arg: pointer): foreachnoderesult;
  231. begin
  232. if n.nodetype=continuen then
  233. result:=fen_norecurse_true
  234. else
  235. result:=fen_false;
  236. end;
  237. function is_loop_invariant(loop : tnode;expr : tnode) : boolean;
  238. begin
  239. result:=is_constnode(expr);
  240. case expr.nodetype of
  241. loadn:
  242. begin
  243. if (pi_dfaavailable in current_procinfo.flags) and
  244. assigned(loop.optinfo) and
  245. assigned(expr.optinfo) and
  246. not(expr.isequal(tfornode(loop).left)) then
  247. { no aliasing? }
  248. result:=(([nf_write,nf_modify]*expr.flags)=[]) and not(tabstractvarsym(tloadnode(expr).symtableentry).addr_taken) and
  249. { no definition in the loop? }
  250. not(DFASetIn(tfornode(loop).t2.optinfo^.defsum,expr.optinfo^.index));
  251. end;
  252. vecn:
  253. begin
  254. result:=((tvecnode(expr).left.nodetype=loadn) or is_loop_invariant(loop,tvecnode(expr).left)) and
  255. is_loop_invariant(loop,tvecnode(expr).right);
  256. end;
  257. typeconvn:
  258. result:=is_loop_invariant(loop,ttypeconvnode(expr).left);
  259. addn,subn:
  260. result:=is_loop_invariant(loop,taddnode(expr).left) and is_loop_invariant(loop,taddnode(expr).right);
  261. else
  262. ;
  263. end;
  264. end;
  265. { checks if the strength of n can be recuded, arg is the tforloop being considered }
  266. function dostrengthreductiontest(var n: tnode; arg: pointer): foreachnoderesult;
  267. function findpreviousstrengthreduction : boolean;
  268. var
  269. i : longint;
  270. hp : tnode;
  271. begin
  272. result:=false;
  273. for i:=0 to inductionexprs.count-1 do
  274. begin
  275. { do we already maintain one expression? }
  276. if tnode(inductionexprs[i]).isequal(n) then
  277. begin
  278. case n.nodetype of
  279. muln:
  280. hp:=ctemprefnode.create(ttempcreatenode(templist[i]));
  281. vecn:
  282. hp:=ctypeconvnode.create_internal(cderefnode.create(ctemprefnode.create(
  283. ttempcreatenode(templist[i]))),n.resultdef);
  284. else
  285. internalerror(200809211);
  286. end;
  287. n.free;
  288. n:=hp;
  289. result:=true;
  290. exit;
  291. end;
  292. end;
  293. end;
  294. procedure CreateNodes;
  295. begin
  296. if not assigned(initcode) then
  297. begin
  298. initcode:=internalstatements(initcodestatements);
  299. calccode:=internalstatements(calccodestatements);
  300. deletecode:=internalstatements(deletecodestatements);
  301. end;
  302. end;
  303. procedure CheckCalcAtEnd;
  304. begin
  305. if not assigned(initcode) then
  306. docalcatend:=not(assigned(tfornode(arg).entrylabel)) and
  307. not(foreachnodestatic(tfornode(arg).t2,@checkcontinue,nil));
  308. end;
  309. var
  310. tempnode,startvaltemp : ttempcreatenode;
  311. dummy : longint;
  312. nn : tnode;
  313. nt : tnodetype;
  314. nflags : tnodeflags;
  315. begin
  316. result:=fen_false;
  317. nflags:=n.flags;
  318. case n.nodetype of
  319. forn:
  320. { inform for loop search routine, that it needs to search more deeply }
  321. containsnestedforloop:=true;
  322. muln:
  323. begin
  324. if (taddnode(n).right.nodetype=loadn) and
  325. taddnode(n).right.isequal(tfornode(arg).left) and
  326. { plain read of the loop variable? }
  327. not(nf_write in taddnode(n).right.flags) and
  328. not(nf_modify in taddnode(n).right.flags) and
  329. is_loop_invariant(tfornode(arg),taddnode(n).left) then
  330. taddnode(n).swapleftright;
  331. if (taddnode(n).left.nodetype=loadn) and
  332. taddnode(n).left.isequal(tfornode(arg).left) and
  333. { plain read of the loop variable? }
  334. not(nf_write in taddnode(n).left.flags) and
  335. not(nf_modify in taddnode(n).left.flags) and
  336. is_loop_invariant(tfornode(arg),taddnode(n).right) then
  337. begin
  338. changedforloop:=true;
  339. { did we use the same expression before already? }
  340. if not(findpreviousstrengthreduction) then
  341. begin
  342. {$ifdef DEBUG_OPTSTRENGTH}
  343. writeln('**********************************************************************************');
  344. writeln(parser_current_file, ': Found expression for strength reduction (MUL): ');
  345. printnode(n);
  346. writeln('**********************************************************************************');
  347. {$endif DEBUG_OPTSTRENGTH}
  348. CheckCalcAtEnd;
  349. tempnode:=ctempcreatenode.create(n.resultdef,n.resultdef.size,tt_persistent,
  350. tstoreddef(n.resultdef).is_intregable or tstoreddef(n.resultdef).is_fpuregable);
  351. templist.Add(tempnode);
  352. inductionexprs.Add(n);
  353. CreateNodes;
  354. if lnf_backward in tfornode(arg).loopflags then
  355. addstatement(calccodestatements,
  356. geninlinenode(in_dec_x,false,
  357. ccallparanode.create(ctemprefnode.create(tempnode),ccallparanode.create(taddnode(n).right.getcopy,nil))))
  358. else
  359. addstatement(calccodestatements,
  360. geninlinenode(in_inc_x,false,
  361. ccallparanode.create(ctemprefnode.create(tempnode),ccallparanode.create(taddnode(n).right.getcopy,nil))));
  362. addstatement(initcodestatements,tempnode);
  363. nn:=tfornode(arg).right.getcopy;
  364. { If the calculation is not performed at the end
  365. it is needed to adjust the starting value }
  366. if not docalcatend then
  367. begin
  368. if lnf_backward in tfornode(arg).loopflags then
  369. nt:=addn
  370. else
  371. nt:=subn;
  372. nn:=caddnode.create_internal(nt,nn,
  373. cordconstnode.create(1,nn.resultdef,false));
  374. end;
  375. addstatement(initcodestatements,cassignmentnode.create(ctemprefnode.create(tempnode),
  376. caddnode.create(muln,nn,
  377. taddnode(n).right.getcopy)
  378. )
  379. );
  380. { finally replace the node by a temp. ref }
  381. n:=ctemprefnode.create(tempnode);
  382. { ... and add a temp. release node }
  383. addstatement(deletecodestatements,ctempdeletenode.create(tempnode));
  384. end;
  385. { set types }
  386. do_firstpass(n);
  387. result:=fen_norecurse_false;
  388. end;
  389. end;
  390. vecn:
  391. begin
  392. { is the index the counter variable? }
  393. if not(is_special_array(tvecnode(n).left.resultdef)) and
  394. not(is_packed_array(tvecnode(n).left.resultdef)) and
  395. (tvecnode(n).right.isequal(tfornode(arg).left) or
  396. { fpc usually creates a type cast to access an array }
  397. ((tvecnode(n).right.nodetype=typeconvn) and
  398. ttypeconvnode(tvecnode(n).right).left.isequal(tfornode(arg).left)
  399. )
  400. ) and
  401. { plain read of the loop variable? }
  402. not(nf_write in tvecnode(n).right.flags) and
  403. not(nf_modify in tvecnode(n).right.flags) and
  404. { direct array access? }
  405. ((tvecnode(n).left.nodetype=loadn) or
  406. { ... or loop invariant expression? }
  407. is_loop_invariant(tfornode(arg),tvecnode(n).right))
  408. {$if not (defined(cpu16bitalu) or defined(cpu8bitalu))}
  409. { removing the multiplication is only worth the
  410. effort if it's not a simple shift }
  411. and not(ispowerof2(tcgvecnode(n).get_mul_size,dummy))
  412. {$endif}
  413. then
  414. begin
  415. changedforloop:=true;
  416. { did we use the same expression before already? }
  417. if not(findpreviousstrengthreduction) then
  418. begin
  419. {$ifdef DEBUG_OPTSTRENGTH}
  420. writeln('**********************************************************************************');
  421. writeln(parser_current_file,': Found expression for strength reduction (VEC): ');
  422. printnode(n);
  423. writeln('**********************************************************************************');
  424. {$endif DEBUG_OPTSTRENGTH}
  425. CheckCalcAtEnd;
  426. tempnode:=ctempcreatenode.create(voidpointertype,voidpointertype.size,tt_persistent,true);
  427. templist.Add(tempnode);
  428. inductionexprs.Add(n);
  429. CreateNodes;
  430. if lnf_backward in tfornode(arg).loopflags then
  431. addstatement(calccodestatements,
  432. cinlinenode.createintern(in_dec_x,false,
  433. ccallparanode.create(ctemprefnode.create(tempnode),ccallparanode.create(
  434. cordconstnode.create(tcgvecnode(n).get_mul_size,sizeuinttype,false),nil))))
  435. else
  436. addstatement(calccodestatements,
  437. cinlinenode.createintern(in_inc_x,false,
  438. ccallparanode.create(ctemprefnode.create(tempnode),ccallparanode.create(
  439. cordconstnode.create(tcgvecnode(n).get_mul_size,sizeuinttype,false),nil))));
  440. addstatement(initcodestatements,tempnode);
  441. startvaltemp:=maybereplacewithtemp(tfornode(arg).right,initcode,initcodestatements,tfornode(arg).right.resultdef.size,true);
  442. nn:=caddrnode.create(
  443. cvecnode.create(tvecnode(n).left.getcopy,tfornode(arg).right.getcopy)
  444. );
  445. { If the calculation is not performed at the end
  446. it is needed to adjust the starting value }
  447. if not docalcatend then
  448. begin
  449. if lnf_backward in tfornode(arg).loopflags then
  450. nt:=addn
  451. else
  452. nt:=subn;
  453. nn:=caddnode.create_internal(nt,
  454. ctypeconvnode.create_internal(nn,voidpointertype),
  455. cordconstnode.create(tcgvecnode(n).get_mul_size,sizeuinttype,false));
  456. end;
  457. addstatement(initcodestatements,cassignmentnode.create(ctemprefnode.create(tempnode),nn));
  458. { finally replace the node by a temp. ref }
  459. n:=ctypeconvnode.create_internal(cderefnode.create(ctemprefnode.create(tempnode)),n.resultdef);
  460. { ... and add a temp. release node }
  461. if startvaltemp<>nil then
  462. addstatement(deletecodestatements,ctempdeletenode.create(startvaltemp));
  463. addstatement(deletecodestatements,ctempdeletenode.create(tempnode));
  464. end;
  465. { Copy the nf_write,nf_modify flags to the new deref node of the temp.
  466. Othewise assignments to vector elements will be removed. }
  467. if nflags*[nf_write,nf_modify]<>[] then
  468. begin
  469. if (n.nodetype<>typeconvn) or (ttypeconvnode(n).left.nodetype<>derefn) then
  470. internalerror(2021091501);
  471. ttypeconvnode(n).left.flags:=ttypeconvnode(n).left.flags+nflags*[nf_write,nf_modify];
  472. end;
  473. { set types }
  474. do_firstpass(n);
  475. result:=fen_norecurse_false;
  476. end;
  477. end;
  478. else
  479. ;
  480. end;
  481. end;
  482. function OptimizeInductionVariablesSingleForLoop(node : tnode) : tnode;
  483. var
  484. loopcode : tblocknode;
  485. loopcodestatements,
  486. newcodestatements : tstatementnode;
  487. fornode : tfornode;
  488. begin
  489. result:=nil;
  490. if node.nodetype<>forn then
  491. exit;
  492. templist:=TFPList.Create;
  493. inductionexprs:=TFPList.Create;
  494. initcode:=nil;
  495. calccode:=nil;
  496. deletecode:=nil;
  497. initcodestatements:=nil;
  498. calccodestatements:=nil;
  499. deletecodestatements:=nil;
  500. docalcatend:=false;
  501. { find all expressions being candidates for strength reduction
  502. and replace them }
  503. foreachnodestatic(pm_postprocess,node,@dostrengthreductiontest,node);
  504. { clue everything together }
  505. if assigned(initcode) then
  506. begin
  507. do_firstpass(tnode(initcode));
  508. do_firstpass(tnode(calccode));
  509. do_firstpass(tnode(deletecode));
  510. { create a new for node, the old one will be released by the compiler }
  511. with tfornode(node) do
  512. begin
  513. fornode:=cfornode.create(left,right,t1,t2,lnf_backward in loopflags);
  514. left:=nil;
  515. right:=nil;
  516. t1:=nil;
  517. t2:=nil;
  518. end;
  519. node:=fornode;
  520. loopcode:=internalstatements(loopcodestatements);
  521. if not docalcatend then
  522. addstatement(loopcodestatements,calccode);
  523. addstatement(loopcodestatements,tfornode(node).t2);
  524. if docalcatend then
  525. addstatement(loopcodestatements,calccode);
  526. tfornode(node).t2:=loopcode;
  527. do_firstpass(node);
  528. result:=internalstatements(newcodestatements);
  529. addstatement(newcodestatements,initcode);
  530. initcode:=nil;
  531. addstatement(newcodestatements,node);
  532. addstatement(newcodestatements,deletecode);
  533. end;
  534. templist.Free;
  535. inductionexprs.Free;
  536. end;
  537. function OptimizeInductionVariables_iterforloops(var n: tnode; arg: pointer): foreachnoderesult;
  538. var
  539. hp : tnode;
  540. begin
  541. Result:=fen_false;
  542. if n.nodetype=forn then
  543. begin
  544. { do we have DFA available? }
  545. if pi_dfaavailable in current_procinfo.flags then
  546. begin
  547. CalcDefSum(tfornode(n).t2);
  548. end;
  549. containsnestedforloop:=false;
  550. hp:=OptimizeInductionVariablesSingleForLoop(n);
  551. if assigned(hp) then
  552. begin
  553. n.Free;
  554. n:=hp;
  555. end;
  556. { can we avoid further searching? }
  557. if not(containsnestedforloop) then
  558. Result:=fen_norecurse_false;
  559. end;
  560. end;
  561. function OptimizeInductionVariables(node : tnode) : boolean;
  562. begin
  563. Result:=false;
  564. if not(pi_dfaavailable in current_procinfo.flags) then
  565. exit;
  566. changedforloop:=false;
  567. foreachnodestatic(pm_postprocess,node,@OptimizeInductionVariables_iterforloops,nil);
  568. Result:=changedforloop;
  569. end;
  570. function OptimizeForLoop_iterforloops(var n: tnode; arg: pointer): foreachnoderesult;
  571. begin
  572. Result:=fen_false;
  573. if (n.nodetype=forn) and
  574. not(lnf_backward in tfornode(n).loopflags) and
  575. (lnf_dont_mind_loopvar_on_exit in tfornode(n).loopflags) and
  576. is_constintnode(tfornode(n).right) and
  577. { this is not strictly necessary, but we do it for now }
  578. is_constnode(tfornode(n).t1) and
  579. (([cs_check_overflow,cs_check_range]*n.localswitches)=[]) and
  580. (([cs_check_overflow,cs_check_range]*tfornode(n).left.localswitches)=[]) and
  581. ((tfornode(n).left.nodetype=loadn) and (tloadnode(tfornode(n).left).symtableentry is tabstractvarsym) and
  582. not(tabstractvarsym(tloadnode(tfornode(n).left).symtableentry).addr_taken) and
  583. not(tabstractvarsym(tloadnode(tfornode(n).left).symtableentry).different_scope)) then
  584. begin
  585. { do we have DFA available? }
  586. if pi_dfaavailable in current_procinfo.flags then
  587. begin
  588. CalcUseSum(tfornode(n).t2);
  589. CalcDefSum(tfornode(n).t2);
  590. end
  591. else
  592. Internalerror(2017122801);
  593. if not(assigned(tfornode(n).left.optinfo)) then
  594. exit;
  595. if not(DFASetIn(tfornode(n).t2.optinfo^.usesum,tfornode(n).left.optinfo^.index)) and
  596. not(DFASetIn(tfornode(n).t2.optinfo^.defsum,tfornode(n).left.optinfo^.index)) then
  597. begin
  598. { convert the loop from i:=a to b into i:=b-a+1 to 1 as this simplifies the
  599. abort condition }
  600. {$ifdef DEBUG_OPTFORLOOP}
  601. writeln('**********************************************************************************');
  602. writeln('Found loop for reverting: ');
  603. printnode(n);
  604. writeln('**********************************************************************************');
  605. {$endif DEBUG_OPTFORLOOP}
  606. include(tfornode(n).loopflags,lnf_backward);
  607. tfornode(n).right:=caddnode.create_internal(addn,caddnode.create_internal(subn,tfornode(n).t1,tfornode(n).right),
  608. cordconstnode.create(1,tfornode(n).left.resultdef,false));
  609. tfornode(n).t1:=cordconstnode.create(1,tfornode(n).left.resultdef,false);
  610. include(tfornode(n).loopflags,lnf_counter_not_used);
  611. exclude(n.flags,nf_pass1_done);
  612. do_firstpass(n);
  613. {$ifdef DEBUG_OPTFORLOOP}
  614. writeln('Loop reverted: ');
  615. printnode(n);
  616. writeln('**********************************************************************************');
  617. {$endif DEBUG_OPTFORLOOP}
  618. changedforloop:=true;
  619. end;
  620. end;
  621. end;
  622. function OptimizeForLoop(var node : tnode) : boolean;
  623. begin
  624. Result:=false;
  625. if not(pi_dfaavailable in current_procinfo.flags) then
  626. exit;
  627. changedforloop:=false;
  628. foreachnodestatic(pm_postprocess,node,@OptimizeForLoop_iterforloops,nil);
  629. Result:=changedforloop;
  630. end;
  631. end.