2
0

optloop.pas 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715
  1. {
  2. Loop optimization
  3. Copyright (c) 2005 by Florian Klaempfl
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit optloop;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_OPTSTRENGTH}
  20. { $define DEBUG_OPTFORLOOP}
  21. interface
  22. uses
  23. node;
  24. function unroll_loop(node : tnode) : tnode;
  25. function OptimizeInductionVariables(node : tnode) : boolean;
  26. function OptimizeForLoop(var node : tnode) : boolean;
  27. implementation
  28. uses
  29. cutils,cclasses,compinnr,
  30. globtype,globals,constexp,
  31. verbose,
  32. symdef,symsym,
  33. defutil,
  34. cpuinfo,
  35. nutils,
  36. nadd,nbas,nflw,ncon,ninl,ncal,nld,nmem,ncnv,
  37. ncgmem,
  38. pass_1,
  39. optbase,optutils,
  40. procinfo;
  41. function number_unrolls(node : tnode) : cardinal;
  42. var
  43. nodeCount : cardinal;
  44. begin
  45. { calculate how often a loop shall be unrolled.
  46. The term (60*ord(node_count_weighted(node)<15)) is used to get small loops unrolled more often as
  47. the counter management takes more time in this case. }
  48. {$ifdef i386}
  49. { multiply by 2 for CPUs with a long pipeline }
  50. if current_settings.optimizecputype in [cpu_Pentium4] then
  51. begin
  52. { See the common branch below for an explanation. }
  53. nodeCount:=node_count_weighted(node,41);
  54. number_unrolls:=round((60+(60*ord(nodeCount<15)))/max(nodeCount,1))
  55. end
  56. else
  57. {$endif i386}
  58. begin
  59. { If nodeCount >= 15, numerator will be 30,
  60. and the largest number (starting from 15) that makes sense as its denominator
  61. (the smallest number that gives number_unrolls = 1) is 21 = trunc(30/1.5+1),
  62. so there's no point in counting for more than 21 nodes.
  63. "Long pipeline" variant above is the same with numerator=60 and max denominator = 41. }
  64. nodeCount:=node_count_weighted(node,21);
  65. number_unrolls:=round((30+(60*ord(nodeCount<15)))/max(nodeCount,1));
  66. end;
  67. if number_unrolls=0 then
  68. number_unrolls:=1;
  69. end;
  70. type
  71. treplaceinfo = record
  72. node : tnode;
  73. value : Tconstexprint;
  74. end;
  75. preplaceinfo = ^treplaceinfo;
  76. function checkcontrollflowstatements(var n:tnode; arg: pointer): foreachnoderesult;
  77. begin
  78. if n.nodetype in [breakn,continuen,goton,labeln,exitn,raisen] then
  79. result:=fen_norecurse_true
  80. else
  81. result:=fen_false;
  82. end;
  83. function replaceloadnodes(var n: tnode; arg: pointer): foreachnoderesult;
  84. begin
  85. if n.isequal(preplaceinfo(arg)^.node) then
  86. begin
  87. if n.flags*[nf_modify,nf_write,nf_address_taken]<>[] then
  88. internalerror(2012090402);
  89. n.free;
  90. n:=cordconstnode.create(preplaceinfo(arg)^.value,preplaceinfo(arg)^.node.resultdef,false);
  91. do_firstpass(n);
  92. end;
  93. result:=fen_false;
  94. end;
  95. function unroll_loop(node : tnode) : tnode;
  96. var
  97. unrolls,i : cardinal;
  98. counts : qword;
  99. unrollstatement,newforstatement : tstatementnode;
  100. unrollblock : tblocknode;
  101. getridoffor : boolean;
  102. replaceinfo : treplaceinfo;
  103. hascontrollflowstatements : boolean;
  104. begin
  105. result:=nil;
  106. if (cs_opt_size in current_settings.optimizerswitches) then
  107. exit;
  108. if ErrorCount<>0 then
  109. exit;
  110. if not(node.nodetype in [forn]) then
  111. exit;
  112. unrolls:=number_unrolls(tfornode(node).t2);
  113. if (unrolls>1) and
  114. ((tfornode(node).left.nodetype<>loadn) or
  115. { the address of the counter variable might be taken if it is passed by constref to a
  116. subroutine, so really check if it is not taken }
  117. ((tfornode(node).left.nodetype=loadn) and (tloadnode(tfornode(node).left).symtableentry is tabstractvarsym) and
  118. not(tabstractvarsym(tloadnode(tfornode(node).left).symtableentry).addr_taken) and
  119. not(tabstractvarsym(tloadnode(tfornode(node).left).symtableentry).different_scope))
  120. ) then
  121. begin
  122. { number of executions known? }
  123. if (tfornode(node).right.nodetype=ordconstn) and (tfornode(node).t1.nodetype=ordconstn) then
  124. begin
  125. if lnf_backward in tfornode(node).loopflags then
  126. counts:=tordconstnode(tfornode(node).right).value-tordconstnode(tfornode(node).t1).value+1
  127. else
  128. counts:=tordconstnode(tfornode(node).t1).value-tordconstnode(tfornode(node).right).value+1;
  129. hascontrollflowstatements:=foreachnodestatic(tfornode(node).t2,@checkcontrollflowstatements,nil);
  130. { don't unroll more than we need,
  131. multiply unroll by two here because we can get rid
  132. of the counter variable completely and replace it by a constant
  133. if unrolls=counts }
  134. if unrolls*2>=counts then
  135. unrolls:=counts;
  136. { create block statement }
  137. unrollblock:=internalstatements(unrollstatement);
  138. { can we get rid completly of the for ? }
  139. getridoffor:=(unrolls=counts) and not(hascontrollflowstatements) and
  140. { TP/Macpas allows assignments to the for-variables, so we cannot get rid of the for }
  141. ([m_tp7,m_mac]*current_settings.modeswitches=[]);
  142. if getridoffor then
  143. begin
  144. replaceinfo.node:=tfornode(node).left;
  145. replaceinfo.value:=tordconstnode(tfornode(node).right).value;
  146. end
  147. else
  148. { we consider currently unrolling not beneficial, if we cannot get rid of the for completely, this
  149. might change if a more sophisticated heuristics is used (FK) }
  150. exit;
  151. { let's unroll (and rock of course) }
  152. for i:=1 to unrolls do
  153. begin
  154. { create and insert copy of the statement block }
  155. addstatement(unrollstatement,tfornode(node).t2.getcopy);
  156. { set and insert entry label? }
  157. if (counts mod unrolls<>0) and
  158. ((counts mod unrolls)=unrolls-i) then
  159. begin
  160. tfornode(node).entrylabel:=clabelnode.create(cnothingnode.create,clabelsym.create('$optunrol'));
  161. addstatement(unrollstatement,tfornode(node).entrylabel);
  162. end;
  163. if getridoffor then
  164. begin
  165. foreachnodestatic(tnode(unrollstatement),@replaceloadnodes,@replaceinfo);
  166. if lnf_backward in tfornode(node).loopflags then
  167. replaceinfo.value:=replaceinfo.value-1
  168. else
  169. replaceinfo.value:=replaceinfo.value+1;
  170. end
  171. else
  172. begin
  173. { for itself increases at the last iteration }
  174. if i<unrolls then
  175. begin
  176. { insert incr/decrementation of counter var }
  177. if lnf_backward in tfornode(node).loopflags then
  178. addstatement(unrollstatement,
  179. geninlinenode(in_dec_x,false,ccallparanode.create(tfornode(node).left.getcopy,nil)))
  180. else
  181. addstatement(unrollstatement,
  182. geninlinenode(in_inc_x,false,ccallparanode.create(tfornode(node).left.getcopy,nil)));
  183. end;
  184. end;
  185. end;
  186. { can we get rid of the for statement? }
  187. if getridoffor then
  188. begin
  189. { create block statement }
  190. result:=internalstatements(newforstatement);
  191. addstatement(newforstatement,unrollblock);
  192. doinlinesimplify(result);
  193. end;
  194. end
  195. else
  196. begin
  197. { unrolling is a little bit more tricky if we don't know the
  198. loop count at compile time, but the solution is to use a jump table
  199. which is indexed by "loop count mod unrolls" at run time and which
  200. jumps then at the appropriate place inside the loop. Because
  201. a module division is expensive, we can use only unroll counts dividable
  202. by 2 }
  203. case unrolls of
  204. 1..2:
  205. ;
  206. 3:
  207. unrolls:=2;
  208. 4..7:
  209. unrolls:=4;
  210. { unrolls>4 already make no sense imo, but who knows (FK) }
  211. 8..15:
  212. unrolls:=8;
  213. 16..31:
  214. unrolls:=16;
  215. 32..63:
  216. unrolls:=32;
  217. 64..$7fff:
  218. unrolls:=64;
  219. else
  220. exit;
  221. end;
  222. { we don't handle this yet }
  223. exit;
  224. end;
  225. if not(assigned(result)) then
  226. begin
  227. tfornode(node).t2.free;
  228. tfornode(node).t2:=unrollblock;
  229. end;
  230. end;
  231. end;
  232. function checkcontinue(var n:tnode; arg: pointer): foreachnoderesult;
  233. begin
  234. if n.nodetype=continuen then
  235. result:=fen_norecurse_true
  236. else
  237. result:=fen_false;
  238. end;
  239. function is_loop_invariant(loop : tnode;expr : tnode) : boolean;
  240. begin
  241. result:=is_constnode(expr);
  242. case expr.nodetype of
  243. loadn:
  244. begin
  245. if (pi_dfaavailable in current_procinfo.flags) and
  246. assigned(loop.optinfo) and
  247. assigned(expr.optinfo) and
  248. not(expr.isequal(tfornode(loop).left)) then
  249. { no aliasing? }
  250. result:=(([nf_write,nf_modify]*expr.flags)=[]) and not(tabstractvarsym(tloadnode(expr).symtableentry).addr_taken) and
  251. { no definition in the loop? }
  252. not(DFASetIn(tfornode(loop).t2.optinfo^.defsum,expr.optinfo^.index));
  253. end;
  254. vecn:
  255. begin
  256. result:=((tvecnode(expr).left.nodetype=loadn) or is_loop_invariant(loop,tvecnode(expr).left)) and
  257. is_loop_invariant(loop,tvecnode(expr).right);
  258. end;
  259. typeconvn:
  260. result:=is_loop_invariant(loop,ttypeconvnode(expr).left);
  261. addn,subn:
  262. result:=is_loop_invariant(loop,taddnode(expr).left) and is_loop_invariant(loop,taddnode(expr).right);
  263. else
  264. ;
  265. end;
  266. end;
  267. type
  268. toptimizeinductionvariablescontext = object
  269. currforloop : tfornode;
  270. initcode,
  271. calccode,
  272. deletecode : tblocknode;
  273. initcodestatements,
  274. calccodestatements,
  275. deletecodestatements: tstatementnode;
  276. ninductions : sizeint;
  277. inductions : array of record
  278. temp : ttempcreatenode;
  279. expr : tnode;
  280. end;
  281. changedforloop,
  282. containsnestedforloop,
  283. docalcatend : boolean;
  284. function findpreviousstrengthreduction(var n: tnode): boolean;
  285. procedure addinduction(temp : ttempcreatenode; expr : tnode);
  286. function dostrengthreductiontest(var n: tnode): foreachnoderesult;
  287. procedure optimizeinductionvariablessingleforloop(var n: tnode);
  288. end;
  289. function toptimizeinductionvariablescontext.findpreviousstrengthreduction(var n: tnode): boolean;
  290. var
  291. i : longint;
  292. hp : tnode;
  293. begin
  294. result:=false;
  295. for i:=0 to ninductions-1 do
  296. begin
  297. { do we already maintain one expression? }
  298. if inductions[i].expr.isequal(n) then
  299. begin
  300. case n.nodetype of
  301. muln:
  302. hp:=ctemprefnode.create(inductions[i].temp);
  303. vecn:
  304. hp:=ctypeconvnode.create_internal(cderefnode.create(ctemprefnode.create(inductions[i].temp)),n.resultdef);
  305. else
  306. internalerror(200809211);
  307. end;
  308. n.free;
  309. n:=hp;
  310. exit(true);
  311. end;
  312. end;
  313. end;
  314. procedure toptimizeinductionvariablescontext.addinduction(temp : ttempcreatenode; expr : tnode);
  315. begin
  316. if not assigned(initcode) then
  317. begin
  318. initcode:=internalstatements(initcodestatements);
  319. calccode:=internalstatements(calccodestatements);
  320. deletecode:=internalstatements(deletecodestatements);
  321. docalcatend:=not(assigned(currforloop.entrylabel)) and
  322. not(foreachnodestatic(currforloop.t2,@checkcontinue,nil));
  323. end;
  324. if ninductions>=length(inductions) then
  325. SetLength(inductions,4+ninductions+ninductions shr 1);
  326. inductions[ninductions].temp:=temp;
  327. inductions[ninductions].expr:=expr;
  328. inc(ninductions);
  329. end;
  330. { checks if the strength of n can be reduced, currforloop is the tforloop being considered }
  331. function toptimizeinductionvariablescontext.dostrengthreductiontest(var n: tnode): foreachnoderesult;
  332. var
  333. tempnode,startvaltemp : ttempcreatenode;
  334. dummy : longint;
  335. nn : tnode;
  336. nt : tnodetype;
  337. nflags : tnodeflags;
  338. begin
  339. result:=fen_false;
  340. nflags:=n.flags;
  341. case n.nodetype of
  342. forn:
  343. { inform for loop search routine, that it needs to search more deeply }
  344. containsnestedforloop:=true;
  345. muln:
  346. begin
  347. if (taddnode(n).right.nodetype=loadn) and
  348. taddnode(n).right.isequal(currforloop.left) and
  349. { plain read of the loop variable? }
  350. not(nf_write in taddnode(n).right.flags) and
  351. not(nf_modify in taddnode(n).right.flags) and
  352. is_loop_invariant(currforloop,taddnode(n).left) then
  353. taddnode(n).swapleftright;
  354. if (taddnode(n).left.nodetype=loadn) and
  355. taddnode(n).left.isequal(currforloop.left) and
  356. { plain read of the loop variable? }
  357. not(nf_write in taddnode(n).left.flags) and
  358. not(nf_modify in taddnode(n).left.flags) and
  359. is_loop_invariant(currforloop,taddnode(n).right) then
  360. begin
  361. changedforloop:=true;
  362. { did we use the same expression before already? }
  363. if not(findpreviousstrengthreduction(n)) then
  364. begin
  365. {$ifdef DEBUG_OPTSTRENGTH}
  366. writeln('**********************************************************************************');
  367. writeln(parser_current_file, ': Found expression for strength reduction (MUL): ');
  368. printnode(n);
  369. writeln('**********************************************************************************');
  370. {$endif DEBUG_OPTSTRENGTH}
  371. tempnode:=ctempcreatenode.create(n.resultdef,n.resultdef.size,tt_persistent,
  372. tstoreddef(n.resultdef).is_intregable or tstoreddef(n.resultdef).is_fpuregable);
  373. addinduction(tempnode,n);
  374. if lnf_backward in currforloop.loopflags then
  375. addstatement(calccodestatements,
  376. geninlinenode(in_dec_x,false,
  377. ccallparanode.create(ctemprefnode.create(tempnode),ccallparanode.create(taddnode(n).right.getcopy,nil))))
  378. else
  379. addstatement(calccodestatements,
  380. geninlinenode(in_inc_x,false,
  381. ccallparanode.create(ctemprefnode.create(tempnode),ccallparanode.create(taddnode(n).right.getcopy,nil))));
  382. addstatement(initcodestatements,tempnode);
  383. nn:=currforloop.right.getcopy;
  384. { If the calculation is not performed at the end
  385. it is needed to adjust the starting value }
  386. if not docalcatend then
  387. begin
  388. if lnf_backward in currforloop.loopflags then
  389. nt:=addn
  390. else
  391. nt:=subn;
  392. nn:=caddnode.create_internal(nt,nn,
  393. cordconstnode.create(1,nn.resultdef,false));
  394. end;
  395. addstatement(initcodestatements,cassignmentnode.create(ctemprefnode.create(tempnode),
  396. caddnode.create(muln,nn,
  397. taddnode(n).right.getcopy)
  398. )
  399. );
  400. { finally replace the node by a temp. ref }
  401. n:=ctemprefnode.create(tempnode);
  402. { ... and add a temp. release node }
  403. addstatement(deletecodestatements,ctempdeletenode.create(tempnode));
  404. end;
  405. { set types }
  406. do_firstpass(n);
  407. result:=fen_norecurse_false;
  408. end;
  409. end;
  410. vecn:
  411. begin
  412. { is the index the counter variable? }
  413. if not(is_special_array(tvecnode(n).left.resultdef)) and
  414. not(is_packed_array(tvecnode(n).left.resultdef)) and
  415. (tvecnode(n).right.isequal(currforloop.left) or
  416. { fpc usually creates a type cast to access an array }
  417. ((tvecnode(n).right.nodetype=typeconvn) and
  418. ttypeconvnode(tvecnode(n).right).left.isequal(currforloop.left)
  419. )
  420. ) and
  421. { plain read of the loop variable? }
  422. not(nf_write in tvecnode(n).right.flags) and
  423. not(nf_modify in tvecnode(n).right.flags) and
  424. { direct array access? }
  425. ((tvecnode(n).left.nodetype=loadn) or
  426. { ... or loop invariant expression? }
  427. is_loop_invariant(currforloop,tvecnode(n).right))
  428. {$if not (defined(cpu16bitalu) or defined(cpu8bitalu))}
  429. { removing the multiplication is only worth the
  430. effort if it's not a simple shift }
  431. and not(ispowerof2(tcgvecnode(n).get_mul_size,dummy))
  432. {$endif}
  433. then
  434. begin
  435. changedforloop:=true;
  436. { did we use the same expression before already? }
  437. if not(findpreviousstrengthreduction(n)) then
  438. begin
  439. {$ifdef DEBUG_OPTSTRENGTH}
  440. writeln('**********************************************************************************');
  441. writeln(parser_current_file,': Found expression for strength reduction (VEC): ');
  442. printnode(n);
  443. writeln('**********************************************************************************');
  444. {$endif DEBUG_OPTSTRENGTH}
  445. tempnode:=ctempcreatenode.create(voidpointertype,voidpointertype.size,tt_persistent,true);
  446. addinduction(tempnode,n);
  447. if lnf_backward in currforloop.loopflags then
  448. addstatement(calccodestatements,
  449. cinlinenode.createintern(in_dec_x,false,
  450. ccallparanode.create(ctemprefnode.create(tempnode),ccallparanode.create(
  451. cordconstnode.create(tcgvecnode(n).get_mul_size,sizeuinttype,false),nil))))
  452. else
  453. addstatement(calccodestatements,
  454. cinlinenode.createintern(in_inc_x,false,
  455. ccallparanode.create(ctemprefnode.create(tempnode),ccallparanode.create(
  456. cordconstnode.create(tcgvecnode(n).get_mul_size,sizeuinttype,false),nil))));
  457. addstatement(initcodestatements,tempnode);
  458. startvaltemp:=maybereplacewithtemp(currforloop.right,initcode,initcodestatements,currforloop.right.resultdef.size,true);
  459. nn:=caddrnode.create(
  460. cvecnode.create(tvecnode(n).left.getcopy,ctypeconvnode.create_internal(currforloop.right.getcopy,tvecnode(n).right.resultdef))
  461. );
  462. { If the calculation is not performed at the end
  463. it is needed to adjust the starting value }
  464. if not docalcatend then
  465. begin
  466. if lnf_backward in currforloop.loopflags then
  467. nt:=addn
  468. else
  469. nt:=subn;
  470. nn:=caddnode.create_internal(nt,
  471. ctypeconvnode.create_internal(nn,voidpointertype),
  472. cordconstnode.create(tcgvecnode(n).get_mul_size,sizeuinttype,false));
  473. end;
  474. addstatement(initcodestatements,cassignmentnode.create(ctemprefnode.create(tempnode),nn));
  475. { finally replace the node by a temp. ref }
  476. n:=ctypeconvnode.create_internal(cderefnode.create(ctemprefnode.create(tempnode)),n.resultdef);
  477. { ... and add a temp. release node }
  478. if startvaltemp<>nil then
  479. addstatement(deletecodestatements,ctempdeletenode.create(startvaltemp));
  480. addstatement(deletecodestatements,ctempdeletenode.create(tempnode));
  481. end;
  482. { Copy the nf_write,nf_modify flags to the new deref node of the temp.
  483. Othewise assignments to vector elements will be removed. }
  484. if nflags*[nf_write,nf_modify]<>[] then
  485. begin
  486. if (n.nodetype<>typeconvn) or (ttypeconvnode(n).left.nodetype<>derefn) then
  487. internalerror(2021091501);
  488. ttypeconvnode(n).left.flags:=ttypeconvnode(n).left.flags+nflags*[nf_write,nf_modify];
  489. end;
  490. { set types }
  491. do_firstpass(n);
  492. result:=fen_norecurse_false;
  493. end;
  494. end;
  495. else
  496. ;
  497. end;
  498. end;
  499. function dostrengthreductiontest_callback(var n: tnode; arg: pointer): foreachnoderesult;
  500. begin
  501. result:=toptimizeinductionvariablescontext(arg^).dostrengthreductiontest(n);
  502. end;
  503. procedure toptimizeinductionvariablescontext.optimizeinductionvariablessingleforloop(var n: tnode);
  504. var
  505. loopcode : tblocknode;
  506. loopcodestatements,
  507. newcodestatements : tstatementnode;
  508. newfor,oldn : tnode;
  509. begin
  510. { do we have DFA available? }
  511. if pi_dfaavailable in current_procinfo.flags then
  512. begin
  513. CalcDefSum(tfornode(n).t2);
  514. end;
  515. currforloop:=tfornode(n);
  516. initcode:=nil;
  517. calccode:=nil;
  518. deletecode:=nil;
  519. initcodestatements:=nil;
  520. calccodestatements:=nil;
  521. deletecodestatements:=nil;
  522. ninductions:=0;
  523. docalcatend:=false;
  524. { find all expressions being candidates for strength reduction
  525. and replace them }
  526. foreachnodestatic(pm_postprocess,n,@dostrengthreductiontest_callback,@self);
  527. { clue everything together }
  528. if assigned(initcode) then
  529. begin
  530. do_firstpass(tnode(initcode));
  531. do_firstpass(tnode(calccode));
  532. do_firstpass(tnode(deletecode));
  533. { create a new for node, the old one will be released by the compiler }
  534. oldn:=n;
  535. newfor:=cfornode.create(tfornode(oldn).left,tfornode(oldn).right,tfornode(oldn).t1,tfornode(oldn).t2,lnf_backward in tfornode(oldn).loopflags);
  536. tfornode(oldn).left:=nil;
  537. tfornode(oldn).right:=nil;
  538. tfornode(oldn).t1:=nil;
  539. tfornode(oldn).t2:=nil;
  540. loopcode:=internalstatements(loopcodestatements);
  541. if not docalcatend then
  542. addstatement(loopcodestatements,calccode);
  543. addstatement(loopcodestatements,tfornode(newfor).t2);
  544. if docalcatend then
  545. addstatement(loopcodestatements,calccode);
  546. tfornode(newfor).t2:=loopcode;
  547. do_firstpass(newfor);
  548. n:=internalstatements(newcodestatements);
  549. oldn.Free;
  550. addstatement(newcodestatements,initcode);
  551. addstatement(newcodestatements,newfor);
  552. addstatement(newcodestatements,deletecode);
  553. end;
  554. end;
  555. function optimizeinductionvariablessingleforloop_static(var n: tnode; arg: pointer): foreachnoderesult;
  556. var
  557. ctx : ^toptimizeinductionvariablescontext absolute arg;
  558. begin
  559. Result:=fen_false;
  560. if n.nodetype<>forn then
  561. exit;
  562. ctx^.containsnestedforloop:=false;
  563. ctx^.optimizeinductionvariablessingleforloop(n);
  564. { can we avoid further searching? }
  565. if not(ctx^.containsnestedforloop) then
  566. Result:=fen_norecurse_false;
  567. end;
  568. function OptimizeInductionVariables(node : tnode) : boolean;
  569. var
  570. ctx : toptimizeinductionvariablescontext;
  571. begin
  572. Result:=false;
  573. if not(pi_dfaavailable in current_procinfo.flags) then
  574. exit;
  575. ctx.changedforloop:=false;
  576. foreachnodestatic(pm_postprocess,node,@optimizeinductionvariablessingleforloop_static,@ctx);
  577. Result:=ctx.changedforloop;
  578. end;
  579. type
  580. toptimizeforloopcontext = object
  581. changedforloop : boolean;
  582. end;
  583. function OptimizeForLoop_iterforloops(var n: tnode; arg: pointer): foreachnoderesult;
  584. begin
  585. Result:=fen_false;
  586. if (n.nodetype=forn) and
  587. not(lnf_backward in tfornode(n).loopflags) and
  588. (lnf_dont_mind_loopvar_on_exit in tfornode(n).loopflags) and
  589. is_constintnode(tfornode(n).right) and
  590. (([cs_check_overflow,cs_check_range]*n.localswitches)=[]) and
  591. (([cs_check_overflow,cs_check_range]*tfornode(n).left.localswitches)=[]) and
  592. ((tfornode(n).left.nodetype=loadn) and (tloadnode(tfornode(n).left).symtableentry is tabstractvarsym) and
  593. not(tabstractvarsym(tloadnode(tfornode(n).left).symtableentry).addr_taken) and
  594. not(tabstractvarsym(tloadnode(tfornode(n).left).symtableentry).different_scope)) then
  595. begin
  596. { do we have DFA available? }
  597. if pi_dfaavailable in current_procinfo.flags then
  598. begin
  599. CalcUseSum(tfornode(n).t2);
  600. CalcDefSum(tfornode(n).t2);
  601. end
  602. else
  603. Internalerror(2017122801);
  604. if not(assigned(tfornode(n).left.optinfo)) then
  605. exit;
  606. if not(DFASetIn(tfornode(n).t2.optinfo^.usesum,tfornode(n).left.optinfo^.index)) and
  607. not(DFASetIn(tfornode(n).t2.optinfo^.defsum,tfornode(n).left.optinfo^.index)) then
  608. begin
  609. { convert the loop from i:=a to b into i:=b-a+1 to 1 as this simplifies the
  610. abort condition }
  611. {$ifdef DEBUG_OPTFORLOOP}
  612. writeln('**********************************************************************************');
  613. writeln('Found loop for reverting: ');
  614. printnode(n);
  615. writeln('**********************************************************************************');
  616. {$endif DEBUG_OPTFORLOOP}
  617. include(tfornode(n).loopflags,lnf_backward);
  618. tfornode(n).right:=ctypeconvnode.create_internal(
  619. caddnode.create_internal(addn,caddnode.create_internal(subn,
  620. tfornode(n).t1,tfornode(n).right),
  621. cordconstnode.create(1,tfornode(n).left.resultdef,false)),
  622. tfornode(n).left.resultdef);
  623. tfornode(n).t1:=cordconstnode.create(1,tfornode(n).left.resultdef,false);
  624. include(tfornode(n).loopflags,lnf_counter_not_used);
  625. exclude(n.transientflags,tnf_pass1_done);
  626. do_firstpass(n);
  627. {$ifdef DEBUG_OPTFORLOOP}
  628. writeln('Loop reverted: ');
  629. printnode(n);
  630. writeln('**********************************************************************************');
  631. {$endif DEBUG_OPTFORLOOP}
  632. toptimizeforloopcontext(arg^).changedforloop:=true;
  633. end;
  634. end;
  635. end;
  636. function OptimizeForLoop(var node : tnode) : boolean;
  637. var
  638. ctx : toptimizeforloopcontext;
  639. begin
  640. ctx.changedforloop:=false;
  641. if pi_dfaavailable in current_procinfo.flags then
  642. foreachnodestatic(pm_postprocess,node,@OptimizeForLoop_iterforloops,@ctx);
  643. Result:=ctx.changedforloop;
  644. end;
  645. end.