rropt386.pas 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. {
  2. $Id$
  3. Copyright (c) 1998-2000 by Jonas Maebe, member of the Free Pascal
  4. development team
  5. This unit contains register renaming functionality
  6. This program is free software; you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation; either version 2 of the License, or
  9. (at your option) any later version.
  10. This program is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with this program; if not, write to the Free Software
  16. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. ****************************************************************************
  18. }
  19. Unit rrOpt386;
  20. {$i defines.inc}
  21. Interface
  22. Uses aasm;
  23. procedure doRenaming(asml: TAAsmoutput; first, last: Tai);
  24. Implementation
  25. Uses
  26. {$ifdef replaceregdebug}cutils,{$endif}
  27. verbose,globals,cpuinfo,cpubase,cpuasm,daopt386,csopt386,cginfo,rgobj;
  28. function canBeFirstSwitch(p: Taicpu; reg: tregister): boolean;
  29. { checks whether an operation on reg can be switched to another reg without an }
  30. { additional mov, e.g. "addl $4,%reg1" can be changed to "leal 4(%reg1),%reg2" }
  31. begin
  32. canBeFirstSwitch := false;
  33. case p.opcode of
  34. A_MOV,A_MOVZX,A_MOVSX,A_LEA:
  35. canBeFirstSwitch :=
  36. (p.oper[1].typ = top_reg) and
  37. (reg32(p.oper[1].reg) = reg);
  38. A_IMUL:
  39. canBeFirstSwitch :=
  40. (p.ops >= 2) and
  41. (reg32(p.oper[p.ops-1].reg) = reg) and
  42. (p.oper[0].typ <> top_ref) and
  43. (not pTaiprop(p.optinfo)^.FlagsUsed);
  44. A_INC,A_DEC,A_SUB,A_ADD:
  45. canBeFirstSwitch :=
  46. (p.oper[1].typ = top_reg) and
  47. (p.opsize = S_L) and
  48. (reg32(p.oper[1].reg) = reg) and
  49. (p.oper[0].typ <> top_ref) and
  50. ((p.opcode <> A_SUB) or
  51. (p.oper[0].typ = top_const)) and
  52. (not pTaiprop(p.optinfo)^.FlagsUsed);
  53. A_SHL:
  54. canBeFirstSwitch :=
  55. (p.opsize = S_L) and
  56. (p.oper[1].typ = top_reg) and
  57. (p.oper[1].reg = reg) and
  58. (p.oper[0].typ = top_const) and
  59. (p.oper[0].val in [1,2,3]) and
  60. (not pTaiprop(p.optinfo)^.FlagsUsed);
  61. end;
  62. end;
  63. procedure switchReg(var reg: tregister; reg1, reg2: tregister);
  64. begin
  65. if reg = reg1 then
  66. reg := reg2
  67. else if reg = reg2 then
  68. reg := reg1
  69. else if (reg in regset8bit) then
  70. begin
  71. if (reg = rg.makeregsize(reg1,OS_8)) then
  72. reg := rg.makeregsize(reg2,OS_8)
  73. else if reg = rg.makeregsize(reg2,OS_8) then
  74. reg := rg.makeregsize(reg1,OS_8);
  75. end
  76. else if (reg in regset16bit) then
  77. begin
  78. if reg = rg.makeregsize(reg1,OS_16) then
  79. reg := rg.makeregsize(reg2,OS_16)
  80. else if reg = rg.makeregsize(reg2,OS_16) then
  81. reg := rg.makeregsize(reg1,OS_16);
  82. end;
  83. end;
  84. procedure switchOp(var op: toper; reg1, reg2: tregister);
  85. begin
  86. case op.typ of
  87. top_reg:
  88. switchReg(op.reg,reg1,reg2);
  89. top_ref:
  90. begin
  91. switchReg(op.ref^.base,reg1,reg2);
  92. switchReg(op.ref^.index,reg1,reg2);
  93. end;
  94. end;
  95. end;
  96. procedure doSwitchReg(hp: Taicpu; reg1,reg2: tregister);
  97. var
  98. opCount: longint;
  99. begin
  100. for opCount := 0 to hp.ops-1 do
  101. switchOp(hp.oper[opCount],reg1,reg2);
  102. end;
  103. procedure doFirstSwitch(p: Taicpu; reg1, reg2: tregister);
  104. var
  105. tmpRef: treference;
  106. begin
  107. case p.opcode of
  108. A_MOV,A_MOVZX,A_MOVSX,A_LEA:
  109. begin
  110. changeOp(p.oper[1],reg1,reg2);
  111. changeOp(p.oper[0],reg2,reg1);
  112. end;
  113. A_IMUL:
  114. begin
  115. p.ops := 3;
  116. p.loadreg(2,p.oper[1].reg);
  117. changeOp(p.oper[2],reg1,reg2);
  118. end;
  119. A_INC,A_DEC:
  120. begin
  121. reference_reset(tmpref);
  122. tmpref.base := reg1;
  123. case p.opcode of
  124. A_INC:
  125. tmpref.offset := 1;
  126. A_DEC:
  127. tmpref.offset := -1;
  128. end;
  129. p.ops := 2;
  130. p.opcode := A_LEA;
  131. p.loadreg(1,reg2);
  132. p.loadref(0,tmpref);
  133. end;
  134. A_SUB,A_ADD:
  135. begin
  136. reference_reset(tmpref);
  137. tmpref.base := reg1;
  138. case p.oper[0].typ of
  139. top_const:
  140. begin
  141. tmpref.offset := p.oper[0].val;
  142. if p.opcode = A_SUB then
  143. tmpref.offset := - tmpRef.offset;
  144. end;
  145. top_symbol:
  146. tmpref.symbol := p.oper[0].sym;
  147. top_reg:
  148. begin
  149. tmpref.index := p.oper[0].reg;
  150. tmpref.scalefactor := 1;
  151. end;
  152. else internalerror(200010031);
  153. end;
  154. p.opcode := A_LEA;
  155. p.loadref(0,tmpref);
  156. p.loadreg(1,reg2);
  157. end;
  158. A_SHL:
  159. begin
  160. reference_reset(tmpref);
  161. tmpref.index := reg1;
  162. tmpref.scalefactor := 1 shl p.oper[0].val;
  163. p.opcode := A_LEA;
  164. p.loadref(0,tmpref);
  165. p.loadreg(1,reg2);
  166. end;
  167. else internalerror(200010032);
  168. end;
  169. end;
  170. function switchRegs(asml: TAAsmoutput; reg1, reg2: tregister; start: Tai): Boolean;
  171. { change movl %reg1,%reg2 ... bla ... to ... bla with reg1 and reg2 switched }
  172. var
  173. endP, hp, lastreg1,lastreg2: Tai;
  174. switchDone, switchLast, tmpResult, sequenceEnd, reg1Modified, reg2Modified: boolean;
  175. reg1StillUsed, reg2StillUsed, isInstruction: boolean;
  176. begin
  177. switchRegs := false;
  178. tmpResult := true;
  179. sequenceEnd := false;
  180. reg1Modified := false;
  181. reg2Modified := false;
  182. endP := start;
  183. while tmpResult and not sequenceEnd do
  184. begin
  185. tmpResult :=
  186. getNextInstruction(endP,endP);
  187. If tmpResult and
  188. not pTaiprop(endp.optinfo)^.canBeRemoved then
  189. begin
  190. { if the newReg gets stored back to the oldReg, we can change }
  191. { "mov %oldReg,%newReg; <operations on %newReg>; mov %newReg, }
  192. { %oldReg" to "<operations on %oldReg>" }
  193. switchLast := storeBack(endP,reg1,reg2);
  194. reg1StillUsed := reg1 in pTaiprop(endp.optinfo)^.usedregs;
  195. reg2StillUsed := reg2 in pTaiprop(endp.optinfo)^.usedregs;
  196. isInstruction := endp.typ = ait_instruction;
  197. sequenceEnd :=
  198. switchLast or
  199. { if both registers are released right before an instruction }
  200. { that contains hardcoded regs, it's ok too }
  201. (not reg1StillUsed and not reg2StillUsed) or
  202. { no support for (i)div, mul and imul with hardcoded operands }
  203. (((not isInstruction) or
  204. noHardCodedRegs(Taicpu(endP),reg1,reg2)) and
  205. (not reg1StillUsed or
  206. (isInstruction and findRegDealloc(reg1,endP) and
  207. regLoadedWithNewValue(reg1,false,Taicpu(endP)))) and
  208. (not reg2StillUsed or
  209. (isInstruction and findRegDealloc(reg2,endP) and
  210. regLoadedWithNewValue(reg2,false,Taicpu(endP)))));
  211. { we can't switch reg1 and reg2 in something like }
  212. { movl %reg1,%reg2 }
  213. { movl (%reg2),%reg2 }
  214. { movl 4(%reg1),%reg1 }
  215. if reg2Modified and not(reg1Modified) and
  216. regReadByInstruction(reg1,endP) then
  217. begin
  218. tmpResult := false;
  219. break
  220. end;
  221. if not reg1Modified then
  222. begin
  223. reg1Modified := regModifiedByInstruction(reg1,endP);
  224. if reg1Modified and not canBeFirstSwitch(Taicpu(endP),reg1) then
  225. begin
  226. tmpResult := false;
  227. break;
  228. end;
  229. end;
  230. if not reg2Modified then
  231. reg2Modified := regModifiedByInstruction(reg2,endP);
  232. if sequenceEnd then
  233. break;
  234. tmpResult :=
  235. (endp.typ <> ait_label) and
  236. ((not isInstruction) or
  237. (NoHardCodedRegs(Taicpu(endP),reg1,reg2) and
  238. RegSizesOk(reg1,reg2,Taicpu(endP)) and
  239. (Taicpu(endp).opcode <> A_JMP)));
  240. end;
  241. end;
  242. if tmpResult and sequenceEnd then
  243. begin
  244. switchRegs := true;
  245. reg1Modified := false;
  246. reg2Modified := false;
  247. lastreg1 := start;
  248. lastreg2 := start;
  249. getNextInstruction(start,hp);
  250. while hp <> endP do
  251. begin
  252. if (not pTaiprop(hp.optinfo)^.canberemoved) and
  253. (hp.typ = ait_instruction) then
  254. begin
  255. switchDone := false;
  256. if not reg1Modified then
  257. begin
  258. reg1Modified := regModifiedByInstruction(reg1,hp);
  259. if reg1Modified then
  260. begin
  261. doFirstSwitch(Taicpu(hp),reg1,reg2);
  262. switchDone := true;
  263. end;
  264. end;
  265. if not switchDone then
  266. if reg1Modified then
  267. doSwitchReg(Taicpu(hp),reg1,reg2)
  268. else
  269. doReplaceReg(Taicpu(hp),reg2,reg1);
  270. end;
  271. if regininstruction(reg1,hp) then
  272. lastreg1 := hp;
  273. if regininstruction(reg2,hp) then
  274. lastreg2 := hp;
  275. getNextInstruction(hp,hp);
  276. end;
  277. if switchLast then
  278. doSwitchReg(Taicpu(hp),reg1,reg2)
  279. else getLastInstruction(hp,hp);
  280. allocRegBetween(asmL,reg1,start,lastreg1);
  281. allocRegBetween(asmL,reg2,start,lastreg2);
  282. end;
  283. end;
  284. procedure doRenaming(asml: TAAsmoutput; first, last: Tai);
  285. var
  286. p: Tai;
  287. begin
  288. p := First;
  289. SkipHead(p);
  290. while p <> last do
  291. begin
  292. case p.typ of
  293. ait_instruction:
  294. begin
  295. case Taicpu(p).opcode of
  296. A_MOV:
  297. begin
  298. if not(pTaiprop(p.optinfo)^.canBeRemoved) and
  299. (Taicpu(p).oper[0].typ = top_reg) and
  300. (Taicpu(p).oper[1].typ = top_reg) and
  301. (Taicpu(p).opsize = S_L) and
  302. (Taicpu(p).oper[0].reg in (rg.usableregsint+[R_EDI])) and
  303. (Taicpu(p).oper[1].reg in (rg.usableregsint+[R_EDI])) then
  304. if switchRegs(asml,Taicpu(p).oper[0].reg,
  305. Taicpu(p).oper[1].reg,p) then
  306. begin
  307. { getnextinstruction(p,hp);
  308. asmL^.remove(p);
  309. dispose(p,done);
  310. p := hp;
  311. continue }
  312. pTaiprop(p.optinfo)^.canBeRemoved := true;
  313. end;
  314. end;
  315. end;
  316. end;
  317. end;
  318. getNextInstruction(p,p);
  319. end;
  320. end;
  321. End.
  322. {
  323. $Log$
  324. Revision 1.13 2002-04-21 15:42:17 carl
  325. * changeregsize -> rg.makeregsize
  326. Revision 1.12 2002/04/20 21:37:08 carl
  327. + generic FPC_CHECKPOINTER
  328. + first parameter offset in stack now portable
  329. * rename some constants
  330. + move some cpu stuff to other units
  331. - remove unused constents
  332. * fix stacksize for some targets
  333. * fix generic size problems which depend now on EXTEND_SIZE constant
  334. * removing frame pointer in routines is only available for : i386,m68k and vis targets
  335. Revision 1.11 2002/04/15 19:44:22 peter
  336. * fixed stackcheck that would be called recursively when a stack
  337. error was found
  338. * generic changeregsize(reg,size) for i386 register resizing
  339. * removed some more routines from cga unit
  340. * fixed returnvalue handling
  341. * fixed default stacksize of linux and go32v2, 8kb was a bit small :-)
  342. Revision 1.10 2002/04/02 17:11:39 peter
  343. * tlocation,treference update
  344. * LOC_CONSTANT added for better constant handling
  345. * secondadd splitted in multiple routines
  346. * location_force_reg added for loading a location to a register
  347. of a specified size
  348. * secondassignment parses now first the right and then the left node
  349. (this is compatible with Kylix). This saves a lot of push/pop especially
  350. with string operations
  351. * adapted some routines to use the new cg methods
  352. Revision 1.9 2002/03/31 20:26:41 jonas
  353. + a_loadfpu_* and a_loadmm_* methods in tcg
  354. * register allocation is now handled by a class and is mostly processor
  355. independent (+rgobj.pas and i386/rgcpu.pas)
  356. * temp allocation is now handled by a class (+tgobj.pas, -i386\tgcpu.pas)
  357. * some small improvements and fixes to the optimizer
  358. * some register allocation fixes
  359. * some fpuvaroffset fixes in the unary minus node
  360. * push/popusedregisters is now called rg.save/restoreusedregisters and
  361. (for i386) uses temps instead of push/pop's when using -Op3 (that code is
  362. also better optimizable)
  363. * fixed and optimized register saving/restoring for new/dispose nodes
  364. * LOC_FPU locations now also require their "register" field to be set to
  365. R_ST, not R_ST0 (the latter is used for LOC_CFPUREGISTER locations only)
  366. - list field removed of the tnode class because it's not used currently
  367. and can cause hard-to-find bugs
  368. Revision 1.8 2001/10/12 13:55:03 jonas
  369. * finer granularity for allocation of reused/replaced registers
  370. Revision 1.7 2001/08/29 14:07:43 jonas
  371. * the optimizer now keeps track of flags register usage. This fixes some
  372. optimizer bugs with int64 calculations (because of the carry flag usage)
  373. * fixed another bug which caused wrong optimizations with complex
  374. array expressions
  375. Revision 1.6 2001/01/06 23:35:06 jonas
  376. * fixed webbug 1323
  377. Revision 1.5 2000/12/25 00:07:34 peter
  378. + new tlinkedlist class (merge of old tstringqueue,tcontainer and
  379. tlinkedlist objects)
  380. Revision 1.4 2000/12/05 09:32:47 jonas
  381. * fixed bug where "shl $1,%reg" was changed to "leal (%reg),%reg2"
  382. instread of to "leal (,%reg,2),%reg2"
  383. Revision 1.3 2000/11/29 00:30:51 florian
  384. * unused units removed from uses clause
  385. * some changes for widestrings
  386. Revision 1.2 2000/11/22 16:30:04 jonas
  387. * fixed bug where "imul mem32,reg,reg" could be generated
  388. Revision 1.1 2000/10/24 10:40:54 jonas
  389. + register renaming ("fixes" bug1088)
  390. * changed command line options meanings for optimizer:
  391. O2 now means peepholopts, CSE and register renaming in 1 pass
  392. O3 is the same, but repeated until no further optimizations are
  393. possible or until 5 passes have been done (to avoid endless loops)
  394. * changed aopt386 so it does this looping
  395. * added some procedures from csopt386 to the interface because they're
  396. used by rropt386 as well
  397. * some changes to csopt386 and daopt386 so that newly added instructions
  398. by the CSE get optimizer info (they were simply skipped previously),
  399. this fixes some bugs
  400. }