rropt386.pas 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. {
  2. $Id$
  3. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  4. development team
  5. This unit contains register renaming functionality
  6. This program is free software; you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation; either version 2 of the License, or
  9. (at your option) any later version.
  10. This program is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with this program; if not, write to the Free Software
  16. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17. ****************************************************************************
  18. }
  19. unit rropt386;
  20. {$i fpcdefs.inc}
  21. interface
  22. uses aasmbase,aasmtai,aasmcpu;
  23. procedure doRenaming(asml: taasmoutput; first, last: tai);
  24. implementation
  25. uses
  26. {$ifdef replaceregdebug}cutils,{$endif}
  27. verbose,globals,cpubase,daopt386,csopt386,rgobj, cgbase, cgobj;
  28. function canBeFirstSwitch(p: taicpu; supreg: tsuperregister): boolean;
  29. { checks whether an operation on reg can be switched to another reg without an }
  30. { additional mov, e.g. "addl $4,%reg1" can be changed to "leal 4(%reg1),%reg2" }
  31. begin
  32. canBeFirstSwitch := false;
  33. case p.opcode of
  34. A_MOV,A_MOVZX,A_MOVSX,A_LEA:
  35. canBeFirstSwitch :=
  36. (p.oper[1]^.typ = top_reg) and
  37. (getsupreg(p.oper[1]^.reg) = supreg);
  38. A_IMUL:
  39. canBeFirstSwitch :=
  40. (p.ops >= 2) and
  41. (getsupreg(p.oper[p.ops-1]^.reg) = supreg) and
  42. (p.oper[0]^.typ <> top_ref) and
  43. (not pTaiprop(p.optinfo)^.FlagsUsed);
  44. A_INC,A_DEC:
  45. canBeFirstSwitch :=
  46. (p.oper[0]^.typ = top_reg) and
  47. (p.opsize = S_L) and
  48. (not pTaiprop(p.optinfo)^.FlagsUsed);
  49. A_SUB,A_ADD:
  50. canBeFirstSwitch :=
  51. (p.oper[1]^.typ = top_reg) and
  52. (p.opsize = S_L) and
  53. (getsupreg(p.oper[1]^.reg) = supreg) and
  54. (p.oper[0]^.typ <> top_ref) and
  55. ((p.opcode <> A_SUB) or
  56. (p.oper[0]^.typ = top_const)) and
  57. (not pTaiprop(p.optinfo)^.FlagsUsed);
  58. A_SHL:
  59. canBeFirstSwitch :=
  60. (p.opsize = S_L) and
  61. (p.oper[1]^.typ = top_reg) and
  62. (getsupreg(p.oper[1]^.reg) = supreg) and
  63. (p.oper[0]^.typ = top_const) and
  64. (p.oper[0]^.val in [1,2,3]) and
  65. (not pTaiprop(p.optinfo)^.FlagsUsed);
  66. end;
  67. end;
  68. procedure switchReg(var reg: tregister; reg1, reg2: tsuperregister);
  69. var
  70. supreg: tsuperregister;
  71. begin
  72. if (reg = NR_NO) or
  73. (getregtype(reg) <> R_INTREGISTER) then
  74. exit;
  75. supreg := getsupreg(reg);
  76. if (supreg = reg1) then
  77. setsupreg(reg,reg2)
  78. else if (supreg = reg2) then
  79. setsupreg(reg,reg1);
  80. end;
  81. procedure switchOp(var op: toper; reg1, reg2: tsuperregister);
  82. begin
  83. case op.typ of
  84. top_reg:
  85. switchReg(op.reg,reg1,reg2);
  86. top_ref:
  87. begin
  88. switchReg(op.ref^.base,reg1,reg2);
  89. switchReg(op.ref^.index,reg1,reg2);
  90. end;
  91. end;
  92. end;
  93. procedure doSwitchReg(hp: taicpu; reg1,reg2: tsuperregister);
  94. var
  95. opCount: longint;
  96. begin
  97. for opCount := 0 to hp.ops-1 do
  98. switchOp(hp.oper[opCount]^,reg1,reg2);
  99. end;
  100. procedure doFirstSwitch(p: taicpu; reg1, reg2: tsuperregister);
  101. var
  102. tmpRef: treference;
  103. begin
  104. case p.opcode of
  105. A_MOV,A_MOVZX,A_MOVSX,A_LEA:
  106. begin
  107. changeOp(p.oper[1]^,reg1,reg2);
  108. changeOp(p.oper[0]^,reg2,reg1);
  109. end;
  110. A_IMUL:
  111. begin
  112. p.ops := 3;
  113. p.loadreg(2,p.oper[1]^.reg);
  114. changeOp(p.oper[2]^,reg1,reg2);
  115. end;
  116. A_INC,A_DEC:
  117. begin
  118. reference_reset(tmpref);
  119. tmpref.base := newreg(R_INTREGISTER,reg1,R_SUBWHOLE);
  120. case p.opcode of
  121. A_INC:
  122. tmpref.offset := 1;
  123. A_DEC:
  124. tmpref.offset := -1;
  125. end;
  126. p.ops := 2;
  127. p.opcode := A_LEA;
  128. p.loadreg(1,newreg(R_INTREGISTER,reg2,R_SUBWHOLE));
  129. p.loadref(0,tmpref);
  130. end;
  131. A_SUB,A_ADD:
  132. begin
  133. reference_reset(tmpref);
  134. tmpref.base := newreg(R_INTREGISTER,reg1,R_SUBWHOLE);
  135. case p.oper[0]^.typ of
  136. top_const:
  137. begin
  138. tmpref.offset := longint(p.oper[0]^.val);
  139. if p.opcode = A_SUB then
  140. tmpref.offset := - tmpRef.offset;
  141. end;
  142. top_symbol:
  143. tmpref.symbol := p.oper[0]^.sym;
  144. top_reg:
  145. begin
  146. tmpref.index := p.oper[0]^.reg;
  147. tmpref.scalefactor := 1;
  148. end;
  149. else internalerror(200010031);
  150. end;
  151. p.opcode := A_LEA;
  152. p.loadref(0,tmpref);
  153. p.loadreg(1,newreg(R_INTREGISTER,reg2,R_SUBWHOLE));
  154. end;
  155. A_SHL:
  156. begin
  157. reference_reset(tmpref);
  158. tmpref.base := newreg(R_INTREGISTER,reg1,R_SUBWHOLE);
  159. tmpref.scalefactor := 1 shl p.oper[0]^.val;
  160. p.opcode := A_LEA;
  161. p.loadref(0,tmpref);
  162. p.loadreg(1,newreg(R_INTREGISTER,reg2,R_SUBWHOLE));
  163. end;
  164. else internalerror(200010032);
  165. end;
  166. end;
  167. function switchRegs(asml: taasmoutput; reg1, reg2: tsuperregister; start: tai): Boolean;
  168. { change movl %reg1,%reg2 ... bla ... to ... bla with reg1 and reg2 switched }
  169. var
  170. endP, hp, lastreg1,lastreg2: tai;
  171. switchDone, switchLast, tmpResult, sequenceEnd, reg1Modified, reg2Modified: boolean;
  172. reg1StillUsed, reg2StillUsed, isInstruction: boolean;
  173. begin
  174. switchRegs := false;
  175. tmpResult := true;
  176. sequenceEnd := false;
  177. reg1Modified := false;
  178. reg2Modified := false;
  179. endP := start;
  180. while tmpResult and not sequenceEnd do
  181. begin
  182. tmpResult :=
  183. getNextInstruction(endP,endP);
  184. If tmpResult and
  185. not pTaiprop(endp.optinfo)^.canBeRemoved then
  186. begin
  187. { if the newReg gets stored back to the oldReg, we can change }
  188. { "mov %oldReg,%newReg; <operations on %newReg>; mov %newReg, }
  189. { %oldReg" to "<operations on %oldReg>" }
  190. switchLast := storeBack(start,endP,reg1,reg2);
  191. reg1StillUsed := reg1 in pTaiprop(endp.optinfo)^.usedregs;
  192. reg2StillUsed := reg2 in pTaiprop(endp.optinfo)^.usedregs;
  193. isInstruction := endp.typ = ait_instruction;
  194. sequenceEnd :=
  195. switchLast or
  196. { if both registers are released right before an instruction }
  197. { that contains hardcoded regs, it's ok too }
  198. (not reg1StillUsed and not reg2StillUsed) or
  199. { no support for (i)div, mul and imul with hardcoded operands }
  200. (((not isInstruction) or
  201. noHardCodedRegs(taicpu(endP),reg1,reg2)) and
  202. (not reg1StillUsed or
  203. (isInstruction and findRegDealloc(reg1,endP) and
  204. regLoadedWithNewValue(reg1,false,taicpu(endP)))) and
  205. (not reg2StillUsed or
  206. (isInstruction and findRegDealloc(reg2,endP) and
  207. regLoadedWithNewValue(reg2,false,taicpu(endP)))));
  208. { we can't switch reg1 and reg2 in something like }
  209. { movl %reg1,%reg2 }
  210. { movl (%reg2),%reg2 }
  211. { movl 4(%reg1),%reg1 }
  212. if reg2Modified and not(reg1Modified) and
  213. regReadByInstruction(reg1,endP) then
  214. begin
  215. tmpResult := false;
  216. break
  217. end;
  218. if not reg1Modified then
  219. begin
  220. reg1Modified := regModifiedByInstruction(reg1,endP);
  221. if reg1Modified and not canBeFirstSwitch(taicpu(endP),reg1) then
  222. begin
  223. tmpResult := false;
  224. break;
  225. end;
  226. end;
  227. if not reg2Modified then
  228. reg2Modified := regModifiedByInstruction(reg2,endP);
  229. tmpResult :=
  230. ((not isInstruction) or
  231. (NoHardCodedRegs(taicpu(endP),reg1,reg2) and
  232. RegSizesOk(reg1,reg2,taicpu(endP))));
  233. if sequenceEnd then
  234. break;
  235. tmpResult :=
  236. tmpresult and
  237. (endp.typ <> ait_label) and
  238. ((not isInstruction) or
  239. (taicpu(endp).opcode <> A_JMP));
  240. end;
  241. end;
  242. if tmpResult and sequenceEnd then
  243. begin
  244. switchRegs := true;
  245. reg1Modified := false;
  246. reg2Modified := false;
  247. lastreg1 := start;
  248. lastreg2 := start;
  249. getNextInstruction(start,hp);
  250. while hp <> endP do
  251. begin
  252. if (not pTaiprop(hp.optinfo)^.canberemoved) and
  253. (hp.typ = ait_instruction) then
  254. begin
  255. switchDone := false;
  256. if not reg1Modified then
  257. begin
  258. reg1Modified := regModifiedByInstruction(reg1,hp);
  259. if reg1Modified then
  260. begin
  261. doFirstSwitch(taicpu(hp),reg1,reg2);
  262. switchDone := true;
  263. end;
  264. end;
  265. if not switchDone then
  266. if reg1Modified then
  267. doSwitchReg(taicpu(hp),reg1,reg2)
  268. else
  269. doReplaceReg(taicpu(hp),reg2,reg1);
  270. end;
  271. if regininstruction(reg1,hp) then
  272. lastreg1 := hp;
  273. if regininstruction(reg2,hp) then
  274. lastreg2 := hp;
  275. getNextInstruction(hp,hp);
  276. end;
  277. if switchLast then
  278. begin
  279. { this is in case of a storeback, make sure the same size of register }
  280. { contents as the initial move is transfered }
  281. doSwitchReg(taicpu(hp),reg1,reg2);
  282. if taicpu(hp).opsize <> taicpu(start).opsize then
  283. begin
  284. taicpu(hp).opsize := taicpu(start).opsize;
  285. taicpu(hp).oper[0]^.reg := taicpu(start).oper[0]^.reg;
  286. taicpu(hp).oper[1]^.reg := taicpu(start).oper[1]^.reg;
  287. end;
  288. end
  289. else
  290. getLastInstruction(hp,hp);
  291. allocRegBetween(asmL,newreg(R_INTREGISTER,reg1,R_SUBWHOLE),start,lastreg1);
  292. allocRegBetween(asmL,newreg(R_INTREGISTER,reg2,R_SUBWHOLE),start,lastreg2);
  293. end;
  294. end;
  295. procedure doRenaming(asml: taasmoutput; first, last: tai);
  296. var
  297. p: tai;
  298. begin
  299. p := First;
  300. SkipHead(p);
  301. while p <> last do
  302. begin
  303. case p.typ of
  304. ait_instruction:
  305. begin
  306. case taicpu(p).opcode of
  307. A_MOV:
  308. begin
  309. if not(pTaiprop(p.optinfo)^.canBeRemoved) and
  310. (taicpu(p).oper[0]^.typ = top_reg) and
  311. (taicpu(p).oper[1]^.typ = top_reg) and
  312. (taicpu(p).opsize = S_L) and
  313. (getsupreg(taicpu(p).oper[0]^.reg) in ([RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI])) and
  314. (getsupreg(taicpu(p).oper[1]^.reg) in ([RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI])) then
  315. if switchRegs(asml,getsupreg(taicpu(p).oper[0]^.reg),
  316. getsupreg(taicpu(p).oper[1]^.reg),p) then
  317. begin
  318. pTaiprop(p.optinfo)^.canBeRemoved := true;
  319. end;
  320. end;
  321. end;
  322. end;
  323. end;
  324. getNextInstruction(p,p);
  325. end;
  326. end;
  327. End.
  328. {
  329. $Log$
  330. Revision 1.25 2003-12-15 16:08:16 jonas
  331. - disable removal of dead loads before a call, because register
  332. parameters are released before a call
  333. * fix storeback of registers in case of different sizes (e.g., first
  334. a "movl %eax,%edx" and later a "movb %dl,%al")
  335. Revision 1.24 2003/12/07 19:19:56 jonas
  336. * fixed some more bugs which only showed up in a ppc cross compiler
  337. Revision 1.23 2003/11/22 00:40:19 jonas
  338. * fixed optimiser so it compiles again
  339. * fixed several bugs which were in there already for a long time, but
  340. which only popped up now :) -O2/-O3 will now optimise less than in
  341. the past (and correctly so), but -O2u/-O3u will optimise a bit more
  342. * some more small improvements for -O3 are still possible
  343. Revision 1.22 2003/06/03 21:09:05 peter
  344. * internal changeregsize for optimizer
  345. * fix with a hack to not remove the first instruction of a block
  346. which will leave blockstart pointing to invalid memory
  347. Revision 1.21 2003/03/28 19:16:57 peter
  348. * generic constructor working for i386
  349. * remove fixed self register
  350. * esi added as address register for i386
  351. Revision 1.20 2003/02/19 22:00:16 daniel
  352. * Code generator converted to new register notation
  353. - Horribily outdated todo.txt removed
  354. Revision 1.19 2003/01/08 18:43:57 daniel
  355. * Tregister changed into a record
  356. Revision 1.18 2002/07/01 18:46:34 peter
  357. * internal linker
  358. * reorganized aasm layer
  359. Revision 1.17 2002/05/18 13:34:26 peter
  360. * readded missing revisions
  361. Revision 1.16 2002/05/16 19:46:52 carl
  362. + defines.inc -> fpcdefs.inc to avoid conflicts if compiling by hand
  363. + try to fix temp allocation (still in ifdef)
  364. + generic constructor calls
  365. + start of tassembler / tmodulebase class cleanup
  366. Revision 1.14 2002/05/12 16:53:18 peter
  367. * moved entry and exitcode to ncgutil and cgobj
  368. * foreach gets extra argument for passing local data to the
  369. iterator function
  370. * -CR checks also class typecasts at runtime by changing them
  371. into as
  372. * fixed compiler to cycle with the -CR option
  373. * fixed stabs with elf writer, finally the global variables can
  374. be watched
  375. * removed a lot of routines from cga unit and replaced them by
  376. calls to cgobj
  377. * u32bit-s32bit updates for and,or,xor nodes. When one element is
  378. u32bit then the other is typecasted also to u32bit without giving
  379. a rangecheck warning/error.
  380. * fixed pascal calling method with reversing also the high tree in
  381. the parast, detected by tcalcst3 test
  382. Revision 1.13 2002/04/21 15:42:17 carl
  383. * changeregsize -> changeregsize
  384. Revision 1.12 2002/04/20 21:37:08 carl
  385. + generic FPC_CHECKPOINTER
  386. + first parameter offset in stack now portable
  387. * rename some constants
  388. + move some cpu stuff to other units
  389. - remove unused constents
  390. * fix stacksize for some targets
  391. * fix generic size problems which depend now on EXTEND_SIZE constant
  392. * removing frame pointer in routines is only available for : i386,m68k and vis targets
  393. Revision 1.11 2002/04/15 19:44:22 peter
  394. * fixed stackcheck that would be called recursively when a stack
  395. error was found
  396. * generic changeregsize(reg,size) for i386 register resizing
  397. * removed some more routines from cga unit
  398. * fixed returnvalue handling
  399. * fixed default stacksize of linux and go32v2, 8kb was a bit small :-)
  400. Revision 1.10 2002/04/02 17:11:39 peter
  401. * tlocation,treference update
  402. * LOC_CONSTANT added for better constant handling
  403. * secondadd splitted in multiple routines
  404. * location_force_reg added for loading a location to a register
  405. of a specified size
  406. * secondassignment parses now first the right and then the left node
  407. (this is compatible with Kylix). This saves a lot of push/pop especially
  408. with string operations
  409. * adapted some routines to use the new cg methods
  410. Revision 1.9 2002/03/31 20:26:41 jonas
  411. + a_loadfpu_* and a_loadmm_* methods in tcg
  412. * register allocation is now handled by a class and is mostly processor
  413. independent (+rgobj.pas and i386/rgcpu.pas)
  414. * temp allocation is now handled by a class (+tgobj.pas, -i386\tgcpu.pas)
  415. * some small improvements and fixes to the optimizer
  416. * some register allocation fixes
  417. * some fpuvaroffset fixes in the unary minus node
  418. * push/popusedregisters is now called rg.save/restoreusedregisters and
  419. (for i386) uses temps instead of push/pop's when using -Op3 (that code is
  420. also better optimizable)
  421. * fixed and optimized register saving/restoring for new/dispose nodes
  422. * LOC_FPU locations now also require their "register" field to be set to
  423. R_ST, not R_ST0 (the latter is used for LOC_CFPUREGISTER locations only)
  424. - list field removed of the tnode class because it's not used currently
  425. and can cause hard-to-find bugs
  426. }