popt386.pas 106 KB


  1. {
  2. $Id$
  3. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  4. This unit contains the peephole optimizer.
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit popt386;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses Aasmbase,aasmtai,aasmcpu,verbose;
  22. procedure PrePeepHoleOpts(asml: taasmoutput; BlockStart, BlockEnd: tai);
  23. procedure PeepHoleOptPass1(asml: taasmoutput; BlockStart, BlockEnd: tai);
  24. procedure PeepHoleOptPass2(asml: taasmoutput; BlockStart, BlockEnd: tai);
  25. procedure PostPeepHoleOpts(asml: taasmoutput; BlockStart, BlockEnd: tai);
  26. implementation
  27. uses
  28. globtype,systems,
  29. globals,cgbase,procinfo,
  30. symsym,symdef,
  31. {$ifdef finaldestdebug}
  32. cobjects,
  33. {$endif finaldestdebug}
  34. cpuinfo,cpubase,cgobj,daopt386,rgobj;
  35. function RegUsedAfterInstruction(reg: Tregister; p: tai; var UsedRegs: TRegSet): Boolean;
  36. var
  37. supreg: tsuperregister;
  38. begin
  39. supreg := getsupreg(reg);
  40. UpdateUsedRegs(UsedRegs, tai(p.Next));
  41. RegUsedAfterInstruction :=
  42. (supreg in UsedRegs) and
  43. (not(getNextInstruction(p,p)) or
  44. not(regLoadedWithNewValue(supreg,false,p)));
  45. end;
  46. function doFpuLoadStoreOpt(asmL: TAAsmoutput; var p: tai): boolean;
  47. { returns true if a "continue" should be done after this optimization }
  48. var hp1, hp2: tai;
  49. begin
  50. doFpuLoadStoreOpt := false;
  51. if (taicpu(p).oper[0]^.typ = top_ref) and
  52. getNextInstruction(p, hp1) and
  53. (hp1.typ = ait_instruction) and
  54. (((taicpu(hp1).opcode = A_FLD) and
  55. (taicpu(p).opcode = A_FSTP)) or
  56. ((taicpu(p).opcode = A_FISTP) and
  57. (taicpu(hp1).opcode = A_FILD))) and
  58. (taicpu(hp1).oper[0]^.typ = top_ref) and
  59. (taicpu(hp1).opsize = taicpu(p).opsize) and
  60. refsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  61. begin
  62. if getNextInstruction(hp1, hp2) and
  63. (hp2.typ = ait_instruction) and
  64. ((taicpu(hp2).opcode = A_LEAVE) or
  65. (taicpu(hp2).opcode = A_RET)) and
  66. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  67. (taicpu(p).oper[0]^.ref^.offset >= tvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset) and
  68. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  69. begin
  70. asml.remove(p);
  71. asml.remove(hp1);
  72. p.free;
  73. hp1.free;
  74. p := hp2;
  75. removeLastDeallocForFuncRes(asmL, p);
  76. doFPULoadStoreOpt := true;
  77. end
  78. else
  79. { fst can't store an extended value! }
  80. if (taicpu(p).opsize <> S_FX) and
  81. (taicpu(p).opsize <> S_IQ) then
  82. begin
  83. if (taicpu(p).opcode = A_FSTP) then
  84. taicpu(p).opcode := A_FST
  85. else taicpu(p).opcode := A_FIST;
  86. asml.remove(hp1);
  87. hp1.free;
  88. end
  89. end;
  90. end;
  91. procedure PrePeepHoleOpts(asml: taasmoutput; BlockStart, BlockEnd: tai);
  92. var
  93. p,hp1: tai;
  94. l: Aword;
  95. tmpRef: treference;
  96. begin
  97. p := BlockStart;
  98. while (p <> BlockEnd) Do
  99. begin
  100. case p.Typ Of
  101. Ait_Instruction:
  102. begin
  103. case taicpu(p).opcode Of
  104. A_IMUL:
  105. {changes certain "imul const, %reg"'s to lea sequences}
  106. begin
  107. if (taicpu(p).oper[0]^.typ = Top_Const) and
  108. (taicpu(p).oper[1]^.typ = Top_Reg) and
  109. (taicpu(p).opsize = S_L) then
  110. if (taicpu(p).oper[0]^.val = 1) then
  111. if (taicpu(p).ops = 2) then
  112. {remove "imul $1, reg"}
  113. begin
  114. hp1 := tai(p.Next);
  115. asml.remove(p);
  116. p.free;
  117. p := hp1;
  118. continue;
  119. end
  120. else
  121. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  122. begin
  123. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  124. InsertLLItem(asml, p.previous, p.next, hp1);
  125. p.free;
  126. p := hp1;
  127. end
  128. else if
  129. ((taicpu(p).ops <= 2) or
  130. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  131. (aktoptprocessor < ClassPentium2) and
  132. (taicpu(p).oper[0]^.val <= 12) and
  133. not(CS_LittleSize in aktglobalswitches) and
  134. (not(GetNextInstruction(p, hp1)) or
  135. {GetNextInstruction(p, hp1) and}
  136. not((tai(hp1).typ = ait_instruction) and
  137. ((taicpu(hp1).opcode=A_Jcc) and
  138. (taicpu(hp1).condition in [C_O,C_NO])))) then
  139. begin
  140. reference_reset(tmpref);
  141. case taicpu(p).oper[0]^.val Of
  142. 3: begin
  143. {imul 3, reg1, reg2 to
  144. lea (reg1,reg1,2), reg2
  145. imul 3, reg1 to
  146. lea (reg1,reg1,2), reg1}
  147. TmpRef.base := taicpu(p).oper[1]^.reg;
  148. TmpRef.index := taicpu(p).oper[1]^.reg;
  149. TmpRef.ScaleFactor := 2;
  150. if (taicpu(p).ops = 2) then
  151. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  152. else
  153. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  154. InsertLLItem(asml,p.previous, p.next, hp1);
  155. p.free;
  156. p := hp1;
  157. end;
  158. 5: begin
  159. {imul 5, reg1, reg2 to
  160. lea (reg1,reg1,4), reg2
  161. imul 5, reg1 to
  162. lea (reg1,reg1,4), reg1}
  163. TmpRef.base := taicpu(p).oper[1]^.reg;
  164. TmpRef.index := taicpu(p).oper[1]^.reg;
  165. TmpRef.ScaleFactor := 4;
  166. if (taicpu(p).ops = 2) then
  167. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  168. else
  169. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  170. InsertLLItem(asml,p.previous, p.next, hp1);
  171. p.free;
  172. p := hp1;
  173. end;
  174. 6: begin
  175. {imul 6, reg1, reg2 to
  176. lea (,reg1,2), reg2
  177. lea (reg2,reg1,4), reg2
  178. imul 6, reg1 to
  179. lea (reg1,reg1,2), reg1
  180. add reg1, reg1}
  181. if (aktoptprocessor <= Class386) then
  182. begin
  183. TmpRef.index := taicpu(p).oper[1]^.reg;
  184. if (taicpu(p).ops = 3) then
  185. begin
  186. TmpRef.base := taicpu(p).oper[2]^.reg;
  187. TmpRef.ScaleFactor := 4;
  188. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  189. end
  190. else
  191. begin
  192. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  193. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  194. end;
  195. InsertLLItem(asml,p, p.next, hp1);
  196. reference_reset(tmpref);
  197. TmpRef.index := taicpu(p).oper[1]^.reg;
  198. TmpRef.ScaleFactor := 2;
  199. if (taicpu(p).ops = 3) then
  200. begin
  201. TmpRef.base := NR_NO;
  202. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  203. taicpu(p).oper[2]^.reg);
  204. end
  205. else
  206. begin
  207. TmpRef.base := taicpu(p).oper[1]^.reg;
  208. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  209. end;
  210. InsertLLItem(asml,p.previous, p.next, hp1);
  211. p.free;
  212. p := tai(hp1.next);
  213. end
  214. end;
  215. 9: begin
  216. {imul 9, reg1, reg2 to
  217. lea (reg1,reg1,8), reg2
  218. imul 9, reg1 to
  219. lea (reg1,reg1,8), reg1}
  220. TmpRef.base := taicpu(p).oper[1]^.reg;
  221. TmpRef.index := taicpu(p).oper[1]^.reg;
  222. TmpRef.ScaleFactor := 8;
  223. if (taicpu(p).ops = 2) then
  224. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  225. else
  226. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  227. InsertLLItem(asml,p.previous, p.next, hp1);
  228. p.free;
  229. p := hp1;
  230. end;
  231. 10: begin
  232. {imul 10, reg1, reg2 to
  233. lea (reg1,reg1,4), reg2
  234. add reg2, reg2
  235. imul 10, reg1 to
  236. lea (reg1,reg1,4), reg1
  237. add reg1, reg1}
  238. if (aktoptprocessor <= Class386) then
  239. begin
  240. if (taicpu(p).ops = 3) then
  241. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  242. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  243. else
  244. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  245. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  246. InsertLLItem(asml,p, p.next, hp1);
  247. TmpRef.base := taicpu(p).oper[1]^.reg;
  248. TmpRef.index := taicpu(p).oper[1]^.reg;
  249. TmpRef.ScaleFactor := 4;
  250. if (taicpu(p).ops = 3) then
  251. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  252. else
  253. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  254. InsertLLItem(asml,p.previous, p.next, hp1);
  255. p.free;
  256. p := tai(hp1.next);
  257. end
  258. end;
  259. 12: begin
  260. {imul 12, reg1, reg2 to
  261. lea (,reg1,4), reg2
  262. lea (,reg1,8) reg2
  263. imul 12, reg1 to
  264. lea (reg1,reg1,2), reg1
  265. lea (,reg1,4), reg1}
  266. if (aktoptprocessor <= Class386)
  267. then
  268. begin
  269. TmpRef.index := taicpu(p).oper[1]^.reg;
  270. if (taicpu(p).ops = 3) then
  271. begin
  272. TmpRef.base := taicpu(p).oper[2]^.reg;
  273. TmpRef.ScaleFactor := 8;
  274. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  275. end
  276. else
  277. begin
  278. TmpRef.base := NR_NO;
  279. TmpRef.ScaleFactor := 4;
  280. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  281. end;
  282. InsertLLItem(asml,p, p.next, hp1);
  283. reference_reset(tmpref);
  284. TmpRef.index := taicpu(p).oper[1]^.reg;
  285. if (taicpu(p).ops = 3) then
  286. begin
  287. TmpRef.base := NR_NO;
  288. TmpRef.ScaleFactor := 4;
  289. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  290. end
  291. else
  292. begin
  293. TmpRef.base := taicpu(p).oper[1]^.reg;
  294. TmpRef.ScaleFactor := 2;
  295. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  296. end;
  297. InsertLLItem(asml,p.previous, p.next, hp1);
  298. p.free;
  299. p := tai(hp1.next);
  300. end
  301. end
  302. end;
  303. end;
  304. end;
  305. A_SAR, A_SHR:
  306. {changes the code sequence
  307. shr/sar const1, x
  308. shl const2, x
  309. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  310. begin
  311. if GetNextInstruction(p, hp1) and
  312. (tai(hp1).typ = ait_instruction) and
  313. (taicpu(hp1).opcode = A_SHL) and
  314. (taicpu(p).oper[0]^.typ = top_const) and
  315. (taicpu(hp1).oper[0]^.typ = top_const) and
  316. (taicpu(hp1).opsize = taicpu(p).opsize) and
  317. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  318. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  319. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  320. not(CS_LittleSize in aktglobalswitches) then
  321. { shr/sar const1, %reg
  322. shl const2, %reg
  323. with const1 > const2 }
  324. begin
  325. taicpu(p).LoadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  326. taicpu(hp1).opcode := A_AND;
  327. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  328. case taicpu(p).opsize Of
  329. S_L: taicpu(hp1).LoadConst(0,l Xor aword($ffffffff));
  330. S_B: taicpu(hp1).LoadConst(0,l Xor $ff);
  331. S_W: taicpu(hp1).LoadConst(0,l Xor $ffff);
  332. end;
  333. end
  334. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  335. not(CS_LittleSize in aktglobalswitches) then
  336. { shr/sar const1, %reg
  337. shl const2, %reg
  338. with const1 < const2 }
  339. begin
  340. taicpu(hp1).LoadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  341. taicpu(p).opcode := A_AND;
  342. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  343. case taicpu(p).opsize Of
  344. S_L: taicpu(p).LoadConst(0,l Xor aword($ffffffff));
  345. S_B: taicpu(p).LoadConst(0,l Xor $ff);
  346. S_W: taicpu(p).LoadConst(0,l Xor $ffff);
  347. end;
  348. end
  349. else
  350. { shr/sar const1, %reg
  351. shl const2, %reg
  352. with const1 = const2 }
  353. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  354. begin
  355. taicpu(p).opcode := A_AND;
  356. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  357. case taicpu(p).opsize Of
  358. S_B: taicpu(p).LoadConst(0,l Xor $ff);
  359. S_W: taicpu(p).LoadConst(0,l Xor $ffff);
  360. S_L: taicpu(p).LoadConst(0,l Xor aword($ffffffff));
  361. end;
  362. asml.remove(hp1);
  363. hp1.free;
  364. end;
  365. end;
  366. A_XOR:
  367. if (taicpu(p).oper[0]^.typ = top_reg) and
  368. (taicpu(p).oper[1]^.typ = top_reg) and
  369. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  370. { temporarily change this to 'mov reg,0' to make it easier }
  371. { for the CSE. Will be changed back in pass 2 }
  372. begin
  373. taicpu(p).opcode := A_MOV;
  374. taicpu(p).loadconst(0,0);
  375. end;
  376. end;
  377. end;
  378. end;
  379. p := tai(p.next)
  380. end;
  381. end;
  382. procedure PeepHoleOptPass1(Asml: taasmoutput; BlockStart, BlockEnd: tai);
  383. {First pass of peepholeoptimizations}
  384. var
  385. l,l1 : longint;
  386. p,hp1,hp2 : tai;
  387. hp3,hp4: tai;
  388. TmpRef: TReference;
  389. UsedRegs, TmpUsedRegs: TRegSet;
  390. TmpBool1, TmpBool2: Boolean;
  391. function SkipLabels(hp: tai; var hp2: tai): boolean;
  392. {skips all labels and returns the next "real" instruction}
  393. begin
  394. while assigned(hp.next) and
  395. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  396. hp := tai(hp.next);
  397. if assigned(hp.next) then
  398. begin
  399. SkipLabels := True;
  400. hp2 := tai(hp.next)
  401. end
  402. else
  403. begin
  404. hp2 := hp;
  405. SkipLabels := False
  406. end;
  407. end;
  408. function GetFinalDestination(asml: taasmoutput; hp: taicpu; level: longint): boolean;
  409. {traces sucessive jumps to their final destination and sets it, e.g.
  410. je l1 je l3
  411. <code> <code>
  412. l1: becomes l1:
  413. je l2 je l3
  414. <code> <code>
  415. l2: l2:
  416. jmp l3 jmp l3
  417. the level parameter denotes how deeep we have already followed the jump,
  418. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  419. var p1, p2: tai;
  420. l: tasmlabel;
  421. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  422. begin
  423. FindAnyLabel := false;
  424. while assigned(hp.next) and
  425. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  426. hp := tai(hp.next);
  427. if assigned(hp.next) and
  428. (tai(hp.next).typ = ait_label) then
  429. begin
  430. FindAnyLabel := true;
  431. l := tai_label(hp.next).l;
  432. end
  433. end;
  434. begin
  435. if level > 20 then
  436. exit;
  437. GetfinalDestination := false;
  438. p1 := dfa.getlabelwithsym(tasmlabel(hp.oper[0]^.sym));
  439. if assigned(p1) then
  440. begin
  441. SkipLabels(p1,p1);
  442. if (tai(p1).typ = ait_instruction) and
  443. (taicpu(p1).is_jmp) then
  444. if { the next instruction after the label where the jump hp arrives}
  445. { is unconditional or of the same type as hp, so continue }
  446. (taicpu(p1).condition in [C_None,hp.condition]) or
  447. { the next instruction after the label where the jump hp arrives}
  448. { is the opposite of hp (so this one is never taken), but after }
  449. { that one there is a branch that will be taken, so perform a }
  450. { little hack: set p1 equal to this instruction (that's what the}
  451. { last SkipLabels is for, only works with short bool evaluation)}
  452. ((taicpu(p1).condition = inverse_cond[hp.condition]) and
  453. SkipLabels(p1,p2) and
  454. (p2.typ = ait_instruction) and
  455. (taicpu(p2).is_jmp) and
  456. (taicpu(p2).condition in [C_None,hp.condition]) and
  457. SkipLabels(p1,p1)) then
  458. begin
  459. { quick check for loops of the form "l5: ; jmp l5 }
  460. if (tasmlabel(taicpu(p1).oper[0]^.sym).labelnr =
  461. tasmlabel(hp.oper[0]^.sym).labelnr) then
  462. exit;
  463. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  464. exit;
  465. tasmlabel(hp.oper[0]^.sym).decrefs;
  466. hp.oper[0]^.sym:=taicpu(p1).oper[0]^.sym;
  467. tasmlabel(hp.oper[0]^.sym).increfs;
  468. end
  469. else
  470. if (taicpu(p1).condition = inverse_cond[hp.condition]) then
  471. if not FindAnyLabel(p1,l) then
  472. begin
  473. {$ifdef finaldestdebug}
  474. insertllitem(asml,p1,p1.next,tai_comment.Create(
  475. strpnew('previous label inserted'))));
  476. {$endif finaldestdebug}
  477. objectlibrary.getlabel(l);
  478. insertllitem(asml,p1,p1.next,tai_label.Create(l));
  479. tasmlabel(taicpu(hp).oper[0]^.sym).decrefs;
  480. hp.oper[0]^.sym := l;
  481. l.increfs;
  482. { this won't work, since the new label isn't in the labeltable }
  483. { so it will fail the rangecheck. Labeltable should become a }
  484. { hashtable to support this: }
  485. { GetFinalDestination(asml, hp); }
  486. end
  487. else
  488. begin
  489. {$ifdef finaldestdebug}
  490. insertllitem(asml,p1,p1.next,tai_comment.Create(
  491. strpnew('next label reused'))));
  492. {$endif finaldestdebug}
  493. l.increfs;
  494. hp.oper[0]^.sym := l;
  495. if not GetFinalDestination(asml, hp,succ(level)) then
  496. exit;
  497. end;
  498. end;
  499. GetFinalDestination := true;
  500. end;
  501. function DoSubAddOpt(var p: tai): Boolean;
  502. begin
  503. DoSubAddOpt := False;
  504. if GetLastInstruction(p, hp1) and
  505. (hp1.typ = ait_instruction) and
  506. (taicpu(hp1).opsize = taicpu(p).opsize) then
  507. case taicpu(hp1).opcode Of
  508. A_DEC:
  509. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  510. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  511. begin
  512. taicpu(p).LoadConst(0,taicpu(p).oper[0]^.val+1);
  513. asml.remove(hp1);
  514. hp1.free;
  515. end;
  516. A_SUB:
  517. if (taicpu(hp1).oper[0]^.typ = top_const) and
  518. (taicpu(hp1).oper[1]^.typ = top_reg) and
  519. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  520. begin
  521. taicpu(p).LoadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  522. asml.remove(hp1);
  523. hp1.free;
  524. end;
  525. A_ADD:
  526. if (taicpu(hp1).oper[0]^.typ = top_const) and
  527. (taicpu(hp1).oper[1]^.typ = top_reg) and
  528. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  529. begin
  530. taicpu(p).LoadConst(0,AWord(int64(taicpu(p).oper[0]^.val)-int64(taicpu(hp1).oper[0]^.val)));
  531. asml.remove(hp1);
  532. hp1.free;
  533. if (taicpu(p).oper[0]^.val = 0) then
  534. begin
  535. hp1 := tai(p.next);
  536. asml.remove(p);
  537. p.free;
  538. if not GetLastInstruction(hp1, p) then
  539. p := hp1;
  540. DoSubAddOpt := True;
  541. end
  542. end;
  543. end;
  544. end;
  545. begin
  546. p := BlockStart;
  547. UsedRegs := [];
  548. while (p <> BlockEnd) Do
  549. begin
  550. UpDateUsedRegs(UsedRegs, tai(p.next));
  551. case p.Typ Of
  552. ait_instruction:
  553. begin
  554. { Handle Jmp Optimizations }
  555. if taicpu(p).is_jmp then
  556. begin
  557. {the following if-block removes all code between a jmp and the next label,
  558. because it can never be executed}
  559. if (taicpu(p).opcode = A_JMP) then
  560. begin
  561. while GetNextInstruction(p, hp1) and
  562. (hp1.typ <> ait_label) do
  563. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  564. begin
  565. asml.remove(hp1);
  566. hp1.free;
  567. end
  568. else break;
  569. end;
  570. { remove jumps to a label coming right after them }
  571. if GetNextInstruction(p, hp1) then
  572. begin
  573. if FindLabel(tasmlabel(taicpu(p).oper[0]^.sym), hp1) and
  574. {$warning FIXME removing the first instruction fails}
  575. (p<>blockstart) then
  576. begin
  577. hp2:=tai(hp1.next);
  578. asml.remove(p);
  579. p.free;
  580. p:=hp2;
  581. continue;
  582. end
  583. else
  584. begin
  585. if hp1.typ = ait_label then
  586. SkipLabels(hp1,hp1);
  587. if (tai(hp1).typ=ait_instruction) and
  588. (taicpu(hp1).opcode=A_JMP) and
  589. GetNextInstruction(hp1, hp2) and
  590. FindLabel(tasmlabel(taicpu(p).oper[0]^.sym), hp2) then
  591. begin
  592. if taicpu(p).opcode=A_Jcc then
  593. begin
  594. taicpu(p).condition:=inverse_cond[taicpu(p).condition];
  595. tai_label(hp2).l.decrefs;
  596. taicpu(p).oper[0]^.sym:=taicpu(hp1).oper[0]^.sym;
  597. taicpu(p).oper[0]^.sym.increfs;
  598. asml.remove(hp1);
  599. hp1.free;
  600. GetFinalDestination(asml, taicpu(p),0);
  601. end
  602. else
  603. begin
  604. GetFinalDestination(asml, taicpu(p),0);
  605. p:=tai(p.next);
  606. continue;
  607. end;
  608. end
  609. else
  610. GetFinalDestination(asml, taicpu(p),0);
  611. end;
  612. end;
  613. end
  614. else
  615. { All other optimizes }
  616. begin
  617. for l := 0 to taicpu(p).ops-1 Do
  618. if (taicpu(p).oper[l]^.typ = top_ref) then
  619. With taicpu(p).oper[l]^.ref^ Do
  620. begin
  621. if (base = NR_NO) and
  622. (index <> NR_NO) and
  623. (scalefactor in [0,1]) then
  624. begin
  625. base := index;
  626. index := NR_NO
  627. end
  628. end;
  629. case taicpu(p).opcode Of
  630. A_AND:
  631. begin
  632. if (taicpu(p).oper[0]^.typ = top_const) and
  633. (taicpu(p).oper[1]^.typ = top_reg) and
  634. GetNextInstruction(p, hp1) and
  635. (tai(hp1).typ = ait_instruction) and
  636. (taicpu(hp1).opcode = A_AND) and
  637. (taicpu(hp1).oper[0]^.typ = top_const) and
  638. (taicpu(hp1).oper[1]^.typ = top_reg) and
  639. (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) then
  640. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  641. begin
  642. taicpu(p).LoadConst(0,taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  643. asml.remove(hp1);
  644. hp1.free;
  645. end
  646. else
  647. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  648. jump, but only if it's a conditional jump (PFV) }
  649. if (taicpu(p).oper[1]^.typ = top_reg) and
  650. GetNextInstruction(p, hp1) and
  651. (hp1.typ = ait_instruction) and
  652. (taicpu(hp1).is_jmp) and
  653. (taicpu(hp1).opcode<>A_JMP) and
  654. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  655. taicpu(p).opcode := A_TEST;
  656. end;
  657. A_CMP:
  658. begin
  659. if (taicpu(p).oper[0]^.typ = top_const) and
  660. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  661. (taicpu(p).oper[0]^.val = 0) and
  662. GetNextInstruction(p, hp1) and
  663. (hp1.typ = ait_instruction) and
  664. (taicpu(hp1).is_jmp) and
  665. (taicpu(hp1).opcode=A_Jcc) and
  666. (taicpu(hp1).condition in [C_LE,C_BE]) and
  667. GetNextInstruction(hp1,hp2) and
  668. (hp2.typ = ait_instruction) and
  669. (taicpu(hp2).opcode = A_DEC) and
  670. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  671. GetNextInstruction(hp2, hp3) and
  672. (hp3.typ = ait_instruction) and
  673. (taicpu(hp3).is_jmp) and
  674. (taicpu(hp3).opcode = A_JMP) and
  675. GetNextInstruction(hp3, hp4) and
  676. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.sym),hp4) then
  677. begin
  678. taicpu(hp2).Opcode := A_SUB;
  679. taicpu(hp2).Loadoper(1,taicpu(hp2).oper[0]^);
  680. taicpu(hp2).LoadConst(0,1);
  681. taicpu(hp2).ops:=2;
  682. taicpu(hp3).Opcode := A_Jcc;
  683. case taicpu(hp1).condition of
  684. C_LE: taicpu(hp3).condition := C_GE;
  685. C_BE: taicpu(hp3).condition := C_AE;
  686. end;
  687. asml.remove(p);
  688. asml.remove(hp1);
  689. p.free;
  690. hp1.free;
  691. p := hp2;
  692. continue;
  693. end
  694. end;
  695. A_FLD:
  696. begin
  697. if (taicpu(p).oper[0]^.typ = top_reg) and
  698. GetNextInstruction(p, hp1) and
  699. (hp1.typ = Ait_Instruction) and
  700. (taicpu(hp1).oper[0]^.typ = top_reg) and
  701. (taicpu(hp1).oper[1]^.typ = top_reg) and
  702. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  703. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  704. { change to
  705. fld reg fxxx reg,st
  706. fxxxp st, st1 (hp1)
  707. Remark: non commutative operations must be reversed!
  708. }
  709. begin
  710. case taicpu(hp1).opcode Of
  711. A_FMULP,A_FADDP,
  712. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  713. begin
  714. case taicpu(hp1).opcode Of
  715. A_FADDP: taicpu(hp1).opcode := A_FADD;
  716. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  717. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  718. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  719. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  720. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  721. end;
  722. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  723. taicpu(hp1).oper[1]^.reg := NR_ST;
  724. asml.remove(p);
  725. p.free;
  726. p := hp1;
  727. continue;
  728. end;
  729. end;
  730. end
  731. else
  732. if (taicpu(p).oper[0]^.typ = top_ref) and
  733. GetNextInstruction(p, hp2) and
  734. (hp2.typ = Ait_Instruction) and
  735. (taicpu(hp2).ops = 2) and
  736. (taicpu(hp2).oper[0]^.typ = top_reg) and
  737. (taicpu(hp2).oper[1]^.typ = top_reg) and
  738. (taicpu(p).opsize in [S_FS, S_FL]) and
  739. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  740. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  741. if GetLastInstruction(p, hp1) and
  742. (hp1.typ = Ait_Instruction) and
  743. ((taicpu(hp1).opcode = A_FLD) or
  744. (taicpu(hp1).opcode = A_FST)) and
  745. (taicpu(hp1).opsize = taicpu(p).opsize) and
  746. (taicpu(hp1).oper[0]^.typ = top_ref) and
  747. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  748. if ((taicpu(hp2).opcode = A_FMULP) or
  749. (taicpu(hp2).opcode = A_FADDP)) then
  750. { change to
  751. fld/fst mem1 (hp1) fld/fst mem1
  752. fld mem1 (p) fadd/
  753. faddp/ fmul st, st
  754. fmulp st, st1 (hp2) }
  755. begin
  756. asml.remove(p);
  757. p.free;
  758. p := hp1;
  759. if (taicpu(hp2).opcode = A_FADDP) then
  760. taicpu(hp2).opcode := A_FADD
  761. else
  762. taicpu(hp2).opcode := A_FMUL;
  763. taicpu(hp2).oper[1]^.reg := NR_ST;
  764. end
  765. else
  766. { change to
  767. fld/fst mem1 (hp1) fld/fst mem1
  768. fld mem1 (p) fld st}
  769. begin
  770. taicpu(p).changeopsize(S_FL);
  771. taicpu(p).loadreg(0,NR_ST);
  772. end
  773. else
  774. begin
  775. case taicpu(hp2).opcode Of
  776. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  777. { change to
  778. fld/fst mem1 (hp1) fld/fst mem1
  779. fld mem2 (p) fxxx mem2
  780. fxxxp st, st1 (hp2) }
  781. begin
  782. case taicpu(hp2).opcode Of
  783. A_FADDP: taicpu(p).opcode := A_FADD;
  784. A_FMULP: taicpu(p).opcode := A_FMUL;
  785. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  786. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  787. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  788. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  789. end;
  790. asml.remove(hp2);
  791. hp2.free;
  792. end
  793. end
  794. end
  795. end;
  796. A_FSTP,A_FISTP:
  797. if doFpuLoadStoreOpt(asmL,p) then
  798. continue;
  799. A_LEA:
  800. begin
  801. {removes seg register prefixes from LEA operations, as they
  802. don't do anything}
  803. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  804. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  805. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  806. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  807. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  808. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  809. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  810. (taicpu(p).oper[0]^.ref^.offset = 0) then
  811. begin
  812. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  813. taicpu(p).oper[1]^.reg);
  814. InsertLLItem(asml,p.previous,p.next, hp1);
  815. p.free;
  816. p := hp1;
  817. continue;
  818. end
  819. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  820. begin
  821. hp1 := tai(p.Next);
  822. asml.remove(p);
  823. p.free;
  824. p := hp1;
  825. continue;
  826. end
  827. else
  828. with taicpu(p).oper[0]^.ref^ do
  829. if (base = taicpu(p).oper[1]^.reg) then
  830. begin
  831. l := offset;
  832. if (l=1) then
  833. begin
  834. taicpu(p).opcode := A_INC;
  835. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  836. taicpu(p).ops := 1
  837. end
  838. else if (l=-1) then
  839. begin
  840. taicpu(p).opcode := A_DEC;
  841. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  842. taicpu(p).ops := 1;
  843. end
  844. else
  845. begin
  846. taicpu(p).opcode := A_ADD;
  847. taicpu(p).loadconst(0,aword(l));
  848. end;
  849. end;
  850. end;
  851. A_MOV:
  852. begin
  853. TmpUsedRegs := UsedRegs;
  854. if (taicpu(p).oper[1]^.typ = top_reg) and
  855. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  856. GetNextInstruction(p, hp1) and
  857. (tai(hp1).typ = ait_instruction) and
  858. (taicpu(hp1).opcode = A_MOV) and
  859. (taicpu(hp1).oper[0]^.typ = top_reg) and
  860. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  861. begin
  862. {we have "mov x, %treg; mov %treg, y}
  863. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  864. {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
  865. case taicpu(p).oper[0]^.typ Of
  866. top_reg:
  867. begin
  868. { change "mov %reg, %treg; mov %treg, y"
  869. to "mov %reg, y" }
  870. taicpu(p).LoadOper(1,taicpu(hp1).oper[1]^);
  871. asml.remove(hp1);
  872. hp1.free;
  873. continue;
  874. end;
  875. top_ref:
  876. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  877. begin
  878. { change "mov mem, %treg; mov %treg, %reg"
  879. to "mov mem, %reg" }
  880. taicpu(p).Loadoper(1,taicpu(hp1).oper[1]^);
  881. asml.remove(hp1);
  882. hp1.free;
  883. continue;
  884. end;
  885. end
  886. end
  887. else
  888. {Change "mov %reg1, %reg2; xxx %reg2, ???" to
  889. "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read
  890. penalty}
  891. if (taicpu(p).oper[0]^.typ = top_reg) and
  892. (taicpu(p).oper[1]^.typ = top_reg) and
  893. GetNextInstruction(p,hp1) and
  894. (tai(hp1).typ = ait_instruction) and
  895. (taicpu(hp1).ops >= 1) and
  896. (taicpu(hp1).oper[0]^.typ = top_reg) and
  897. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  898. {we have "mov %reg1, %reg2; XXX %reg2, ???"}
  899. begin
  900. if ((taicpu(hp1).opcode = A_OR) or
  901. (taicpu(hp1).opcode = A_TEST)) and
  902. (taicpu(hp1).oper[1]^.typ = top_reg) and
  903. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  904. {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
  905. begin
  906. TmpUsedRegs := UsedRegs;
  907. { reg1 will be used after the first instruction, }
  908. { so update the allocation info }
  909. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1);
  910. if GetNextInstruction(hp1, hp2) and
  911. (hp2.typ = ait_instruction) and
  912. taicpu(hp2).is_jmp and
  913. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  914. { change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to
  915. "test %reg1, %reg1; jxx" }
  916. begin
  917. taicpu(hp1).Loadoper(0,taicpu(p).oper[0]^);
  918. taicpu(hp1).Loadoper(1,taicpu(p).oper[0]^);
  919. asml.remove(p);
  920. p.free;
  921. p := hp1;
  922. continue
  923. end
  924. else
  925. {change "mov %reg1, %reg2; test/or %reg2, %reg2" to
  926. "mov %reg1, %reg2; test/or %reg1, %reg1"}
  927. begin
  928. taicpu(hp1).Loadoper(0,taicpu(p).oper[0]^);
  929. taicpu(hp1).Loadoper(1,taicpu(p).oper[0]^);
  930. end;
  931. end
  932. { else
  933. if (taicpu(p.next)^.opcode
  934. in [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])}
  935. {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to
  936. "mov %reg1, %reg2; push/or/xor/... %reg1, ???"}
  937. end
  938. else
  939. {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  940. x >= RetOffset) as it doesn't do anything (it writes either to a
  941. parameter or to the temporary storage room for the function
  942. result)}
  943. if GetNextInstruction(p, hp1) and
  944. (tai(hp1).typ = ait_instruction) then
  945. if ((taicpu(hp1).opcode = A_LEAVE) or
  946. (taicpu(hp1).opcode = A_RET)) and
  947. (taicpu(p).oper[1]^.typ = top_ref) and
  948. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  949. (taicpu(p).oper[1]^.ref^.offset >= tvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset) and
  950. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  951. (taicpu(p).oper[0]^.typ = top_reg) then
  952. begin
  953. asml.remove(p);
  954. p.free;
  955. p := hp1;
  956. RemoveLastDeallocForFuncRes(asmL,p);
  957. end
  958. else
  959. if (taicpu(p).oper[0]^.typ = top_reg) and
  960. (taicpu(p).oper[1]^.typ = top_ref) and
  961. (taicpu(p).opsize = taicpu(hp1).opsize) and
  962. (taicpu(hp1).opcode = A_CMP) and
  963. (taicpu(hp1).oper[1]^.typ = top_ref) and
  964. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  965. {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
  966. begin
  967. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  968. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1);
  969. end;
  970. { Next instruction is also a MOV ? }
  971. if GetNextInstruction(p, hp1) and
  972. (tai(hp1).typ = ait_instruction) and
  973. (taicpu(hp1).opcode = A_MOV) and
  974. (taicpu(hp1).opsize = taicpu(p).opsize) then
  975. begin
  976. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  977. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  978. {mov reg1, mem1 or mov mem1, reg1
  979. mov mem2, reg2 mov reg2, mem2}
  980. begin
  981. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  982. {mov reg1, mem1 or mov mem1, reg1
  983. mov mem2, reg1 mov reg2, mem1}
  984. begin
  985. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  986. { Removes the second statement from
  987. mov reg1, mem1/reg2
  988. mov mem1/reg2, reg1 }
  989. begin
  990. if (taicpu(p).oper[0]^.typ = top_reg) then
  991. AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1);
  992. asml.remove(hp1);
  993. hp1.free;
  994. end
  995. else
  996. begin
  997. TmpUsedRegs := UsedRegs;
  998. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  999. if (taicpu(p).oper[1]^.typ = top_ref) and
  1000. { mov reg1, mem1
  1001. mov mem2, reg1 }
  1002. GetNextInstruction(hp1, hp2) and
  1003. (hp2.typ = ait_instruction) and
  1004. (taicpu(hp2).opcode = A_CMP) and
  1005. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1006. (taicpu(hp2).oper[0]^.typ = TOp_Ref) and
  1007. (taicpu(hp2).oper[1]^.typ = TOp_Reg) and
  1008. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(p).oper[1]^.ref^) and
  1009. (taicpu(hp2).oper[1]^.reg= taicpu(p).oper[0]^.reg) and
  1010. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1011. { change to
  1012. mov reg1, mem1 mov reg1, mem1
  1013. mov mem2, reg1 cmp reg1, mem2
  1014. cmp mem1, reg1 }
  1015. begin
  1016. asml.remove(hp2);
  1017. hp2.free;
  1018. taicpu(hp1).opcode := A_CMP;
  1019. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1020. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1021. end;
  1022. end;
  1023. end
  1024. else
  1025. begin
  1026. tmpUsedRegs := UsedRegs;
  1027. if GetNextInstruction(hp1, hp2) and
  1028. (taicpu(p).oper[0]^.typ = top_ref) and
  1029. (taicpu(p).oper[1]^.typ = top_reg) and
  1030. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1031. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1032. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1033. (tai(hp2).typ = ait_instruction) and
  1034. (taicpu(hp2).opcode = A_MOV) and
  1035. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1036. (taicpu(hp2).oper[1]^.typ = top_reg) and
  1037. (taicpu(hp2).oper[0]^.typ = top_ref) and
  1038. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1039. if not regInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^) and
  1040. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1041. { mov mem1, %reg1
  1042. mov %reg1, mem2
  1043. mov mem2, reg2
  1044. to:
  1045. mov mem1, reg2
  1046. mov reg2, mem2}
  1047. begin
  1048. AllocRegBetween(asmL,taicpu(hp2).oper[1]^.reg,p,hp2);
  1049. taicpu(p).Loadoper(1,taicpu(hp2).oper[1]^);
  1050. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1051. asml.remove(hp2);
  1052. hp2.free;
  1053. end
  1054. else
  1055. if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1056. not(RegInRef(getsupreg(taicpu(p).oper[1]^.reg),taicpu(p).oper[0]^.ref^)) and
  1057. not(RegInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^)) then
  1058. { mov mem1, reg1 mov mem1, reg1
  1059. mov reg1, mem2 mov reg1, mem2
  1060. mov mem2, reg2 mov mem2, reg1
  1061. to: to:
  1062. mov mem1, reg1 mov mem1, reg1
  1063. mov mem1, reg2 mov reg1, mem2
  1064. mov reg1, mem2
  1065. or (if mem1 depends on reg1
  1066. and/or if mem2 depends on reg2)
  1067. to:
  1068. mov mem1, reg1
  1069. mov reg1, mem2
  1070. mov reg1, reg2
  1071. }
  1072. begin
  1073. taicpu(hp1).LoadRef(0,taicpu(p).oper[0]^.ref^);
  1074. taicpu(hp1).LoadReg(1,taicpu(hp2).oper[1]^.reg);
  1075. taicpu(hp2).LoadRef(1,taicpu(hp2).oper[0]^.ref^);
  1076. taicpu(hp2).LoadReg(0,taicpu(p).oper[1]^.reg);
  1077. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2);
  1078. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1079. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1080. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.base,p,hp2);
  1081. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1082. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1083. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.index,p,hp2);
  1084. end
  1085. else
  1086. if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1087. begin
  1088. taicpu(hp2).LoadReg(0,taicpu(hp1).Oper[0]^.reg);
  1089. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2);
  1090. end
  1091. else
  1092. begin
  1093. asml.remove(hp2);
  1094. hp2.free;
  1095. end
  1096. end
  1097. end
  1098. else
  1099. (* {movl [mem1],reg1
  1100. movl [mem1],reg2
  1101. to:
  1102. movl [mem1],reg1
  1103. movl reg1,reg2 }
  1104. if (taicpu(p).oper[0]^.typ = top_ref) and
  1105. (taicpu(p).oper[1]^.typ = top_reg) and
  1106. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1107. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1108. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1109. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1110. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1111. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1112. taicpu(hp1).LoadReg(0,taicpu(p).oper[1]^.reg)
  1113. else*)
  1114. { movl const1,[mem1]
  1115. movl [mem1],reg1
  1116. to:
  1117. movl const1,reg1
  1118. movl reg1,[mem1] }
  1119. if (taicpu(p).oper[0]^.typ = top_const) and
  1120. (taicpu(p).oper[1]^.typ = top_ref) and
  1121. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1122. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1123. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1124. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) then
  1125. begin
  1126. allocregbetween(asml,taicpu(hp1).oper[1]^.reg,p,hp1);
  1127. { allocregbetween doesn't insert this because at }
  1128. { this time, no regalloc info is available in }
  1129. { the optinfo field, so do it manually (JM) }
  1130. hp2 := tai_regalloc.Alloc(taicpu(hp1).oper[1]^.reg);
  1131. insertllitem(asml,p.previous,p,hp2);
  1132. taicpu(hp1).LoadReg(0,taicpu(hp1).oper[1]^.reg);
  1133. taicpu(hp1).LoadRef(1,taicpu(p).oper[1]^.ref^);
  1134. taicpu(p).LoadReg(1,taicpu(hp1).oper[0]^.reg);
  1135. end
  1136. end;
  1137. end;
  1138. A_MOVZX:
  1139. begin
  1140. {removes superfluous And's after movzx's}
  1141. if (taicpu(p).oper[1]^.typ = top_reg) and
  1142. GetNextInstruction(p, hp1) and
  1143. (tai(hp1).typ = ait_instruction) and
  1144. (taicpu(hp1).opcode = A_AND) and
  1145. (taicpu(hp1).oper[0]^.typ = top_const) and
  1146. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1147. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1148. case taicpu(p).opsize Of
  1149. S_BL, S_BW:
  1150. if (taicpu(hp1).oper[0]^.val = $ff) then
  1151. begin
  1152. asml.remove(hp1);
  1153. hp1.free;
  1154. end;
  1155. S_WL:
  1156. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1157. begin
  1158. asml.remove(hp1);
  1159. hp1.free;
  1160. end;
  1161. end;
  1162. {changes some movzx constructs to faster synonims (all examples
  1163. are given with eax/ax, but are also valid for other registers)}
  1164. if (taicpu(p).oper[1]^.typ = top_reg) then
  1165. if (taicpu(p).oper[0]^.typ = top_reg) then
  1166. case taicpu(p).opsize of
  1167. S_BW:
  1168. begin
  1169. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1170. not(CS_LittleSize in aktglobalswitches) then
  1171. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1172. begin
  1173. taicpu(p).opcode := A_AND;
  1174. taicpu(p).changeopsize(S_W);
  1175. taicpu(p).LoadConst(0,$ff);
  1176. end
  1177. else if GetNextInstruction(p, hp1) and
  1178. (tai(hp1).typ = ait_instruction) and
  1179. (taicpu(hp1).opcode = A_AND) and
  1180. (taicpu(hp1).oper[0]^.typ = top_const) and
  1181. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1182. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1183. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1184. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1185. begin
  1186. taicpu(p).opcode := A_MOV;
  1187. taicpu(p).changeopsize(S_W);
  1188. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1189. taicpu(hp1).LoadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1190. end;
  1191. end;
  1192. S_BL:
  1193. begin
  1194. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1195. not(CS_LittleSize in aktglobalswitches) then
  1196. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1197. begin
  1198. taicpu(p).opcode := A_AND;
  1199. taicpu(p).changeopsize(S_L);
  1200. taicpu(p).loadconst(0,$ff)
  1201. end
  1202. else if GetNextInstruction(p, hp1) and
  1203. (tai(hp1).typ = ait_instruction) and
  1204. (taicpu(hp1).opcode = A_AND) and
  1205. (taicpu(hp1).oper[0]^.typ = top_const) and
  1206. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1207. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1208. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1209. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1210. begin
  1211. taicpu(p).opcode := A_MOV;
  1212. taicpu(p).changeopsize(S_L);
  1213. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1214. taicpu(hp1).LoadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1215. end
  1216. end;
  1217. S_WL:
  1218. begin
  1219. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1220. not(CS_LittleSize in aktglobalswitches) then
  1221. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1222. begin
  1223. taicpu(p).opcode := A_AND;
  1224. taicpu(p).changeopsize(S_L);
  1225. taicpu(p).LoadConst(0,$ffff);
  1226. end
  1227. else if GetNextInstruction(p, hp1) and
  1228. (tai(hp1).typ = ait_instruction) and
  1229. (taicpu(hp1).opcode = A_AND) and
  1230. (taicpu(hp1).oper[0]^.typ = top_const) and
  1231. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1232. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1233. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1234. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1235. begin
  1236. taicpu(p).opcode := A_MOV;
  1237. taicpu(p).changeopsize(S_L);
  1238. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1239. taicpu(hp1).LoadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1240. end;
  1241. end;
  1242. end
  1243. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1244. begin
  1245. if GetNextInstruction(p, hp1) and
  1246. (tai(hp1).typ = ait_instruction) and
  1247. (taicpu(hp1).opcode = A_AND) and
  1248. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1249. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1250. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1251. begin
  1252. taicpu(p).opcode := A_MOV;
  1253. case taicpu(p).opsize Of
  1254. S_BL:
  1255. begin
  1256. taicpu(p).changeopsize(S_L);
  1257. taicpu(hp1).LoadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1258. end;
  1259. S_WL:
  1260. begin
  1261. taicpu(p).changeopsize(S_L);
  1262. taicpu(hp1).LoadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1263. end;
  1264. S_BW:
  1265. begin
  1266. taicpu(p).changeopsize(S_W);
  1267. taicpu(hp1).LoadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1268. end;
  1269. end;
  1270. end;
  1271. end;
  1272. end;
  1273. (* should not be generated anymore by the current code generator
  1274. A_POP:
  1275. begin
  1276. if target_info.system=system_i386_go32v2 then
  1277. begin
  1278. { Transform a series of pop/pop/pop/push/push/push to }
  1279. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1280. { because I'm not sure whether they can cope with }
  1281. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1282. { such a problem when using esp as frame pointer (JM) }
  1283. if (taicpu(p).oper[0]^.typ = top_reg) then
  1284. begin
  1285. hp1 := p;
  1286. hp2 := p;
  1287. l := 0;
  1288. while getNextInstruction(hp1,hp1) and
  1289. (hp1.typ = ait_instruction) and
  1290. (taicpu(hp1).opcode = A_POP) and
  1291. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1292. begin
  1293. hp2 := hp1;
  1294. inc(l,4);
  1295. end;
  1296. getLastInstruction(p,hp3);
  1297. l1 := 0;
  1298. while (hp2 <> hp3) and
  1299. assigned(hp1) and
  1300. (hp1.typ = ait_instruction) and
  1301. (taicpu(hp1).opcode = A_PUSH) and
  1302. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1303. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1304. begin
  1305. { change it to a two op operation }
  1306. taicpu(hp2).oper[1]^.typ:=top_none;
  1307. taicpu(hp2).ops:=2;
  1308. taicpu(hp2).opcode := A_MOV;
  1309. taicpu(hp2).Loadoper(1,taicpu(hp1).oper[0]^);
  1310. reference_reset(tmpref);
  1311. tmpRef.base.enum:=R_INTREGISTER;
  1312. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1313. convert_register_to_enum(tmpref.base);
  1314. tmpRef.offset := l;
  1315. taicpu(hp2).loadRef(0,tmpRef);
  1316. hp4 := hp1;
  1317. getNextInstruction(hp1,hp1);
  1318. asml.remove(hp4);
  1319. hp4.free;
  1320. getLastInstruction(hp2,hp2);
  1321. dec(l,4);
  1322. inc(l1);
  1323. end;
  1324. if l <> -4 then
  1325. begin
  1326. inc(l,4);
  1327. for l1 := l1 downto 1 do
  1328. begin
  1329. getNextInstruction(hp2,hp2);
  1330. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1331. end
  1332. end
  1333. end
  1334. end
  1335. else
  1336. begin
  1337. if (taicpu(p).oper[0]^.typ = top_reg) and
  1338. GetNextInstruction(p, hp1) and
  1339. (tai(hp1).typ=ait_instruction) and
  1340. (taicpu(hp1).opcode=A_PUSH) and
  1341. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1342. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1343. begin
  1344. { change it to a two op operation }
  1345. taicpu(p).oper[1]^.typ:=top_none;
  1346. taicpu(p).ops:=2;
  1347. taicpu(p).opcode := A_MOV;
  1348. taicpu(p).Loadoper(1,taicpu(p).oper[0]^);
  1349. reference_reset(tmpref);
  1350. TmpRef.base.enum := R_ESP;
  1351. taicpu(p).LoadRef(0,TmpRef);
  1352. asml.remove(hp1);
  1353. hp1.free;
  1354. end;
  1355. end;
  1356. end;
  1357. *)
  1358. A_PUSH:
  1359. begin
  1360. if (taicpu(p).opsize = S_W) and
  1361. (taicpu(p).oper[0]^.typ = Top_Const) and
  1362. GetNextInstruction(p, hp1) and
  1363. (tai(hp1).typ = ait_instruction) and
  1364. (taicpu(hp1).opcode = A_PUSH) and
  1365. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1366. (taicpu(hp1).opsize = S_W) then
  1367. begin
  1368. taicpu(p).changeopsize(S_L);
  1369. taicpu(p).LoadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1370. asml.remove(hp1);
  1371. hp1.free;
  1372. end;
  1373. end;
  1374. A_SHL, A_SAL:
  1375. begin
  1376. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1377. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1378. (taicpu(p).opsize = S_L) and
  1379. (taicpu(p).oper[0]^.val <= 3) then
  1380. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1381. begin
  1382. TmpBool1 := True; {should we check the next instruction?}
  1383. TmpBool2 := False; {have we found an add/sub which could be
  1384. integrated in the lea?}
  1385. reference_reset(tmpref);
  1386. TmpRef.index := taicpu(p).oper[1]^.reg;
  1387. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1388. while TmpBool1 and
  1389. GetNextInstruction(p, hp1) and
  1390. (tai(hp1).typ = ait_instruction) and
  1391. ((((taicpu(hp1).opcode = A_ADD) or
  1392. (taicpu(hp1).opcode = A_SUB)) and
  1393. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1394. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1395. (((taicpu(hp1).opcode = A_INC) or
  1396. (taicpu(hp1).opcode = A_DEC)) and
  1397. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1398. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) Do
  1399. begin
  1400. TmpBool1 := False;
  1401. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1402. begin
  1403. TmpBool1 := True;
  1404. TmpBool2 := True;
  1405. case taicpu(hp1).opcode of
  1406. A_ADD:
  1407. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1408. A_SUB:
  1409. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1410. end;
  1411. asml.remove(hp1);
  1412. hp1.free;
  1413. end
  1414. else
  1415. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1416. (((taicpu(hp1).opcode = A_ADD) and
  1417. (TmpRef.base = NR_NO)) or
  1418. (taicpu(hp1).opcode = A_INC) or
  1419. (taicpu(hp1).opcode = A_DEC)) then
  1420. begin
  1421. TmpBool1 := True;
  1422. TmpBool2 := True;
  1423. case taicpu(hp1).opcode of
  1424. A_ADD:
  1425. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1426. A_INC:
  1427. inc(TmpRef.offset);
  1428. A_DEC:
  1429. dec(TmpRef.offset);
  1430. end;
  1431. asml.remove(hp1);
  1432. hp1.free;
  1433. end;
  1434. end;
  1435. if TmpBool2 or
  1436. ((aktoptprocessor < ClassPentium2) and
  1437. (taicpu(p).oper[0]^.val <= 3) and
  1438. not(CS_LittleSize in aktglobalswitches)) then
  1439. begin
  1440. if not(TmpBool2) and
  1441. (taicpu(p).oper[0]^.val = 1) then
  1442. begin
  1443. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1444. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1445. end
  1446. else
  1447. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1448. taicpu(p).oper[1]^.reg);
  1449. InsertLLItem(asml,p.previous, p.next, hp1);
  1450. p.free;
  1451. p := hp1;
  1452. end;
  1453. end
  1454. else
  1455. if (aktoptprocessor < ClassPentium2) and
  1456. (taicpu(p).oper[0]^.typ = top_const) and
  1457. (taicpu(p).oper[1]^.typ = top_reg) then
  1458. if (taicpu(p).oper[0]^.val = 1) then
  1459. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1460. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1461. (unlike shl, which is only Tairable in the U pipe)}
  1462. begin
  1463. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1464. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1465. InsertLLItem(asml,p.previous, p.next, hp1);
  1466. p.free;
  1467. p := hp1;
  1468. end
  1469. else if (taicpu(p).opsize = S_L) and
  1470. (taicpu(p).oper[0]^.val<= 3) then
  1471. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1472. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1473. begin
  1474. reference_reset(tmpref);
  1475. TmpRef.index := taicpu(p).oper[1]^.reg;
  1476. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1477. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1478. InsertLLItem(asml,p.previous, p.next, hp1);
  1479. p.free;
  1480. p := hp1;
  1481. end
  1482. end;
  1483. A_SETcc :
  1484. { changes
  1485. setcc (funcres) setcc reg
  1486. movb (funcres), reg to leave/ret
  1487. leave/ret }
  1488. begin
  1489. if (taicpu(p).oper[0]^.typ = top_ref) and
  1490. GetNextInstruction(p, hp1) and
  1491. GetNextInstruction(hp1, hp2) and
  1492. (hp2.typ = ait_instruction) and
  1493. ((taicpu(hp2).opcode = A_LEAVE) or
  1494. (taicpu(hp2).opcode = A_RET)) and
  1495. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1496. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1497. (taicpu(p).oper[0]^.ref^.offset >= tvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset) and
  1498. (hp1.typ = ait_instruction) and
  1499. (taicpu(hp1).opcode = A_MOV) and
  1500. (taicpu(hp1).opsize = S_B) and
  1501. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1502. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1503. begin
  1504. taicpu(p).LoadReg(0,taicpu(hp1).oper[1]^.reg);
  1505. asml.remove(hp1);
  1506. hp1.free;
  1507. end
  1508. end;
  1509. A_SUB:
  1510. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1511. { * change "sub/add const1, reg" or "dec reg" followed by
  1512. "sub const2, reg" to one "sub ..., reg" }
  1513. begin
  1514. if (taicpu(p).oper[0]^.typ = top_const) and
  1515. (taicpu(p).oper[1]^.typ = top_reg) then
  1516. if (taicpu(p).oper[0]^.val = 2) and
  1517. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1518. { Don't do the sub/push optimization if the sub }
  1519. { comes from setting up the stack frame (JM) }
  1520. (not getLastInstruction(p,hp1) or
  1521. (hp1.typ <> ait_instruction) or
  1522. (taicpu(hp1).opcode <> A_MOV) or
  1523. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1524. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1525. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1526. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1527. begin
  1528. hp1 := tai(p.next);
  1529. while Assigned(hp1) and
  1530. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1531. not regReadByInstruction(RS_ESP,hp1) and
  1532. not regModifiedByInstruction(RS_ESP,hp1) do
  1533. hp1 := tai(hp1.next);
  1534. if Assigned(hp1) and
  1535. (tai(hp1).typ = ait_instruction) and
  1536. (taicpu(hp1).opcode = A_PUSH) and
  1537. (taicpu(hp1).opsize = S_W) then
  1538. begin
  1539. taicpu(hp1).changeopsize(S_L);
  1540. if taicpu(hp1).oper[0]^.typ=top_reg then
  1541. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1542. hp1 := tai(p.next);
  1543. asml.remove(p);
  1544. p.free;
  1545. p := hp1;
  1546. continue
  1547. end;
  1548. if DoSubAddOpt(p) then
  1549. continue;
  1550. end
  1551. else if DoSubAddOpt(p) then
  1552. continue
  1553. end;
  1554. end;
  1555. end; { if is_jmp }
  1556. end;
  1557. end;
  1558. updateUsedRegs(UsedRegs,p);
  1559. p:=tai(p.next);
  1560. end;
  1561. end;
  1562. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  1563. begin
  1564. isFoldableArithOp := False;
  1565. case hp1.opcode of
  1566. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  1567. isFoldableArithOp :=
  1568. ((taicpu(hp1).oper[0]^.typ = top_const) or
  1569. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  1570. (taicpu(hp1).oper[0]^.reg <> reg))) and
  1571. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1572. (taicpu(hp1).oper[1]^.reg = reg);
  1573. A_INC,A_DEC:
  1574. isFoldableArithOp :=
  1575. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1576. (taicpu(hp1).oper[0]^.reg = reg);
  1577. end;
  1578. end;
  1579. procedure PeepHoleOptPass2(asml: taasmoutput; BlockStart, BlockEnd: tai);
  1580. function CanBeCMOV(p : tai) : boolean;
  1581. begin
  1582. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1583. (taicpu(p).opcode=A_MOV) and
  1584. (taicpu(p).opsize in [S_L,S_W]) and
  1585. (taicpu(p).oper[0]^.typ in [top_reg,top_ref]) and
  1586. (taicpu(p).oper[1]^.typ in [top_reg]);
  1587. end;
  1588. var
  1589. p,hp1,hp2: tai;
  1590. {$ifdef USECMOV}
  1591. l : longint;
  1592. condition : tasmcond;
  1593. hp3: tai;
  1594. {$endif USECMOV}
  1595. UsedRegs, TmpUsedRegs: TRegSet;
  1596. begin
  1597. p := BlockStart;
  1598. UsedRegs := [];
  1599. while (p <> BlockEnd) Do
  1600. begin
  1601. UpdateUsedRegs(UsedRegs, tai(p.next));
  1602. case p.Typ Of
  1603. Ait_Instruction:
  1604. begin
  1605. case taicpu(p).opcode Of
  1606. {$ifdef USECMOV}
  1607. A_Jcc:
  1608. if (aktspecificoptprocessor>=ClassPentium2) then
  1609. begin
  1610. { check for
  1611. jCC xxx
  1612. <several movs>
  1613. xxx:
  1614. }
  1615. l:=0;
  1616. GetNextInstruction(p, hp1);
  1617. while assigned(hp1) and
  1618. CanBeCMOV(hp1) do
  1619. begin
  1620. inc(l);
  1621. GetNextInstruction(hp1,hp1);
  1622. end;
  1623. if assigned(hp1) then
  1624. begin
  1625. if FindLabel(tasmlabel(taicpu(p).oper[0]^.sym),hp1) then
  1626. begin
  1627. if (l<=4) and (l>0) then
  1628. begin
  1629. condition:=inverse_cond[taicpu(p).condition];
  1630. GetNextInstruction(p,hp1);
  1631. asml.remove(p);
  1632. p.free;
  1633. p:=hp1;
  1634. repeat
  1635. taicpu(hp1).opcode:=A_CMOVcc;
  1636. taicpu(hp1).condition:=condition;
  1637. GetNextInstruction(hp1,hp1);
  1638. until not(assigned(hp1)) or
  1639. not(CanBeCMOV(hp1));
  1640. asml.remove(hp1);
  1641. hp1.free;
  1642. continue;
  1643. end;
  1644. end
  1645. else
  1646. begin
  1647. { check further for
  1648. jCC xxx
  1649. <several movs>
  1650. jmp yyy
  1651. xxx:
  1652. <several movs>
  1653. yyy:
  1654. }
  1655. { hp2 points to jmp xxx }
  1656. hp2:=hp1;
  1657. { skip hp1 to xxx }
  1658. GetNextInstruction(hp1, hp1);
  1659. if assigned(hp2) and
  1660. assigned(hp1) and
  1661. (l<=3) and
  1662. (hp2.typ=ait_instruction) and
  1663. (taicpu(hp2).is_jmp) and
  1664. (taicpu(hp2).condition=C_None) and
  1665. FindLabel(tasmlabel(taicpu(p).oper[0]^.sym),hp1) then
  1666. begin
  1667. l:=0;
  1668. while assigned(hp1) and
  1669. CanBeCMOV(hp1) do
  1670. begin
  1671. inc(l);
  1672. GetNextInstruction(hp1, hp1);
  1673. end;
  1674. end;
  1675. {
  1676. if assigned(hp1) and
  1677. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.sym),hp1) then
  1678. begin
  1679. condition:=inverse_cond[taicpu(p).condition];
  1680. GetNextInstruction(p,hp1);
  1681. asml.remove(p);
  1682. p.free;
  1683. p:=hp1;
  1684. repeat
  1685. taicpu(hp1).opcode:=A_CMOVcc;
  1686. taicpu(hp1).condition:=condition;
  1687. GetNextInstruction(hp1,hp1);
  1688. until not(assigned(hp1)) or
  1689. not(CanBeCMOV(hp1));
  1690. hp2:=hp1.next;
  1691. condition:=inverse_cond[condition];
  1692. asml.remove(hp1.next)
  1693. hp1.next.free;
  1694. asml.remove(hp1);
  1695. hp1.free;
  1696. continue;
  1697. end;
  1698. }
  1699. end;
  1700. end;
  1701. end;
  1702. {$endif USECMOV}
  1703. A_FSTP,A_FISTP:
  1704. if doFpuLoadStoreOpt(asmL,p) then
  1705. continue;
  1706. A_IMUL:
  1707. begin
  1708. if (taicpu(p).ops >= 2) and
  1709. ((taicpu(p).oper[0]^.typ = top_const) or
  1710. (taicpu(p).oper[0]^.typ = top_symbol)) and
  1711. (taicpu(p).oper[1]^.typ = top_reg) and
  1712. ((taicpu(p).ops = 2) or
  1713. ((taicpu(p).oper[2]^.typ = top_reg) and
  1714. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1715. getLastInstruction(p,hp1) and
  1716. (hp1.typ = ait_instruction) and
  1717. (taicpu(hp1).opcode = A_MOV) and
  1718. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1719. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1720. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1721. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  1722. begin
  1723. taicpu(p).ops := 3;
  1724. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1725. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1726. asml.remove(hp1);
  1727. hp1.free;
  1728. end;
  1729. end;
  1730. A_MOV:
  1731. begin
  1732. if (taicpu(p).oper[0]^.typ = top_reg) and
  1733. (taicpu(p).oper[1]^.typ = top_reg) and
  1734. GetNextInstruction(p, hp1) and
  1735. (hp1.typ = ait_Instruction) and
  1736. ((taicpu(hp1).opcode = A_MOV) or
  1737. (taicpu(hp1).opcode = A_MOVZX) or
  1738. (taicpu(hp1).opcode = A_MOVSX)) and
  1739. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1740. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1741. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) or
  1742. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)) and
  1743. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1744. {mov reg1, reg2
  1745. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1746. begin
  1747. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1748. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1749. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1750. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1751. asml.remove(p);
  1752. p.free;
  1753. p := hp1;
  1754. continue;
  1755. end
  1756. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1757. GetNextInstruction(p,hp1) and
  1758. (hp1.typ = ait_instruction) and
  1759. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1760. GetNextInstruction(hp1,hp2) and
  1761. (hp2.typ = ait_instruction) and
  1762. (taicpu(hp2).opcode = A_MOV) and
  1763. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1764. (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1765. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1766. begin
  1767. TmpUsedRegs := UsedRegs;
  1768. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1769. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1770. not(RegUsedAfterInstruction(getsupreg(taicpu(p).oper[1]^.reg),
  1771. hp2, TmpUsedRegs))) then
  1772. { change mov (ref), reg }
  1773. { add/sub/or/... reg2/$const, reg }
  1774. { mov reg, (ref) }
  1775. { # release reg }
  1776. { to add/sub/or/... reg2/$const, (ref) }
  1777. begin
  1778. case taicpu(hp1).opcode of
  1779. A_INC,A_DEC:
  1780. taicpu(hp1).LoadRef(0,taicpu(p).oper[0]^.ref^)
  1781. else
  1782. taicpu(hp1).LoadRef(1,taicpu(p).oper[0]^.ref^);
  1783. end;
  1784. asml.remove(p);
  1785. asml.remove(hp2);
  1786. p.free;
  1787. hp2.free;
  1788. p := hp1
  1789. end;
  1790. end
  1791. end;
  1792. end;
  1793. end;
  1794. end;
  1795. p := tai(p.next)
  1796. end;
  1797. end;
  1798. procedure PostPeepHoleOpts(asml: taasmoutput; BlockStart, BlockEnd: tai);
  1799. var
  1800. p,hp1,hp2: tai;
  1801. begin
  1802. p := BlockStart;
  1803. while (p <> BlockEnd) Do
  1804. begin
  1805. case p.Typ Of
  1806. Ait_Instruction:
  1807. begin
  1808. case taicpu(p).opcode Of
  1809. A_CALL:
  1810. if (AktOptProcessor < ClassPentium2) and
  1811. GetNextInstruction(p, hp1) and
  1812. (hp1.typ = ait_instruction) and
  1813. (taicpu(hp1).opcode = A_JMP) and
  1814. (taicpu(hp1).oper[0]^.typ = top_symbol) then
  1815. begin
  1816. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.sym);
  1817. InsertLLItem(asml, p.previous, p, hp2);
  1818. taicpu(p).opcode := A_JMP;
  1819. taicpu(p).is_jmp := true;
  1820. asml.remove(hp1);
  1821. hp1.free;
  1822. end;
  1823. A_CMP:
  1824. begin
  1825. if (taicpu(p).oper[0]^.typ = top_const) and
  1826. (taicpu(p).oper[0]^.val = 0) and
  1827. (taicpu(p).oper[1]^.typ = top_reg) then
  1828. {change "cmp $0, %reg" to "test %reg, %reg"}
  1829. begin
  1830. taicpu(p).opcode := A_TEST;
  1831. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1832. continue;
  1833. end;
  1834. end;
  1835. (*
  1836. Optimization is not safe; xor clears the carry flag.
  1837. See test/tgadint64 in the test suite.
  1838. A_MOV:
  1839. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1840. (taicpu(p).oper[0]^.val = 0) and
  1841. (taicpu(p).oper[1]^.typ = Top_Reg) then
  1842. { change "mov $0, %reg" into "xor %reg, %reg" }
  1843. begin
  1844. taicpu(p).opcode := A_XOR;
  1845. taicpu(p).LoadReg(0,taicpu(p).oper[1]^.reg);
  1846. end;
  1847. *)
  1848. A_MOVZX:
  1849. { if register vars are on, it's possible there is code like }
  1850. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  1851. { so we can't safely replace the movzx then with xor/mov, }
  1852. { since that would change the flags (JM) }
  1853. if not(cs_regvars in aktglobalswitches) then
  1854. begin
  1855. if (taicpu(p).oper[1]^.typ = top_reg) then
  1856. if (taicpu(p).oper[0]^.typ = top_reg)
  1857. then
  1858. case taicpu(p).opsize of
  1859. S_BL:
  1860. begin
  1861. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1862. not(CS_LittleSize in aktglobalswitches) and
  1863. (aktoptprocessor = ClassPentium) then
  1864. {Change "movzbl %reg1, %reg2" to
  1865. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  1866. PentiumMMX}
  1867. begin
  1868. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  1869. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1870. InsertLLItem(asml,p.previous, p, hp1);
  1871. taicpu(p).opcode := A_MOV;
  1872. taicpu(p).changeopsize(S_B);
  1873. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1874. end;
  1875. end;
  1876. end
  1877. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1878. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1879. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  1880. not(CS_LittleSize in aktglobalswitches) and
  1881. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1882. (aktoptprocessor = ClassPentium) and
  1883. (taicpu(p).opsize = S_BL) then
  1884. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  1885. Pentium and PentiumMMX}
  1886. begin
  1887. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  1888. taicpu(p).oper[1]^.reg);
  1889. taicpu(p).opcode := A_MOV;
  1890. taicpu(p).changeopsize(S_B);
  1891. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1892. InsertLLItem(asml,p.previous, p, hp1);
  1893. end;
  1894. end;
  1895. A_TEST, A_OR:
  1896. {removes the line marked with (x) from the sequence
  1897. and/or/xor/add/sub/... $x, %y
  1898. test/or %y, %y (x)
  1899. j(n)z _Label
  1900. as the first instruction already adjusts the ZF}
  1901. begin
  1902. if OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1903. if GetLastInstruction(p, hp1) and
  1904. (tai(hp1).typ = ait_instruction) then
  1905. case taicpu(hp1).opcode Of
  1906. A_ADD, A_SUB, A_OR, A_XOR, A_AND{, A_SHL, A_SHR}:
  1907. begin
  1908. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1909. begin
  1910. hp1 := tai(p.next);
  1911. asml.remove(p);
  1912. p.free;
  1913. p := tai(hp1);
  1914. continue
  1915. end;
  1916. end;
  1917. A_DEC, A_INC, A_NEG:
  1918. begin
  1919. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) then
  1920. begin
  1921. case taicpu(hp1).opcode Of
  1922. A_DEC, A_INC:
  1923. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  1924. begin
  1925. case taicpu(hp1).opcode Of
  1926. A_DEC: taicpu(hp1).opcode := A_SUB;
  1927. A_INC: taicpu(hp1).opcode := A_ADD;
  1928. end;
  1929. taicpu(hp1).Loadoper(1,taicpu(hp1).oper[0]^);
  1930. taicpu(hp1).LoadConst(0,1);
  1931. taicpu(hp1).ops:=2;
  1932. end
  1933. end;
  1934. hp1 := tai(p.next);
  1935. asml.remove(p);
  1936. p.free;
  1937. p := tai(hp1);
  1938. continue
  1939. end;
  1940. end
  1941. end
  1942. end;
  1943. end;
  1944. end;
  1945. end;
  1946. p := tai(p.next)
  1947. end;
  1948. end;
  1949. end.
  1950. {
  1951. $Log$
  1952. Revision 1.50 2003-11-22 00:40:19 jonas
  1953. * fixed optimiser so it compiles again
  1954. * fixed several bugs which were in there already for a long time, but
  1955. which only popped up now :) -O2/-O3 will now optimise less than in
  1956. the past (and correctly so), but -O2u/-O3u will optimise a bit more
  1957. * some more small improvements for -O3 are still possible
  1958. Revision 1.49 2003/11/07 15:58:32 florian
  1959. * Florian's culmutative nr. 1; contains:
  1960. - invalid calling conventions for a certain cpu are rejected
  1961. - arm softfloat calling conventions
  1962. - -Sp for cpu dependend code generation
  1963. - several arm fixes
  1964. - remaining code for value open array paras on heap
  1965. Revision 1.48 2003/08/09 18:56:54 daniel
  1966. * cs_regalloc renamed to cs_regvars to avoid confusion with register
  1967. allocator
  1968. * Some preventive changes to i386 spillinh code
  1969. Revision 1.47 2003/06/08 18:48:03 jonas
  1970. * first small steps towards an oop optimizer
  1971. Revision 1.46 2003/06/03 21:09:05 peter
  1972. * internal changeregsize for optimizer
  1973. * fix with a hack to not remove the first instruction of a block
  1974. which will leave blockstart pointing to invalid memory
  1975. Revision 1.45 2003/06/02 21:42:05 jonas
  1976. * function results can now also be regvars
  1977. - removed tprocinfo.return_offset, never use it again since it's invalid
  1978. if the result is a regvar
  1979. Revision 1.44 2003/05/30 23:57:08 peter
  1980. * more sparc cleanup
  1981. * accumulator removed, splitted in function_return_reg (called) and
  1982. function_result_reg (caller)
  1983. Revision 1.43 2003/04/27 11:21:35 peter
  1984. * aktprocdef renamed to current_procdef
  1985. * procinfo renamed to current_procinfo
  1986. * procinfo will now be stored in current_module so it can be
  1987. cleaned up properly
  1988. * gen_main_procsym changed to create_main_proc and release_main_proc
  1989. to also generate a tprocinfo structure
  1990. * fixed unit implicit initfinal
  1991. Revision 1.42 2003/03/28 19:16:57 peter
  1992. * generic constructor working for i386
  1993. * remove fixed self register
  1994. * esi added as address register for i386
  1995. Revision 1.41 2003/02/26 13:24:59 daniel
  1996. * Disabled mov reg,0 -> xor reg,reg optimization
  1997. Revision 1.40 2003/02/25 07:41:54 daniel
  1998. * Properly fixed reversed operands bug
  1999. Revision 1.39 2003/02/24 21:27:01 daniel
  2000. * Reversed operand order in an optimization in postpeepholeopt
  2001. Revision 1.38 2003/02/19 22:39:56 daniel
  2002. * Fixed a few issues
  2003. Revision 1.37 2003/02/19 22:00:16 daniel
  2004. * Code generator converted to new register notation
  2005. - Horribily outdated todo.txt removed
  2006. Revision 1.36 2003/01/08 18:43:57 daniel
  2007. * Tregister changed into a record
  2008. Revision 1.35 2002/11/15 16:30:54 peter
  2009. * made tasmsymbol.refs private (merged)
  2010. Revision 1.34 2002/08/18 20:06:30 peter
  2011. * inlining is now also allowed in interface
  2012. * renamed write/load to ppuwrite/ppuload
  2013. * tnode storing in ppu
  2014. * nld,ncon,nbas are already updated for storing in ppu
  2015. Revision 1.33 2002/08/17 09:23:46 florian
  2016. * first part of procinfo rewrite
  2017. Revision 1.32 2002/08/11 14:32:30 peter
  2018. * renamed current_library to objectlibrary
  2019. Revision 1.31 2002/08/11 13:24:17 peter
  2020. * saving of asmsymbols in ppu supported
  2021. * asmsymbollist global is removed and moved into a new class
  2022. tasmlibrarydata that will hold the info of a .a file which
  2023. corresponds with a single module. Added librarydata to tmodule
  2024. to keep the library info stored for the module. in the future the
  2025. objectfiles will also be stored to the tasmlibrarydata class
  2026. * all getlabel/newasmsymbol and friends are moved to the new class
  2027. Revision 1.30 2002/07/26 21:15:43 florian
  2028. * rewrote the system handling
  2029. Revision 1.29 2002/07/01 18:46:34 peter
  2030. * internal linker
  2031. * reorganized aasm layer
  2032. Revision 1.28 2002/06/09 12:55:23 jonas
  2033. * fixed detection of register usage
  2034. Revision 1.27 2002/05/18 13:34:25 peter
  2035. * readded missing revisions
  2036. Revision 1.26 2002/05/16 19:46:52 carl
  2037. + defines.inc -> fpcdefs.inc to avoid conflicts if compiling by hand
  2038. + try to fix temp allocation (still in ifdef)
  2039. + generic constructor calls
  2040. + start of tassembler / tmodulebase class cleanup
  2041. Revision 1.24 2002/05/12 16:53:18 peter
  2042. * moved entry and exitcode to ncgutil and cgobj
  2043. * foreach gets extra argument for passing local data to the
  2044. iterator function
  2045. * -CR checks also class typecasts at runtime by changing them
  2046. into as
  2047. * fixed compiler to cycle with the -CR option
  2048. * fixed stabs with elf writer, finally the global variables can
  2049. be watched
  2050. * removed a lot of routines from cga unit and replaced them by
  2051. calls to cgobj
  2052. * u32bit-s32bit updates for and,or,xor nodes. When one element is
  2053. u32bit then the other is typecasted also to u32bit without giving
  2054. a rangecheck warning/error.
  2055. * fixed pascal calling method with reversing also the high tree in
  2056. the parast, detected by tcalcst3 test
  2057. Revision 1.23 2002/04/21 15:40:49 carl
  2058. * changeregsize -> changeregsize
  2059. Revision 1.22 2002/04/20 21:37:07 carl
  2060. + generic FPC_CHECKPOINTER
  2061. + first parameter offset in stack now portable
  2062. * rename some constants
  2063. + move some cpu stuff to other units
  2064. - remove unused constents
  2065. * fix stacksize for some targets
  2066. * fix generic size problems which depend now on EXTEND_SIZE constant
  2067. * removing frame pointer in routines is only available for : i386,m68k and vis targets
  2068. Revision 1.21 2002/04/15 19:44:21 peter
  2069. * fixed stackcheck that would be called recursively when a stack
  2070. error was found
  2071. * generic changeregsize(reg,size) for i386 register resizing
  2072. * removed some more routines from cga unit
  2073. * fixed returnvalue handling
  2074. * fixed default stacksize of linux and go32v2, 8kb was a bit small :-)
  2075. Revision 1.20 2002/04/02 20:30:16 jonas
  2076. + support for folding inc/dec in shl/add/sub sequences toa single lea
  2077. instruction
  2078. Revision 1.19 2002/04/02 13:01:58 jonas
  2079. * fixed nasty bug in "and" peepholeoptimization that caused wrong
  2080. optimizations after Peter's big location patch
  2081. Revision 1.18 2002/03/31 20:26:40 jonas
  2082. + a_loadfpu_* and a_loadmm_* methods in tcg
  2083. * register allocation is now handled by a class and is mostly processor
  2084. independent (+rgobj.pas and i386/rgcpu.pas)
  2085. * temp allocation is now handled by a class (+tgobj.pas, -i386\tgcpu.pas)
  2086. * some small improvements and fixes to the optimizer
  2087. * some register allocation fixes
  2088. * some fpuvaroffset fixes in the unary minus node
  2089. * push/popusedregisters is now called rg.save/restoreusedregisters and
  2090. (for i386) uses temps instead of push/pop's when using -Op3 (that code is
  2091. also better optimizable)
  2092. * fixed and optimized register saving/restoring for new/dispose nodes
  2093. * LOC_FPU locations now also require their "register" field to be set to
  2094. R_ST, not R_ST0 (the latter is used for LOC_CFPUREGISTER locations only)
  2095. - list field removed of the tnode class because it's not used currently
  2096. and can cause hard-to-find bugs
  2097. }