popt386.pas 100 KB


  1. {
  2. $Id$
  3. Copyright (c) 1998-2000 by Florian Klaempfl and Jonas Maebe
  4. This unit contains the peephole optimizer.
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit POpt386;
  19. {$i defines.inc}
  20. Interface
  21. Uses Aasm;
  22. Procedure PrePeepHoleOpts(AsmL: TAAsmOutput; BlockStart, BlockEnd: Tai);
  23. Procedure PeepHoleOptPass1(AsmL: TAAsmOutput; BlockStart, BlockEnd: Tai);
  24. Procedure PeepHoleOptPass2(AsmL: TAAsmOutput; BlockStart, BlockEnd: Tai);
  25. Procedure PostPeepHoleOpts(AsmL: TAAsmOutput; BlockStart, BlockEnd: Tai);
  26. Implementation
  27. Uses
  28. globtype,systems,
  29. globals,hcodegen,
  30. {$ifdef finaldestdebug}
  31. cobjects,
  32. {$endif finaldestdebug}
  33. cpubase,cpuasm,DAOpt386,tgcpu;
  34. Function RegUsedAfterInstruction(Reg: TRegister; p: Tai; Var UsedRegs: TRegSet): Boolean;
  35. Begin
  36. reg := reg32(reg);
  37. UpdateUsedRegs(UsedRegs, Tai(p.Next));
  38. RegUsedAfterInstruction :=
  39. (Reg in UsedRegs) and
  40. (not(getNextInstruction(p,p)) or
  41. not(regLoadedWithNewValue(reg,false,p)));
  42. End;
  43. function doFpuLoadStoreOpt(asmL: TAAsmoutput; var p: Tai): boolean;
  44. { returns true if a "continue" should be done after this optimization }
  45. var hp1, hp2: Tai;
  46. begin
  47. doFpuLoadStoreOpt := false;
  48. if (Taicpu(p).oper[0].typ = top_ref) and
  49. getNextInstruction(p, hp1) and
  50. (hp1.typ = ait_instruction) and
  51. (((Taicpu(hp1).opcode = A_FLD) and
  52. (Taicpu(p).opcode = A_FSTP)) or
  53. ((Taicpu(p).opcode = A_FISTP) and
  54. (Taicpu(hp1).opcode = A_FILD))) and
  55. (Taicpu(hp1).oper[0].typ = top_ref) and
  56. (Taicpu(hp1).opsize = Taicpu(p).opsize) and
  57. refsEqual(Taicpu(p).oper[0].ref^, Taicpu(hp1).oper[0].ref^) then
  58. begin
  59. if getNextInstruction(hp1, hp2) and
  60. (hp2.typ = ait_instruction) and
  61. ((Taicpu(hp2).opcode = A_LEAVE) or
  62. (Taicpu(hp2).opcode = A_RET)) and
  63. (Taicpu(p).oper[0].ref^.Base = procinfo^.FramePointer) and
  64. (Taicpu(p).oper[0].ref^.Offset >= procinfo^.Return_Offset) and
  65. (Taicpu(p).oper[0].ref^.Index = R_NO) then
  66. begin
  67. asml.remove(p);
  68. asml.remove(hp1);
  69. p.free;
  70. hp1.free;
  71. p := hp2;
  72. removeLastDeallocForFuncRes(asmL, p);
  73. doFPULoadStoreOpt := true;
  74. end
  75. else
  76. { fst can't store an extended value! }
  77. if (Taicpu(p).opsize <> S_FX) and
  78. (Taicpu(p).opsize <> S_IQ) then
  79. begin
  80. if (Taicpu(p).opcode = A_FSTP) then
  81. Taicpu(p).opcode := A_FST
  82. else Taicpu(p).opcode := A_FIST;
  83. asml.remove(hp1);
  84. hp1.free;
  85. end
  86. end;
  87. end;
  88. Procedure PrePeepHoleOpts(AsmL: TAAsmOutput; BlockStart, BlockEnd: Tai);
  89. var
  90. p,hp1: Tai;
  91. l: longint;
  92. tmpRef: treference;
  93. Begin
  94. P := BlockStart;
  95. While (P <> BlockEnd) Do
  96. Begin
  97. Case p.Typ Of
  98. Ait_Instruction:
  99. Begin
  100. Case Taicpu(p).opcode Of
  101. A_IMUL:
  102. {changes certain "imul const, %reg"'s to lea sequences}
  103. Begin
  104. If (Taicpu(p).oper[0].typ = Top_Const) And
  105. (Taicpu(p).oper[1].typ = Top_Reg) And
  106. (Taicpu(p).opsize = S_L) Then
  107. If (Taicpu(p).oper[0].val = 1) Then
  108. If (Taicpu(p).oper[2].typ = Top_None) Then
  109. {remove "imul $1, reg"}
  110. Begin
  111. hp1 := Tai(p.Next);
  112. asml.Remove(p);
  113. p.free;
  114. p := hp1;
  115. Continue;
  116. End
  117. Else
  118. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  119. Begin
  120. hp1 := Taicpu.Op_Reg_Reg(A_MOV, S_L, Taicpu(p).oper[1].reg,Taicpu(p).oper[2].reg);
  121. InsertLLItem(AsmL, p.previous, p.next, hp1);
  122. p.free;
  123. p := hp1;
  124. End
  125. Else If
  126. ((Taicpu(p).oper[2].typ = Top_Reg) or
  127. (Taicpu(p).oper[2].typ = Top_None)) And
  128. (aktoptprocessor < ClassP6) And
  129. (Taicpu(p).oper[0].val <= 12) And
  130. Not(CS_LittleSize in aktglobalswitches) And
  131. (Not(GetNextInstruction(p, hp1)) Or
  132. {GetNextInstruction(p, hp1) And}
  133. Not((Tai(hp1).typ = ait_instruction) And
  134. ((Taicpu(hp1).opcode=A_Jcc) and
  135. (Taicpu(hp1).condition in [C_O,C_NO]))))
  136. Then
  137. Begin
  138. Reset_reference(tmpref);
  139. Case Taicpu(p).oper[0].val Of
  140. 3: Begin
  141. {imul 3, reg1, reg2 to
  142. lea (reg1,reg1,2), reg2
  143. imul 3, reg1 to
  144. lea (reg1,reg1,2), reg1}
  145. TmpRef.base := Taicpu(p).oper[1].reg;
  146. TmpRef.Index := Taicpu(p).oper[1].reg;
  147. TmpRef.ScaleFactor := 2;
  148. If (Taicpu(p).oper[2].typ = Top_None) Then
  149. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, newReference(TmpRef), Taicpu(p).oper[1].reg)
  150. Else
  151. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, newReference(TmpRef), Taicpu(p).oper[2].reg);
  152. InsertLLItem(AsmL,p.previous, p.next, hp1);
  153. p.free;
  154. p := hp1;
  155. End;
  156. 5: Begin
  157. {imul 5, reg1, reg2 to
  158. lea (reg1,reg1,4), reg2
  159. imul 5, reg1 to
  160. lea (reg1,reg1,4), reg1}
  161. TmpRef.base := Taicpu(p).oper[1].reg;
  162. TmpRef.Index := Taicpu(p).oper[1].reg;
  163. TmpRef.ScaleFactor := 4;
  164. If (Taicpu(p).oper[2].typ = Top_None) Then
  165. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, newReference(TmpRef), Taicpu(p).oper[1].reg)
  166. Else
  167. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, newReference(TmpRef), Taicpu(p).oper[2].reg);
  168. InsertLLItem(AsmL,p.previous, p.next, hp1);
  169. p.free;
  170. p := hp1;
  171. End;
  172. 6: Begin
  173. {imul 6, reg1, reg2 to
  174. lea (,reg1,2), reg2
  175. lea (reg2,reg1,4), reg2
  176. imul 6, reg1 to
  177. lea (reg1,reg1,2), reg1
  178. add reg1, reg1}
  179. If (aktoptprocessor <= Class386)
  180. Then
  181. Begin
  182. TmpRef.Index := Taicpu(p).oper[1].reg;
  183. If (Taicpu(p).oper[2].typ = Top_Reg)
  184. Then
  185. Begin
  186. TmpRef.base := Taicpu(p).oper[2].reg;
  187. TmpRef.ScaleFactor := 4;
  188. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, newReference(TmpRef), Taicpu(p).oper[1].reg);
  189. End
  190. Else
  191. Begin
  192. hp1 := Taicpu.op_reg_reg(A_ADD, S_L,
  193. Taicpu(p).oper[1].reg,Taicpu(p).oper[1].reg);
  194. End;
  195. InsertLLItem(AsmL,p, p.next, hp1);
  196. Reset_reference(tmpref);
  197. TmpRef.Index := Taicpu(p).oper[1].reg;
  198. TmpRef.ScaleFactor := 2;
  199. If (Taicpu(p).oper[2].typ = Top_Reg)
  200. Then
  201. Begin
  202. TmpRef.base := R_NO;
  203. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, newReference(TmpRef),
  204. Taicpu(p).oper[2].reg);
  205. End
  206. Else
  207. Begin
  208. TmpRef.base := Taicpu(p).oper[1].reg;
  209. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, newReference(TmpRef), Taicpu(p).oper[1].reg);
  210. End;
  211. InsertLLItem(AsmL,p.previous, p.next, hp1);
  212. p.free;
  213. p := Tai(hp1.next);
  214. End
  215. End;
  216. 9: Begin
  217. {imul 9, reg1, reg2 to
  218. lea (reg1,reg1,8), reg2
  219. imul 9, reg1 to
  220. lea (reg1,reg1,8), reg1}
  221. TmpRef.base := Taicpu(p).oper[1].reg;
  222. TmpRef.Index := Taicpu(p).oper[1].reg;
  223. TmpRef.ScaleFactor := 8;
  224. If (Taicpu(p).oper[2].typ = Top_None) Then
  225. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, newReference(TmpRef), Taicpu(p).oper[1].reg)
  226. Else
  227. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, newReference(TmpRef), Taicpu(p).oper[2].reg);
  228. InsertLLItem(AsmL,p.previous, p.next, hp1);
  229. p.free;
  230. p := hp1;
  231. End;
  232. 10: Begin
  233. {imul 10, reg1, reg2 to
  234. lea (reg1,reg1,4), reg2
  235. add reg2, reg2
  236. imul 10, reg1 to
  237. lea (reg1,reg1,4), reg1
  238. add reg1, reg1}
  239. If (aktoptprocessor <= Class386) Then
  240. Begin
  241. If (Taicpu(p).oper[2].typ = Top_Reg) Then
  242. hp1 := Taicpu.op_reg_reg(A_ADD, S_L,
  243. Taicpu(p).oper[2].reg,Taicpu(p).oper[2].reg)
  244. Else
  245. hp1 := Taicpu.op_reg_reg(A_ADD, S_L,
  246. Taicpu(p).oper[1].reg,Taicpu(p).oper[1].reg);
  247. InsertLLItem(AsmL,p, p.next, hp1);
  248. TmpRef.base := Taicpu(p).oper[1].reg;
  249. TmpRef.Index := Taicpu(p).oper[1].reg;
  250. TmpRef.ScaleFactor := 4;
  251. If (Taicpu(p).oper[2].typ = Top_Reg)
  252. Then
  253. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, newReference(TmpRef), Taicpu(p).oper[2].reg)
  254. Else
  255. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, newReference(TmpRef), Taicpu(p).oper[1].reg);
  256. InsertLLItem(AsmL,p.previous, p.next, hp1);
  257. p.free;
  258. p := Tai(hp1.next);
  259. End
  260. End;
  261. 12: Begin
  262. {imul 12, reg1, reg2 to
  263. lea (,reg1,4), reg2
  264. lea (,reg1,8) reg2
  265. imul 12, reg1 to
  266. lea (reg1,reg1,2), reg1
  267. lea (,reg1,4), reg1}
  268. If (aktoptprocessor <= Class386)
  269. Then
  270. Begin
  271. TmpRef.Index := Taicpu(p).oper[1].reg;
  272. If (Taicpu(p).oper[2].typ = Top_Reg) Then
  273. Begin
  274. TmpRef.base := Taicpu(p).oper[2].reg;
  275. TmpRef.ScaleFactor := 8;
  276. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, newReference(TmpRef), Taicpu(p).oper[2].reg);
  277. End
  278. Else
  279. Begin
  280. TmpRef.base := R_NO;
  281. TmpRef.ScaleFactor := 4;
  282. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, newReference(TmpRef), Taicpu(p).oper[1].reg);
  283. End;
  284. InsertLLItem(AsmL,p, p.next, hp1);
  285. Reset_reference(tmpref);
  286. TmpRef.Index := Taicpu(p).oper[1].reg;
  287. If (Taicpu(p).oper[2].typ = Top_Reg) Then
  288. Begin
  289. TmpRef.base := R_NO;
  290. TmpRef.ScaleFactor := 4;
  291. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, newReference(TmpRef), Taicpu(p).oper[2].reg);
  292. End
  293. Else
  294. Begin
  295. TmpRef.base := Taicpu(p).oper[1].reg;
  296. TmpRef.ScaleFactor := 2;
  297. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, newReference(TmpRef), Taicpu(p).oper[1].reg);
  298. End;
  299. InsertLLItem(AsmL,p.previous, p.next, hp1);
  300. p.free;
  301. p := Tai(hp1.next);
  302. End
  303. End
  304. End;
  305. End;
  306. End;
  307. A_SAR, A_SHR:
  308. {changes the code sequence
  309. shr/sar const1, x
  310. shl const2, x
  311. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  312. Begin
  313. If GetNextInstruction(p, hp1) And
  314. (Tai(hp1).typ = ait_instruction) and
  315. (Taicpu(hp1).opcode = A_SHL) and
  316. (Taicpu(p).oper[0].typ = top_const) and
  317. (Taicpu(hp1).oper[0].typ = top_const) and
  318. (Taicpu(hp1).opsize = Taicpu(p).opsize) And
  319. (Taicpu(hp1).oper[1].typ = Taicpu(p).oper[1].typ) And
  320. OpsEqual(Taicpu(hp1).oper[1], Taicpu(p).oper[1])
  321. Then
  322. If (Taicpu(p).oper[0].val > Taicpu(hp1).oper[0].val) And
  323. Not(CS_LittleSize In aktglobalswitches)
  324. Then
  325. { shr/sar const1, %reg
  326. shl const2, %reg
  327. with const1 > const2 }
  328. Begin
  329. Taicpu(p).LoadConst(0,Taicpu(p).oper[0].val-Taicpu(hp1).oper[0].val);
  330. Taicpu(hp1).opcode := A_AND;
  331. l := (1 shl (Taicpu(hp1).oper[0].val)) - 1;
  332. Case Taicpu(p).opsize Of
  333. S_L: Taicpu(hp1).LoadConst(0,l Xor longint(-1));
  334. S_B: Taicpu(hp1).LoadConst(0,l Xor $ff);
  335. S_W: Taicpu(hp1).LoadConst(0,l Xor $ffff);
  336. End;
  337. End
  338. Else
  339. If (Taicpu(p).oper[0].val<Taicpu(hp1).oper[0].val) And
  340. Not(CS_LittleSize In aktglobalswitches)
  341. Then
  342. { shr/sar const1, %reg
  343. shl const2, %reg
  344. with const1 < const2 }
  345. Begin
  346. Taicpu(hp1).LoadConst(0,Taicpu(hp1).oper[0].val-Taicpu(p).oper[0].val);
  347. Taicpu(p).opcode := A_AND;
  348. l := (1 shl (Taicpu(p).oper[0].val))-1;
  349. Case Taicpu(p).opsize Of
  350. S_L: Taicpu(p).LoadConst(0,l Xor longint($ffffffff));
  351. S_B: Taicpu(p).LoadConst(0,l Xor $ff);
  352. S_W: Taicpu(p).LoadConst(0,l Xor $ffff);
  353. End;
  354. End
  355. Else
  356. { shr/sar const1, %reg
  357. shl const2, %reg
  358. with const1 = const2 }
  359. if (Taicpu(p).oper[0].val = Taicpu(hp1).oper[0].val) then
  360. Begin
  361. Taicpu(p).opcode := A_AND;
  362. l := (1 shl (Taicpu(p).oper[0].val))-1;
  363. Case Taicpu(p).opsize Of
  364. S_B: Taicpu(p).LoadConst(0,l Xor $ff);
  365. S_W: Taicpu(p).LoadConst(0,l Xor $ffff);
  366. S_L: Taicpu(p).LoadConst(0,l Xor longint($ffffffff));
  367. End;
  368. asml.remove(hp1);
  369. hp1.free;
  370. End;
  371. End;
  372. A_XOR:
  373. If (Taicpu(p).oper[0].typ = top_reg) And
  374. (Taicpu(p).oper[1].typ = top_reg) And
  375. (Taicpu(p).oper[0].reg = Taicpu(p).oper[1].reg) then
  376. { temporarily change this to 'mov reg,0' to make it easier }
  377. { for the CSE. Will be changed back in pass 2 }
  378. begin
  379. Taicpu(p).opcode := A_MOV;
  380. Taicpu(p).loadconst(0,0);
  381. end;
  382. End;
  383. End;
  384. End;
  385. p := Tai(p.next)
  386. End;
  387. End;
  388. Procedure PeepHoleOptPass1(Asml: TAAsmOutput; BlockStart, BlockEnd: Tai);
  389. {First pass of peepholeoptimizations}
  390. Var
  391. l,l1 : longint;
  392. p,hp1,hp2 : Tai;
  393. hp3,hp4: Tai;
  394. TmpRef: TReference;
  395. UsedRegs, TmpUsedRegs: TRegSet;
  396. TmpBool1, TmpBool2: Boolean;
  397. Function SkipLabels(hp: Tai; var hp2: Tai): boolean;
  398. {skips all labels and returns the next "real" instruction}
  399. Begin
  400. While assigned(hp.next) and
  401. (Tai(hp.next).typ In SkipInstr + [ait_label,ait_align]) Do
  402. hp := Tai(hp.next);
  403. If assigned(hp.next) Then
  404. Begin
  405. SkipLabels := True;
  406. hp2 := Tai(hp.next)
  407. End
  408. Else
  409. Begin
  410. hp2 := hp;
  411. SkipLabels := False
  412. End;
  413. End;
  414. Procedure GetFinalDestination(AsmL: TAAsmOutput; hp: Taicpu);
  415. {traces sucessive jumps to their final destination and sets it, e.g.
  416. je l1 je l3
  417. <code> <code>
  418. l1: becomes l1:
  419. je l2 je l3
  420. <code> <code>
  421. l2: l2:
  422. jmp l3 jmp l3}
  423. Var p1, p2: Tai;
  424. l: pasmlabel;
  425. Function FindAnyLabel(hp: Tai; var l: pasmlabel): Boolean;
  426. Begin
  427. FindAnyLabel := false;
  428. While assigned(hp.next) and
  429. (Tai(hp.next).typ In (SkipInstr+[ait_align])) Do
  430. hp := Tai(hp.next);
  431. If assigned(hp.next) and
  432. (Tai(hp.next).typ = ait_label) Then
  433. Begin
  434. FindAnyLabel := true;
  435. l := Tai_label(hp.next).l;
  436. End
  437. End;
  438. Begin
  439. If (pasmlabel(hp.oper[0].sym)^.labelnr >= LoLab) and
  440. (pasmlabel(hp.oper[0].sym)^.labelnr <= HiLab) and {range check, a jump can go past an assembler block!}
  441. Assigned(LTable^[pasmlabel(hp.oper[0].sym)^.labelnr-LoLab].TaiObj) Then
  442. Begin
  443. p1 := LTable^[pasmlabel(hp.oper[0].sym)^.labelnr-LoLab].TaiObj; {the jump's destination}
  444. SkipLabels(p1,p1);
  445. If (Tai(p1).typ = ait_instruction) and
  446. (Taicpu(p1).is_jmp) Then
  447. If { the next instruction after the label where the jump hp arrives}
  448. { is unconditional or of the same type as hp, so continue }
  449. (Taicpu(p1).condition in [C_None,hp.condition]) or
  450. { the next instruction after the label where the jump hp arrives}
  451. { is the opposite of hp (so this one is never taken), but after }
  452. { that one there is a branch that will be taken, so perform a }
  453. { little hack: set p1 equal to this instruction (that's what the}
  454. { last SkipLabels is for, only works with short bool evaluation)}
  455. ((Taicpu(p1).condition = inverse_cond[hp.condition]) and
  456. SkipLabels(p1,p2) and
  457. (p2.typ = ait_instruction) and
  458. (Taicpu(p2).is_jmp) and
  459. (Taicpu(p2).condition in [C_None,hp.condition]) and
  460. SkipLabels(p1,p1)) Then
  461. Begin
  462. GetFinalDestination(asml, Taicpu(p1));
  463. Dec(pasmlabel(hp.oper[0].sym)^.refs);
  464. hp.oper[0].sym:=Taicpu(p1).oper[0].sym;
  465. inc(pasmlabel(hp.oper[0].sym)^.refs);
  466. End
  467. Else
  468. If (Taicpu(p1).condition = inverse_cond[hp.condition]) then
  469. if not FindAnyLabel(p1,l) then
  470. begin
  471. {$ifdef finaldestdebug}
  472. insertllitem(asml,p1,p1.next,Tai_asm_comment.Create(
  473. strpnew('previous label inserted'))));
  474. {$endif finaldestdebug}
  475. getlabel(l);
  476. insertllitem(asml,p1,p1.next,Tai_label.Create(l));
  477. dec(pasmlabel(Taicpu(hp).oper[0].sym)^.refs);
  478. hp.oper[0].sym := l;
  479. inc(l^.refs);
  480. { this won't work, since the new label isn't in the labeltable }
  481. { so it will fail the rangecheck. Labeltable should become a }
  482. { hashtable to support this: }
  483. { GetFinalDestination(asml, hp); }
  484. end
  485. else
  486. begin
  487. {$ifdef finaldestdebug}
  488. insertllitem(asml,p1,p1.next,Tai_asm_comment.Create(
  489. strpnew('next label reused'))));
  490. {$endif finaldestdebug}
  491. inc(l^.refs);
  492. hp.oper[0].sym := l;
  493. GetFinalDestination(asml, hp);
  494. end;
  495. End;
  496. End;
  497. Function DoSubAddOpt(var p: Tai): Boolean;
  498. Begin
  499. DoSubAddOpt := False;
  500. If GetLastInstruction(p, hp1) And
  501. (hp1.typ = ait_instruction) And
  502. (Taicpu(hp1).opsize = Taicpu(p).opsize) then
  503. Case Taicpu(hp1).opcode Of
  504. A_DEC:
  505. If (Taicpu(hp1).oper[0].typ = top_reg) And
  506. (Taicpu(hp1).oper[0].reg = Taicpu(p).oper[1].reg) Then
  507. Begin
  508. Taicpu(p).LoadConst(0,Taicpu(p).oper[0].val+1);
  509. asml.Remove(hp1);
  510. hp1.free;
  511. End;
  512. A_SUB:
  513. If (Taicpu(hp1).oper[0].typ = top_const) And
  514. (Taicpu(hp1).oper[1].typ = top_reg) And
  515. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg) Then
  516. Begin
  517. Taicpu(p).LoadConst(0,Taicpu(p).oper[0].val+Taicpu(hp1).oper[0].val);
  518. asml.Remove(hp1);
  519. hp1.free;
  520. End;
  521. A_ADD:
  522. If (Taicpu(hp1).oper[0].typ = top_const) And
  523. (Taicpu(hp1).oper[1].typ = top_reg) And
  524. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg) Then
  525. Begin
  526. Taicpu(p).LoadConst(0,Taicpu(p).oper[0].val-Taicpu(hp1).oper[0].val);
  527. asml.Remove(hp1);
  528. hp1.free;
  529. If (Taicpu(p).oper[0].val = 0) Then
  530. Begin
  531. hp1 := Tai(p.next);
  532. asml.Remove(p);
  533. p.free;
  534. If Not GetLastInstruction(hp1, p) Then
  535. p := hp1;
  536. DoSubAddOpt := True;
  537. End
  538. End;
  539. End;
  540. End;
  541. Begin
  542. P := BlockStart;
  543. UsedRegs := [];
  544. While (P <> BlockEnd) Do
  545. Begin
  546. UpDateUsedRegs(UsedRegs, Tai(p.next));
  547. Case p.Typ Of
  548. ait_instruction:
  549. Begin
  550. { Handle Jmp Optimizations }
  551. if Taicpu(p).is_jmp then
  552. begin
  553. {the following if-block removes all code between a jmp and the next label,
  554. because it can never be executed}
  555. If (Taicpu(p).opcode = A_JMP) Then
  556. Begin
  557. While GetNextInstruction(p, hp1) and
  558. (hp1.typ <> ait_label) do
  559. If not(hp1.typ in ([ait_label,ait_align]+skipinstr)) Then
  560. Begin
  561. asml.Remove(hp1);
  562. hp1.free;
  563. End
  564. else break;
  565. End;
  566. { remove jumps to a label coming right after them }
  567. If GetNextInstruction(p, hp1) then
  568. Begin
  569. if FindLabel(pasmlabel(Taicpu(p).oper[0].sym), hp1) then
  570. Begin
  571. hp2:=Tai(hp1.next);
  572. asml.remove(p);
  573. p.free;
  574. p:=hp2;
  575. continue;
  576. end
  577. Else
  578. Begin
  579. if hp1.typ = ait_label then
  580. SkipLabels(hp1,hp1);
  581. If (Tai(hp1).typ=ait_instruction) and
  582. (Taicpu(hp1).opcode=A_JMP) and
  583. GetNextInstruction(hp1, hp2) And
  584. FindLabel(PAsmLabel(Taicpu(p).oper[0].sym), hp2)
  585. Then
  586. Begin
  587. if Taicpu(p).opcode=A_Jcc then
  588. Taicpu(p).condition:=inverse_cond[Taicpu(p).condition]
  589. else
  590. begin
  591. If (LabDif <> 0) Then
  592. GetFinalDestination(asml, Taicpu(p));
  593. p:=Tai(p.next);
  594. continue;
  595. end;
  596. Dec(Tai_label(hp2).l^.refs);
  597. Taicpu(p).oper[0].sym:=Taicpu(hp1).oper[0].sym;
  598. Inc(Taicpu(p).oper[0].sym^.refs);
  599. asml.remove(hp1);
  600. hp1.free;
  601. If (LabDif <> 0) Then
  602. GetFinalDestination(asml, Taicpu(p));
  603. end
  604. else
  605. If (LabDif <> 0) Then
  606. GetFinalDestination(asml, Taicpu(p));
  607. end;
  608. end;
  609. end
  610. else
  611. { All other optimizes }
  612. begin
  613. For l := 0 to 2 Do
  614. If (Taicpu(p).oper[l].typ = top_ref) Then
  615. With Taicpu(p).oper[l].ref^ Do
  616. Begin
  617. If (base = R_NO) And
  618. (index <> R_NO) And
  619. (scalefactor in [0,1])
  620. Then
  621. Begin
  622. base := index;
  623. index := R_NO
  624. End
  625. End;
  626. Case Taicpu(p).opcode Of
  627. A_AND:
  628. Begin
  629. If (Taicpu(p).oper[0].typ = top_const) And
  630. (Taicpu(p).oper[1].typ = top_reg) And
  631. GetNextInstruction(p, hp1) And
  632. (Tai(hp1).typ = ait_instruction) And
  633. (Taicpu(hp1).opcode = A_AND) And
  634. (Taicpu(hp1).oper[0].typ = top_const) And
  635. (Taicpu(hp1).oper[1].typ = top_reg) And
  636. (Taicpu(hp1).oper[1].reg = Taicpu(hp1).oper[1].reg)
  637. Then
  638. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  639. Begin
  640. Taicpu(p).LoadConst(0,Taicpu(p).oper[0].val And Taicpu(hp1).oper[0].val);
  641. asml.Remove(hp1);
  642. hp1.free;
  643. End
  644. Else
  645. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  646. jump, but only if it's a conditional jump (PFV) }
  647. If (Taicpu(p).oper[1].typ = top_reg) And
  648. GetNextInstruction(p, hp1) And
  649. (hp1.typ = ait_instruction) And
  650. (Taicpu(hp1).is_jmp) and
  651. (Taicpu(hp1).opcode<>A_JMP) and
  652. Not(Taicpu(p).oper[1].reg in UsedRegs) Then
  653. Taicpu(p).opcode := A_TEST;
  654. End;
  655. A_CMP:
  656. Begin
  657. If (Taicpu(p).oper[0].typ = top_const) And
  658. (Taicpu(p).oper[1].typ in [top_reg,top_ref]) And
  659. (Taicpu(p).oper[0].val = 0) Then
  660. If GetNextInstruction(p, hp1) And
  661. (hp1.typ = ait_instruction) And
  662. (Taicpu(hp1).is_jmp) and
  663. (Taicpu(hp1).opcode=A_Jcc) and
  664. (Taicpu(hp1).condition in [C_LE,C_BE]) and
  665. GetNextInstruction(hp1,hp2) and
  666. (hp2.typ = ait_instruction) and
  667. (Taicpu(hp2).opcode = A_DEC) And
  668. OpsEqual(Taicpu(hp2).oper[0],Taicpu(p).oper[1]) And
  669. GetNextInstruction(hp2, hp3) And
  670. (hp3.typ = ait_instruction) and
  671. (Taicpu(hp3).is_jmp) and
  672. (Taicpu(hp3).opcode = A_JMP) And
  673. GetNextInstruction(hp3, hp4) And
  674. FindLabel(PAsmLabel(Taicpu(hp1).oper[0].sym),hp4)
  675. Then
  676. Begin
  677. Taicpu(hp2).Opcode := A_SUB;
  678. Taicpu(hp2).Loadoper(1,Taicpu(hp2).oper[0]);
  679. Taicpu(hp2).LoadConst(0,1);
  680. Taicpu(hp2).ops:=2;
  681. Taicpu(hp3).Opcode := A_Jcc;
  682. Case Taicpu(hp1).condition of
  683. C_LE: Taicpu(hp3).condition := C_GE;
  684. C_BE: Taicpu(hp3).condition := C_AE;
  685. End;
  686. asml.Remove(p);
  687. asml.Remove(hp1);
  688. p.free;
  689. hp1.free;
  690. p := hp2;
  691. continue;
  692. End
  693. Else
  694. {change "cmp $0, %reg" to "test %reg, %reg"}
  695. If (Taicpu(p).oper[1].typ = top_reg) Then
  696. Begin
  697. Taicpu(p).opcode := A_TEST;
  698. Taicpu(p).loadreg(0,Taicpu(p).oper[1].reg);
  699. End;
  700. End;
  701. A_FLD:
  702. Begin
  703. If (Taicpu(p).oper[0].typ = top_reg) And
  704. GetNextInstruction(p, hp1) And
  705. (hp1.typ = Ait_Instruction) And
  706. (Taicpu(hp1).oper[0].typ = top_reg) And
  707. (Taicpu(hp1).oper[1].typ = top_reg) And
  708. (Taicpu(hp1).oper[0].reg = R_ST) And
  709. (Taicpu(hp1).oper[1].reg = R_ST1) Then
  710. { change to
  711. fld reg fxxx reg,st
  712. fxxxp st, st1 (hp1)
  713. Remark: non commutative operations must be reversed!
  714. }
  715. begin
  716. Case Taicpu(hp1).opcode Of
  717. A_FMULP,A_FADDP,
  718. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  719. begin
  720. Case Taicpu(hp1).opcode Of
  721. A_FADDP: Taicpu(hp1).opcode := A_FADD;
  722. A_FMULP: Taicpu(hp1).opcode := A_FMUL;
  723. A_FSUBP: Taicpu(hp1).opcode := A_FSUBR;
  724. A_FSUBRP: Taicpu(hp1).opcode := A_FSUB;
  725. A_FDIVP: Taicpu(hp1).opcode := A_FDIVR;
  726. A_FDIVRP: Taicpu(hp1).opcode := A_FDIV;
  727. End;
  728. Taicpu(hp1).oper[0].reg := Taicpu(p).oper[0].reg;
  729. Taicpu(hp1).oper[1].reg := R_ST;
  730. asml.Remove(p);
  731. p.free;
  732. p := hp1;
  733. Continue;
  734. end;
  735. end;
  736. end
  737. else
  738. If (Taicpu(p).oper[0].typ = top_ref) And
  739. GetNextInstruction(p, hp2) And
  740. (hp2.typ = Ait_Instruction) And
  741. (Taicpu(hp2).oper[0].typ = top_reg) And
  742. (Taicpu(hp2).oper[1].typ = top_reg) And
  743. (Taicpu(p).opsize in [S_FS, S_FL]) And
  744. (Taicpu(hp2).oper[0].reg = R_ST) And
  745. (Taicpu(hp2).oper[1].reg = R_ST1) Then
  746. If GetLastInstruction(p, hp1) And
  747. (hp1.typ = Ait_Instruction) And
  748. ((Taicpu(hp1).opcode = A_FLD) Or
  749. (Taicpu(hp1).opcode = A_FST)) And
  750. (Taicpu(hp1).opsize = Taicpu(p).opsize) And
  751. (Taicpu(hp1).oper[0].typ = top_ref) And
  752. RefsEqual(Taicpu(p).oper[0].ref^, Taicpu(hp1).oper[0].ref^) Then
  753. If ((Taicpu(hp2).opcode = A_FMULP) Or
  754. (Taicpu(hp2).opcode = A_FADDP)) Then
  755. { change to
  756. fld/fst mem1 (hp1) fld/fst mem1
  757. fld mem1 (p) fadd/
  758. faddp/ fmul st, st
  759. fmulp st, st1 (hp2) }
  760. Begin
  761. asml.Remove(p);
  762. p.free;
  763. p := hp1;
  764. If (Taicpu(hp2).opcode = A_FADDP) Then
  765. Taicpu(hp2).opcode := A_FADD
  766. Else
  767. Taicpu(hp2).opcode := A_FMUL;
  768. Taicpu(hp2).oper[1].reg := R_ST;
  769. End
  770. Else
  771. { change to
  772. fld/fst mem1 (hp1) fld/fst mem1
  773. fld mem1 (p) fld st}
  774. Begin
  775. Taicpu(p).changeopsize(S_FL);
  776. Taicpu(p).loadreg(0,R_ST);
  777. End
  778. Else
  779. Begin
  780. Case Taicpu(hp2).opcode Of
  781. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  782. { change to
  783. fld/fst mem1 (hp1) fld/fst mem1
  784. fld mem2 (p) fxxx mem2
  785. fxxxp st, st1 (hp2) }
  786. Begin
  787. Case Taicpu(hp2).opcode Of
  788. A_FADDP: Taicpu(p).opcode := A_FADD;
  789. A_FMULP: Taicpu(p).opcode := A_FMUL;
  790. A_FSUBP: Taicpu(p).opcode := A_FSUBR;
  791. A_FSUBRP: Taicpu(p).opcode := A_FSUB;
  792. A_FDIVP: Taicpu(p).opcode := A_FDIVR;
  793. A_FDIVRP: Taicpu(p).opcode := A_FDIV;
  794. End;
  795. asml.Remove(hp2);
  796. hp2.free;
  797. End
  798. End
  799. End
  800. End;
  801. A_FSTP,A_FISTP:
  802. if doFpuLoadStoreOpt(asmL,p) then
  803. continue;
  804. A_LEA:
  805. Begin
  806. {removes seg register prefixes from LEA operations, as they
  807. don't do anything}
  808. Taicpu(p).oper[0].ref^.Segment := R_NO;
  809. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  810. If (Taicpu(p).oper[0].ref^.Base In [R_EAX..R_EDI]) And
  811. (Taicpu(p).oper[0].ref^.Index = R_NO) And
  812. (Not(Assigned(Taicpu(p).oper[0].ref^.Symbol))) Then
  813. If (Taicpu(p).oper[0].ref^.Base <> Taicpu(p).oper[1].reg)
  814. and (Taicpu(p).oper[0].ref^.Offset = 0)
  815. Then
  816. Begin
  817. hp1 := Taicpu.op_reg_reg(A_MOV, S_L,Taicpu(p).oper[0].ref^.Base,
  818. Taicpu(p).oper[1].reg);
  819. InsertLLItem(AsmL,p.previous,p.next, hp1);
  820. p.free;
  821. p := hp1;
  822. Continue;
  823. End
  824. Else
  825. if (Taicpu(p).oper[0].ref^.Offset = 0) then
  826. Begin
  827. hp1 := Tai(p.Next);
  828. asml.Remove(p);
  829. p.free;
  830. p := hp1;
  831. Continue;
  832. End
  833. else
  834. with Taicpu(p).oper[0].ref^ do
  835. if (Base = Taicpu(p).oper[1].reg) then
  836. begin
  837. l := offset+offsetfixup;
  838. case l of
  839. 1,-1:
  840. begin
  841. if l = 1 then
  842. Taicpu(p).opcode := A_INC
  843. else Taicpu(p).opcode := A_DEC;
  844. Taicpu(p).loadreg(0,Taicpu(p).oper[1].reg);
  845. Taicpu(p).ops := 1;
  846. end;
  847. else
  848. begin
  849. Taicpu(p).opcode := A_ADD;
  850. Taicpu(p).loadconst(0,offset+offsetfixup);
  851. end;
  852. end;
  853. end;
  854. End;
  855. A_MOV:
  856. Begin
  857. TmpUsedRegs := UsedRegs;
  858. If (Taicpu(p).oper[1].typ = top_reg) And
  859. (Taicpu(p).oper[1].reg In [R_EAX, R_EBX, R_EDX, R_EDI]) And
  860. GetNextInstruction(p, hp1) And
  861. (Tai(hp1).typ = ait_instruction) And
  862. (Taicpu(hp1).opcode = A_MOV) And
  863. (Taicpu(hp1).oper[0].typ = top_reg) And
  864. (Taicpu(hp1).oper[0].reg = Taicpu(p).oper[1].reg)
  865. Then
  866. {we have "mov x, %treg; mov %treg, y}
  867. If not(RegUsedAfterInstruction(Taicpu(p).oper[1].reg, hp1, TmpUsedRegs)) then
  868. {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
  869. Case Taicpu(p).oper[0].typ Of
  870. top_reg:
  871. Begin
  872. { change "mov %reg, %treg; mov %treg, y"
  873. to "mov %reg, y" }
  874. Taicpu(p).LoadOper(1,Taicpu(hp1).oper[1]);
  875. asml.Remove(hp1);
  876. hp1.free;
  877. continue;
  878. End;
  879. top_ref:
  880. If (Taicpu(hp1).oper[1].typ = top_reg) Then
  881. Begin
  882. { change "mov mem, %treg; mov %treg, %reg"
  883. to "mov mem, %reg" }
  884. Taicpu(p).Loadoper(1,Taicpu(hp1).oper[1]);
  885. asml.Remove(hp1);
  886. hp1.free;
  887. continue;
  888. End;
  889. End
  890. Else
  891. Else
  892. {Change "mov %reg1, %reg2; xxx %reg2, ???" to
  893. "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read
  894. penalty}
  895. If (Taicpu(p).oper[0].typ = top_reg) And
  896. (Taicpu(p).oper[1].typ = top_reg) And
  897. GetNextInstruction(p,hp1) And
  898. (Tai(hp1).typ = ait_instruction) And
  899. (Taicpu(hp1).oper[0].typ = top_reg) And
  900. (Taicpu(hp1).oper[0].reg = Taicpu(p).oper[1].reg)
  901. Then
  902. {we have "mov %reg1, %reg2; XXX %reg2, ???"}
  903. Begin
  904. If ((Taicpu(hp1).opcode = A_OR) Or
  905. (Taicpu(hp1).opcode = A_TEST)) And
  906. (Taicpu(hp1).oper[1].typ = top_reg) And
  907. (Taicpu(hp1).oper[0].reg = Taicpu(hp1).oper[1].reg)
  908. Then
  909. {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
  910. Begin
  911. TmpUsedRegs := UsedRegs;
  912. { reg1 will be used after the first instruction, }
  913. { so update the allocation info }
  914. allocRegBetween(asmL,Taicpu(p).oper[0].reg,p,hp1);
  915. If GetNextInstruction(hp1, hp2) And
  916. (hp2.typ = ait_instruction) And
  917. Taicpu(hp2).is_jmp and
  918. Not(RegUsedAfterInstruction(Taicpu(hp1).oper[0].reg, hp1, TmpUsedRegs))
  919. Then
  920. {change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to
  921. "test %reg1, %reg1; jxx"}
  922. Begin
  923. Taicpu(hp1).Loadoper(0,Taicpu(p).oper[0]);
  924. Taicpu(hp1).Loadoper(1,Taicpu(p).oper[0]);
  925. asml.Remove(p);
  926. p.free;
  927. p := hp1;
  928. continue
  929. End
  930. Else
  931. {change "mov %reg1, %reg2; test/or %reg2, %reg2" to
  932. "mov %reg1, %reg2; test/or %reg1, %reg1"}
  933. Begin
  934. Taicpu(hp1).Loadoper(0,Taicpu(p).oper[0]);
  935. Taicpu(hp1).Loadoper(1,Taicpu(p).oper[0]);
  936. End;
  937. End
  938. { Else
  939. If (Taicpu(p.next)^.opcode
  940. In [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])}
  941. {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to
  942. "mov %reg1, %reg2; push/or/xor/... %reg1, ???"}
  943. End
  944. Else
  945. {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  946. x >= RetOffset) as it doesn't do anything (it writes either to a
  947. parameter or to the temporary storage room for the function
  948. result)}
  949. If GetNextInstruction(p, hp1) And
  950. (Tai(hp1).typ = ait_instruction)
  951. Then
  952. If ((Taicpu(hp1).opcode = A_LEAVE) Or
  953. (Taicpu(hp1).opcode = A_RET)) And
  954. (Taicpu(p).oper[1].typ = top_ref) And
  955. (Taicpu(p).oper[1].ref^.base = procinfo^.FramePointer) And
  956. (Taicpu(p).oper[1].ref^.offset >= procinfo^.Return_Offset) And
  957. (Taicpu(p).oper[1].ref^.index = R_NO) And
  958. (Taicpu(p).oper[0].typ = top_reg)
  959. Then
  960. Begin
  961. asml.Remove(p);
  962. p.free;
  963. p := hp1;
  964. RemoveLastDeallocForFuncRes(asmL,p);
  965. End
  966. Else
  967. If (Taicpu(p).oper[0].typ = top_reg) And
  968. (Taicpu(p).oper[1].typ = top_ref) And
  969. (Taicpu(p).opsize = Taicpu(hp1).opsize) And
  970. (Taicpu(hp1).opcode = A_CMP) And
  971. (Taicpu(hp1).oper[1].typ = top_ref) And
  972. RefsEqual(Taicpu(p).oper[1].ref^, Taicpu(hp1).oper[1].ref^) Then
  973. {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
  974. begin
  975. Taicpu(hp1).loadreg(1,Taicpu(p).oper[0].reg);
  976. allocRegBetween(asmL,Taicpu(p).oper[0].reg,p,hp1);
  977. end;
  978. { Next instruction is also a MOV ? }
  979. If GetNextInstruction(p, hp1) And
  980. (Tai(hp1).typ = ait_instruction) and
  981. (Taicpu(hp1).opcode = A_MOV) and
  982. (Taicpu(hp1).opsize = Taicpu(p).opsize)
  983. Then
  984. Begin
  985. If (Taicpu(hp1).oper[0].typ = Taicpu(p).oper[1].typ) and
  986. (Taicpu(hp1).oper[1].typ = Taicpu(p).oper[0].typ)
  987. Then
  988. {mov reg1, mem1 or mov mem1, reg1
  989. mov mem2, reg2 mov reg2, mem2}
  990. Begin
  991. If OpsEqual(Taicpu(hp1).oper[1],Taicpu(p).oper[0]) Then
  992. {mov reg1, mem1 or mov mem1, reg1
  993. mov mem2, reg1 mov reg2, mem1}
  994. Begin
  995. If OpsEqual(Taicpu(hp1).oper[0],Taicpu(p).oper[1]) Then
  996. { Removes the second statement from
  997. mov reg1, mem1/reg2
  998. mov mem1/reg2, reg1 }
  999. Begin
  1000. if (Taicpu(p).oper[0].typ = top_reg) then
  1001. AllocRegBetween(asmL,Taicpu(p).oper[0].reg,p,hp1);
  1002. asml.remove(hp1);
  1003. hp1.free;
  1004. End
  1005. Else
  1006. Begin
  1007. TmpUsedRegs := UsedRegs;
  1008. UpdateUsedRegs(TmpUsedRegs, Tai(hp1.next));
  1009. If (Taicpu(p).oper[0].typ = top_reg) And
  1010. { mov reg1, mem1
  1011. mov mem2, reg1 }
  1012. GetNextInstruction(hp1, hp2) And
  1013. (hp2.typ = ait_instruction) And
  1014. (Taicpu(hp2).opcode = A_CMP) And
  1015. (Taicpu(hp2).opsize = Taicpu(p).opsize) and
  1016. (Taicpu(hp2).oper[0].typ = TOp_Ref) And
  1017. (Taicpu(hp2).oper[1].typ = TOp_Reg) And
  1018. RefsEqual(Taicpu(hp2).oper[0].ref^, Taicpu(p).oper[1].ref^) And
  1019. (Taicpu(hp2).oper[1].reg = Taicpu(p).oper[0].reg) And
  1020. Not(RegUsedAfterInstruction(Taicpu(p).oper[0].reg, hp2, TmpUsedRegs)) Then
  1021. { change to
  1022. mov reg1, mem1 mov reg1, mem1
  1023. mov mem2, reg1 cmp reg1, mem2
  1024. cmp mem1, reg1 }
  1025. Begin
  1026. asml.Remove(hp2);
  1027. hp2.free;
  1028. Taicpu(hp1).opcode := A_CMP;
  1029. Taicpu(hp1).loadref(1,newreference(Taicpu(hp1).oper[0].ref^));
  1030. Taicpu(hp1).loadreg(0,Taicpu(p).oper[0].reg);
  1031. End;
  1032. End;
  1033. End
  1034. Else
  1035. Begin
  1036. tmpUsedRegs := UsedRegs;
  1037. If GetNextInstruction(hp1, hp2) And
  1038. (Taicpu(p).oper[0].typ = top_ref) And
  1039. (Taicpu(p).oper[1].typ = top_reg) And
  1040. (Taicpu(hp1).oper[0].typ = top_reg) And
  1041. (Taicpu(hp1).oper[0].reg = Taicpu(p).oper[1].reg) And
  1042. (Taicpu(hp1).oper[1].typ = top_ref) And
  1043. (Tai(hp2).typ = ait_instruction) And
  1044. (Taicpu(hp2).opcode = A_MOV) And
  1045. (Taicpu(hp2).opsize = Taicpu(p).opsize) and
  1046. (Taicpu(hp2).oper[1].typ = top_reg) And
  1047. (Taicpu(hp2).oper[0].typ = top_ref) And
  1048. RefsEqual(Taicpu(hp2).oper[0].ref^, Taicpu(hp1).oper[1].ref^) Then
  1049. If not regInRef(Taicpu(hp2).oper[1].reg,Taicpu(hp2).oper[0].ref^) and
  1050. (Taicpu(p).oper[1].reg in [R_DI,R_EDI]) and
  1051. not(RegUsedAfterInstruction(R_EDI,hp1,tmpUsedRegs)) Then
  1052. { mov mem1, %edi
  1053. mov %edi, mem2
  1054. mov mem2, reg2
  1055. to:
  1056. mov mem1, reg2
  1057. mov reg2, mem2}
  1058. Begin
  1059. AllocRegBetween(asmL,reg32(Taicpu(hp2).oper[1].reg),p,hp2);
  1060. Taicpu(p).Loadoper(1,Taicpu(hp2).oper[1]);
  1061. Taicpu(hp1).loadoper(0,Taicpu(hp2).oper[1]);
  1062. asml.Remove(hp2);
  1063. hp2.free;
  1064. End
  1065. Else
  1066. If (Taicpu(p).oper[1].reg <> Taicpu(hp2).oper[1].reg) And
  1067. not(RegInRef(Taicpu(p).oper[1].reg,Taicpu(p).oper[0].ref^)) And
  1068. not(RegInRef(Taicpu(hp2).oper[1].reg,Taicpu(hp2).oper[0].ref^))
  1069. Then
  1070. { mov mem1, reg1 mov mem1, reg1
  1071. mov reg1, mem2 mov reg1, mem2
  1072. mov mem2, reg2 mov mem2, reg1
  1073. to: to:
  1074. mov mem1, reg1 mov mem1, reg1
  1075. mov mem1, reg2 mov reg1, mem2
  1076. mov reg1, mem2
  1077. or (if mem1 depends on reg1
  1078. and/or if mem2 depends on reg2)
  1079. to:
  1080. mov mem1, reg1
  1081. mov reg1, mem2
  1082. mov reg1, reg2
  1083. }
  1084. Begin
  1085. Taicpu(hp1).LoadRef(0,newreference(Taicpu(p).oper[0].ref^));
  1086. Taicpu(hp1).LoadReg(1,Taicpu(hp2).oper[1].reg);
  1087. Taicpu(hp2).LoadRef(1,newreference(Taicpu(hp2).oper[0].ref^));
  1088. Taicpu(hp2).LoadReg(0,Taicpu(p).oper[1].reg);
  1089. allocRegBetween(asmL,Taicpu(p).oper[1].reg,p,hp2);
  1090. if (Taicpu(p).oper[0].ref^.base in (usableregs+[R_EDI])) then
  1091. allocRegBetween(asmL,Taicpu(p).oper[0].ref^.base,p,hp2);
  1092. if (Taicpu(p).oper[0].ref^.index in (usableregs+[R_EDI])) then
  1093. allocRegBetween(asmL,Taicpu(p).oper[0].ref^.index,p,hp2);
  1094. End
  1095. Else
  1096. If (Taicpu(hp1).Oper[0].reg <> Taicpu(hp2).Oper[1].reg) Then
  1097. begin
  1098. Taicpu(hp2).LoadReg(0,Taicpu(hp1).Oper[0].reg);
  1099. allocRegBetween(asmL,Taicpu(p).oper[1].reg,p,hp2);
  1100. end
  1101. else
  1102. begin
  1103. asml.Remove(hp2);
  1104. hp2.free;
  1105. end
  1106. End;
  1107. End
  1108. Else
  1109. (* {movl [mem1],reg1
  1110. movl [mem1],reg2
  1111. to:
  1112. movl [mem1],reg1
  1113. movl reg1,reg2 }
  1114. If (Taicpu(p).oper[0].typ = top_ref) and
  1115. (Taicpu(p).oper[1].typ = top_reg) and
  1116. (Taicpu(hp1).oper[0].typ = top_ref) and
  1117. (Taicpu(hp1).oper[1].typ = top_reg) and
  1118. (Taicpu(p).opsize = Taicpu(hp1).opsize) and
  1119. RefsEqual(TReference(Taicpu(p).oper[0]^),Taicpu(hp1).oper[0]^.ref^) and
  1120. (Taicpu(p).oper[1].reg<>Taicpu(hp1).oper[0]^.ref^.base) and
  1121. (Taicpu(p).oper[1].reg<>Taicpu(hp1).oper[0]^.ref^.index) then
  1122. Taicpu(hp1).LoadReg(0,Taicpu(p).oper[1].reg)
  1123. Else*)
  1124. { movl const1,[mem1]
  1125. movl [mem1],reg1
  1126. to:
  1127. movl const1,reg1
  1128. movl reg1,[mem1] }
  1129. If (Taicpu(p).oper[0].typ = top_const) and
  1130. (Taicpu(p).oper[1].typ = top_ref) and
  1131. (Taicpu(hp1).oper[0].typ = top_ref) and
  1132. (Taicpu(hp1).oper[1].typ = top_reg) and
  1133. (Taicpu(p).opsize = Taicpu(hp1).opsize) and
  1134. RefsEqual(Taicpu(hp1).oper[0].ref^,Taicpu(p).oper[1].ref^) then
  1135. Begin
  1136. allocregbetween(asml,Taicpu(hp1).oper[1].reg,p,hp1);
  1137. { allocregbetween doesn't insert this because at }
  1138. { this time, no regalloc info is available in }
  1139. { the optinfo field, so do it manually (JM) }
  1140. hp2 := TaiRegalloc.Alloc(Taicpu(hp1).oper[1].reg);
  1141. insertllitem(asml,p.previous,p,hp2);
  1142. Taicpu(hp1).LoadReg(0,Taicpu(hp1).oper[1].reg);
  1143. Taicpu(hp1).LoadRef(1,newreference(Taicpu(p).oper[1].ref^));
  1144. Taicpu(p).LoadReg(1,Taicpu(hp1).oper[0].reg);
  1145. End
  1146. End;
  1147. End;
  1148. A_MOVZX:
  1149. Begin
  1150. {removes superfluous And's after movzx's}
  1151. If (Taicpu(p).oper[1].typ = top_reg) And
  1152. GetNextInstruction(p, hp1) And
  1153. (Tai(hp1).typ = ait_instruction) And
  1154. (Taicpu(hp1).opcode = A_AND) And
  1155. (Taicpu(hp1).oper[0].typ = top_const) And
  1156. (Taicpu(hp1).oper[1].typ = top_reg) And
  1157. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg)
  1158. Then
  1159. Case Taicpu(p).opsize Of
  1160. S_BL, S_BW:
  1161. If (Taicpu(hp1).oper[0].val = $ff) Then
  1162. Begin
  1163. asml.Remove(hp1);
  1164. hp1.free;
  1165. End;
  1166. S_WL:
  1167. If (Taicpu(hp1).oper[0].val = $ffff) Then
  1168. Begin
  1169. asml.Remove(hp1);
  1170. hp1.free;
  1171. End;
  1172. End;
  1173. {changes some movzx constructs to faster synonims (all examples
  1174. are given with eax/ax, but are also valid for other registers)}
  1175. If (Taicpu(p).oper[1].typ = top_reg) Then
  1176. If (Taicpu(p).oper[0].typ = top_reg) Then
  1177. Case Taicpu(p).opsize of
  1178. S_BW:
  1179. Begin
  1180. If (Taicpu(p).oper[0].reg = Reg16ToReg8(Taicpu(p).oper[1].reg)) And
  1181. Not(CS_LittleSize In aktglobalswitches)
  1182. Then
  1183. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1184. Begin
  1185. Taicpu(p).opcode := A_AND;
  1186. Taicpu(p).changeopsize(S_W);
  1187. Taicpu(p).LoadConst(0,$ff);
  1188. End
  1189. Else
  1190. If GetNextInstruction(p, hp1) And
  1191. (Tai(hp1).typ = ait_instruction) And
  1192. (Taicpu(hp1).opcode = A_AND) And
  1193. (Taicpu(hp1).oper[0].typ = top_const) And
  1194. (Taicpu(hp1).oper[1].typ = top_reg) And
  1195. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg)
  1196. Then
  1197. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1198. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1199. Begin
  1200. Taicpu(p).opcode := A_MOV;
  1201. Taicpu(p).changeopsize(S_W);
  1202. Taicpu(p).LoadReg(0,Reg8ToReg16(Taicpu(p).oper[0].reg));
  1203. Taicpu(hp1).LoadConst(0,Taicpu(hp1).oper[0].val And $ff);
  1204. End;
  1205. End;
  1206. S_BL:
  1207. Begin
  1208. If (Taicpu(p).oper[0].reg = Reg32ToReg8(Taicpu(p).oper[1].reg)) And
  1209. Not(CS_LittleSize in aktglobalswitches)
  1210. Then
  1211. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1212. Begin
  1213. Taicpu(p).opcode := A_AND;
  1214. Taicpu(p).changeopsize(S_L);
  1215. Taicpu(p).loadconst(0,$ff)
  1216. End
  1217. Else
  1218. If GetNextInstruction(p, hp1) And
  1219. (Tai(hp1).typ = ait_instruction) And
  1220. (Taicpu(hp1).opcode = A_AND) And
  1221. (Taicpu(hp1).oper[0].typ = top_const) And
  1222. (Taicpu(hp1).oper[1].typ = top_reg) And
  1223. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg)
  1224. Then
  1225. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1226. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1227. Begin
  1228. Taicpu(p).opcode := A_MOV;
  1229. Taicpu(p).changeopsize(S_L);
  1230. Taicpu(p).LoadReg(0,Reg8ToReg32(Taicpu(p).oper[0].reg));
  1231. Taicpu(hp1).LoadConst(0,Taicpu(hp1).oper[0].val And $ff);
  1232. End
  1233. End;
  1234. S_WL:
  1235. Begin
  1236. If (Taicpu(p).oper[0].reg = Reg32ToReg16(Taicpu(p).oper[1].reg)) And
  1237. Not(CS_LittleSize In aktglobalswitches)
  1238. Then
  1239. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1240. Begin
  1241. Taicpu(p).opcode := A_AND;
  1242. Taicpu(p).changeopsize(S_L);
  1243. Taicpu(p).LoadConst(0,$ffff);
  1244. End
  1245. Else
  1246. If GetNextInstruction(p, hp1) And
  1247. (Tai(hp1).typ = ait_instruction) And
  1248. (Taicpu(hp1).opcode = A_AND) And
  1249. (Taicpu(hp1).oper[0].typ = top_const) And
  1250. (Taicpu(hp1).oper[1].typ = top_reg) And
  1251. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg)
  1252. Then
  1253. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1254. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1255. Begin
  1256. Taicpu(p).opcode := A_MOV;
  1257. Taicpu(p).changeopsize(S_L);
  1258. Taicpu(p).LoadReg(0,Reg16ToReg32(Taicpu(p).oper[0].reg));
  1259. Taicpu(hp1).LoadConst(0,Taicpu(hp1).oper[0].val And $ffff);
  1260. End;
  1261. End;
  1262. End
  1263. Else
  1264. If (Taicpu(p).oper[0].typ = top_ref) Then
  1265. Begin
  1266. If GetNextInstruction(p, hp1) And
  1267. (Tai(hp1).typ = ait_instruction) And
  1268. (Taicpu(hp1).opcode = A_AND) And
  1269. (Taicpu(hp1).oper[0].typ = Top_Const) And
  1270. (Taicpu(hp1).oper[1].typ = Top_Reg) And
  1271. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg) Then
  1272. Begin
  1273. Taicpu(p).opcode := A_MOV;
  1274. Case Taicpu(p).opsize Of
  1275. S_BL:
  1276. Begin
  1277. Taicpu(p).changeopsize(S_L);
  1278. Taicpu(hp1).LoadConst(0,Taicpu(hp1).oper[0].val And $ff);
  1279. End;
  1280. S_WL:
  1281. Begin
  1282. Taicpu(p).changeopsize(S_L);
  1283. Taicpu(hp1).LoadConst(0,Taicpu(hp1).oper[0].val And $ffff);
  1284. End;
  1285. S_BW:
  1286. Begin
  1287. Taicpu(p).changeopsize(S_W);
  1288. Taicpu(hp1).LoadConst(0,Taicpu(hp1).oper[0].val And $ff);
  1289. End;
  1290. End;
  1291. End;
  1292. End;
  1293. End;
  1294. A_POP:
  1295. Begin
  1296. if target_info.target=target_i386_go32v2 then
  1297. begin
  1298. { Transform a series of pop/pop/pop/push/push/push to }
  1299. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1300. { because I'm not sure whether they can cope with }
  1301. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1302. { such a problem when using esp as frame pointer (JM) }
  1303. if (Taicpu(p).oper[0].typ = top_reg) then
  1304. begin
  1305. hp1 := p;
  1306. hp2 := p;
  1307. l := 0;
  1308. while getNextInstruction(hp1,hp1) and
  1309. (hp1.typ = ait_instruction) and
  1310. (Taicpu(hp1).opcode = A_POP) and
  1311. (Taicpu(hp1).oper[0].typ = top_reg) do
  1312. begin
  1313. hp2 := hp1;
  1314. inc(l,4);
  1315. end;
  1316. getLastInstruction(p,hp3);
  1317. l1 := 0;
  1318. while (hp2 <> hp3) and
  1319. assigned(hp1) and
  1320. (hp1.typ = ait_instruction) and
  1321. (Taicpu(hp1).opcode = A_PUSH) and
  1322. (Taicpu(hp1).oper[0].typ = top_reg) and
  1323. (Taicpu(hp1).oper[0].reg = Taicpu(hp2).oper[0].reg) do
  1324. begin
  1325. { change it to a two op operation }
  1326. Taicpu(hp2).oper[1].typ:=top_none;
  1327. Taicpu(hp2).ops:=2;
  1328. Taicpu(hp2).opcode := A_MOV;
  1329. Taicpu(hp2).Loadoper(1,Taicpu(hp1).oper[0]);
  1330. reset_reference(tmpref);
  1331. tmpRef.base := stack_pointer;
  1332. tmpRef.offset := l;
  1333. Taicpu(hp2).loadRef(0,newReference(tmpRef));
  1334. hp4 := hp1;
  1335. getNextInstruction(hp1,hp1);
  1336. asml.remove(hp4);
  1337. hp4.free;
  1338. getLastInstruction(hp2,hp2);
  1339. dec(l,4);
  1340. inc(l1);
  1341. end;
  1342. if l <> -4 then
  1343. begin
  1344. inc(l,4);
  1345. for l1 := l1 downto 1 do
  1346. begin
  1347. getNextInstruction(hp2,hp2);
  1348. dec(Taicpu(hp2).oper[0].ref^.offset,l);
  1349. end
  1350. end
  1351. end
  1352. end
  1353. else
  1354. begin
  1355. if (Taicpu(p).oper[0].typ = top_reg) And
  1356. GetNextInstruction(p, hp1) And
  1357. (Tai(hp1).typ=ait_instruction) and
  1358. (Taicpu(hp1).opcode=A_PUSH) and
  1359. (Taicpu(hp1).oper[0].typ = top_reg) And
  1360. (Taicpu(hp1).oper[0].reg=Taicpu(p).oper[0].reg) then
  1361. Begin
  1362. { change it to a two op operation }
  1363. Taicpu(p).oper[1].typ:=top_none;
  1364. Taicpu(p).ops:=2;
  1365. Taicpu(p).opcode := A_MOV;
  1366. Taicpu(p).Loadoper(1,Taicpu(p).oper[0]);
  1367. Reset_reference(tmpref);
  1368. TmpRef.base := R_ESP;
  1369. Taicpu(p).LoadRef(0,newReference(TmpRef));
  1370. asml.Remove(hp1);
  1371. hp1.free;
  1372. End;
  1373. end;
  1374. end;
  1375. A_PUSH:
  1376. Begin
  1377. If (Taicpu(p).opsize = S_W) And
  1378. (Taicpu(p).oper[0].typ = Top_Const) And
  1379. GetNextInstruction(p, hp1) And
  1380. (Tai(hp1).typ = ait_instruction) And
  1381. (Taicpu(hp1).opcode = A_PUSH) And
  1382. (Taicpu(hp1).oper[0].typ = Top_Const) And
  1383. (Taicpu(hp1).opsize = S_W) Then
  1384. Begin
  1385. Taicpu(p).changeopsize(S_L);
  1386. Taicpu(p).LoadConst(0,Taicpu(p).oper[0].val shl 16 + word(Taicpu(hp1).oper[0].val));
  1387. asml.Remove(hp1);
  1388. hp1.free;
  1389. End;
  1390. End;
  1391. A_SHL, A_SAL:
  1392. Begin
  1393. If (Taicpu(p).oper[0].typ = Top_Const) And
  1394. (Taicpu(p).oper[1].typ = Top_Reg) And
  1395. (Taicpu(p).opsize = S_L) And
  1396. (Taicpu(p).oper[0].val <= 3)
  1397. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1398. Then
  1399. Begin
  1400. TmpBool1 := True; {should we check the next instruction?}
  1401. TmpBool2 := False; {have we found an add/sub which could be
  1402. integrated in the lea?}
  1403. Reset_reference(tmpref);
  1404. TmpRef.index := Taicpu(p).oper[1].reg;
  1405. TmpRef.scalefactor := 1 shl Taicpu(p).oper[0].val;
  1406. While TmpBool1 And
  1407. GetNextInstruction(p, hp1) And
  1408. (Tai(hp1).typ = ait_instruction) And
  1409. ((Taicpu(hp1).opcode = A_ADD) Or
  1410. (Taicpu(hp1).opcode = A_SUB)) And
  1411. (Taicpu(hp1).oper[1].typ = Top_Reg) And
  1412. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg) Do
  1413. Begin
  1414. TmpBool1 := False;
  1415. If (Taicpu(hp1).oper[0].typ = Top_Const)
  1416. Then
  1417. Begin
  1418. TmpBool1 := True;
  1419. TmpBool2 := True;
  1420. If Taicpu(hp1).opcode = A_ADD Then
  1421. Inc(TmpRef.offset, Taicpu(hp1).oper[0].val)
  1422. Else
  1423. Dec(TmpRef.offset, Taicpu(hp1).oper[0].val);
  1424. asml.Remove(hp1);
  1425. hp1.free;
  1426. End
  1427. Else
  1428. If (Taicpu(hp1).oper[0].typ = Top_Reg) And
  1429. (Taicpu(hp1).opcode = A_ADD) And
  1430. (TmpRef.base = R_NO) Then
  1431. Begin
  1432. TmpBool1 := True;
  1433. TmpBool2 := True;
  1434. TmpRef.base := Taicpu(hp1).oper[0].reg;
  1435. asml.Remove(hp1);
  1436. hp1.free;
  1437. End;
  1438. End;
  1439. If TmpBool2 Or
  1440. ((aktoptprocessor < ClassP6) And
  1441. (Taicpu(p).oper[0].val <= 3) And
  1442. Not(CS_LittleSize in aktglobalswitches))
  1443. Then
  1444. Begin
  1445. If Not(TmpBool2) And
  1446. (Taicpu(p).oper[0].val = 1)
  1447. Then
  1448. Begin
  1449. hp1 := Taicpu.Op_reg_reg(A_ADD,Taicpu(p).opsize,
  1450. Taicpu(p).oper[1].reg, Taicpu(p).oper[1].reg)
  1451. End
  1452. Else hp1 := Taicpu.op_ref_reg(A_LEA, S_L, newReference(TmpRef),
  1453. Taicpu(p).oper[1].reg);
  1454. InsertLLItem(AsmL,p.previous, p.next, hp1);
  1455. p.free;
  1456. p := hp1;
  1457. End;
  1458. End
  1459. Else
  1460. If (aktoptprocessor < ClassP6) And
  1461. (Taicpu(p).oper[0].typ = top_const) And
  1462. (Taicpu(p).oper[1].typ = top_reg) Then
  1463. If (Taicpu(p).oper[0].val = 1)
  1464. Then
  1465. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1466. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1467. (unlike shl, which is only Tairable in the U pipe)}
  1468. Begin
  1469. hp1 := Taicpu.Op_reg_reg(A_ADD,Taicpu(p).opsize,
  1470. Taicpu(p).oper[1].reg, Taicpu(p).oper[1].reg);
  1471. InsertLLItem(AsmL,p.previous, p.next, hp1);
  1472. p.free;
  1473. p := hp1;
  1474. End
  1475. Else If (Taicpu(p).opsize = S_L) and
  1476. (Taicpu(p).oper[0].val<= 3) Then
  1477. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1478. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1479. Begin
  1480. Reset_reference(tmpref);
  1481. TmpRef.index := Taicpu(p).oper[1].reg;
  1482. TmpRef.scalefactor := 1 shl Taicpu(p).oper[0].val;
  1483. hp1 := Taicpu.Op_ref_reg(A_LEA,S_L,newReference(TmpRef), Taicpu(p).oper[1].reg);
  1484. InsertLLItem(AsmL,p.previous, p.next, hp1);
  1485. p.free;
  1486. p := hp1;
  1487. End
  1488. End;
  1489. A_SETcc :
  1490. { changes
  1491. setcc (funcres) setcc reg
  1492. movb (funcres), reg to leave/ret
  1493. leave/ret }
  1494. Begin
  1495. If (Taicpu(p).oper[0].typ = top_ref) And
  1496. GetNextInstruction(p, hp1) And
  1497. GetNextInstruction(hp1, hp2) And
  1498. (hp2.typ = ait_instruction) And
  1499. ((Taicpu(hp2).opcode = A_LEAVE) or
  1500. (Taicpu(hp2).opcode = A_RET)) And
  1501. (Taicpu(p).oper[0].ref^.Base = procinfo^.FramePointer) And
  1502. (Taicpu(p).oper[0].ref^.Index = R_NO) And
  1503. (Taicpu(p).oper[0].ref^.Offset >= procinfo^.Return_Offset) And
  1504. (hp1.typ = ait_instruction) And
  1505. (Taicpu(hp1).opcode = A_MOV) And
  1506. (Taicpu(hp1).opsize = S_B) And
  1507. (Taicpu(hp1).oper[0].typ = top_ref) And
  1508. RefsEqual(Taicpu(hp1).oper[0].ref^, Taicpu(p).oper[0].ref^) Then
  1509. Begin
  1510. Taicpu(p).LoadReg(0,Taicpu(hp1).oper[1].reg);
  1511. asml.Remove(hp1);
  1512. hp1.free;
  1513. End
  1514. End;
  1515. A_SUB:
  1516. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1517. { * change "sub/add const1, reg" or "dec reg" followed by
  1518. "sub const2, reg" to one "sub ..., reg" }
  1519. Begin
  1520. If (Taicpu(p).oper[0].typ = top_const) And
  1521. (Taicpu(p).oper[1].typ = top_reg) Then
  1522. If (Taicpu(p).oper[0].val = 2) And
  1523. (Taicpu(p).oper[1].reg = R_ESP) and
  1524. { Don't do the sub/push optimization if the sub }
  1525. { comes from setting up the stack frame (JM) }
  1526. (not getLastInstruction(p,hp1) or
  1527. (hp1.typ <> ait_instruction) or
  1528. (Taicpu(hp1).opcode <> A_MOV) or
  1529. (Taicpu(hp1).oper[0].typ <> top_reg) or
  1530. (Taicpu(hp1).oper[0].reg <> R_ESP) or
  1531. (Taicpu(hp1).oper[1].typ <> top_reg) or
  1532. (Taicpu(hp1).oper[1].reg <> R_EBP)) then
  1533. Begin
  1534. hp1 := Tai(p.next);
  1535. While Assigned(hp1) And
  1536. (Tai(hp1).typ In [ait_instruction]+SkipInstr) And
  1537. not regReadByInstruction(R_ESP,hp1) and
  1538. not regModifiedByInstruction(R_ESP,hp1) do
  1539. hp1 := Tai(hp1.next);
  1540. If Assigned(hp1) And
  1541. (Tai(hp1).typ = ait_instruction) And
  1542. (Taicpu(hp1).opcode = A_PUSH) And
  1543. (Taicpu(hp1).opsize = S_W)
  1544. Then
  1545. Begin
  1546. Taicpu(hp1).changeopsize(S_L);
  1547. if Taicpu(hp1).oper[0].typ=top_reg then
  1548. Taicpu(hp1).LoadReg(0,Reg16ToReg32(Taicpu(hp1).oper[0].reg));
  1549. hp1 := Tai(p.next);
  1550. asml.Remove(p);
  1551. p.free;
  1552. p := hp1;
  1553. Continue
  1554. End;
  1555. If DoSubAddOpt(p) Then continue;
  1556. End
  1557. Else If DoSubAddOpt(p) Then Continue
  1558. End;
  1559. A_XOR:
  1560. If (Taicpu(p).oper[0].typ = top_reg) And
  1561. (Taicpu(p).oper[1].typ = top_reg) And
  1562. (Taicpu(p).oper[0].reg = Taicpu(p).oper[1].reg) then
  1563. { temporarily change this to 'mov reg,0' to make it easier }
  1564. { for the CSE. Will be changed back in pass 2 }
  1565. begin
  1566. Taicpu(p).opcode := A_MOV;
  1567. Taicpu(p).loadconst(0,0);
  1568. end;
  1569. End;
  1570. end; { if is_jmp }
  1571. End;
  1572. { ait_label:
  1573. Begin
  1574. If labelCanBeSkipped(Tai_label(p))
  1575. Then
  1576. Begin
  1577. hp1 := Tai(p.next);
  1578. asml.Remove(p);
  1579. p.free;
  1580. p := hp1;
  1581. Continue
  1582. End;
  1583. End;}
  1584. End;
  1585. p:=Tai(p.next);
  1586. end;
  1587. end;
  1588. function isFoldableArithOp(hp1: Taicpu; reg: tregister): boolean;
  1589. begin
  1590. IsFoldableArithOp := False;
  1591. case hp1.opcode of
  1592. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  1593. isFoldableArithOp :=
  1594. (Taicpu(hp1).oper[0].typ in [top_reg,top_const]) and
  1595. (Taicpu(hp1).oper[1].typ = top_reg) and
  1596. (Taicpu(hp1).oper[1].reg = reg);
  1597. A_INC,A_DEC:
  1598. isFoldableArithOp :=
  1599. (Taicpu(hp1).oper[0].typ = top_reg) and
  1600. (Taicpu(hp1).oper[0].reg = reg);
  1601. end;
  1602. end;
  1603. Procedure PeepHoleOptPass2(AsmL: TAAsmOutput; BlockStart, BlockEnd: Tai);
  1604. {$ifdef USECMOV}
  1605. function CanBeCMOV(p : Tai) : boolean;
  1606. begin
  1607. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1608. (Taicpu(p).opcode=A_MOV) and
  1609. (Taicpu(p).opsize in [S_L,S_W]) and
  1610. (Taicpu(p).oper[0].typ in [top_reg,top_ref]) and
  1611. (Taicpu(p).oper[1].typ in [top_reg,top_ref]);
  1612. end;
  1613. {$endif USECMOV}
  1614. var
  1615. p,hp1,hp2: Tai;
  1616. {$ifdef USECMOV}
  1617. l : longint;
  1618. condition : tasmcond;
  1619. hp3: Tai;
  1620. {$endif USECMOV}
  1621. UsedRegs, TmpUsedRegs: TRegSet;
  1622. Begin
  1623. P := BlockStart;
  1624. UsedRegs := [];
  1625. While (P <> BlockEnd) Do
  1626. Begin
  1627. UpdateUsedRegs(UsedRegs, Tai(p.next));
  1628. Case p.Typ Of
  1629. Ait_Instruction:
  1630. Begin
  1631. Case Taicpu(p).opcode Of
  1632. {$ifdef USECMOV}
  1633. A_Jcc:
  1634. if (aktspecificoptprocessor=ClassP6) then
  1635. begin
  1636. { check for
  1637. jCC xxx
  1638. <several movs>
  1639. xxx:
  1640. }
  1641. l:=0;
  1642. GetNextInstruction(p, hp1);
  1643. while assigned(hp1) And
  1644. CanBeCMOV(hp1) do
  1645. begin
  1646. inc(l);
  1647. GetNextInstruction(hp1,hp1);
  1648. end;
  1649. if assigned(hp1) then
  1650. begin
  1651. if FindLabel(PAsmLabel(Taicpu(p).oper[0].sym),hp1) then
  1652. begin
  1653. if (l<=4) and (l>0) then
  1654. begin
  1655. condition:=inverse_cond[Taicpu(p).condition];
  1656. GetNextInstruction(p,hp1);
  1657. asml.remove(p);
  1658. p.free;
  1659. p:=hp1;
  1660. repeat
  1661. Taicpu(hp1).opcode:=A_CMOVcc;
  1662. Taicpu(hp1).condition:=condition;
  1663. GetNextInstruction(hp1,hp1);
  1664. until not(assigned(hp1)) or
  1665. not(CanBeCMOV(hp1));
  1666. asml.remove(hp1);
  1667. hp1.free;
  1668. continue;
  1669. end;
  1670. end
  1671. else
  1672. begin
  1673. { check further for
  1674. jCC xxx
  1675. <several movs>
  1676. jmp yyy
  1677. xxx:
  1678. <several movs>
  1679. yyy:
  1680. }
  1681. { hp2 points to jmp xxx }
  1682. hp2:=hp1;
  1683. { skip hp1 to xxx }
  1684. GetNextInstruction(hp1, hp1);
  1685. if assigned(hp2) and
  1686. assigned(hp1) and
  1687. (l<=3) and
  1688. (hp2.typ=ait_instruction) and
  1689. (Taicpu(hp2).is_jmp) and
  1690. (Taicpu(hp2).condition=C_None) and
  1691. FindLabel(PAsmLabel(Taicpu(p).oper[0].sym),hp1) then
  1692. begin
  1693. l:=0;
  1694. while assigned(hp1) And
  1695. CanBeCMOV(hp1) do
  1696. begin
  1697. inc(l);
  1698. GetNextInstruction(hp1, hp1);
  1699. end;
  1700. end;
  1701. {
  1702. if assigned(hp1) and
  1703. FindLabel(PAsmLabel(Taicpu(hp2).oper[0].sym),hp1) then
  1704. begin
  1705. condition:=inverse_cond[Taicpu(p).condition];
  1706. GetNextInstruction(p,hp1);
  1707. asml.remove(p);
  1708. p.free;
  1709. p:=hp1;
  1710. repeat
  1711. Taicpu(hp1).opcode:=A_CMOVcc;
  1712. Taicpu(hp1).condition:=condition;
  1713. GetNextInstruction(hp1,hp1);
  1714. until not(assigned(hp1)) or
  1715. not(CanBeCMOV(hp1));
  1716. hp2:=hp1.next;
  1717. condition:=inverse_cond[condition];
  1718. asml.remove(hp1.next)
  1719. hp1.next.free;
  1720. asml.remove(hp1);
  1721. hp1.free;
  1722. continue;
  1723. end;
  1724. }
  1725. end;
  1726. end;
  1727. end;
  1728. {$endif USECMOV}
  1729. A_FSTP,A_FISTP:
  1730. if doFpuLoadStoreOpt(asmL,p) then
  1731. continue;
  1732. A_IMUL:
  1733. begin
  1734. if ((Taicpu(p).oper[0].typ = top_const) or
  1735. (Taicpu(p).oper[0].typ = top_symbol)) and
  1736. (Taicpu(p).oper[1].typ = top_reg) and
  1737. ((Taicpu(p).oper[2].typ = top_none) or
  1738. ((Taicpu(p).oper[2].typ = top_reg) and
  1739. (Taicpu(p).oper[2].reg = Taicpu(p).oper[1].reg))) and
  1740. getLastInstruction(p,hp1) and
  1741. (hp1.typ = ait_instruction) and
  1742. (Taicpu(hp1).opcode = A_MOV) and
  1743. (Taicpu(hp1).oper[0].typ = top_reg) and
  1744. (Taicpu(hp1).oper[1].typ = top_reg) and
  1745. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg) then
  1746. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  1747. begin
  1748. Taicpu(p).ops := 3;
  1749. Taicpu(p).loadreg(1,Taicpu(hp1).oper[0].reg);
  1750. Taicpu(p).loadreg(2,Taicpu(hp1).oper[1].reg);
  1751. asml.remove(hp1);
  1752. hp1.free;
  1753. end;
  1754. end;
  1755. A_MOV:
  1756. Begin
  1757. If (Taicpu(p).oper[0].typ = top_reg) And
  1758. (Taicpu(p).oper[1].typ = top_reg) And
  1759. GetNextInstruction(p, hp1) And
  1760. (hp1.typ = ait_Instruction) And
  1761. ((Taicpu(hp1).opcode = A_MOV) or
  1762. (Taicpu(hp1).opcode = A_MOVZX) or
  1763. (Taicpu(hp1).opcode = A_MOVSX)) And
  1764. (Taicpu(hp1).oper[0].typ = top_ref) And
  1765. (Taicpu(hp1).oper[1].typ = top_reg) And
  1766. ((Taicpu(hp1).oper[0].ref^.Base = Taicpu(p).oper[1].reg) Or
  1767. (Taicpu(hp1).oper[0].ref^.Index = Taicpu(p).oper[1].reg)) And
  1768. (Reg32(Taicpu(hp1).oper[1].reg) = Taicpu(p).oper[1].reg) Then
  1769. {mov reg1, reg2
  1770. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1771. Begin
  1772. If (Taicpu(hp1).oper[0].ref^.Base = Taicpu(p).oper[1].reg) Then
  1773. Taicpu(hp1).oper[0].ref^.Base := Taicpu(p).oper[0].reg;
  1774. If (Taicpu(hp1).oper[0].ref^.Index = Taicpu(p).oper[1].reg) Then
  1775. Taicpu(hp1).oper[0].ref^.Index := Taicpu(p).oper[0].reg;
  1776. asml.Remove(p);
  1777. p.free;
  1778. p := hp1;
  1779. Continue;
  1780. End
  1781. Else If (Taicpu(p).oper[0].typ = top_ref) And
  1782. GetNextInstruction(p,hp1) And
  1783. (hp1.typ = ait_instruction) And
  1784. IsFoldableArithOp(Taicpu(hp1),Taicpu(p).oper[1].reg) And
  1785. GetNextInstruction(hp1,hp2) And
  1786. (hp2.typ = ait_instruction) And
  1787. (Taicpu(hp2).opcode = A_MOV) And
  1788. (Taicpu(hp2).oper[0].typ = top_reg) And
  1789. (Taicpu(hp2).oper[0].reg = Taicpu(p).oper[1].reg) And
  1790. (Taicpu(hp2).oper[1].typ = top_ref) Then
  1791. Begin
  1792. TmpUsedRegs := UsedRegs;
  1793. UpdateUsedRegs(TmpUsedRegs,Tai(hp1.next));
  1794. If (RefsEqual(Taicpu(hp2).oper[1].ref^, Taicpu(p).oper[0].ref^) And
  1795. Not(RegUsedAfterInstruction(Taicpu(p).oper[1].reg,
  1796. hp2, TmpUsedRegs)))
  1797. Then
  1798. { change mov (ref), reg }
  1799. { add/sub/or/... reg2/$const, reg }
  1800. { mov reg, (ref) }
  1801. { # release reg }
  1802. { to add/sub/or/... reg2/$const, (ref) }
  1803. Begin
  1804. case Taicpu(hp1).opcode of
  1805. A_INC,A_DEC:
  1806. Taicpu(hp1).LoadRef(0,newreference(Taicpu(p).oper[0].ref^))
  1807. else
  1808. Taicpu(hp1).LoadRef(1,newreference(Taicpu(p).oper[0].ref^));
  1809. end;
  1810. asml.Remove(p);
  1811. asml.Remove(hp2);
  1812. p.free;
  1813. hp2.free;
  1814. p := hp1
  1815. End;
  1816. End
  1817. End;
  1818. A_TEST, A_OR:
  1819. {removes the line marked with (x) from the sequence
  1820. And/or/xor/add/sub/... $x, %y
  1821. test/or %y, %y (x)
  1822. j(n)z _Label
  1823. as the first instruction already adjusts the ZF}
  1824. Begin
  1825. If OpsEqual(Taicpu(p).oper[0],Taicpu(p).oper[1]) Then
  1826. If GetLastInstruction(p, hp1) And
  1827. (Tai(hp1).typ = ait_instruction) Then
  1828. Case Taicpu(hp1).opcode Of
  1829. A_ADD, A_SUB, A_OR, A_XOR, A_AND{, A_SHL, A_SHR}:
  1830. Begin
  1831. If OpsEqual(Taicpu(hp1).oper[1],Taicpu(p).oper[0]) Then
  1832. Begin
  1833. hp1 := Tai(p.next);
  1834. asml.remove(p);
  1835. p.free;
  1836. p := Tai(hp1);
  1837. continue
  1838. End;
  1839. End;
  1840. A_DEC, A_INC, A_NEG:
  1841. Begin
  1842. If OpsEqual(Taicpu(hp1).oper[0],Taicpu(p).oper[0]) Then
  1843. Begin
  1844. Case Taicpu(hp1).opcode Of
  1845. A_DEC, A_INC:
  1846. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  1847. Begin
  1848. Case Taicpu(hp1).opcode Of
  1849. A_DEC: Taicpu(hp1).opcode := A_SUB;
  1850. A_INC: Taicpu(hp1).opcode := A_ADD;
  1851. End;
  1852. Taicpu(hp1).Loadoper(1,Taicpu(hp1).oper[0]);
  1853. Taicpu(hp1).LoadConst(0,1);
  1854. Taicpu(hp1).ops:=2;
  1855. End
  1856. End;
  1857. hp1 := Tai(p.next);
  1858. asml.remove(p);
  1859. p.free;
  1860. p := Tai(hp1);
  1861. continue
  1862. End;
  1863. End
  1864. End
  1865. End;
  1866. End;
  1867. End;
  1868. End;
  1869. p := Tai(p.next)
  1870. End;
  1871. End;
  1872. Procedure PostPeepHoleOpts(AsmL: TAAsmOutput; BlockStart, BlockEnd: Tai);
  1873. var
  1874. p,hp1,hp2: Tai;
  1875. Begin
  1876. P := BlockStart;
  1877. While (P <> BlockEnd) Do
  1878. Begin
  1879. Case p.Typ Of
  1880. Ait_Instruction:
  1881. Begin
  1882. Case Taicpu(p).opcode Of
  1883. A_CALL:
  1884. If (AktOptProcessor < ClassP6) And
  1885. GetNextInstruction(p, hp1) And
  1886. (hp1.typ = ait_instruction) And
  1887. (Taicpu(hp1).opcode = A_JMP) And
  1888. (Taicpu(hp1).oper[0].typ = top_symbol) Then
  1889. Begin
  1890. hp2 := Taicpu.Op_sym(A_PUSH,S_L,Taicpu(hp1).oper[0].sym);
  1891. InsertLLItem(AsmL, p.previous, p, hp2);
  1892. Taicpu(p).opcode := A_JMP;
  1893. Taicpu(p).is_jmp := true;
  1894. asml.Remove(hp1);
  1895. hp1.free;
  1896. End;
  1897. A_MOV:
  1898. if (Taicpu(p).oper[0].typ = Top_Const) And
  1899. (Taicpu(p).oper[0].val = 0) And
  1900. (Taicpu(p).oper[1].typ = Top_Reg) Then
  1901. { change "mov $0, %reg" into "xor %reg, %reg" }
  1902. Begin
  1903. Taicpu(p).opcode := A_XOR;
  1904. Taicpu(p).LoadReg(0,Taicpu(p).oper[1].reg);
  1905. End;
  1906. A_MOVZX:
  1907. Begin
  1908. If (Taicpu(p).oper[1].typ = top_reg) Then
  1909. If (Taicpu(p).oper[0].typ = top_reg)
  1910. Then
  1911. Case Taicpu(p).opsize of
  1912. S_BL:
  1913. Begin
  1914. If IsGP32Reg(Taicpu(p).oper[1].reg) And
  1915. Not(CS_LittleSize in aktglobalswitches) And
  1916. (aktoptprocessor = ClassP5)
  1917. Then
  1918. {Change "movzbl %reg1, %reg2" to
  1919. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  1920. PentiumMMX}
  1921. Begin
  1922. hp1 := Taicpu.op_reg_reg(A_XOR, S_L,
  1923. Taicpu(p).oper[1].reg, Taicpu(p).oper[1].reg);
  1924. InsertLLItem(AsmL,p.previous, p, hp1);
  1925. Taicpu(p).opcode := A_MOV;
  1926. Taicpu(p).changeopsize(S_B);
  1927. Taicpu(p).LoadReg(1,Reg32ToReg8(Taicpu(p).oper[1].reg));
  1928. End;
  1929. End;
  1930. End
  1931. Else
  1932. If (Taicpu(p).oper[0].typ = top_ref) And
  1933. (Taicpu(p).oper[0].ref^.base <> Taicpu(p).oper[1].reg) And
  1934. (Taicpu(p).oper[0].ref^.index <> Taicpu(p).oper[1].reg) And
  1935. Not(CS_LittleSize in aktglobalswitches) And
  1936. IsGP32Reg(Taicpu(p).oper[1].reg) And
  1937. (aktoptprocessor = ClassP5) And
  1938. (Taicpu(p).opsize = S_BL)
  1939. Then
  1940. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  1941. Pentium and PentiumMMX}
  1942. Begin
  1943. hp1 := Taicpu.Op_reg_reg(A_XOR, S_L, Taicpu(p).oper[1].reg,
  1944. Taicpu(p).oper[1].reg);
  1945. Taicpu(p).opcode := A_MOV;
  1946. Taicpu(p).changeopsize(S_B);
  1947. Taicpu(p).LoadReg(1,Reg32ToReg8(Taicpu(p).oper[1].reg));
  1948. InsertLLItem(AsmL,p.previous, p, hp1);
  1949. End;
  1950. End;
  1951. End;
  1952. End;
  1953. End;
  1954. p := Tai(p.next)
  1955. End;
  1956. End;
  1957. End.
  1958. {
  1959. $Log$
  1960. Revision 1.8 2001-01-10 10:29:36 jonas
  1961. * really fixed problems with -Op2 opts (merged)
  1962. Revision 1.7 2001/01/07 15:49:49 jonas
  1963. * fixed bug in call/jmp optimization with -Op1 and -Op2
  1964. Revision 1.6 2000/12/25 00:07:33 peter
  1965. + new tlinkedlist class (merge of old tstringqueue,tcontainer and
  1966. tlinkedlist objects)
  1967. Revision 1.5 2000/12/16 16:00:12 jonas
  1968. * removed warnings about possible range check errors
  1969. Revision 1.4 2000/11/29 00:30:49 florian
  1970. * unused units removed from uses clause
  1971. * some changes for widestrings
  1972. Revision 1.3 2000/11/14 09:53:18 jonas
  1973. * added missing allocregbetween() (merged)
  1974. Revision 1.2 2000/10/24 10:40:54 jonas
  1975. + register renaming ("fixes" bug1088)
  1976. * changed command line options meanings for optimizer:
  1977. O2 now means peepholopts, CSE and register renaming in 1 pass
  1978. O3 is the same, but repeated until no further optimizations are
  1979. possible or until 5 passes have been done (to avoid endless loops)
  1980. * changed aopt386 so it does this looping
  1981. * added some procedures from csopt386 to the interface because they're
  1982. used by rropt386 as well
  1983. * some changes to csopt386 and daopt386 so that newly added instructions
  1984. by the CSE get optimizer info (they were simply skipped previously),
  1985. this fixes some bugs
  1986. Revision 1.1 2000/10/15 09:47:43 peter
  1987. * moved to i386/
  1988. Revision 1.13 2000/10/02 13:01:29 jonas
  1989. * fixed bug regarding removal of "test/or reg,reg": apparently, shr/shl
  1990. doesn't set the zero flag according to the contents of the register
  1991. after the shift :( (mergfed from fixes branch)
  1992. Revision 1.12 2000/09/24 15:06:23 peter
  1993. * use defines.inc
  1994. Revision 1.11 2000/09/18 11:28:36 jonas
  1995. * fixed web bug 1133 (merged from fixes branch)
  1996. Revision 1.10 2000/08/18 10:09:13 jonas
  1997. * fix for web bug1099 (merged from fixes branch)
  1998. Revision 1.9 2000/08/05 13:33:08 peter
  1999. * $ifdef go32v2 -> target_info.target=go32v2
  2000. Revision 1.8 2000/08/05 10:35:51 jonas
  2001. * readded l1 variable (between ifdef go32v2 to avoid hints/notes)
  2002. Revision 1.7 2000/08/04 22:00:52 peter
  2003. * merges from fixes
  2004. Revision 1.6 2000/07/31 08:44:05 jonas
  2005. - removed imul support from -dfoldarithops since "imull [reg32],[mem32]"
  2006. doesn't exist (merged from fixes branch)
  2007. Revision 1.5 2000/07/28 13:56:23 jonas
  2008. * fixed bug in shr/shl optimization when -Og is used (merged from fixes
  2009. branch)
  2010. Revision 1.4 2000/07/21 15:19:55 jonas
  2011. * daopt386: changes to getnextinstruction/getlastinstruction so they
  2012. ignore labels who have is_addr set
  2013. + daopt386/csopt386: remove loads of registers which are overwritten
  2014. before their contents are used (especially usefull for removing superfluous
  2015. maybe_loadesi outputs and push/pops transformed by below optimization
  2016. + popt386: transform pop/pop/pop/.../push/push/push to sequences of
  2017. 'movl x(%esp),%reg' (only active when compiling a go32v2 compiler
  2018. currently because I don't know whether it's safe to do this under Win32/
  2019. Linux (because of problems we had when using esp as frame pointer on
  2020. those os'es)
  2021. Revision 1.3 2000/07/14 05:11:49 michael
  2022. + Patch to 1.1
  2023. Revision 1.2 2000/07/13 11:32:45 michael
  2024. + removed logs
  2025. }