popt386.pas 124 KB


  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit popt386;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses Aasmbase,aasmtai,aasmdata,aasmcpu,verbose;
  22. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  23. procedure PeepHoleOptPass1(asml: TAsmList; BlockStart, BlockEnd: tai);
  24. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  25. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  26. implementation
  27. uses
  28. cutils,globtype,systems,
  29. globals,cgbase,procinfo,
  30. symsym,
  31. {$ifdef finaldestdebug}
  32. cobjects,
  33. {$endif finaldestdebug}
  34. cpuinfo,cpubase,cgutils,daopt386,
  35. cgx86,
  36. aoptx86;
  37. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  38. begin
  39. isFoldableArithOp := False;
  40. case hp1.opcode of
  41. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  42. isFoldableArithOp :=
  43. ((taicpu(hp1).oper[0]^.typ = top_const) or
  44. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  45. (taicpu(hp1).oper[0]^.reg <> reg))) and
  46. (taicpu(hp1).oper[1]^.typ = top_reg) and
  47. (taicpu(hp1).oper[1]^.reg = reg);
  48. A_INC,A_DEC,A_NEG,A_NOT:
  49. isFoldableArithOp :=
  50. (taicpu(hp1).oper[0]^.typ = top_reg) and
  51. (taicpu(hp1).oper[0]^.reg = reg);
  52. end;
  53. end;
  54. function RegUsedAfterInstruction(reg: Tregister; p: tai; var UsedRegs: TRegSet): Boolean;
  55. var
  56. supreg: tsuperregister;
  57. begin
  58. supreg := getsupreg(reg);
  59. UpdateUsedRegs(UsedRegs, tai(p.Next));
  60. RegUsedAfterInstruction :=
  61. (supreg in UsedRegs) and
  62. (not(getNextInstruction(p,p)) or
  63. not(regLoadedWithNewValue(supreg,false,p)));
  64. end;
  65. function IsExitCode(p : tai) : boolean;
  66. var
  67. hp2,hp3 : tai;
  68. begin
  69. result:=(p.typ=ait_instruction) and
  70. ((taicpu(p).opcode = A_RET) or
  71. ((taicpu(p).opcode=A_LEAVE) and
  72. GetNextInstruction(p,hp2) and
  73. (hp2.typ=ait_instruction) and
  74. (taicpu(hp2).opcode=A_RET)
  75. ) or
  76. ((taicpu(p).opcode=A_MOV) and
  77. (taicpu(p).oper[0]^.typ=top_reg) and
  78. (taicpu(p).oper[0]^.reg=NR_EBP) and
  79. (taicpu(p).oper[1]^.typ=top_reg) and
  80. (taicpu(p).oper[1]^.reg=NR_ESP) and
  81. GetNextInstruction(p,hp2) and
  82. (hp2.typ=ait_instruction) and
  83. (taicpu(hp2).opcode=A_POP) and
  84. (taicpu(hp2).oper[0]^.typ=top_reg) and
  85. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  86. GetNextInstruction(hp2,hp3) and
  87. (hp3.typ=ait_instruction) and
  88. (taicpu(hp3).opcode=A_RET)
  89. )
  90. );
  91. end;
  92. function doFpuLoadStoreOpt(asmL: TAsmList; var p: tai): boolean;
  93. { returns true if a "continue" should be done after this optimization }
  94. var hp1, hp2: tai;
  95. begin
  96. doFpuLoadStoreOpt := false;
  97. if (taicpu(p).oper[0]^.typ = top_ref) and
  98. getNextInstruction(p, hp1) and
  99. (hp1.typ = ait_instruction) and
  100. (((taicpu(hp1).opcode = A_FLD) and
  101. (taicpu(p).opcode = A_FSTP)) or
  102. ((taicpu(p).opcode = A_FISTP) and
  103. (taicpu(hp1).opcode = A_FILD))) and
  104. (taicpu(hp1).oper[0]^.typ = top_ref) and
  105. (taicpu(hp1).opsize = taicpu(p).opsize) and
  106. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  107. begin
  108. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  109. if (taicpu(p).opsize=S_FX) and
  110. getNextInstruction(hp1, hp2) and
  111. (hp2.typ = ait_instruction) and
  112. IsExitCode(hp2) and
  113. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  114. not(assigned(current_procinfo.procdef.funcretsym) and
  115. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  116. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  117. begin
  118. asml.remove(p);
  119. asml.remove(hp1);
  120. p.free;
  121. hp1.free;
  122. p := hp2;
  123. removeLastDeallocForFuncRes(asmL, p);
  124. doFPULoadStoreOpt := true;
  125. end
  126. (* can't be done because the store operation rounds
  127. else
  128. { fst can't store an extended value! }
  129. if (taicpu(p).opsize <> S_FX) and
  130. (taicpu(p).opsize <> S_IQ) then
  131. begin
  132. if (taicpu(p).opcode = A_FSTP) then
  133. taicpu(p).opcode := A_FST
  134. else taicpu(p).opcode := A_FIST;
  135. asml.remove(hp1);
  136. hp1.free;
  137. end
  138. *)
  139. end;
  140. end;
  141. { returns true if p contains a memory operand with a segment set }
  142. function InsContainsSegRef(p: taicpu): boolean;
  143. var
  144. i: longint;
  145. begin
  146. result:=true;
  147. for i:=0 to p.opercnt-1 do
  148. if (p.oper[i]^.typ=top_ref) and
  149. (p.oper[i]^.ref^.segment<>NR_NO) then
  150. exit;
  151. result:=false;
  152. end;
  153. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  154. var
  155. p,hp1: tai;
  156. l: aint;
  157. tmpRef: treference;
  158. begin
  159. p := BlockStart;
  160. while (p <> BlockEnd) Do
  161. begin
  162. case p.Typ Of
  163. Ait_Instruction:
  164. begin
  165. if InsContainsSegRef(taicpu(p)) then
  166. begin
  167. p := tai(p.next);
  168. continue;
  169. end;
  170. case taicpu(p).opcode Of
  171. A_IMUL:
  172. {changes certain "imul const, %reg"'s to lea sequences}
  173. begin
  174. if (taicpu(p).oper[0]^.typ = Top_Const) and
  175. (taicpu(p).oper[1]^.typ = Top_Reg) and
  176. (taicpu(p).opsize = S_L) then
  177. if (taicpu(p).oper[0]^.val = 1) then
  178. if (taicpu(p).ops = 2) then
  179. {remove "imul $1, reg"}
  180. begin
  181. hp1 := tai(p.Next);
  182. asml.remove(p);
  183. p.free;
  184. p := hp1;
  185. continue;
  186. end
  187. else
  188. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  189. begin
  190. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  191. InsertLLItem(asml, p.previous, p.next, hp1);
  192. p.free;
  193. p := hp1;
  194. end
  195. else if
  196. ((taicpu(p).ops <= 2) or
  197. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  198. (taicpu(p).oper[0]^.val <= 12) and
  199. not(cs_opt_size in current_settings.optimizerswitches) and
  200. (not(GetNextInstruction(p, hp1)) or
  201. {GetNextInstruction(p, hp1) and}
  202. not((tai(hp1).typ = ait_instruction) and
  203. ((taicpu(hp1).opcode=A_Jcc) and
  204. (taicpu(hp1).condition in [C_O,C_NO])))) then
  205. begin
  206. reference_reset(tmpref,1);
  207. case taicpu(p).oper[0]^.val Of
  208. 3: begin
  209. {imul 3, reg1, reg2 to
  210. lea (reg1,reg1,2), reg2
  211. imul 3, reg1 to
  212. lea (reg1,reg1,2), reg1}
  213. TmpRef.base := taicpu(p).oper[1]^.reg;
  214. TmpRef.index := taicpu(p).oper[1]^.reg;
  215. TmpRef.ScaleFactor := 2;
  216. if (taicpu(p).ops = 2) then
  217. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  218. else
  219. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  220. InsertLLItem(asml,p.previous, p.next, hp1);
  221. p.free;
  222. p := hp1;
  223. end;
  224. 5: begin
  225. {imul 5, reg1, reg2 to
  226. lea (reg1,reg1,4), reg2
  227. imul 5, reg1 to
  228. lea (reg1,reg1,4), reg1}
  229. TmpRef.base := taicpu(p).oper[1]^.reg;
  230. TmpRef.index := taicpu(p).oper[1]^.reg;
  231. TmpRef.ScaleFactor := 4;
  232. if (taicpu(p).ops = 2) then
  233. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  234. else
  235. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  236. InsertLLItem(asml,p.previous, p.next, hp1);
  237. p.free;
  238. p := hp1;
  239. end;
  240. 6: begin
  241. {imul 6, reg1, reg2 to
  242. lea (,reg1,2), reg2
  243. lea (reg2,reg1,4), reg2
  244. imul 6, reg1 to
  245. lea (reg1,reg1,2), reg1
  246. add reg1, reg1}
  247. if (current_settings.optimizecputype <= cpu_386) then
  248. begin
  249. TmpRef.index := taicpu(p).oper[1]^.reg;
  250. if (taicpu(p).ops = 3) then
  251. begin
  252. TmpRef.base := taicpu(p).oper[2]^.reg;
  253. TmpRef.ScaleFactor := 4;
  254. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  255. end
  256. else
  257. begin
  258. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  259. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  260. end;
  261. InsertLLItem(asml,p, p.next, hp1);
  262. reference_reset(tmpref,2);
  263. TmpRef.index := taicpu(p).oper[1]^.reg;
  264. TmpRef.ScaleFactor := 2;
  265. if (taicpu(p).ops = 3) then
  266. begin
  267. TmpRef.base := NR_NO;
  268. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  269. taicpu(p).oper[2]^.reg);
  270. end
  271. else
  272. begin
  273. TmpRef.base := taicpu(p).oper[1]^.reg;
  274. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  275. end;
  276. InsertLLItem(asml,p.previous, p.next, hp1);
  277. p.free;
  278. p := tai(hp1.next);
  279. end
  280. end;
  281. 9: begin
  282. {imul 9, reg1, reg2 to
  283. lea (reg1,reg1,8), reg2
  284. imul 9, reg1 to
  285. lea (reg1,reg1,8), reg1}
  286. TmpRef.base := taicpu(p).oper[1]^.reg;
  287. TmpRef.index := taicpu(p).oper[1]^.reg;
  288. TmpRef.ScaleFactor := 8;
  289. if (taicpu(p).ops = 2) then
  290. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  291. else
  292. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  293. InsertLLItem(asml,p.previous, p.next, hp1);
  294. p.free;
  295. p := hp1;
  296. end;
  297. 10: begin
  298. {imul 10, reg1, reg2 to
  299. lea (reg1,reg1,4), reg2
  300. add reg2, reg2
  301. imul 10, reg1 to
  302. lea (reg1,reg1,4), reg1
  303. add reg1, reg1}
  304. if (current_settings.optimizecputype <= cpu_386) then
  305. begin
  306. if (taicpu(p).ops = 3) then
  307. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  308. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  309. else
  310. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  311. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  312. InsertLLItem(asml,p, p.next, hp1);
  313. TmpRef.base := taicpu(p).oper[1]^.reg;
  314. TmpRef.index := taicpu(p).oper[1]^.reg;
  315. TmpRef.ScaleFactor := 4;
  316. if (taicpu(p).ops = 3) then
  317. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  318. else
  319. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  320. InsertLLItem(asml,p.previous, p.next, hp1);
  321. p.free;
  322. p := tai(hp1.next);
  323. end
  324. end;
  325. 12: begin
  326. {imul 12, reg1, reg2 to
  327. lea (,reg1,4), reg2
  328. lea (reg2,reg1,8), reg2
  329. imul 12, reg1 to
  330. lea (reg1,reg1,2), reg1
  331. lea (,reg1,4), reg1}
  332. if (current_settings.optimizecputype <= cpu_386)
  333. then
  334. begin
  335. TmpRef.index := taicpu(p).oper[1]^.reg;
  336. if (taicpu(p).ops = 3) then
  337. begin
  338. TmpRef.base := taicpu(p).oper[2]^.reg;
  339. TmpRef.ScaleFactor := 8;
  340. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  341. end
  342. else
  343. begin
  344. TmpRef.base := NR_NO;
  345. TmpRef.ScaleFactor := 4;
  346. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  347. end;
  348. InsertLLItem(asml,p, p.next, hp1);
  349. reference_reset(tmpref,2);
  350. TmpRef.index := taicpu(p).oper[1]^.reg;
  351. if (taicpu(p).ops = 3) then
  352. begin
  353. TmpRef.base := NR_NO;
  354. TmpRef.ScaleFactor := 4;
  355. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  356. end
  357. else
  358. begin
  359. TmpRef.base := taicpu(p).oper[1]^.reg;
  360. TmpRef.ScaleFactor := 2;
  361. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  362. end;
  363. InsertLLItem(asml,p.previous, p.next, hp1);
  364. p.free;
  365. p := tai(hp1.next);
  366. end
  367. end
  368. end;
  369. end;
  370. end;
  371. A_SAR, A_SHR:
  372. {changes the code sequence
  373. shr/sar const1, x
  374. shl const2, x
  375. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  376. begin
  377. if GetNextInstruction(p, hp1) and
  378. (tai(hp1).typ = ait_instruction) and
  379. (taicpu(hp1).opcode = A_SHL) and
  380. (taicpu(p).oper[0]^.typ = top_const) and
  381. (taicpu(hp1).oper[0]^.typ = top_const) and
  382. (taicpu(hp1).opsize = taicpu(p).opsize) and
  383. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  384. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  385. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  386. not(cs_opt_size in current_settings.optimizerswitches) then
  387. { shr/sar const1, %reg
  388. shl const2, %reg
  389. with const1 > const2 }
  390. begin
  391. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  392. taicpu(hp1).opcode := A_AND;
  393. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  394. case taicpu(p).opsize Of
  395. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  396. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  397. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  398. end;
  399. end
  400. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  401. not(cs_opt_size in current_settings.optimizerswitches) then
  402. { shr/sar const1, %reg
  403. shl const2, %reg
  404. with const1 < const2 }
  405. begin
  406. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  407. taicpu(p).opcode := A_AND;
  408. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  409. case taicpu(p).opsize Of
  410. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  411. S_B: taicpu(p).loadConst(0,l Xor $ff);
  412. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  413. end;
  414. end
  415. else
  416. { shr/sar const1, %reg
  417. shl const2, %reg
  418. with const1 = const2 }
  419. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  420. begin
  421. taicpu(p).opcode := A_AND;
  422. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  423. case taicpu(p).opsize Of
  424. S_B: taicpu(p).loadConst(0,l Xor $ff);
  425. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  426. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  427. end;
  428. asml.remove(hp1);
  429. hp1.free;
  430. end;
  431. end;
  432. A_XOR:
  433. if (taicpu(p).oper[0]^.typ = top_reg) and
  434. (taicpu(p).oper[1]^.typ = top_reg) and
  435. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  436. { temporarily change this to 'mov reg,0' to make it easier }
  437. { for the CSE. Will be changed back in pass 2 }
  438. begin
  439. taicpu(p).opcode := A_MOV;
  440. taicpu(p).loadConst(0,0);
  441. end;
  442. end;
  443. end;
  444. end;
  445. p := tai(p.next)
  446. end;
  447. end;
  448. { First pass of peephole optimizations }
  449. procedure PeepHoleOptPass1(Asml: TAsmList; BlockStart, BlockEnd: tai);
  450. {$ifdef DEBUG_AOPTCPU}
  451. procedure DebugMsg(const s: string;p : tai);
  452. begin
  453. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  454. end;
  455. {$else DEBUG_AOPTCPU}
  456. procedure DebugMsg(const s: string;p : tai);inline;
  457. begin
  458. end;
  459. {$endif DEBUG_AOPTCPU}
  460. function WriteOk : Boolean;
  461. begin
  462. writeln('Ok');
  463. Result:=True;
  464. end;
  465. var
  466. l : longint;
  467. p,hp1,hp2 : tai;
  468. hp3,hp4: tai;
  469. v:aint;
  470. TmpRef: TReference;
  471. UsedRegs, TmpUsedRegs: TRegSet;
  472. TmpBool1, TmpBool2: Boolean;
  473. function SkipLabels(hp: tai; var hp2: tai): boolean;
  474. {skips all labels and returns the next "real" instruction}
  475. begin
  476. while assigned(hp.next) and
  477. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  478. hp := tai(hp.next);
  479. if assigned(hp.next) then
  480. begin
  481. SkipLabels := True;
  482. hp2 := tai(hp.next)
  483. end
  484. else
  485. begin
  486. hp2 := hp;
  487. SkipLabels := False
  488. end;
  489. end;
  490. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  491. {traces sucessive jumps to their final destination and sets it, e.g.
  492. je l1 je l3
  493. <code> <code>
  494. l1: becomes l1:
  495. je l2 je l3
  496. <code> <code>
  497. l2: l2:
  498. jmp l3 jmp l3
  499. the level parameter denotes how deeep we have already followed the jump,
  500. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  501. var p1, p2: tai;
  502. l: tasmlabel;
  503. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  504. begin
  505. FindAnyLabel := false;
  506. while assigned(hp.next) and
  507. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  508. hp := tai(hp.next);
  509. if assigned(hp.next) and
  510. (tai(hp.next).typ = ait_label) then
  511. begin
  512. FindAnyLabel := true;
  513. l := tai_label(hp.next).labsym;
  514. end
  515. end;
  516. begin
  517. GetfinalDestination := false;
  518. if level > 20 then
  519. exit;
  520. p1 := dfa.getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  521. if assigned(p1) then
  522. begin
  523. SkipLabels(p1,p1);
  524. if (tai(p1).typ = ait_instruction) and
  525. (taicpu(p1).is_jmp) then
  526. if { the next instruction after the label where the jump hp arrives}
  527. { is unconditional or of the same type as hp, so continue }
  528. (taicpu(p1).condition in [C_None,hp.condition]) or
  529. { the next instruction after the label where the jump hp arrives}
  530. { is the opposite of hp (so this one is never taken), but after }
  531. { that one there is a branch that will be taken, so perform a }
  532. { little hack: set p1 equal to this instruction (that's what the}
  533. { last SkipLabels is for, only works with short bool evaluation)}
  534. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  535. SkipLabels(p1,p2) and
  536. (p2.typ = ait_instruction) and
  537. (taicpu(p2).is_jmp) and
  538. (taicpu(p2).condition in [C_None,hp.condition]) and
  539. SkipLabels(p1,p1)) then
  540. begin
  541. { quick check for loops of the form "l5: ; jmp l5 }
  542. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  543. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  544. exit;
  545. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  546. exit;
  547. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  548. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  549. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  550. end
  551. else
  552. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  553. if not FindAnyLabel(p1,l) then
  554. begin
  555. {$ifdef finaldestdebug}
  556. insertllitem(asml,p1,p1.next,tai_comment.Create(
  557. strpnew('previous label inserted'))));
  558. {$endif finaldestdebug}
  559. current_asmdata.getjumplabel(l);
  560. insertllitem(asml,p1,p1.next,tai_label.Create(l));
  561. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  562. hp.oper[0]^.ref^.symbol := l;
  563. l.increfs;
  564. { this won't work, since the new label isn't in the labeltable }
  565. { so it will fail the rangecheck. Labeltable should become a }
  566. { hashtable to support this: }
  567. { GetFinalDestination(asml, hp); }
  568. end
  569. else
  570. begin
  571. {$ifdef finaldestdebug}
  572. insertllitem(asml,p1,p1.next,tai_comment.Create(
  573. strpnew('next label reused'))));
  574. {$endif finaldestdebug}
  575. l.increfs;
  576. hp.oper[0]^.ref^.symbol := l;
  577. if not GetFinalDestination(asml, hp,succ(level)) then
  578. exit;
  579. end;
  580. end;
  581. GetFinalDestination := true;
  582. end;
  583. function DoSubAddOpt(var p: tai): Boolean;
  584. begin
  585. DoSubAddOpt := False;
  586. if GetLastInstruction(p, hp1) and
  587. (hp1.typ = ait_instruction) and
  588. (taicpu(hp1).opsize = taicpu(p).opsize) then
  589. case taicpu(hp1).opcode Of
  590. A_DEC:
  591. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  592. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  593. begin
  594. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  595. asml.remove(hp1);
  596. hp1.free;
  597. end;
  598. A_SUB:
  599. if (taicpu(hp1).oper[0]^.typ = top_const) and
  600. (taicpu(hp1).oper[1]^.typ = top_reg) and
  601. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  602. begin
  603. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  604. asml.remove(hp1);
  605. hp1.free;
  606. end;
  607. A_ADD:
  608. if (taicpu(hp1).oper[0]^.typ = top_const) and
  609. (taicpu(hp1).oper[1]^.typ = top_reg) and
  610. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  611. begin
  612. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  613. asml.remove(hp1);
  614. hp1.free;
  615. if (taicpu(p).oper[0]^.val = 0) then
  616. begin
  617. hp1 := tai(p.next);
  618. asml.remove(p);
  619. p.free;
  620. if not GetLastInstruction(hp1, p) then
  621. p := hp1;
  622. DoSubAddOpt := True;
  623. end
  624. end;
  625. end;
  626. end;
  627. begin
  628. p := BlockStart;
  629. UsedRegs := [];
  630. while (p <> BlockEnd) Do
  631. begin
  632. UpDateUsedRegs(UsedRegs, tai(p.next));
  633. case p.Typ Of
  634. ait_instruction:
  635. begin
  636. current_filepos:=taicpu(p).fileinfo;
  637. if InsContainsSegRef(taicpu(p)) then
  638. begin
  639. p := tai(p.next);
  640. continue;
  641. end;
  642. { Handle Jmp Optimizations }
  643. if taicpu(p).is_jmp then
  644. begin
  645. {the following if-block removes all code between a jmp and the next label,
  646. because it can never be executed}
  647. if (taicpu(p).opcode = A_JMP) then
  648. begin
  649. hp2:=p;
  650. while GetNextInstruction(hp2, hp1) and
  651. (hp1.typ <> ait_label) do
  652. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  653. begin
  654. { don't kill start/end of assembler block,
  655. no-line-info-start/end etc }
  656. if hp1.typ<>ait_marker then
  657. begin
  658. asml.remove(hp1);
  659. hp1.free;
  660. end
  661. else
  662. hp2:=hp1;
  663. end
  664. else break;
  665. end;
  666. { remove jumps to a label coming right after them }
  667. if GetNextInstruction(p, hp1) then
  668. begin
  669. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  670. { TODO: FIXME removing the first instruction fails}
  671. (p<>blockstart) then
  672. begin
  673. hp2:=tai(hp1.next);
  674. asml.remove(p);
  675. p.free;
  676. p:=hp2;
  677. continue;
  678. end
  679. else
  680. begin
  681. if hp1.typ = ait_label then
  682. SkipLabels(hp1,hp1);
  683. if (tai(hp1).typ=ait_instruction) and
  684. (taicpu(hp1).opcode=A_JMP) and
  685. GetNextInstruction(hp1, hp2) and
  686. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  687. begin
  688. if taicpu(p).opcode=A_Jcc then
  689. begin
  690. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  691. tai_label(hp2).labsym.decrefs;
  692. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  693. { when free'ing hp1, the ref. isn't decresed, so we don't
  694. increase it (FK)
  695. taicpu(p).oper[0]^.ref^.symbol.increfs;
  696. }
  697. asml.remove(hp1);
  698. hp1.free;
  699. GetFinalDestination(asml, taicpu(p),0);
  700. end
  701. else
  702. begin
  703. GetFinalDestination(asml, taicpu(p),0);
  704. p:=tai(p.next);
  705. continue;
  706. end;
  707. end
  708. else
  709. GetFinalDestination(asml, taicpu(p),0);
  710. end;
  711. end;
  712. end
  713. else
  714. { All other optimizes }
  715. begin
  716. for l := 0 to taicpu(p).ops-1 Do
  717. if (taicpu(p).oper[l]^.typ = top_ref) then
  718. With taicpu(p).oper[l]^.ref^ Do
  719. begin
  720. if (base = NR_NO) and
  721. (index <> NR_NO) and
  722. (scalefactor in [0,1]) then
  723. begin
  724. base := index;
  725. index := NR_NO
  726. end
  727. end;
  728. case taicpu(p).opcode Of
  729. A_AND:
  730. begin
  731. if (taicpu(p).oper[0]^.typ = top_const) and
  732. (taicpu(p).oper[1]^.typ = top_reg) and
  733. GetNextInstruction(p, hp1) and
  734. (tai(hp1).typ = ait_instruction) and
  735. (taicpu(hp1).opcode = A_AND) and
  736. (taicpu(hp1).oper[0]^.typ = top_const) and
  737. (taicpu(hp1).oper[1]^.typ = top_reg) and
  738. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  739. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) then
  740. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  741. begin
  742. taicpu(hp1).loadConst(0,taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  743. asml.remove(p);
  744. p.free;
  745. p:=hp1;
  746. end
  747. else
  748. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  749. jump, but only if it's a conditional jump (PFV) }
  750. if (taicpu(p).oper[1]^.typ = top_reg) and
  751. GetNextInstruction(p, hp1) and
  752. (hp1.typ = ait_instruction) and
  753. (taicpu(hp1).is_jmp) and
  754. (taicpu(hp1).opcode<>A_JMP) and
  755. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  756. taicpu(p).opcode := A_TEST;
  757. end;
  758. A_CMP:
  759. begin
  760. { cmp register,$8000 neg register
  761. je target --> jo target
  762. .... only if register is deallocated before jump.}
  763. case Taicpu(p).opsize of
  764. S_B: v:=$80;
  765. S_W: v:=$8000;
  766. S_L: v:=aint($80000000);
  767. else
  768. internalerror(2013112905);
  769. end;
  770. if (taicpu(p).oper[0]^.typ=Top_const) and
  771. (taicpu(p).oper[0]^.val=v) and
  772. (Taicpu(p).oper[1]^.typ=top_reg) and
  773. GetNextInstruction(p, hp1) and
  774. (hp1.typ=ait_instruction) and
  775. (taicpu(hp1).opcode=A_Jcc) and
  776. (Taicpu(hp1).condition in [C_E,C_NE]) and
  777. not(getsupreg(Taicpu(p).oper[1]^.reg) in usedregs) then
  778. begin
  779. Taicpu(p).opcode:=A_NEG;
  780. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  781. Taicpu(p).clearop(1);
  782. Taicpu(p).ops:=1;
  783. if Taicpu(hp1).condition=C_E then
  784. Taicpu(hp1).condition:=C_O
  785. else
  786. Taicpu(hp1).condition:=C_NO;
  787. continue;
  788. end;
  789. {
  790. @@2: @@2:
  791. .... ....
  792. cmp operand1,0
  793. jle/jbe @@1
  794. dec operand1 --> sub operand1,1
  795. jmp @@2 jge/jae @@2
  796. @@1: @@1:
  797. ... ....}
  798. if (taicpu(p).oper[0]^.typ = top_const) and
  799. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  800. (taicpu(p).oper[0]^.val = 0) and
  801. GetNextInstruction(p, hp1) and
  802. (hp1.typ = ait_instruction) and
  803. (taicpu(hp1).is_jmp) and
  804. (taicpu(hp1).opcode=A_Jcc) and
  805. (taicpu(hp1).condition in [C_LE,C_BE]) and
  806. GetNextInstruction(hp1,hp2) and
  807. (hp2.typ = ait_instruction) and
  808. (taicpu(hp2).opcode = A_DEC) and
  809. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  810. GetNextInstruction(hp2, hp3) and
  811. (hp3.typ = ait_instruction) and
  812. (taicpu(hp3).is_jmp) and
  813. (taicpu(hp3).opcode = A_JMP) and
  814. GetNextInstruction(hp3, hp4) and
  815. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  816. begin
  817. taicpu(hp2).Opcode := A_SUB;
  818. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  819. taicpu(hp2).loadConst(0,1);
  820. taicpu(hp2).ops:=2;
  821. taicpu(hp3).Opcode := A_Jcc;
  822. case taicpu(hp1).condition of
  823. C_LE: taicpu(hp3).condition := C_GE;
  824. C_BE: taicpu(hp3).condition := C_AE;
  825. end;
  826. asml.remove(p);
  827. asml.remove(hp1);
  828. p.free;
  829. hp1.free;
  830. p := hp2;
  831. continue;
  832. end
  833. end;
  834. A_FLD:
  835. begin
  836. if (taicpu(p).oper[0]^.typ = top_reg) and
  837. GetNextInstruction(p, hp1) and
  838. (hp1.typ = Ait_Instruction) and
  839. (taicpu(hp1).oper[0]^.typ = top_reg) and
  840. (taicpu(hp1).oper[1]^.typ = top_reg) and
  841. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  842. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  843. { change to
  844. fld reg fxxx reg,st
  845. fxxxp st, st1 (hp1)
  846. Remark: non commutative operations must be reversed!
  847. }
  848. begin
  849. case taicpu(hp1).opcode Of
  850. A_FMULP,A_FADDP,
  851. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  852. begin
  853. case taicpu(hp1).opcode Of
  854. A_FADDP: taicpu(hp1).opcode := A_FADD;
  855. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  856. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  857. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  858. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  859. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  860. end;
  861. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  862. taicpu(hp1).oper[1]^.reg := NR_ST;
  863. asml.remove(p);
  864. p.free;
  865. p := hp1;
  866. continue;
  867. end;
  868. end;
  869. end
  870. else
  871. if (taicpu(p).oper[0]^.typ = top_ref) and
  872. GetNextInstruction(p, hp2) and
  873. (hp2.typ = Ait_Instruction) and
  874. (taicpu(hp2).ops = 2) and
  875. (taicpu(hp2).oper[0]^.typ = top_reg) and
  876. (taicpu(hp2).oper[1]^.typ = top_reg) and
  877. (taicpu(p).opsize in [S_FS, S_FL]) and
  878. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  879. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  880. if GetLastInstruction(p, hp1) and
  881. (hp1.typ = Ait_Instruction) and
  882. ((taicpu(hp1).opcode = A_FLD) or
  883. (taicpu(hp1).opcode = A_FST)) and
  884. (taicpu(hp1).opsize = taicpu(p).opsize) and
  885. (taicpu(hp1).oper[0]^.typ = top_ref) and
  886. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  887. if ((taicpu(hp2).opcode = A_FMULP) or
  888. (taicpu(hp2).opcode = A_FADDP)) then
  889. { change to
  890. fld/fst mem1 (hp1) fld/fst mem1
  891. fld mem1 (p) fadd/
  892. faddp/ fmul st, st
  893. fmulp st, st1 (hp2) }
  894. begin
  895. asml.remove(p);
  896. p.free;
  897. p := hp1;
  898. if (taicpu(hp2).opcode = A_FADDP) then
  899. taicpu(hp2).opcode := A_FADD
  900. else
  901. taicpu(hp2).opcode := A_FMUL;
  902. taicpu(hp2).oper[1]^.reg := NR_ST;
  903. end
  904. else
  905. { change to
  906. fld/fst mem1 (hp1) fld/fst mem1
  907. fld mem1 (p) fld st}
  908. begin
  909. taicpu(p).changeopsize(S_FL);
  910. taicpu(p).loadreg(0,NR_ST);
  911. end
  912. else
  913. begin
  914. case taicpu(hp2).opcode Of
  915. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  916. { change to
  917. fld/fst mem1 (hp1) fld/fst mem1
  918. fld mem2 (p) fxxx mem2
  919. fxxxp st, st1 (hp2) }
  920. begin
  921. case taicpu(hp2).opcode Of
  922. A_FADDP: taicpu(p).opcode := A_FADD;
  923. A_FMULP: taicpu(p).opcode := A_FMUL;
  924. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  925. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  926. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  927. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  928. end;
  929. asml.remove(hp2);
  930. hp2.free;
  931. end
  932. end
  933. end
  934. end;
  935. A_FSTP,A_FISTP:
  936. if doFpuLoadStoreOpt(asmL,p) then
  937. continue;
  938. A_LEA:
  939. begin
  940. {removes seg register prefixes from LEA operations, as they
  941. don't do anything}
  942. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  943. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  944. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  945. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  946. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  947. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  948. begin
  949. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  950. (taicpu(p).oper[0]^.ref^.offset = 0) then
  951. begin
  952. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  953. taicpu(p).oper[1]^.reg);
  954. InsertLLItem(asml,p.previous,p.next, hp1);
  955. p.free;
  956. p := hp1;
  957. continue;
  958. end
  959. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  960. begin
  961. hp1 := tai(p.Next);
  962. asml.remove(p);
  963. p.free;
  964. p := hp1;
  965. continue;
  966. end
  967. { continue to use lea to adjust the stack pointer,
  968. it is the recommended way, but only if not optimizing for size }
  969. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  970. (cs_opt_size in current_settings.optimizerswitches) then
  971. with taicpu(p).oper[0]^.ref^ do
  972. if (base = taicpu(p).oper[1]^.reg) then
  973. begin
  974. l := offset;
  975. if (l=1) and UseIncDec then
  976. begin
  977. taicpu(p).opcode := A_INC;
  978. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  979. taicpu(p).ops := 1
  980. end
  981. else if (l=-1) and UseIncDec then
  982. begin
  983. taicpu(p).opcode := A_DEC;
  984. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  985. taicpu(p).ops := 1;
  986. end
  987. else
  988. begin
  989. if (l<0) and (l<>-2147483648) then
  990. begin
  991. taicpu(p).opcode := A_SUB;
  992. taicpu(p).loadConst(0,-l);
  993. end
  994. else
  995. begin
  996. taicpu(p).opcode := A_ADD;
  997. taicpu(p).loadConst(0,l);
  998. end;
  999. end;
  1000. end;
  1001. end
  1002. (*
  1003. This is unsafe, lea doesn't modify the flags but "add"
  1004. does. This breaks webtbs/tw15694.pp. The above
  1005. transformations are also unsafe, but they don't seem to
  1006. be triggered by code that FPC generators (or that at
  1007. least does not occur in the tests...). This needs to be
  1008. fixed by checking for the liveness of the flags register.
  1009. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1010. begin
  1011. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1012. taicpu(p).oper[0]^.ref^.base);
  1013. InsertLLItem(asml,p.previous,p.next, hp1);
  1014. DebugMsg('Peephole Lea2AddBase done',hp1);
  1015. p.free;
  1016. p:=hp1;
  1017. continue;
  1018. end
  1019. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1020. begin
  1021. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1022. taicpu(p).oper[0]^.ref^.index);
  1023. InsertLLItem(asml,p.previous,p.next,hp1);
  1024. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1025. p.free;
  1026. p:=hp1;
  1027. continue;
  1028. end
  1029. *)
  1030. end;
  1031. A_MOV:
  1032. begin
  1033. TmpUsedRegs := UsedRegs;
  1034. if (taicpu(p).oper[1]^.typ = top_reg) and
  1035. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  1036. GetNextInstruction(p, hp1) and
  1037. (tai(hp1).typ = ait_instruction) and
  1038. (taicpu(hp1).opcode = A_MOV) and
  1039. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1040. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  1041. begin
  1042. {we have "mov x, %treg; mov %treg, y}
  1043. if not(RegInOp(getsupreg(taicpu(p).oper[1]^.reg),taicpu(hp1).oper[1]^)) and
  1044. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1045. {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
  1046. case taicpu(p).oper[0]^.typ Of
  1047. top_reg:
  1048. begin
  1049. { change "mov %reg, %treg; mov %treg, y"
  1050. to "mov %reg, y" }
  1051. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1052. asml.remove(hp1);
  1053. hp1.free;
  1054. continue;
  1055. end;
  1056. top_ref:
  1057. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1058. begin
  1059. { change "mov mem, %treg; mov %treg, %reg"
  1060. to "mov mem, %reg" }
  1061. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1062. asml.remove(hp1);
  1063. hp1.free;
  1064. continue;
  1065. end;
  1066. end
  1067. end
  1068. else
  1069. {Change "mov %reg1, %reg2; xxx %reg2, ???" to
  1070. "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read
  1071. penalty}
  1072. if (taicpu(p).oper[0]^.typ = top_reg) and
  1073. (taicpu(p).oper[1]^.typ = top_reg) and
  1074. GetNextInstruction(p,hp1) and
  1075. (tai(hp1).typ = ait_instruction) and
  1076. (taicpu(hp1).ops >= 1) and
  1077. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1078. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  1079. {we have "mov %reg1, %reg2; XXX %reg2, ???"}
  1080. begin
  1081. if ((taicpu(hp1).opcode = A_OR) or
  1082. (taicpu(hp1).opcode = A_TEST)) and
  1083. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1084. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1085. {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
  1086. begin
  1087. TmpUsedRegs := UsedRegs;
  1088. { reg1 will be used after the first instruction, }
  1089. { so update the allocation info }
  1090. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1091. if GetNextInstruction(hp1, hp2) and
  1092. (hp2.typ = ait_instruction) and
  1093. taicpu(hp2).is_jmp and
  1094. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1095. { change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to
  1096. "test %reg1, %reg1; jxx" }
  1097. begin
  1098. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1099. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1100. asml.remove(p);
  1101. p.free;
  1102. p := hp1;
  1103. continue
  1104. end
  1105. else
  1106. {change "mov %reg1, %reg2; test/or %reg2, %reg2" to
  1107. "mov %reg1, %reg2; test/or %reg1, %reg1"}
  1108. begin
  1109. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1110. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1111. end;
  1112. end
  1113. { else
  1114. if (taicpu(p.next)^.opcode
  1115. in [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])}
  1116. {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to
  1117. "mov %reg1, %reg2; push/or/xor/... %reg1, ???"}
  1118. end
  1119. else
  1120. {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1121. x >= RetOffset) as it doesn't do anything (it writes either to a
  1122. parameter or to the temporary storage room for the function
  1123. result)}
  1124. if GetNextInstruction(p, hp1) and
  1125. (tai(hp1).typ = ait_instruction) then
  1126. if IsExitCode(hp1) and
  1127. (taicpu(p).oper[1]^.typ = top_ref) and
  1128. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1129. not(assigned(current_procinfo.procdef.funcretsym) and
  1130. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1131. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  1132. (taicpu(p).oper[0]^.typ = top_reg) then
  1133. begin
  1134. asml.remove(p);
  1135. p.free;
  1136. p := hp1;
  1137. RemoveLastDeallocForFuncRes(asmL,p);
  1138. end
  1139. else
  1140. if (taicpu(p).oper[0]^.typ = top_reg) and
  1141. (taicpu(p).oper[1]^.typ = top_ref) and
  1142. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1143. (taicpu(hp1).opcode = A_CMP) and
  1144. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1145. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1146. {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
  1147. begin
  1148. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1149. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1150. end;
  1151. { Next instruction is also a MOV ? }
  1152. if GetNextInstruction(p, hp1) and
  1153. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1154. begin
  1155. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1156. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1157. {mov reg1, mem1 or mov mem1, reg1
  1158. mov mem2, reg2 mov reg2, mem2}
  1159. begin
  1160. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1161. {mov reg1, mem1 or mov mem1, reg1
  1162. mov mem2, reg1 mov reg2, mem1}
  1163. begin
  1164. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1165. { Removes the second statement from
  1166. mov reg1, mem1/reg2
  1167. mov mem1/reg2, reg1 }
  1168. begin
  1169. if (taicpu(p).oper[0]^.typ = top_reg) then
  1170. AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1171. asml.remove(hp1);
  1172. hp1.free;
  1173. end
  1174. else
  1175. begin
  1176. TmpUsedRegs := UsedRegs;
  1177. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1178. if (taicpu(p).oper[1]^.typ = top_ref) and
  1179. { mov reg1, mem1
  1180. mov mem2, reg1 }
  1181. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1182. GetNextInstruction(hp1, hp2) and
  1183. (hp2.typ = ait_instruction) and
  1184. (taicpu(hp2).opcode = A_CMP) and
  1185. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1186. (taicpu(hp2).oper[0]^.typ = TOp_Ref) and
  1187. (taicpu(hp2).oper[1]^.typ = TOp_Reg) and
  1188. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(p).oper[1]^.ref^) and
  1189. (taicpu(hp2).oper[1]^.reg= taicpu(p).oper[0]^.reg) and
  1190. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1191. { change to
  1192. mov reg1, mem1 mov reg1, mem1
  1193. mov mem2, reg1 cmp reg1, mem2
  1194. cmp mem1, reg1 }
  1195. begin
  1196. asml.remove(hp2);
  1197. hp2.free;
  1198. taicpu(hp1).opcode := A_CMP;
  1199. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1200. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1201. end;
  1202. end;
  1203. end
  1204. else
  1205. begin
  1206. tmpUsedRegs := UsedRegs;
  1207. if GetNextInstruction(hp1, hp2) and
  1208. (taicpu(p).oper[0]^.typ = top_ref) and
  1209. (taicpu(p).oper[1]^.typ = top_reg) and
  1210. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1211. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1212. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1213. (tai(hp2).typ = ait_instruction) and
  1214. (taicpu(hp2).opcode = A_MOV) and
  1215. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1216. (taicpu(hp2).oper[1]^.typ = top_reg) and
  1217. (taicpu(hp2).oper[0]^.typ = top_ref) and
  1218. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1219. if not regInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^) and
  1220. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1221. { mov mem1, %reg1
  1222. mov %reg1, mem2
  1223. mov mem2, reg2
  1224. to:
  1225. mov mem1, reg2
  1226. mov reg2, mem2}
  1227. begin
  1228. AllocRegBetween(asmL,taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1229. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1230. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1231. asml.remove(hp2);
  1232. hp2.free;
  1233. end
  1234. else
  1235. if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1236. not(RegInRef(getsupreg(taicpu(p).oper[1]^.reg),taicpu(p).oper[0]^.ref^)) and
  1237. not(RegInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^)) then
  1238. { mov mem1, reg1 mov mem1, reg1
  1239. mov reg1, mem2 mov reg1, mem2
  1240. mov mem2, reg2 mov mem2, reg1
  1241. to: to:
  1242. mov mem1, reg1 mov mem1, reg1
  1243. mov mem1, reg2 mov reg1, mem2
  1244. mov reg1, mem2
  1245. or (if mem1 depends on reg1
  1246. and/or if mem2 depends on reg2)
  1247. to:
  1248. mov mem1, reg1
  1249. mov reg1, mem2
  1250. mov reg1, reg2
  1251. }
  1252. begin
  1253. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1254. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1255. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1256. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1257. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1258. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1259. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1260. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1261. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1262. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1263. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1264. end
  1265. else
  1266. if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1267. begin
  1268. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1269. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1270. end
  1271. else
  1272. begin
  1273. asml.remove(hp2);
  1274. hp2.free;
  1275. end
  1276. end
  1277. end
  1278. else
  1279. (* {movl [mem1],reg1
  1280. movl [mem1],reg2
  1281. to:
  1282. movl [mem1],reg1
  1283. movl reg1,reg2 }
  1284. if (taicpu(p).oper[0]^.typ = top_ref) and
  1285. (taicpu(p).oper[1]^.typ = top_reg) and
  1286. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1287. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1288. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1289. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1290. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1291. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1292. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1293. else*)
  1294. { movl const1,[mem1]
  1295. movl [mem1],reg1
  1296. to:
  1297. movl const1,reg1
  1298. movl reg1,[mem1] }
  1299. if (taicpu(p).oper[0]^.typ = top_const) and
  1300. (taicpu(p).oper[1]^.typ = top_ref) and
  1301. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1302. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1303. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1304. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1305. not(reginref(getsupreg(taicpu(hp1).oper[1]^.reg),taicpu(hp1).oper[0]^.ref^)) then
  1306. begin
  1307. allocregbetween(asml,taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1308. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1309. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1310. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1311. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1312. end
  1313. end;
  1314. if GetNextInstruction(p, hp1) and
  1315. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1316. GetNextInstruction(hp1, hp2) and
  1317. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1318. MatchOperand(Taicpu(p).oper[0]^,0) and
  1319. (Taicpu(p).oper[1]^.typ = top_reg) and
  1320. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1321. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1322. {mov reg1,0
  1323. bts reg1,operand1 --> mov reg1,operand2
  1324. or reg1,operand2 bts reg1,operand1}
  1325. begin
  1326. Taicpu(hp2).opcode:=A_MOV;
  1327. asml.remove(hp1);
  1328. insertllitem(asml,hp2,hp2.next,hp1);
  1329. asml.remove(p);
  1330. p.free;
  1331. p:=hp1;
  1332. end;
  1333. if GetNextInstruction(p, hp1) and
  1334. MatchInstruction(hp1,A_LEA,[S_L]) and
  1335. (Taicpu(p).oper[0]^.typ = top_ref) and
  1336. (Taicpu(p).oper[1]^.typ = top_reg) and
  1337. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1338. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1339. ) or
  1340. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1341. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1342. )
  1343. ) then
  1344. {mov reg1,ref
  1345. lea reg2,[reg1,reg2] --> add reg2,ref}
  1346. begin
  1347. TmpUsedRegs := UsedRegs;
  1348. { reg1 may not be used afterwards }
  1349. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1350. begin
  1351. Taicpu(hp1).opcode:=A_ADD;
  1352. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1353. DebugMsg('Peephole MovLea2Add done',hp1);
  1354. asml.remove(p);
  1355. p.free;
  1356. p:=hp1;
  1357. end;
  1358. end;
  1359. end;
  1360. A_MOVSX,
  1361. A_MOVZX :
  1362. begin
  1363. if (taicpu(p).oper[1]^.typ = top_reg) and
  1364. GetNextInstruction(p,hp1) and
  1365. (hp1.typ = ait_instruction) and
  1366. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1367. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1368. GetNextInstruction(hp1,hp2) and
  1369. MatchInstruction(hp2,A_MOV,[]) and
  1370. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1371. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1372. (((taicpu(hp1).ops=2) and
  1373. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1374. ((taicpu(hp1).ops=1) and
  1375. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1376. { reg2 must not be used after the sequence considered, so
  1377. it must be either deallocated or loaded with a new value }
  1378. (GetNextInstruction(hp2,hp3) and
  1379. (FindRegDealloc(getsupreg(taicpu(hp2).oper[0]^.reg),tai(hp3)) or
  1380. RegLoadedWithNewValue(getsupreg(taicpu(hp2).oper[0]^.reg), false, hp3))) then
  1381. { change movsX/movzX reg/ref, reg2 }
  1382. { add/sub/or/... reg3/$const, reg2 }
  1383. { mov reg2 reg/ref }
  1384. { to add/sub/or/... reg3/$const, reg/ref }
  1385. begin
  1386. { by example:
  1387. movswl %si,%eax movswl %si,%eax p
  1388. decl %eax addl %edx,%eax hp1
  1389. movw %ax,%si movw %ax,%si hp2
  1390. ->
  1391. movswl %si,%eax movswl %si,%eax p
  1392. decw %eax addw %edx,%eax hp1
  1393. movw %ax,%si movw %ax,%si hp2
  1394. }
  1395. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1396. {
  1397. ->
  1398. movswl %si,%eax movswl %si,%eax p
  1399. decw %si addw %dx,%si hp1
  1400. movw %ax,%si movw %ax,%si hp2
  1401. }
  1402. case taicpu(hp1).ops of
  1403. 1:
  1404. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1405. 2:
  1406. begin
  1407. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1408. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1409. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1410. end;
  1411. else
  1412. internalerror(2008042701);
  1413. end;
  1414. {
  1415. ->
  1416. decw %si addw %dx,%si p
  1417. }
  1418. asml.remove(p);
  1419. asml.remove(hp2);
  1420. p.free;
  1421. hp2.free;
  1422. p := hp1
  1423. end
  1424. { removes superfluous And's after movzx's }
  1425. else if taicpu(p).opcode=A_MOVZX then
  1426. begin
  1427. if (taicpu(p).oper[1]^.typ = top_reg) and
  1428. GetNextInstruction(p, hp1) and
  1429. (tai(hp1).typ = ait_instruction) and
  1430. (taicpu(hp1).opcode = A_AND) and
  1431. (taicpu(hp1).oper[0]^.typ = top_const) and
  1432. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1433. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1434. case taicpu(p).opsize Of
  1435. S_BL, S_BW:
  1436. if (taicpu(hp1).oper[0]^.val = $ff) then
  1437. begin
  1438. asml.remove(hp1);
  1439. hp1.free;
  1440. end;
  1441. S_WL:
  1442. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1443. begin
  1444. asml.remove(hp1);
  1445. hp1.free;
  1446. end;
  1447. end;
  1448. {changes some movzx constructs to faster synonims (all examples
  1449. are given with eax/ax, but are also valid for other registers)}
  1450. if (taicpu(p).oper[1]^.typ = top_reg) then
  1451. if (taicpu(p).oper[0]^.typ = top_reg) then
  1452. case taicpu(p).opsize of
  1453. S_BW:
  1454. begin
  1455. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1456. not(cs_opt_size in current_settings.optimizerswitches) then
  1457. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1458. begin
  1459. taicpu(p).opcode := A_AND;
  1460. taicpu(p).changeopsize(S_W);
  1461. taicpu(p).loadConst(0,$ff);
  1462. end
  1463. else if GetNextInstruction(p, hp1) and
  1464. (tai(hp1).typ = ait_instruction) and
  1465. (taicpu(hp1).opcode = A_AND) and
  1466. (taicpu(hp1).oper[0]^.typ = top_const) and
  1467. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1468. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1469. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1470. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1471. begin
  1472. taicpu(p).opcode := A_MOV;
  1473. taicpu(p).changeopsize(S_W);
  1474. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1475. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1476. end;
  1477. end;
  1478. S_BL:
  1479. begin
  1480. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1481. not(cs_opt_size in current_settings.optimizerswitches) then
  1482. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1483. begin
  1484. taicpu(p).opcode := A_AND;
  1485. taicpu(p).changeopsize(S_L);
  1486. taicpu(p).loadConst(0,$ff)
  1487. end
  1488. else if GetNextInstruction(p, hp1) and
  1489. (tai(hp1).typ = ait_instruction) and
  1490. (taicpu(hp1).opcode = A_AND) and
  1491. (taicpu(hp1).oper[0]^.typ = top_const) and
  1492. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1493. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1494. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1495. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1496. begin
  1497. taicpu(p).opcode := A_MOV;
  1498. taicpu(p).changeopsize(S_L);
  1499. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1500. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1501. end
  1502. end;
  1503. S_WL:
  1504. begin
  1505. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1506. not(cs_opt_size in current_settings.optimizerswitches) then
  1507. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1508. begin
  1509. taicpu(p).opcode := A_AND;
  1510. taicpu(p).changeopsize(S_L);
  1511. taicpu(p).loadConst(0,$ffff);
  1512. end
  1513. else if GetNextInstruction(p, hp1) and
  1514. (tai(hp1).typ = ait_instruction) and
  1515. (taicpu(hp1).opcode = A_AND) and
  1516. (taicpu(hp1).oper[0]^.typ = top_const) and
  1517. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1518. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1519. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1520. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1521. begin
  1522. taicpu(p).opcode := A_MOV;
  1523. taicpu(p).changeopsize(S_L);
  1524. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1525. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1526. end;
  1527. end;
  1528. end
  1529. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1530. begin
  1531. if GetNextInstruction(p, hp1) and
  1532. (tai(hp1).typ = ait_instruction) and
  1533. (taicpu(hp1).opcode = A_AND) and
  1534. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1535. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1536. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1537. begin
  1538. taicpu(p).opcode := A_MOV;
  1539. case taicpu(p).opsize Of
  1540. S_BL:
  1541. begin
  1542. taicpu(p).changeopsize(S_L);
  1543. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1544. end;
  1545. S_WL:
  1546. begin
  1547. taicpu(p).changeopsize(S_L);
  1548. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1549. end;
  1550. S_BW:
  1551. begin
  1552. taicpu(p).changeopsize(S_W);
  1553. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1554. end;
  1555. end;
  1556. end;
  1557. end;
  1558. end;
  1559. end;
  1560. (* should not be generated anymore by the current code generator
  1561. A_POP:
  1562. begin
  1563. if target_info.system=system_i386_go32v2 then
  1564. begin
  1565. { Transform a series of pop/pop/pop/push/push/push to }
  1566. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1567. { because I'm not sure whether they can cope with }
  1568. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1569. { such a problem when using esp as frame pointer (JM) }
  1570. if (taicpu(p).oper[0]^.typ = top_reg) then
  1571. begin
  1572. hp1 := p;
  1573. hp2 := p;
  1574. l := 0;
  1575. while getNextInstruction(hp1,hp1) and
  1576. (hp1.typ = ait_instruction) and
  1577. (taicpu(hp1).opcode = A_POP) and
  1578. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1579. begin
  1580. hp2 := hp1;
  1581. inc(l,4);
  1582. end;
  1583. getLastInstruction(p,hp3);
  1584. l1 := 0;
  1585. while (hp2 <> hp3) and
  1586. assigned(hp1) and
  1587. (hp1.typ = ait_instruction) and
  1588. (taicpu(hp1).opcode = A_PUSH) and
  1589. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1590. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1591. begin
  1592. { change it to a two op operation }
  1593. taicpu(hp2).oper[1]^.typ:=top_none;
  1594. taicpu(hp2).ops:=2;
  1595. taicpu(hp2).opcode := A_MOV;
  1596. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1597. reference_reset(tmpref);
  1598. tmpRef.base.enum:=R_INTREGISTER;
  1599. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1600. convert_register_to_enum(tmpref.base);
  1601. tmpRef.offset := l;
  1602. taicpu(hp2).loadRef(0,tmpRef);
  1603. hp4 := hp1;
  1604. getNextInstruction(hp1,hp1);
  1605. asml.remove(hp4);
  1606. hp4.free;
  1607. getLastInstruction(hp2,hp2);
  1608. dec(l,4);
  1609. inc(l1);
  1610. end;
  1611. if l <> -4 then
  1612. begin
  1613. inc(l,4);
  1614. for l1 := l1 downto 1 do
  1615. begin
  1616. getNextInstruction(hp2,hp2);
  1617. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1618. end
  1619. end
  1620. end
  1621. end
  1622. else
  1623. begin
  1624. if (taicpu(p).oper[0]^.typ = top_reg) and
  1625. GetNextInstruction(p, hp1) and
  1626. (tai(hp1).typ=ait_instruction) and
  1627. (taicpu(hp1).opcode=A_PUSH) and
  1628. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1629. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1630. begin
  1631. { change it to a two op operation }
  1632. taicpu(p).oper[1]^.typ:=top_none;
  1633. taicpu(p).ops:=2;
  1634. taicpu(p).opcode := A_MOV;
  1635. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1636. reference_reset(tmpref);
  1637. TmpRef.base.enum := R_ESP;
  1638. taicpu(p).loadRef(0,TmpRef);
  1639. asml.remove(hp1);
  1640. hp1.free;
  1641. end;
  1642. end;
  1643. end;
  1644. *)
  1645. A_PUSH:
  1646. begin
  1647. if (taicpu(p).opsize = S_W) and
  1648. (taicpu(p).oper[0]^.typ = Top_Const) and
  1649. GetNextInstruction(p, hp1) and
  1650. (tai(hp1).typ = ait_instruction) and
  1651. (taicpu(hp1).opcode = A_PUSH) and
  1652. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1653. (taicpu(hp1).opsize = S_W) then
  1654. begin
  1655. taicpu(p).changeopsize(S_L);
  1656. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1657. asml.remove(hp1);
  1658. hp1.free;
  1659. end;
  1660. end;
  1661. A_SHL, A_SAL:
  1662. begin
  1663. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1664. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1665. (taicpu(p).opsize = S_L) and
  1666. (taicpu(p).oper[0]^.val <= 3) then
  1667. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1668. begin
  1669. TmpBool1 := True; {should we check the next instruction?}
  1670. TmpBool2 := False; {have we found an add/sub which could be
  1671. integrated in the lea?}
  1672. reference_reset(tmpref,2);
  1673. TmpRef.index := taicpu(p).oper[1]^.reg;
  1674. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1675. while TmpBool1 and
  1676. GetNextInstruction(p, hp1) and
  1677. (tai(hp1).typ = ait_instruction) and
  1678. ((((taicpu(hp1).opcode = A_ADD) or
  1679. (taicpu(hp1).opcode = A_SUB)) and
  1680. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1681. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1682. (((taicpu(hp1).opcode = A_INC) or
  1683. (taicpu(hp1).opcode = A_DEC)) and
  1684. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1685. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1686. (not GetNextInstruction(hp1,hp2) or
  1687. not instrReadsFlags(hp2)) Do
  1688. begin
  1689. TmpBool1 := False;
  1690. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1691. begin
  1692. TmpBool1 := True;
  1693. TmpBool2 := True;
  1694. case taicpu(hp1).opcode of
  1695. A_ADD:
  1696. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1697. A_SUB:
  1698. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1699. end;
  1700. asml.remove(hp1);
  1701. hp1.free;
  1702. end
  1703. else
  1704. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1705. (((taicpu(hp1).opcode = A_ADD) and
  1706. (TmpRef.base = NR_NO)) or
  1707. (taicpu(hp1).opcode = A_INC) or
  1708. (taicpu(hp1).opcode = A_DEC)) then
  1709. begin
  1710. TmpBool1 := True;
  1711. TmpBool2 := True;
  1712. case taicpu(hp1).opcode of
  1713. A_ADD:
  1714. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1715. A_INC:
  1716. inc(TmpRef.offset);
  1717. A_DEC:
  1718. dec(TmpRef.offset);
  1719. end;
  1720. asml.remove(hp1);
  1721. hp1.free;
  1722. end;
  1723. end;
  1724. if TmpBool2 or
  1725. ((current_settings.optimizecputype < cpu_Pentium2) and
  1726. (taicpu(p).oper[0]^.val <= 3) and
  1727. not(cs_opt_size in current_settings.optimizerswitches)) then
  1728. begin
  1729. if not(TmpBool2) and
  1730. (taicpu(p).oper[0]^.val = 1) then
  1731. begin
  1732. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1733. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1734. end
  1735. else
  1736. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1737. taicpu(p).oper[1]^.reg);
  1738. InsertLLItem(asml,p.previous, p.next, hp1);
  1739. p.free;
  1740. p := hp1;
  1741. end;
  1742. end
  1743. else
  1744. if (current_settings.optimizecputype < cpu_Pentium2) and
  1745. (taicpu(p).oper[0]^.typ = top_const) and
  1746. (taicpu(p).oper[1]^.typ = top_reg) then
  1747. if (taicpu(p).oper[0]^.val = 1) then
  1748. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1749. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1750. (unlike shl, which is only Tairable in the U pipe)}
  1751. begin
  1752. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1753. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1754. InsertLLItem(asml,p.previous, p.next, hp1);
  1755. p.free;
  1756. p := hp1;
  1757. end
  1758. else if (taicpu(p).opsize = S_L) and
  1759. (taicpu(p).oper[0]^.val<= 3) then
  1760. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1761. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1762. begin
  1763. reference_reset(tmpref,2);
  1764. TmpRef.index := taicpu(p).oper[1]^.reg;
  1765. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1766. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1767. InsertLLItem(asml,p.previous, p.next, hp1);
  1768. p.free;
  1769. p := hp1;
  1770. end
  1771. end;
  1772. A_SETcc :
  1773. { changes
  1774. setcc (funcres) setcc reg
  1775. movb (funcres), reg to leave/ret
  1776. leave/ret }
  1777. begin
  1778. if (taicpu(p).oper[0]^.typ = top_ref) and
  1779. GetNextInstruction(p, hp1) and
  1780. GetNextInstruction(hp1, hp2) and
  1781. IsExitCode(hp2) and
  1782. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1783. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1784. not(assigned(current_procinfo.procdef.funcretsym) and
  1785. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1786. (hp1.typ = ait_instruction) and
  1787. (taicpu(hp1).opcode = A_MOV) and
  1788. (taicpu(hp1).opsize = S_B) and
  1789. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1790. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1791. begin
  1792. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1793. asml.remove(hp1);
  1794. hp1.free;
  1795. end
  1796. end;
  1797. A_SUB:
  1798. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1799. { * change "sub/add const1, reg" or "dec reg" followed by
  1800. "sub const2, reg" to one "sub ..., reg" }
  1801. begin
  1802. if (taicpu(p).oper[0]^.typ = top_const) and
  1803. (taicpu(p).oper[1]^.typ = top_reg) then
  1804. if (taicpu(p).oper[0]^.val = 2) and
  1805. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1806. { Don't do the sub/push optimization if the sub }
  1807. { comes from setting up the stack frame (JM) }
  1808. (not getLastInstruction(p,hp1) or
  1809. (hp1.typ <> ait_instruction) or
  1810. (taicpu(hp1).opcode <> A_MOV) or
  1811. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1812. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1813. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1814. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1815. begin
  1816. hp1 := tai(p.next);
  1817. while Assigned(hp1) and
  1818. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1819. not regReadByInstruction(RS_ESP,hp1) and
  1820. not regModifiedByInstruction(RS_ESP,hp1) do
  1821. hp1 := tai(hp1.next);
  1822. if Assigned(hp1) and
  1823. (tai(hp1).typ = ait_instruction) and
  1824. (taicpu(hp1).opcode = A_PUSH) and
  1825. (taicpu(hp1).opsize = S_W) then
  1826. begin
  1827. taicpu(hp1).changeopsize(S_L);
  1828. if taicpu(hp1).oper[0]^.typ=top_reg then
  1829. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1830. hp1 := tai(p.next);
  1831. asml.remove(p);
  1832. p.free;
  1833. p := hp1;
  1834. continue
  1835. end;
  1836. if DoSubAddOpt(p) then
  1837. continue;
  1838. end
  1839. else if DoSubAddOpt(p) then
  1840. continue
  1841. end;
  1842. end;
  1843. end; { if is_jmp }
  1844. end;
  1845. end;
  1846. updateUsedRegs(UsedRegs,p);
  1847. p:=tai(p.next);
  1848. end;
  1849. end;
  1850. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  1851. {$ifdef DEBUG_AOPTCPU}
  1852. procedure DebugMsg(const s: string;p : tai);
  1853. begin
  1854. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  1855. end;
  1856. {$else DEBUG_AOPTCPU}
  1857. procedure DebugMsg(const s: string;p : tai);inline;
  1858. begin
  1859. end;
  1860. {$endif DEBUG_AOPTCPU}
  1861. function CanBeCMOV(p : tai) : boolean;
  1862. begin
  1863. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1864. (taicpu(p).opcode=A_MOV) and
  1865. (taicpu(p).opsize in [S_L,S_W]) and
  1866. ((taicpu(p).oper[0]^.typ = top_reg)
  1867. { we can't use cmov ref,reg because
  1868. ref could be nil and cmov still throws an exception
  1869. if ref=nil but the mov isn't done (FK)
  1870. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1871. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1872. }
  1873. ) and
  1874. (taicpu(p).oper[1]^.typ in [top_reg]);
  1875. end;
  1876. var
  1877. p,hp1,hp2,hp3: tai;
  1878. l : longint;
  1879. condition : tasmcond;
  1880. UsedRegs, TmpUsedRegs: TRegSet;
  1881. carryadd_opcode: Tasmop;
  1882. begin
  1883. p := BlockStart;
  1884. UsedRegs := [];
  1885. while (p <> BlockEnd) Do
  1886. begin
  1887. UpdateUsedRegs(UsedRegs, tai(p.next));
  1888. case p.Typ Of
  1889. Ait_Instruction:
  1890. begin
  1891. if InsContainsSegRef(taicpu(p)) then
  1892. begin
  1893. p := tai(p.next);
  1894. continue;
  1895. end;
  1896. case taicpu(p).opcode Of
  1897. A_Jcc:
  1898. begin
  1899. { jb @@1 cmc
  1900. inc/dec operand --> adc/sbb operand,0
  1901. @@1:
  1902. ... and ...
  1903. jnb @@1
  1904. inc/dec operand --> adc/sbb operand,0
  1905. @@1: }
  1906. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1907. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1908. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1909. begin
  1910. carryadd_opcode:=A_NONE;
  1911. if Taicpu(p).condition in [C_NAE,C_B] then
  1912. begin
  1913. if Taicpu(hp1).opcode=A_INC then
  1914. carryadd_opcode:=A_ADC;
  1915. if Taicpu(hp1).opcode=A_DEC then
  1916. carryadd_opcode:=A_SBB;
  1917. if carryadd_opcode<>A_NONE then
  1918. begin
  1919. Taicpu(p).clearop(0);
  1920. Taicpu(p).ops:=0;
  1921. Taicpu(p).is_jmp:=false;
  1922. Taicpu(p).opcode:=A_CMC;
  1923. Taicpu(p).condition:=C_NONE;
  1924. Taicpu(hp1).ops:=2;
  1925. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1926. Taicpu(hp1).loadconst(0,0);
  1927. Taicpu(hp1).opcode:=carryadd_opcode;
  1928. continue;
  1929. end;
  1930. end;
  1931. if Taicpu(p).condition in [C_AE,C_NB] then
  1932. begin
  1933. if Taicpu(hp1).opcode=A_INC then
  1934. carryadd_opcode:=A_ADC;
  1935. if Taicpu(hp1).opcode=A_DEC then
  1936. carryadd_opcode:=A_SBB;
  1937. if carryadd_opcode<>A_NONE then
  1938. begin
  1939. asml.remove(p);
  1940. p.free;
  1941. Taicpu(hp1).ops:=2;
  1942. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1943. Taicpu(hp1).loadconst(0,0);
  1944. Taicpu(hp1).opcode:=carryadd_opcode;
  1945. p:=hp1;
  1946. continue;
  1947. end;
  1948. end;
  1949. end;
  1950. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1951. begin
  1952. { check for
  1953. jCC xxx
  1954. <several movs>
  1955. xxx:
  1956. }
  1957. l:=0;
  1958. GetNextInstruction(p, hp1);
  1959. while assigned(hp1) and
  1960. CanBeCMOV(hp1) and
  1961. { stop on labels }
  1962. not(hp1.typ=ait_label) do
  1963. begin
  1964. inc(l);
  1965. GetNextInstruction(hp1,hp1);
  1966. end;
  1967. if assigned(hp1) then
  1968. begin
  1969. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1970. begin
  1971. if (l<=4) and (l>0) then
  1972. begin
  1973. condition:=inverse_cond(taicpu(p).condition);
  1974. hp2:=p;
  1975. GetNextInstruction(p,hp1);
  1976. p:=hp1;
  1977. repeat
  1978. taicpu(hp1).opcode:=A_CMOVcc;
  1979. taicpu(hp1).condition:=condition;
  1980. GetNextInstruction(hp1,hp1);
  1981. until not(assigned(hp1)) or
  1982. not(CanBeCMOV(hp1));
  1983. { wait with removing else GetNextInstruction could
  1984. ignore the label if it was the only usage in the
  1985. jump moved away }
  1986. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1987. asml.remove(hp2);
  1988. hp2.free;
  1989. continue;
  1990. end;
  1991. end
  1992. else
  1993. begin
  1994. { check further for
  1995. jCC xxx
  1996. <several movs 1>
  1997. jmp yyy
  1998. xxx:
  1999. <several movs 2>
  2000. yyy:
  2001. }
  2002. { hp2 points to jmp yyy }
  2003. hp2:=hp1;
  2004. { skip hp1 to xxx }
  2005. GetNextInstruction(hp1, hp1);
  2006. if assigned(hp2) and
  2007. assigned(hp1) and
  2008. (l<=3) and
  2009. (hp2.typ=ait_instruction) and
  2010. (taicpu(hp2).is_jmp) and
  2011. (taicpu(hp2).condition=C_None) and
  2012. { real label and jump, no further references to the
  2013. label are allowed }
  2014. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  2015. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2016. begin
  2017. l:=0;
  2018. { skip hp1 to <several moves 2> }
  2019. GetNextInstruction(hp1, hp1);
  2020. while assigned(hp1) and
  2021. CanBeCMOV(hp1) do
  2022. begin
  2023. inc(l);
  2024. GetNextInstruction(hp1, hp1);
  2025. end;
  2026. { hp1 points to yyy: }
  2027. if assigned(hp1) and
  2028. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2029. begin
  2030. condition:=inverse_cond(taicpu(p).condition);
  2031. GetNextInstruction(p,hp1);
  2032. hp3:=p;
  2033. p:=hp1;
  2034. repeat
  2035. taicpu(hp1).opcode:=A_CMOVcc;
  2036. taicpu(hp1).condition:=condition;
  2037. GetNextInstruction(hp1,hp1);
  2038. until not(assigned(hp1)) or
  2039. not(CanBeCMOV(hp1));
  2040. { hp2 is still at jmp yyy }
  2041. GetNextInstruction(hp2,hp1);
  2042. { hp2 is now at xxx: }
  2043. condition:=inverse_cond(condition);
  2044. GetNextInstruction(hp1,hp1);
  2045. { hp1 is now at <several movs 2> }
  2046. repeat
  2047. taicpu(hp1).opcode:=A_CMOVcc;
  2048. taicpu(hp1).condition:=condition;
  2049. GetNextInstruction(hp1,hp1);
  2050. until not(assigned(hp1)) or
  2051. not(CanBeCMOV(hp1));
  2052. {
  2053. asml.remove(hp1.next)
  2054. hp1.next.free;
  2055. asml.remove(hp1);
  2056. hp1.free;
  2057. }
  2058. { remove jCC }
  2059. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2060. asml.remove(hp3);
  2061. hp3.free;
  2062. { remove jmp }
  2063. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2064. asml.remove(hp2);
  2065. hp2.free;
  2066. continue;
  2067. end;
  2068. end;
  2069. end;
  2070. end;
  2071. end;
  2072. end;
  2073. A_FSTP,A_FISTP:
  2074. if doFpuLoadStoreOpt(asmL,p) then
  2075. continue;
  2076. A_IMUL:
  2077. begin
  2078. if (taicpu(p).ops >= 2) and
  2079. ((taicpu(p).oper[0]^.typ = top_const) or
  2080. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  2081. (taicpu(p).oper[1]^.typ = top_reg) and
  2082. ((taicpu(p).ops = 2) or
  2083. ((taicpu(p).oper[2]^.typ = top_reg) and
  2084. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  2085. getLastInstruction(p,hp1) and
  2086. (hp1.typ = ait_instruction) and
  2087. (taicpu(hp1).opcode = A_MOV) and
  2088. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2089. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2090. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2091. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  2092. begin
  2093. taicpu(p).ops := 3;
  2094. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  2095. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  2096. asml.remove(hp1);
  2097. hp1.free;
  2098. end;
  2099. end;
  2100. A_MOV:
  2101. begin
  2102. if (taicpu(p).oper[0]^.typ = top_reg) and
  2103. (taicpu(p).oper[1]^.typ = top_reg) and
  2104. GetNextInstruction(p, hp1) and
  2105. (hp1.typ = ait_Instruction) and
  2106. ((taicpu(hp1).opcode = A_MOV) or
  2107. (taicpu(hp1).opcode = A_MOVZX) or
  2108. (taicpu(hp1).opcode = A_MOVSX)) and
  2109. (taicpu(hp1).oper[0]^.typ = top_ref) and
  2110. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2111. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) or
  2112. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)) and
  2113. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  2114. {mov reg1, reg2
  2115. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  2116. begin
  2117. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  2118. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  2119. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  2120. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  2121. asml.remove(p);
  2122. p.free;
  2123. p := hp1;
  2124. continue;
  2125. end
  2126. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2127. GetNextInstruction(p,hp1) and
  2128. (hp1.typ = ait_instruction) and
  2129. (IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) or
  2130. ((taicpu(hp1).opcode=A_LEA) and
  2131. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  2132. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  2133. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)) or
  2134. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  2135. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg))
  2136. )
  2137. )
  2138. ) and
  2139. GetNextInstruction(hp1,hp2) and
  2140. MatchInstruction(hp2,A_MOV,[]) and
  2141. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  2142. (taicpu(hp2).oper[1]^.typ = top_ref) then
  2143. begin
  2144. TmpUsedRegs := UsedRegs;
  2145. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  2146. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  2147. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,
  2148. hp2, TmpUsedRegs))) then
  2149. { change mov (ref), reg }
  2150. { add/sub/or/... reg2/$const, reg }
  2151. { mov reg, (ref) }
  2152. { # release reg }
  2153. { to add/sub/or/... reg2/$const, (ref) }
  2154. begin
  2155. case taicpu(hp1).opcode of
  2156. A_INC,A_DEC,A_NOT,A_NEG:
  2157. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  2158. A_LEA:
  2159. begin
  2160. taicpu(hp1).opcode:=A_ADD;
  2161. if taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg then
  2162. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  2163. else
  2164. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base);
  2165. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2166. DebugMsg('Peephole FoldLea done',hp1);
  2167. end
  2168. else
  2169. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2170. end;
  2171. asml.remove(p);
  2172. asml.remove(hp2);
  2173. p.free;
  2174. hp2.free;
  2175. p := hp1
  2176. end;
  2177. end
  2178. end;
  2179. end;
  2180. end;
  2181. end;
  2182. p := tai(p.next)
  2183. end;
  2184. end;
  2185. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  2186. var
  2187. p,hp1,hp2: tai;
  2188. IsTestConstX: boolean;
  2189. begin
  2190. p := BlockStart;
  2191. while (p <> BlockEnd) Do
  2192. begin
  2193. case p.Typ Of
  2194. Ait_Instruction:
  2195. begin
  2196. if InsContainsSegRef(taicpu(p)) then
  2197. begin
  2198. p := tai(p.next);
  2199. continue;
  2200. end;
  2201. case taicpu(p).opcode Of
  2202. A_CALL:
  2203. begin
  2204. { don't do this on modern CPUs, this really hurts them due to
  2205. broken call/ret pairing }
  2206. if (current_settings.optimizecputype < cpu_Pentium2) and
  2207. not(cs_create_pic in current_settings.moduleswitches) and
  2208. GetNextInstruction(p, hp1) and
  2209. (hp1.typ = ait_instruction) and
  2210. (taicpu(hp1).opcode = A_JMP) and
  2211. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  2212. begin
  2213. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  2214. InsertLLItem(asml, p.previous, p, hp2);
  2215. taicpu(p).opcode := A_JMP;
  2216. taicpu(p).is_jmp := true;
  2217. asml.remove(hp1);
  2218. hp1.free;
  2219. end
  2220. { replace
  2221. call procname
  2222. ret
  2223. by
  2224. jmp procname
  2225. this should never hurt except when pic is used, not sure
  2226. how to handle it then
  2227. but do it only on level 4 because it destroys stack back traces
  2228. }
  2229. else if (cs_opt_level4 in current_settings.optimizerswitches) and
  2230. not(cs_create_pic in current_settings.moduleswitches) and
  2231. GetNextInstruction(p, hp1) and
  2232. (hp1.typ = ait_instruction) and
  2233. (taicpu(hp1).opcode = A_RET) and
  2234. (taicpu(hp1).ops=0) then
  2235. begin
  2236. taicpu(p).opcode := A_JMP;
  2237. taicpu(p).is_jmp := true;
  2238. asml.remove(hp1);
  2239. hp1.free;
  2240. end;
  2241. end;
  2242. A_CMP:
  2243. begin
  2244. if (taicpu(p).oper[0]^.typ = top_const) and
  2245. (taicpu(p).oper[0]^.val = 0) and
  2246. (taicpu(p).oper[1]^.typ = top_reg) then
  2247. {change "cmp $0, %reg" to "test %reg, %reg"}
  2248. begin
  2249. taicpu(p).opcode := A_TEST;
  2250. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2251. continue;
  2252. end;
  2253. end;
  2254. (*
  2255. Optimization is not safe; xor clears the carry flag.
  2256. See test/tgadint64 in the test suite.
  2257. A_MOV:
  2258. if (taicpu(p).oper[0]^.typ = Top_Const) and
  2259. (taicpu(p).oper[0]^.val = 0) and
  2260. (taicpu(p).oper[1]^.typ = Top_Reg) then
  2261. { change "mov $0, %reg" into "xor %reg, %reg" }
  2262. begin
  2263. taicpu(p).opcode := A_XOR;
  2264. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2265. end;
  2266. *)
  2267. A_MOVZX:
  2268. { if register vars are on, it's possible there is code like }
  2269. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  2270. { so we can't safely replace the movzx then with xor/mov, }
  2271. { since that would change the flags (JM) }
  2272. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  2273. begin
  2274. if (taicpu(p).oper[1]^.typ = top_reg) then
  2275. if (taicpu(p).oper[0]^.typ = top_reg)
  2276. then
  2277. case taicpu(p).opsize of
  2278. S_BL:
  2279. begin
  2280. if IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  2281. not(cs_opt_size in current_settings.optimizerswitches) and
  2282. (current_settings.optimizecputype = cpu_Pentium) then
  2283. {Change "movzbl %reg1, %reg2" to
  2284. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  2285. PentiumMMX}
  2286. begin
  2287. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  2288. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2289. InsertLLItem(asml,p.previous, p, hp1);
  2290. taicpu(p).opcode := A_MOV;
  2291. taicpu(p).changeopsize(S_B);
  2292. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2293. end;
  2294. end;
  2295. end
  2296. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2297. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2298. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  2299. not(cs_opt_size in current_settings.optimizerswitches) and
  2300. IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  2301. (current_settings.optimizecputype = cpu_Pentium) and
  2302. (taicpu(p).opsize = S_BL) then
  2303. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  2304. Pentium and PentiumMMX}
  2305. begin
  2306. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  2307. taicpu(p).oper[1]^.reg);
  2308. taicpu(p).opcode := A_MOV;
  2309. taicpu(p).changeopsize(S_B);
  2310. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2311. InsertLLItem(asml,p.previous, p, hp1);
  2312. end;
  2313. end;
  2314. A_TEST, A_OR:
  2315. {removes the line marked with (x) from the sequence
  2316. and/or/xor/add/sub/... $x, %y
  2317. test/or %y, %y | test $-1, %y (x)
  2318. j(n)z _Label
  2319. as the first instruction already adjusts the ZF
  2320. %y operand may also be a reference }
  2321. begin
  2322. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  2323. MatchOperand(taicpu(p).oper[0]^,-1);
  2324. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  2325. GetLastInstruction(p, hp1) and
  2326. (tai(hp1).typ = ait_instruction) and
  2327. GetNextInstruction(p,hp2) and
  2328. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  2329. case taicpu(hp1).opcode Of
  2330. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  2331. begin
  2332. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2333. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2334. { and in case of carry for A(E)/B(E)/C/NC }
  2335. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2336. ((taicpu(hp1).opcode <> A_ADD) and
  2337. (taicpu(hp1).opcode <> A_SUB))) then
  2338. begin
  2339. hp1 := tai(p.next);
  2340. asml.remove(p);
  2341. p.free;
  2342. p := tai(hp1);
  2343. continue
  2344. end;
  2345. end;
  2346. A_SHL, A_SAL, A_SHR, A_SAR:
  2347. begin
  2348. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2349. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  2350. { therefore, it's only safe to do this optimization for }
  2351. { shifts by a (nonzero) constant }
  2352. (taicpu(hp1).oper[0]^.typ = top_const) and
  2353. (taicpu(hp1).oper[0]^.val <> 0) and
  2354. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2355. { and in case of carry for A(E)/B(E)/C/NC }
  2356. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2357. begin
  2358. hp1 := tai(p.next);
  2359. asml.remove(p);
  2360. p.free;
  2361. p := tai(hp1);
  2362. continue
  2363. end;
  2364. end;
  2365. A_DEC, A_INC, A_NEG:
  2366. begin
  2367. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  2368. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2369. { and in case of carry for A(E)/B(E)/C/NC }
  2370. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2371. begin
  2372. case taicpu(hp1).opcode Of
  2373. A_DEC, A_INC:
  2374. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2375. begin
  2376. case taicpu(hp1).opcode Of
  2377. A_DEC: taicpu(hp1).opcode := A_SUB;
  2378. A_INC: taicpu(hp1).opcode := A_ADD;
  2379. end;
  2380. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2381. taicpu(hp1).loadConst(0,1);
  2382. taicpu(hp1).ops:=2;
  2383. end
  2384. end;
  2385. hp1 := tai(p.next);
  2386. asml.remove(p);
  2387. p.free;
  2388. p := tai(hp1);
  2389. continue
  2390. end;
  2391. end
  2392. else
  2393. { change "test $-1,%reg" into "test %reg,%reg" }
  2394. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2395. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2396. end { case }
  2397. else
  2398. { change "test $-1,%reg" into "test %reg,%reg" }
  2399. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2400. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2401. end;
  2402. end;
  2403. end;
  2404. end;
  2405. p := tai(p.next)
  2406. end;
  2407. end;
  2408. end.