popt386.pas 114 KB


  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit popt386;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses Aasmbase,aasmtai,aasmdata,aasmcpu,verbose;
  21. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  22. procedure PeepHoleOptPass1(asml: TAsmList; BlockStart, BlockEnd: tai);
  23. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  24. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  25. implementation
  26. uses
  27. globtype,systems,
  28. globals,cgbase,procinfo,
  29. symsym,
  30. {$ifdef finaldestdebug}
  31. cobjects,
  32. {$endif finaldestdebug}
  33. cpuinfo,cpubase,cgutils,daopt386;
  34. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  35. begin
  36. isFoldableArithOp := False;
  37. case hp1.opcode of
  38. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  39. isFoldableArithOp :=
  40. ((taicpu(hp1).oper[0]^.typ = top_const) or
  41. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  42. (taicpu(hp1).oper[0]^.reg <> reg))) and
  43. (taicpu(hp1).oper[1]^.typ = top_reg) and
  44. (taicpu(hp1).oper[1]^.reg = reg);
  45. A_INC,A_DEC:
  46. isFoldableArithOp :=
  47. (taicpu(hp1).oper[0]^.typ = top_reg) and
  48. (taicpu(hp1).oper[0]^.reg = reg);
  49. end;
  50. end;
  51. function RegUsedAfterInstruction(reg: Tregister; p: tai; var UsedRegs: TRegSet): Boolean;
  52. var
  53. supreg: tsuperregister;
  54. begin
  55. supreg := getsupreg(reg);
  56. UpdateUsedRegs(UsedRegs, tai(p.Next));
  57. RegUsedAfterInstruction :=
  58. (supreg in UsedRegs) and
  59. (not(getNextInstruction(p,p)) or
  60. not(regLoadedWithNewValue(supreg,false,p)));
  61. end;
  62. function doFpuLoadStoreOpt(asmL: TAsmList; var p: tai): boolean;
  63. { returns true if a "continue" should be done after this optimization }
  64. var hp1, hp2: tai;
  65. begin
  66. doFpuLoadStoreOpt := false;
  67. if (taicpu(p).oper[0]^.typ = top_ref) and
  68. getNextInstruction(p, hp1) and
  69. (hp1.typ = ait_instruction) and
  70. (((taicpu(hp1).opcode = A_FLD) and
  71. (taicpu(p).opcode = A_FSTP)) or
  72. ((taicpu(p).opcode = A_FISTP) and
  73. (taicpu(hp1).opcode = A_FILD))) and
  74. (taicpu(hp1).oper[0]^.typ = top_ref) and
  75. (taicpu(hp1).opsize = taicpu(p).opsize) and
  76. refsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  77. begin
  78. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  79. if (taicpu(p).opsize=S_FX) and
  80. getNextInstruction(hp1, hp2) and
  81. (hp2.typ = ait_instruction) and
  82. ((taicpu(hp2).opcode = A_LEAVE) or
  83. (taicpu(hp2).opcode = A_RET)) and
  84. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  85. not(assigned(current_procinfo.procdef.funcretsym) and
  86. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  87. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  88. begin
  89. asml.remove(p);
  90. asml.remove(hp1);
  91. p.free;
  92. hp1.free;
  93. p := hp2;
  94. removeLastDeallocForFuncRes(asmL, p);
  95. doFPULoadStoreOpt := true;
  96. end
  97. (* can't be done because the store operation rounds
  98. else
  99. { fst can't store an extended value! }
  100. if (taicpu(p).opsize <> S_FX) and
  101. (taicpu(p).opsize <> S_IQ) then
  102. begin
  103. if (taicpu(p).opcode = A_FSTP) then
  104. taicpu(p).opcode := A_FST
  105. else taicpu(p).opcode := A_FIST;
  106. asml.remove(hp1);
  107. hp1.free;
  108. end
  109. *)
  110. end;
  111. end;
  112. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  113. var
  114. p,hp1: tai;
  115. l: aint;
  116. tmpRef: treference;
  117. begin
  118. p := BlockStart;
  119. while (p <> BlockEnd) Do
  120. begin
  121. case p.Typ Of
  122. Ait_Instruction:
  123. begin
  124. case taicpu(p).opcode Of
  125. A_IMUL:
  126. {changes certain "imul const, %reg"'s to lea sequences}
  127. begin
  128. if (taicpu(p).oper[0]^.typ = Top_Const) and
  129. (taicpu(p).oper[1]^.typ = Top_Reg) and
  130. (taicpu(p).opsize = S_L) then
  131. if (taicpu(p).oper[0]^.val = 1) then
  132. if (taicpu(p).ops = 2) then
  133. {remove "imul $1, reg"}
  134. begin
  135. hp1 := tai(p.Next);
  136. asml.remove(p);
  137. p.free;
  138. p := hp1;
  139. continue;
  140. end
  141. else
  142. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  143. begin
  144. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  145. InsertLLItem(asml, p.previous, p.next, hp1);
  146. p.free;
  147. p := hp1;
  148. end
  149. else if
  150. ((taicpu(p).ops <= 2) or
  151. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  152. (taicpu(p).oper[0]^.val <= 12) and
  153. not(cs_opt_size in current_settings.optimizerswitches) and
  154. (not(GetNextInstruction(p, hp1)) or
  155. {GetNextInstruction(p, hp1) and}
  156. not((tai(hp1).typ = ait_instruction) and
  157. ((taicpu(hp1).opcode=A_Jcc) and
  158. (taicpu(hp1).condition in [C_O,C_NO])))) then
  159. begin
  160. reference_reset(tmpref,1);
  161. case taicpu(p).oper[0]^.val Of
  162. 3: begin
  163. {imul 3, reg1, reg2 to
  164. lea (reg1,reg1,2), reg2
  165. imul 3, reg1 to
  166. lea (reg1,reg1,2), reg1}
  167. TmpRef.base := taicpu(p).oper[1]^.reg;
  168. TmpRef.index := taicpu(p).oper[1]^.reg;
  169. TmpRef.ScaleFactor := 2;
  170. if (taicpu(p).ops = 2) then
  171. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  172. else
  173. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  174. InsertLLItem(asml,p.previous, p.next, hp1);
  175. p.free;
  176. p := hp1;
  177. end;
  178. 5: begin
  179. {imul 5, reg1, reg2 to
  180. lea (reg1,reg1,4), reg2
  181. imul 5, reg1 to
  182. lea (reg1,reg1,4), reg1}
  183. TmpRef.base := taicpu(p).oper[1]^.reg;
  184. TmpRef.index := taicpu(p).oper[1]^.reg;
  185. TmpRef.ScaleFactor := 4;
  186. if (taicpu(p).ops = 2) then
  187. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  188. else
  189. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  190. InsertLLItem(asml,p.previous, p.next, hp1);
  191. p.free;
  192. p := hp1;
  193. end;
  194. 6: begin
  195. {imul 6, reg1, reg2 to
  196. lea (,reg1,2), reg2
  197. lea (reg2,reg1,4), reg2
  198. imul 6, reg1 to
  199. lea (reg1,reg1,2), reg1
  200. add reg1, reg1}
  201. if (current_settings.optimizecputype <= cpu_386) then
  202. begin
  203. TmpRef.index := taicpu(p).oper[1]^.reg;
  204. if (taicpu(p).ops = 3) then
  205. begin
  206. TmpRef.base := taicpu(p).oper[2]^.reg;
  207. TmpRef.ScaleFactor := 4;
  208. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  209. end
  210. else
  211. begin
  212. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  213. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  214. end;
  215. InsertLLItem(asml,p, p.next, hp1);
  216. reference_reset(tmpref,2);
  217. TmpRef.index := taicpu(p).oper[1]^.reg;
  218. TmpRef.ScaleFactor := 2;
  219. if (taicpu(p).ops = 3) then
  220. begin
  221. TmpRef.base := NR_NO;
  222. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  223. taicpu(p).oper[2]^.reg);
  224. end
  225. else
  226. begin
  227. TmpRef.base := taicpu(p).oper[1]^.reg;
  228. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  229. end;
  230. InsertLLItem(asml,p.previous, p.next, hp1);
  231. p.free;
  232. p := tai(hp1.next);
  233. end
  234. end;
  235. 9: begin
  236. {imul 9, reg1, reg2 to
  237. lea (reg1,reg1,8), reg2
  238. imul 9, reg1 to
  239. lea (reg1,reg1,8), reg1}
  240. TmpRef.base := taicpu(p).oper[1]^.reg;
  241. TmpRef.index := taicpu(p).oper[1]^.reg;
  242. TmpRef.ScaleFactor := 8;
  243. if (taicpu(p).ops = 2) then
  244. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  245. else
  246. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  247. InsertLLItem(asml,p.previous, p.next, hp1);
  248. p.free;
  249. p := hp1;
  250. end;
  251. 10: begin
  252. {imul 10, reg1, reg2 to
  253. lea (reg1,reg1,4), reg2
  254. add reg2, reg2
  255. imul 10, reg1 to
  256. lea (reg1,reg1,4), reg1
  257. add reg1, reg1}
  258. if (current_settings.optimizecputype <= cpu_386) then
  259. begin
  260. if (taicpu(p).ops = 3) then
  261. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  262. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  263. else
  264. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  265. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  266. InsertLLItem(asml,p, p.next, hp1);
  267. TmpRef.base := taicpu(p).oper[1]^.reg;
  268. TmpRef.index := taicpu(p).oper[1]^.reg;
  269. TmpRef.ScaleFactor := 4;
  270. if (taicpu(p).ops = 3) then
  271. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  272. else
  273. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  274. InsertLLItem(asml,p.previous, p.next, hp1);
  275. p.free;
  276. p := tai(hp1.next);
  277. end
  278. end;
  279. 12: begin
  280. {imul 12, reg1, reg2 to
  281. lea (,reg1,4), reg2
  282. lea (,reg1,8) reg2
  283. imul 12, reg1 to
  284. lea (reg1,reg1,2), reg1
  285. lea (,reg1,4), reg1}
  286. if (current_settings.optimizecputype <= cpu_386)
  287. then
  288. begin
  289. TmpRef.index := taicpu(p).oper[1]^.reg;
  290. if (taicpu(p).ops = 3) then
  291. begin
  292. TmpRef.base := taicpu(p).oper[2]^.reg;
  293. TmpRef.ScaleFactor := 8;
  294. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  295. end
  296. else
  297. begin
  298. TmpRef.base := NR_NO;
  299. TmpRef.ScaleFactor := 4;
  300. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  301. end;
  302. InsertLLItem(asml,p, p.next, hp1);
  303. reference_reset(tmpref,2);
  304. TmpRef.index := taicpu(p).oper[1]^.reg;
  305. if (taicpu(p).ops = 3) then
  306. begin
  307. TmpRef.base := NR_NO;
  308. TmpRef.ScaleFactor := 4;
  309. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  310. end
  311. else
  312. begin
  313. TmpRef.base := taicpu(p).oper[1]^.reg;
  314. TmpRef.ScaleFactor := 2;
  315. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  316. end;
  317. InsertLLItem(asml,p.previous, p.next, hp1);
  318. p.free;
  319. p := tai(hp1.next);
  320. end
  321. end
  322. end;
  323. end;
  324. end;
  325. A_SAR, A_SHR:
  326. {changes the code sequence
  327. shr/sar const1, x
  328. shl const2, x
  329. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  330. begin
  331. if GetNextInstruction(p, hp1) and
  332. (tai(hp1).typ = ait_instruction) and
  333. (taicpu(hp1).opcode = A_SHL) and
  334. (taicpu(p).oper[0]^.typ = top_const) and
  335. (taicpu(hp1).oper[0]^.typ = top_const) and
  336. (taicpu(hp1).opsize = taicpu(p).opsize) and
  337. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  338. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  339. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  340. not(cs_opt_size in current_settings.optimizerswitches) then
  341. { shr/sar const1, %reg
  342. shl const2, %reg
  343. with const1 > const2 }
  344. begin
  345. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  346. taicpu(hp1).opcode := A_AND;
  347. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  348. case taicpu(p).opsize Of
  349. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  350. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  351. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  352. end;
  353. end
  354. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  355. not(cs_opt_size in current_settings.optimizerswitches) then
  356. { shr/sar const1, %reg
  357. shl const2, %reg
  358. with const1 < const2 }
  359. begin
  360. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  361. taicpu(p).opcode := A_AND;
  362. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  363. case taicpu(p).opsize Of
  364. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  365. S_B: taicpu(p).loadConst(0,l Xor $ff);
  366. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  367. end;
  368. end
  369. else
  370. { shr/sar const1, %reg
  371. shl const2, %reg
  372. with const1 = const2 }
  373. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  374. begin
  375. taicpu(p).opcode := A_AND;
  376. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  377. case taicpu(p).opsize Of
  378. S_B: taicpu(p).loadConst(0,l Xor $ff);
  379. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  380. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  381. end;
  382. asml.remove(hp1);
  383. hp1.free;
  384. end;
  385. end;
  386. A_XOR:
  387. if (taicpu(p).oper[0]^.typ = top_reg) and
  388. (taicpu(p).oper[1]^.typ = top_reg) and
  389. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  390. { temporarily change this to 'mov reg,0' to make it easier }
  391. { for the CSE. Will be changed back in pass 2 }
  392. begin
  393. taicpu(p).opcode := A_MOV;
  394. taicpu(p).loadConst(0,0);
  395. end;
  396. end;
  397. end;
  398. end;
  399. p := tai(p.next)
  400. end;
  401. end;
  402. procedure PeepHoleOptPass1(Asml: TAsmList; BlockStart, BlockEnd: tai);
  403. {First pass of peepholeoptimizations}
  404. var
  405. l : longint;
  406. p,hp1,hp2 : tai;
  407. hp3,hp4: tai;
  408. v:aint;
  409. TmpRef: TReference;
  410. UsedRegs, TmpUsedRegs: TRegSet;
  411. TmpBool1, TmpBool2: Boolean;
  412. function SkipLabels(hp: tai; var hp2: tai): boolean;
  413. {skips all labels and returns the next "real" instruction}
  414. begin
  415. while assigned(hp.next) and
  416. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  417. hp := tai(hp.next);
  418. if assigned(hp.next) then
  419. begin
  420. SkipLabels := True;
  421. hp2 := tai(hp.next)
  422. end
  423. else
  424. begin
  425. hp2 := hp;
  426. SkipLabels := False
  427. end;
  428. end;
  429. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  430. {traces sucessive jumps to their final destination and sets it, e.g.
  431. je l1 je l3
  432. <code> <code>
  433. l1: becomes l1:
  434. je l2 je l3
  435. <code> <code>
  436. l2: l2:
  437. jmp l3 jmp l3
  438. the level parameter denotes how deeep we have already followed the jump,
  439. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  440. var p1, p2: tai;
  441. l: tasmlabel;
  442. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  443. begin
  444. FindAnyLabel := false;
  445. while assigned(hp.next) and
  446. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  447. hp := tai(hp.next);
  448. if assigned(hp.next) and
  449. (tai(hp.next).typ = ait_label) then
  450. begin
  451. FindAnyLabel := true;
  452. l := tai_label(hp.next).labsym;
  453. end
  454. end;
  455. begin
  456. GetfinalDestination := false;
  457. if level > 20 then
  458. exit;
  459. p1 := dfa.getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  460. if assigned(p1) then
  461. begin
  462. SkipLabels(p1,p1);
  463. if (tai(p1).typ = ait_instruction) and
  464. (taicpu(p1).is_jmp) then
  465. if { the next instruction after the label where the jump hp arrives}
  466. { is unconditional or of the same type as hp, so continue }
  467. (taicpu(p1).condition in [C_None,hp.condition]) or
  468. { the next instruction after the label where the jump hp arrives}
  469. { is the opposite of hp (so this one is never taken), but after }
  470. { that one there is a branch that will be taken, so perform a }
  471. { little hack: set p1 equal to this instruction (that's what the}
  472. { last SkipLabels is for, only works with short bool evaluation)}
  473. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  474. SkipLabels(p1,p2) and
  475. (p2.typ = ait_instruction) and
  476. (taicpu(p2).is_jmp) and
  477. (taicpu(p2).condition in [C_None,hp.condition]) and
  478. SkipLabels(p1,p1)) then
  479. begin
  480. { quick check for loops of the form "l5: ; jmp l5 }
  481. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  482. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  483. exit;
  484. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  485. exit;
  486. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  487. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  488. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  489. end
  490. else
  491. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  492. if not FindAnyLabel(p1,l) then
  493. begin
  494. {$ifdef finaldestdebug}
  495. insertllitem(asml,p1,p1.next,tai_comment.Create(
  496. strpnew('previous label inserted'))));
  497. {$endif finaldestdebug}
  498. current_asmdata.getjumplabel(l);
  499. insertllitem(asml,p1,p1.next,tai_label.Create(l));
  500. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  501. hp.oper[0]^.ref^.symbol := l;
  502. l.increfs;
  503. { this won't work, since the new label isn't in the labeltable }
  504. { so it will fail the rangecheck. Labeltable should become a }
  505. { hashtable to support this: }
  506. { GetFinalDestination(asml, hp); }
  507. end
  508. else
  509. begin
  510. {$ifdef finaldestdebug}
  511. insertllitem(asml,p1,p1.next,tai_comment.Create(
  512. strpnew('next label reused'))));
  513. {$endif finaldestdebug}
  514. l.increfs;
  515. hp.oper[0]^.ref^.symbol := l;
  516. if not GetFinalDestination(asml, hp,succ(level)) then
  517. exit;
  518. end;
  519. end;
  520. GetFinalDestination := true;
  521. end;
  522. function DoSubAddOpt(var p: tai): Boolean;
  523. begin
  524. DoSubAddOpt := False;
  525. if GetLastInstruction(p, hp1) and
  526. (hp1.typ = ait_instruction) and
  527. (taicpu(hp1).opsize = taicpu(p).opsize) then
  528. case taicpu(hp1).opcode Of
  529. A_DEC:
  530. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  531. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  532. begin
  533. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  534. asml.remove(hp1);
  535. hp1.free;
  536. end;
  537. A_SUB:
  538. if (taicpu(hp1).oper[0]^.typ = top_const) and
  539. (taicpu(hp1).oper[1]^.typ = top_reg) and
  540. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  541. begin
  542. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  543. asml.remove(hp1);
  544. hp1.free;
  545. end;
  546. A_ADD:
  547. if (taicpu(hp1).oper[0]^.typ = top_const) and
  548. (taicpu(hp1).oper[1]^.typ = top_reg) and
  549. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  550. begin
  551. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  552. asml.remove(hp1);
  553. hp1.free;
  554. if (taicpu(p).oper[0]^.val = 0) then
  555. begin
  556. hp1 := tai(p.next);
  557. asml.remove(p);
  558. p.free;
  559. if not GetLastInstruction(hp1, p) then
  560. p := hp1;
  561. DoSubAddOpt := True;
  562. end
  563. end;
  564. end;
  565. end;
  566. begin
  567. p := BlockStart;
  568. UsedRegs := [];
  569. while (p <> BlockEnd) Do
  570. begin
  571. UpDateUsedRegs(UsedRegs, tai(p.next));
  572. case p.Typ Of
  573. ait_instruction:
  574. begin
  575. { Handle Jmp Optimizations }
  576. if taicpu(p).is_jmp then
  577. begin
  578. {the following if-block removes all code between a jmp and the next label,
  579. because it can never be executed}
  580. if (taicpu(p).opcode = A_JMP) then
  581. begin
  582. while GetNextInstruction(p, hp1) and
  583. (hp1.typ <> ait_label) do
  584. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  585. begin
  586. asml.remove(hp1);
  587. hp1.free;
  588. end
  589. else break;
  590. end;
  591. { remove jumps to a label coming right after them }
  592. if GetNextInstruction(p, hp1) then
  593. begin
  594. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  595. { TODO: FIXME removing the first instruction fails}
  596. (p<>blockstart) then
  597. begin
  598. hp2:=tai(hp1.next);
  599. asml.remove(p);
  600. p.free;
  601. p:=hp2;
  602. continue;
  603. end
  604. else
  605. begin
  606. if hp1.typ = ait_label then
  607. SkipLabels(hp1,hp1);
  608. if (tai(hp1).typ=ait_instruction) and
  609. (taicpu(hp1).opcode=A_JMP) and
  610. GetNextInstruction(hp1, hp2) and
  611. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  612. begin
  613. if taicpu(p).opcode=A_Jcc then
  614. begin
  615. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  616. tai_label(hp2).labsym.decrefs;
  617. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  618. { when free'ing hp1, the ref. isn't decresed, so we don't
  619. increase it (FK)
  620. taicpu(p).oper[0]^.ref^.symbol.increfs;
  621. }
  622. asml.remove(hp1);
  623. hp1.free;
  624. GetFinalDestination(asml, taicpu(p),0);
  625. end
  626. else
  627. begin
  628. GetFinalDestination(asml, taicpu(p),0);
  629. p:=tai(p.next);
  630. continue;
  631. end;
  632. end
  633. else
  634. GetFinalDestination(asml, taicpu(p),0);
  635. end;
  636. end;
  637. end
  638. else
  639. { All other optimizes }
  640. begin
  641. for l := 0 to taicpu(p).ops-1 Do
  642. if (taicpu(p).oper[l]^.typ = top_ref) then
  643. With taicpu(p).oper[l]^.ref^ Do
  644. begin
  645. if (base = NR_NO) and
  646. (index <> NR_NO) and
  647. (scalefactor in [0,1]) then
  648. begin
  649. base := index;
  650. index := NR_NO
  651. end
  652. end;
  653. case taicpu(p).opcode Of
  654. A_AND:
  655. begin
  656. if (taicpu(p).oper[0]^.typ = top_const) and
  657. (taicpu(p).oper[1]^.typ = top_reg) and
  658. GetNextInstruction(p, hp1) and
  659. (tai(hp1).typ = ait_instruction) and
  660. (taicpu(hp1).opcode = A_AND) and
  661. (taicpu(hp1).oper[0]^.typ = top_const) and
  662. (taicpu(hp1).oper[1]^.typ = top_reg) and
  663. (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) then
  664. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  665. begin
  666. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  667. asml.remove(hp1);
  668. hp1.free;
  669. end
  670. else
  671. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  672. jump, but only if it's a conditional jump (PFV) }
  673. if (taicpu(p).oper[1]^.typ = top_reg) and
  674. GetNextInstruction(p, hp1) and
  675. (hp1.typ = ait_instruction) and
  676. (taicpu(hp1).is_jmp) and
  677. (taicpu(hp1).opcode<>A_JMP) and
  678. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  679. taicpu(p).opcode := A_TEST;
  680. end;
  681. A_CMP:
  682. begin
  683. { cmp register,$8000 neg register
  684. je target --> jo target
  685. .... only if register is deallocated before jump.}
  686. case Taicpu(p).opsize of
  687. S_B: v:=$80;
  688. S_W: v:=$8000;
  689. S_L: v:=aint($80000000);
  690. end;
  691. if (taicpu(p).oper[0]^.typ=Top_const) and
  692. (taicpu(p).oper[0]^.val=v) and
  693. (Taicpu(p).oper[1]^.typ=top_reg) and
  694. GetNextInstruction(p, hp1) and
  695. (hp1.typ=ait_instruction) and
  696. (taicpu(hp1).opcode=A_Jcc) and
  697. (Taicpu(hp1).condition in [C_E,C_NE]) and
  698. not(getsupreg(Taicpu(p).oper[1]^.reg) in usedregs) then
  699. begin
  700. Taicpu(p).opcode:=A_NEG;
  701. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  702. Taicpu(p).clearop(1);
  703. Taicpu(p).ops:=1;
  704. if Taicpu(hp1).condition=C_E then
  705. Taicpu(hp1).condition:=C_O
  706. else
  707. Taicpu(hp1).condition:=C_NO;
  708. continue;
  709. end;
  710. {
  711. @@2: @@2:
  712. .... ....
  713. cmp operand1,0
  714. jle/jbe @@1
  715. dec operand1 --> sub operand1,1
  716. jmp @@2 jge/jae @@2
  717. @@1: @@1:
  718. ... ....}
  719. if (taicpu(p).oper[0]^.typ = top_const) and
  720. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  721. (taicpu(p).oper[0]^.val = 0) and
  722. GetNextInstruction(p, hp1) and
  723. (hp1.typ = ait_instruction) and
  724. (taicpu(hp1).is_jmp) and
  725. (taicpu(hp1).opcode=A_Jcc) and
  726. (taicpu(hp1).condition in [C_LE,C_BE]) and
  727. GetNextInstruction(hp1,hp2) and
  728. (hp2.typ = ait_instruction) and
  729. (taicpu(hp2).opcode = A_DEC) and
  730. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  731. GetNextInstruction(hp2, hp3) and
  732. (hp3.typ = ait_instruction) and
  733. (taicpu(hp3).is_jmp) and
  734. (taicpu(hp3).opcode = A_JMP) and
  735. GetNextInstruction(hp3, hp4) and
  736. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  737. begin
  738. taicpu(hp2).Opcode := A_SUB;
  739. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  740. taicpu(hp2).loadConst(0,1);
  741. taicpu(hp2).ops:=2;
  742. taicpu(hp3).Opcode := A_Jcc;
  743. case taicpu(hp1).condition of
  744. C_LE: taicpu(hp3).condition := C_GE;
  745. C_BE: taicpu(hp3).condition := C_AE;
  746. end;
  747. asml.remove(p);
  748. asml.remove(hp1);
  749. p.free;
  750. hp1.free;
  751. p := hp2;
  752. continue;
  753. end
  754. end;
  755. A_FLD:
  756. begin
  757. if (taicpu(p).oper[0]^.typ = top_reg) and
  758. GetNextInstruction(p, hp1) and
  759. (hp1.typ = Ait_Instruction) and
  760. (taicpu(hp1).oper[0]^.typ = top_reg) and
  761. (taicpu(hp1).oper[1]^.typ = top_reg) and
  762. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  763. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  764. { change to
  765. fld reg fxxx reg,st
  766. fxxxp st, st1 (hp1)
  767. Remark: non commutative operations must be reversed!
  768. }
  769. begin
  770. case taicpu(hp1).opcode Of
  771. A_FMULP,A_FADDP,
  772. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  773. begin
  774. case taicpu(hp1).opcode Of
  775. A_FADDP: taicpu(hp1).opcode := A_FADD;
  776. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  777. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  778. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  779. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  780. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  781. end;
  782. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  783. taicpu(hp1).oper[1]^.reg := NR_ST;
  784. asml.remove(p);
  785. p.free;
  786. p := hp1;
  787. continue;
  788. end;
  789. end;
  790. end
  791. else
  792. if (taicpu(p).oper[0]^.typ = top_ref) and
  793. GetNextInstruction(p, hp2) and
  794. (hp2.typ = Ait_Instruction) and
  795. (taicpu(hp2).ops = 2) and
  796. (taicpu(hp2).oper[0]^.typ = top_reg) and
  797. (taicpu(hp2).oper[1]^.typ = top_reg) and
  798. (taicpu(p).opsize in [S_FS, S_FL]) and
  799. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  800. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  801. if GetLastInstruction(p, hp1) and
  802. (hp1.typ = Ait_Instruction) and
  803. ((taicpu(hp1).opcode = A_FLD) or
  804. (taicpu(hp1).opcode = A_FST)) and
  805. (taicpu(hp1).opsize = taicpu(p).opsize) and
  806. (taicpu(hp1).oper[0]^.typ = top_ref) and
  807. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  808. if ((taicpu(hp2).opcode = A_FMULP) or
  809. (taicpu(hp2).opcode = A_FADDP)) then
  810. { change to
  811. fld/fst mem1 (hp1) fld/fst mem1
  812. fld mem1 (p) fadd/
  813. faddp/ fmul st, st
  814. fmulp st, st1 (hp2) }
  815. begin
  816. asml.remove(p);
  817. p.free;
  818. p := hp1;
  819. if (taicpu(hp2).opcode = A_FADDP) then
  820. taicpu(hp2).opcode := A_FADD
  821. else
  822. taicpu(hp2).opcode := A_FMUL;
  823. taicpu(hp2).oper[1]^.reg := NR_ST;
  824. end
  825. else
  826. { change to
  827. fld/fst mem1 (hp1) fld/fst mem1
  828. fld mem1 (p) fld st}
  829. begin
  830. taicpu(p).changeopsize(S_FL);
  831. taicpu(p).loadreg(0,NR_ST);
  832. end
  833. else
  834. begin
  835. case taicpu(hp2).opcode Of
  836. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  837. { change to
  838. fld/fst mem1 (hp1) fld/fst mem1
  839. fld mem2 (p) fxxx mem2
  840. fxxxp st, st1 (hp2) }
  841. begin
  842. case taicpu(hp2).opcode Of
  843. A_FADDP: taicpu(p).opcode := A_FADD;
  844. A_FMULP: taicpu(p).opcode := A_FMUL;
  845. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  846. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  847. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  848. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  849. end;
  850. asml.remove(hp2);
  851. hp2.free;
  852. end
  853. end
  854. end
  855. end;
  856. A_FSTP,A_FISTP:
  857. if doFpuLoadStoreOpt(asmL,p) then
  858. continue;
  859. A_LEA:
  860. begin
  861. {removes seg register prefixes from LEA operations, as they
  862. don't do anything}
  863. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  864. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  865. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  866. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  867. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  868. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  869. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  870. (taicpu(p).oper[0]^.ref^.offset = 0) then
  871. begin
  872. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  873. taicpu(p).oper[1]^.reg);
  874. InsertLLItem(asml,p.previous,p.next, hp1);
  875. p.free;
  876. p := hp1;
  877. continue;
  878. end
  879. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  880. begin
  881. hp1 := tai(p.Next);
  882. asml.remove(p);
  883. p.free;
  884. p := hp1;
  885. continue;
  886. end
  887. else
  888. with taicpu(p).oper[0]^.ref^ do
  889. if (base = taicpu(p).oper[1]^.reg) then
  890. begin
  891. l := offset;
  892. if (l=1) then
  893. begin
  894. taicpu(p).opcode := A_INC;
  895. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  896. taicpu(p).ops := 1
  897. end
  898. else if (l=-1) then
  899. begin
  900. taicpu(p).opcode := A_DEC;
  901. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  902. taicpu(p).ops := 1;
  903. end
  904. else
  905. begin
  906. taicpu(p).opcode := A_ADD;
  907. taicpu(p).loadConst(0,l);
  908. end;
  909. end;
  910. end;
  911. A_MOV:
  912. begin
  913. TmpUsedRegs := UsedRegs;
  914. if (taicpu(p).oper[1]^.typ = top_reg) and
  915. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  916. GetNextInstruction(p, hp1) and
  917. (tai(hp1).typ = ait_instruction) and
  918. (taicpu(hp1).opcode = A_MOV) and
  919. (taicpu(hp1).oper[0]^.typ = top_reg) and
  920. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  921. begin
  922. {we have "mov x, %treg; mov %treg, y}
  923. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  924. {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
  925. case taicpu(p).oper[0]^.typ Of
  926. top_reg:
  927. begin
  928. { change "mov %reg, %treg; mov %treg, y"
  929. to "mov %reg, y" }
  930. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  931. asml.remove(hp1);
  932. hp1.free;
  933. continue;
  934. end;
  935. top_ref:
  936. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  937. begin
  938. { change "mov mem, %treg; mov %treg, %reg"
  939. to "mov mem, %reg" }
  940. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  941. asml.remove(hp1);
  942. hp1.free;
  943. continue;
  944. end;
  945. end
  946. end
  947. else
  948. {Change "mov %reg1, %reg2; xxx %reg2, ???" to
  949. "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read
  950. penalty}
  951. if (taicpu(p).oper[0]^.typ = top_reg) and
  952. (taicpu(p).oper[1]^.typ = top_reg) and
  953. GetNextInstruction(p,hp1) and
  954. (tai(hp1).typ = ait_instruction) and
  955. (taicpu(hp1).ops >= 1) and
  956. (taicpu(hp1).oper[0]^.typ = top_reg) and
  957. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  958. {we have "mov %reg1, %reg2; XXX %reg2, ???"}
  959. begin
  960. if ((taicpu(hp1).opcode = A_OR) or
  961. (taicpu(hp1).opcode = A_TEST)) and
  962. (taicpu(hp1).oper[1]^.typ = top_reg) and
  963. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  964. {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
  965. begin
  966. TmpUsedRegs := UsedRegs;
  967. { reg1 will be used after the first instruction, }
  968. { so update the allocation info }
  969. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  970. if GetNextInstruction(hp1, hp2) and
  971. (hp2.typ = ait_instruction) and
  972. taicpu(hp2).is_jmp and
  973. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  974. { change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to
  975. "test %reg1, %reg1; jxx" }
  976. begin
  977. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  978. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  979. asml.remove(p);
  980. p.free;
  981. p := hp1;
  982. continue
  983. end
  984. else
  985. {change "mov %reg1, %reg2; test/or %reg2, %reg2" to
  986. "mov %reg1, %reg2; test/or %reg1, %reg1"}
  987. begin
  988. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  989. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  990. end;
  991. end
  992. { else
  993. if (taicpu(p.next)^.opcode
  994. in [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])}
  995. {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to
  996. "mov %reg1, %reg2; push/or/xor/... %reg1, ???"}
  997. end
  998. else
  999. {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1000. x >= RetOffset) as it doesn't do anything (it writes either to a
  1001. parameter or to the temporary storage room for the function
  1002. result)}
  1003. if GetNextInstruction(p, hp1) and
  1004. (tai(hp1).typ = ait_instruction) then
  1005. if ((taicpu(hp1).opcode = A_LEAVE) or
  1006. (taicpu(hp1).opcode = A_RET)) and
  1007. (taicpu(p).oper[1]^.typ = top_ref) and
  1008. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1009. not(assigned(current_procinfo.procdef.funcretsym) and
  1010. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1011. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  1012. (taicpu(p).oper[0]^.typ = top_reg) then
  1013. begin
  1014. asml.remove(p);
  1015. p.free;
  1016. p := hp1;
  1017. RemoveLastDeallocForFuncRes(asmL,p);
  1018. end
  1019. else
  1020. if (taicpu(p).oper[0]^.typ = top_reg) and
  1021. (taicpu(p).oper[1]^.typ = top_ref) and
  1022. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1023. (taicpu(hp1).opcode = A_CMP) and
  1024. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1025. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1026. {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
  1027. begin
  1028. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1029. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1030. end;
  1031. { Next instruction is also a MOV ? }
  1032. if GetNextInstruction(p, hp1) and
  1033. (tai(hp1).typ = ait_instruction) and
  1034. (taicpu(hp1).opcode = A_MOV) and
  1035. (taicpu(hp1).opsize = taicpu(p).opsize) then
  1036. begin
  1037. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1038. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1039. {mov reg1, mem1 or mov mem1, reg1
  1040. mov mem2, reg2 mov reg2, mem2}
  1041. begin
  1042. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1043. {mov reg1, mem1 or mov mem1, reg1
  1044. mov mem2, reg1 mov reg2, mem1}
  1045. begin
  1046. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1047. { Removes the second statement from
  1048. mov reg1, mem1/reg2
  1049. mov mem1/reg2, reg1 }
  1050. begin
  1051. if (taicpu(p).oper[0]^.typ = top_reg) then
  1052. AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1053. asml.remove(hp1);
  1054. hp1.free;
  1055. end
  1056. else
  1057. begin
  1058. TmpUsedRegs := UsedRegs;
  1059. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1060. if (taicpu(p).oper[1]^.typ = top_ref) and
  1061. { mov reg1, mem1
  1062. mov mem2, reg1 }
  1063. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1064. GetNextInstruction(hp1, hp2) and
  1065. (hp2.typ = ait_instruction) and
  1066. (taicpu(hp2).opcode = A_CMP) and
  1067. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1068. (taicpu(hp2).oper[0]^.typ = TOp_Ref) and
  1069. (taicpu(hp2).oper[1]^.typ = TOp_Reg) and
  1070. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(p).oper[1]^.ref^) and
  1071. (taicpu(hp2).oper[1]^.reg= taicpu(p).oper[0]^.reg) and
  1072. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1073. { change to
  1074. mov reg1, mem1 mov reg1, mem1
  1075. mov mem2, reg1 cmp reg1, mem2
  1076. cmp mem1, reg1 }
  1077. begin
  1078. asml.remove(hp2);
  1079. hp2.free;
  1080. taicpu(hp1).opcode := A_CMP;
  1081. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1082. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1083. end;
  1084. end;
  1085. end
  1086. else
  1087. begin
  1088. tmpUsedRegs := UsedRegs;
  1089. if GetNextInstruction(hp1, hp2) and
  1090. (taicpu(p).oper[0]^.typ = top_ref) and
  1091. (taicpu(p).oper[1]^.typ = top_reg) and
  1092. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1093. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1094. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1095. (tai(hp2).typ = ait_instruction) and
  1096. (taicpu(hp2).opcode = A_MOV) and
  1097. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1098. (taicpu(hp2).oper[1]^.typ = top_reg) and
  1099. (taicpu(hp2).oper[0]^.typ = top_ref) and
  1100. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1101. if not regInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^) and
  1102. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1103. { mov mem1, %reg1
  1104. mov %reg1, mem2
  1105. mov mem2, reg2
  1106. to:
  1107. mov mem1, reg2
  1108. mov reg2, mem2}
  1109. begin
  1110. AllocRegBetween(asmL,taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1111. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1112. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1113. asml.remove(hp2);
  1114. hp2.free;
  1115. end
  1116. else
  1117. if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1118. not(RegInRef(getsupreg(taicpu(p).oper[1]^.reg),taicpu(p).oper[0]^.ref^)) and
  1119. not(RegInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^)) then
  1120. { mov mem1, reg1 mov mem1, reg1
  1121. mov reg1, mem2 mov reg1, mem2
  1122. mov mem2, reg2 mov mem2, reg1
  1123. to: to:
  1124. mov mem1, reg1 mov mem1, reg1
  1125. mov mem1, reg2 mov reg1, mem2
  1126. mov reg1, mem2
  1127. or (if mem1 depends on reg1
  1128. and/or if mem2 depends on reg2)
  1129. to:
  1130. mov mem1, reg1
  1131. mov reg1, mem2
  1132. mov reg1, reg2
  1133. }
  1134. begin
  1135. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1136. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1137. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1138. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1139. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1140. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1141. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1142. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1143. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1144. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1145. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1146. end
  1147. else
  1148. if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1149. begin
  1150. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1151. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1152. end
  1153. else
  1154. begin
  1155. asml.remove(hp2);
  1156. hp2.free;
  1157. end
  1158. end
  1159. end
  1160. else
  1161. (* {movl [mem1],reg1
  1162. movl [mem1],reg2
  1163. to:
  1164. movl [mem1],reg1
  1165. movl reg1,reg2 }
  1166. if (taicpu(p).oper[0]^.typ = top_ref) and
  1167. (taicpu(p).oper[1]^.typ = top_reg) and
  1168. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1169. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1170. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1171. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1172. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1173. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1174. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1175. else*)
  1176. { movl const1,[mem1]
  1177. movl [mem1],reg1
  1178. to:
  1179. movl const1,reg1
  1180. movl reg1,[mem1] }
  1181. if (taicpu(p).oper[0]^.typ = top_const) and
  1182. (taicpu(p).oper[1]^.typ = top_ref) and
  1183. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1184. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1185. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1186. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1187. not(reginref(getsupreg(taicpu(hp1).oper[1]^.reg),taicpu(hp1).oper[0]^.ref^)) then
  1188. begin
  1189. allocregbetween(asml,taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1190. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1191. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1192. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1193. end
  1194. end;
  1195. if GetNextInstruction(p, hp1) and
  1196. (Tai(hp1).typ = ait_instruction) and
  1197. ((Taicpu(hp1).opcode = A_BTS) or (Taicpu(hp1).opcode = A_BTR)) and
  1198. (Taicpu(hp1).opsize = Taicpu(p).opsize) and
  1199. GetNextInstruction(hp1, hp2) and
  1200. (Tai(hp2).typ = ait_instruction) and
  1201. (Taicpu(hp2).opcode = A_OR) and
  1202. (Taicpu(hp1).opsize = Taicpu(p).opsize) and
  1203. (Taicpu(hp2).opsize = Taicpu(p).opsize) and
  1204. (Taicpu(p).oper[0]^.typ = top_const) and (Taicpu(p).oper[0]^.val=0) and
  1205. (Taicpu(p).oper[1]^.typ = top_reg) and
  1206. (Taicpu(hp1).oper[1]^.typ = top_reg) and
  1207. (Taicpu(p).oper[1]^.reg=Taicpu(hp1).oper[1]^.reg) and
  1208. (Taicpu(hp2).oper[1]^.typ = top_reg) and
  1209. (Taicpu(p).oper[1]^.reg=Taicpu(hp2).oper[1]^.reg) then
  1210. {mov reg1,0
  1211. bts reg1,operand1 --> mov reg1,operand2
  1212. or reg1,operand2 bts reg1,operand1}
  1213. begin
  1214. Taicpu(hp2).opcode:=A_MOV;
  1215. asml.remove(hp1);
  1216. insertllitem(asml,hp2,hp2.next,hp1);
  1217. asml.remove(p);
  1218. p.free;
  1219. end;
  1220. end;
  1221. A_MOVSX,
  1222. A_MOVZX :
  1223. begin
  1224. if (taicpu(p).oper[1]^.typ = top_reg) and
  1225. GetNextInstruction(p,hp1) and
  1226. (hp1.typ = ait_instruction) and
  1227. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1228. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1229. GetNextInstruction(hp1,hp2) and
  1230. (hp2.typ = ait_instruction) and
  1231. (taicpu(hp2).opcode = A_MOV) and
  1232. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1233. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) then
  1234. { change movsX/movzX reg/ref, reg2 }
  1235. { add/sub/or/... reg3/$const, reg2 }
  1236. { mov reg2 reg/ref }
  1237. { to add/sub/or/... reg3/$const, reg/ref }
  1238. begin
  1239. { by example:
  1240. movswl %si,%eax movswl %si,%eax p
  1241. decl %eax addl %edx,%eax hp1
  1242. movw %ax,%si movw %ax,%si hp2
  1243. ->
  1244. movswl %si,%eax movswl %si,%eax p
  1245. decw %eax addw %edx,%eax hp1
  1246. movw %ax,%si movw %ax,%si hp2
  1247. }
  1248. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1249. {
  1250. ->
  1251. movswl %si,%eax movswl %si,%eax p
  1252. decw %si addw %dx,%si hp1
  1253. movw %ax,%si movw %ax,%si hp2
  1254. }
  1255. case taicpu(hp1).ops of
  1256. 1:
  1257. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1258. 2:
  1259. begin
  1260. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1261. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1262. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1263. end;
  1264. else
  1265. internalerror(2008042701);
  1266. end;
  1267. {
  1268. ->
  1269. decw %si addw %dx,%si p
  1270. }
  1271. asml.remove(p);
  1272. asml.remove(hp2);
  1273. p.free;
  1274. hp2.free;
  1275. p := hp1
  1276. end
  1277. { removes superfluous And's after movzx's }
  1278. else if taicpu(p).opcode=A_MOVZX then
  1279. begin
  1280. if (taicpu(p).oper[1]^.typ = top_reg) and
  1281. GetNextInstruction(p, hp1) and
  1282. (tai(hp1).typ = ait_instruction) and
  1283. (taicpu(hp1).opcode = A_AND) and
  1284. (taicpu(hp1).oper[0]^.typ = top_const) and
  1285. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1286. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1287. case taicpu(p).opsize Of
  1288. S_BL, S_BW:
  1289. if (taicpu(hp1).oper[0]^.val = $ff) then
  1290. begin
  1291. asml.remove(hp1);
  1292. hp1.free;
  1293. end;
  1294. S_WL:
  1295. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1296. begin
  1297. asml.remove(hp1);
  1298. hp1.free;
  1299. end;
  1300. end;
  1301. {changes some movzx constructs to faster synonims (all examples
  1302. are given with eax/ax, but are also valid for other registers)}
  1303. if (taicpu(p).oper[1]^.typ = top_reg) then
  1304. if (taicpu(p).oper[0]^.typ = top_reg) then
  1305. case taicpu(p).opsize of
  1306. S_BW:
  1307. begin
  1308. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1309. not(cs_opt_size in current_settings.optimizerswitches) then
  1310. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1311. begin
  1312. taicpu(p).opcode := A_AND;
  1313. taicpu(p).changeopsize(S_W);
  1314. taicpu(p).loadConst(0,$ff);
  1315. end
  1316. else if GetNextInstruction(p, hp1) and
  1317. (tai(hp1).typ = ait_instruction) and
  1318. (taicpu(hp1).opcode = A_AND) and
  1319. (taicpu(hp1).oper[0]^.typ = top_const) and
  1320. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1321. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1322. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1323. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1324. begin
  1325. taicpu(p).opcode := A_MOV;
  1326. taicpu(p).changeopsize(S_W);
  1327. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1328. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1329. end;
  1330. end;
  1331. S_BL:
  1332. begin
  1333. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1334. not(cs_opt_size in current_settings.optimizerswitches) then
  1335. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1336. begin
  1337. taicpu(p).opcode := A_AND;
  1338. taicpu(p).changeopsize(S_L);
  1339. taicpu(p).loadConst(0,$ff)
  1340. end
  1341. else if GetNextInstruction(p, hp1) and
  1342. (tai(hp1).typ = ait_instruction) and
  1343. (taicpu(hp1).opcode = A_AND) and
  1344. (taicpu(hp1).oper[0]^.typ = top_const) and
  1345. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1346. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1347. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1348. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1349. begin
  1350. taicpu(p).opcode := A_MOV;
  1351. taicpu(p).changeopsize(S_L);
  1352. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1353. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1354. end
  1355. end;
  1356. S_WL:
  1357. begin
  1358. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1359. not(cs_opt_size in current_settings.optimizerswitches) then
  1360. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1361. begin
  1362. taicpu(p).opcode := A_AND;
  1363. taicpu(p).changeopsize(S_L);
  1364. taicpu(p).loadConst(0,$ffff);
  1365. end
  1366. else if GetNextInstruction(p, hp1) and
  1367. (tai(hp1).typ = ait_instruction) and
  1368. (taicpu(hp1).opcode = A_AND) and
  1369. (taicpu(hp1).oper[0]^.typ = top_const) and
  1370. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1371. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1372. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1373. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1374. begin
  1375. taicpu(p).opcode := A_MOV;
  1376. taicpu(p).changeopsize(S_L);
  1377. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1378. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1379. end;
  1380. end;
  1381. end
  1382. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1383. begin
  1384. if GetNextInstruction(p, hp1) and
  1385. (tai(hp1).typ = ait_instruction) and
  1386. (taicpu(hp1).opcode = A_AND) and
  1387. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1388. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1389. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1390. begin
  1391. taicpu(p).opcode := A_MOV;
  1392. case taicpu(p).opsize Of
  1393. S_BL:
  1394. begin
  1395. taicpu(p).changeopsize(S_L);
  1396. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1397. end;
  1398. S_WL:
  1399. begin
  1400. taicpu(p).changeopsize(S_L);
  1401. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1402. end;
  1403. S_BW:
  1404. begin
  1405. taicpu(p).changeopsize(S_W);
  1406. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1407. end;
  1408. end;
  1409. end;
  1410. end;
  1411. end;
  1412. end;
  1413. (* should not be generated anymore by the current code generator
  1414. A_POP:
  1415. begin
  1416. if target_info.system=system_i386_go32v2 then
  1417. begin
  1418. { Transform a series of pop/pop/pop/push/push/push to }
  1419. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1420. { because I'm not sure whether they can cope with }
  1421. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1422. { such a problem when using esp as frame pointer (JM) }
  1423. if (taicpu(p).oper[0]^.typ = top_reg) then
  1424. begin
  1425. hp1 := p;
  1426. hp2 := p;
  1427. l := 0;
  1428. while getNextInstruction(hp1,hp1) and
  1429. (hp1.typ = ait_instruction) and
  1430. (taicpu(hp1).opcode = A_POP) and
  1431. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1432. begin
  1433. hp2 := hp1;
  1434. inc(l,4);
  1435. end;
  1436. getLastInstruction(p,hp3);
  1437. l1 := 0;
  1438. while (hp2 <> hp3) and
  1439. assigned(hp1) and
  1440. (hp1.typ = ait_instruction) and
  1441. (taicpu(hp1).opcode = A_PUSH) and
  1442. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1443. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1444. begin
  1445. { change it to a two op operation }
  1446. taicpu(hp2).oper[1]^.typ:=top_none;
  1447. taicpu(hp2).ops:=2;
  1448. taicpu(hp2).opcode := A_MOV;
  1449. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1450. reference_reset(tmpref);
  1451. tmpRef.base.enum:=R_INTREGISTER;
  1452. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1453. convert_register_to_enum(tmpref.base);
  1454. tmpRef.offset := l;
  1455. taicpu(hp2).loadRef(0,tmpRef);
  1456. hp4 := hp1;
  1457. getNextInstruction(hp1,hp1);
  1458. asml.remove(hp4);
  1459. hp4.free;
  1460. getLastInstruction(hp2,hp2);
  1461. dec(l,4);
  1462. inc(l1);
  1463. end;
  1464. if l <> -4 then
  1465. begin
  1466. inc(l,4);
  1467. for l1 := l1 downto 1 do
  1468. begin
  1469. getNextInstruction(hp2,hp2);
  1470. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1471. end
  1472. end
  1473. end
  1474. end
  1475. else
  1476. begin
  1477. if (taicpu(p).oper[0]^.typ = top_reg) and
  1478. GetNextInstruction(p, hp1) and
  1479. (tai(hp1).typ=ait_instruction) and
  1480. (taicpu(hp1).opcode=A_PUSH) and
  1481. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1482. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1483. begin
  1484. { change it to a two op operation }
  1485. taicpu(p).oper[1]^.typ:=top_none;
  1486. taicpu(p).ops:=2;
  1487. taicpu(p).opcode := A_MOV;
  1488. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1489. reference_reset(tmpref);
  1490. TmpRef.base.enum := R_ESP;
  1491. taicpu(p).loadRef(0,TmpRef);
  1492. asml.remove(hp1);
  1493. hp1.free;
  1494. end;
  1495. end;
  1496. end;
  1497. *)
  1498. A_PUSH:
  1499. begin
  1500. if (taicpu(p).opsize = S_W) and
  1501. (taicpu(p).oper[0]^.typ = Top_Const) and
  1502. GetNextInstruction(p, hp1) and
  1503. (tai(hp1).typ = ait_instruction) and
  1504. (taicpu(hp1).opcode = A_PUSH) and
  1505. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1506. (taicpu(hp1).opsize = S_W) then
  1507. begin
  1508. taicpu(p).changeopsize(S_L);
  1509. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1510. asml.remove(hp1);
  1511. hp1.free;
  1512. end;
  1513. end;
  1514. A_SHL, A_SAL:
  1515. begin
  1516. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1517. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1518. (taicpu(p).opsize = S_L) and
  1519. (taicpu(p).oper[0]^.val <= 3) then
  1520. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1521. begin
  1522. TmpBool1 := True; {should we check the next instruction?}
  1523. TmpBool2 := False; {have we found an add/sub which could be
  1524. integrated in the lea?}
  1525. reference_reset(tmpref,2);
  1526. TmpRef.index := taicpu(p).oper[1]^.reg;
  1527. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1528. while TmpBool1 and
  1529. GetNextInstruction(p, hp1) and
  1530. (tai(hp1).typ = ait_instruction) and
  1531. ((((taicpu(hp1).opcode = A_ADD) or
  1532. (taicpu(hp1).opcode = A_SUB)) and
  1533. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1534. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1535. (((taicpu(hp1).opcode = A_INC) or
  1536. (taicpu(hp1).opcode = A_DEC)) and
  1537. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1538. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1539. (not GetNextInstruction(hp1,hp2) or
  1540. not instrReadsFlags(hp2)) Do
  1541. begin
  1542. TmpBool1 := False;
  1543. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1544. begin
  1545. TmpBool1 := True;
  1546. TmpBool2 := True;
  1547. case taicpu(hp1).opcode of
  1548. A_ADD:
  1549. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1550. A_SUB:
  1551. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1552. end;
  1553. asml.remove(hp1);
  1554. hp1.free;
  1555. end
  1556. else
  1557. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1558. (((taicpu(hp1).opcode = A_ADD) and
  1559. (TmpRef.base = NR_NO)) or
  1560. (taicpu(hp1).opcode = A_INC) or
  1561. (taicpu(hp1).opcode = A_DEC)) then
  1562. begin
  1563. TmpBool1 := True;
  1564. TmpBool2 := True;
  1565. case taicpu(hp1).opcode of
  1566. A_ADD:
  1567. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1568. A_INC:
  1569. inc(TmpRef.offset);
  1570. A_DEC:
  1571. dec(TmpRef.offset);
  1572. end;
  1573. asml.remove(hp1);
  1574. hp1.free;
  1575. end;
  1576. end;
  1577. if TmpBool2 or
  1578. ((current_settings.optimizecputype < cpu_Pentium2) and
  1579. (taicpu(p).oper[0]^.val <= 3) and
  1580. not(cs_opt_size in current_settings.optimizerswitches)) then
  1581. begin
  1582. if not(TmpBool2) and
  1583. (taicpu(p).oper[0]^.val = 1) then
  1584. begin
  1585. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1586. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1587. end
  1588. else
  1589. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1590. taicpu(p).oper[1]^.reg);
  1591. InsertLLItem(asml,p.previous, p.next, hp1);
  1592. p.free;
  1593. p := hp1;
  1594. end;
  1595. end
  1596. else
  1597. if (current_settings.optimizecputype < cpu_Pentium2) and
  1598. (taicpu(p).oper[0]^.typ = top_const) and
  1599. (taicpu(p).oper[1]^.typ = top_reg) then
  1600. if (taicpu(p).oper[0]^.val = 1) then
  1601. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1602. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1603. (unlike shl, which is only Tairable in the U pipe)}
  1604. begin
  1605. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1606. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1607. InsertLLItem(asml,p.previous, p.next, hp1);
  1608. p.free;
  1609. p := hp1;
  1610. end
  1611. else if (taicpu(p).opsize = S_L) and
  1612. (taicpu(p).oper[0]^.val<= 3) then
  1613. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1614. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1615. begin
  1616. reference_reset(tmpref,2);
  1617. TmpRef.index := taicpu(p).oper[1]^.reg;
  1618. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1619. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1620. InsertLLItem(asml,p.previous, p.next, hp1);
  1621. p.free;
  1622. p := hp1;
  1623. end
  1624. end;
  1625. A_SETcc :
  1626. { changes
  1627. setcc (funcres) setcc reg
  1628. movb (funcres), reg to leave/ret
  1629. leave/ret }
  1630. begin
  1631. if (taicpu(p).oper[0]^.typ = top_ref) and
  1632. GetNextInstruction(p, hp1) and
  1633. GetNextInstruction(hp1, hp2) and
  1634. (hp2.typ = ait_instruction) and
  1635. ((taicpu(hp2).opcode = A_LEAVE) or
  1636. (taicpu(hp2).opcode = A_RET)) and
  1637. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1638. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1639. not(assigned(current_procinfo.procdef.funcretsym) and
  1640. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1641. (hp1.typ = ait_instruction) and
  1642. (taicpu(hp1).opcode = A_MOV) and
  1643. (taicpu(hp1).opsize = S_B) and
  1644. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1645. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1646. begin
  1647. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1648. asml.remove(hp1);
  1649. hp1.free;
  1650. end
  1651. end;
  1652. A_SUB:
  1653. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1654. { * change "sub/add const1, reg" or "dec reg" followed by
  1655. "sub const2, reg" to one "sub ..., reg" }
  1656. begin
  1657. if (taicpu(p).oper[0]^.typ = top_const) and
  1658. (taicpu(p).oper[1]^.typ = top_reg) then
  1659. if (taicpu(p).oper[0]^.val = 2) and
  1660. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1661. { Don't do the sub/push optimization if the sub }
  1662. { comes from setting up the stack frame (JM) }
  1663. (not getLastInstruction(p,hp1) or
  1664. (hp1.typ <> ait_instruction) or
  1665. (taicpu(hp1).opcode <> A_MOV) or
  1666. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1667. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1668. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1669. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1670. begin
  1671. hp1 := tai(p.next);
  1672. while Assigned(hp1) and
  1673. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1674. not regReadByInstruction(RS_ESP,hp1) and
  1675. not regModifiedByInstruction(RS_ESP,hp1) do
  1676. hp1 := tai(hp1.next);
  1677. if Assigned(hp1) and
  1678. (tai(hp1).typ = ait_instruction) and
  1679. (taicpu(hp1).opcode = A_PUSH) and
  1680. (taicpu(hp1).opsize = S_W) then
  1681. begin
  1682. taicpu(hp1).changeopsize(S_L);
  1683. if taicpu(hp1).oper[0]^.typ=top_reg then
  1684. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1685. hp1 := tai(p.next);
  1686. asml.remove(p);
  1687. p.free;
  1688. p := hp1;
  1689. continue
  1690. end;
  1691. if DoSubAddOpt(p) then
  1692. continue;
  1693. end
  1694. else if DoSubAddOpt(p) then
  1695. continue
  1696. end;
  1697. end;
  1698. end; { if is_jmp }
  1699. end;
  1700. end;
  1701. updateUsedRegs(UsedRegs,p);
  1702. p:=tai(p.next);
  1703. end;
  1704. end;
  1705. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  1706. function CanBeCMOV(p : tai) : boolean;
  1707. begin
  1708. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1709. (taicpu(p).opcode=A_MOV) and
  1710. (taicpu(p).opsize in [S_L,S_W]) and
  1711. ((taicpu(p).oper[0]^.typ = top_reg)
  1712. { we can't use cmov ref,reg because
  1713. ref could be nil and cmov still throws an exception
  1714. if ref=nil but the mov isn't done (FK)
  1715. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1716. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1717. }
  1718. ) and
  1719. (taicpu(p).oper[1]^.typ in [top_reg]);
  1720. end;
  1721. var
  1722. p,hp1,hp2: tai;
  1723. l : longint;
  1724. condition : tasmcond;
  1725. hp3: tai;
  1726. UsedRegs, TmpUsedRegs: TRegSet;
  1727. carryadd_opcode: Tasmop;
  1728. begin
  1729. p := BlockStart;
  1730. UsedRegs := [];
  1731. while (p <> BlockEnd) Do
  1732. begin
  1733. UpdateUsedRegs(UsedRegs, tai(p.next));
  1734. case p.Typ Of
  1735. Ait_Instruction:
  1736. begin
  1737. case taicpu(p).opcode Of
  1738. A_Jcc:
  1739. begin
  1740. { jb @@1 cmc
  1741. inc/dec operand --> adc/sbb operand,0
  1742. @@1:
  1743. ... and ...
  1744. jnb @@1
  1745. inc/dec operand --> adc/sbb operand,0
  1746. @@1: }
  1747. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1748. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1749. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1750. begin
  1751. carryadd_opcode:=A_NONE;
  1752. if Taicpu(p).condition in [C_NAE,C_B] then
  1753. begin
  1754. if Taicpu(hp1).opcode=A_INC then
  1755. carryadd_opcode:=A_ADC;
  1756. if Taicpu(hp1).opcode=A_DEC then
  1757. carryadd_opcode:=A_SBB;
  1758. if carryadd_opcode<>A_NONE then
  1759. begin
  1760. Taicpu(p).clearop(0);
  1761. Taicpu(p).ops:=0;
  1762. Taicpu(p).is_jmp:=false;
  1763. Taicpu(p).opcode:=A_CMC;
  1764. Taicpu(p).condition:=C_NONE;
  1765. Taicpu(hp1).ops:=2;
  1766. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1767. Taicpu(hp1).loadconst(0,0);
  1768. Taicpu(hp1).opcode:=carryadd_opcode;
  1769. continue;
  1770. end;
  1771. end;
  1772. if Taicpu(p).condition in [C_AE,C_NB] then
  1773. begin
  1774. if Taicpu(hp1).opcode=A_INC then
  1775. carryadd_opcode:=A_ADC;
  1776. if Taicpu(hp1).opcode=A_DEC then
  1777. carryadd_opcode:=A_SBB;
  1778. if carryadd_opcode<>A_NONE then
  1779. begin
  1780. asml.remove(p);
  1781. p.free;
  1782. Taicpu(hp1).ops:=2;
  1783. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1784. Taicpu(hp1).loadconst(0,0);
  1785. Taicpu(hp1).opcode:=carryadd_opcode;
  1786. p:=hp1;
  1787. continue;
  1788. end;
  1789. end;
  1790. end;
  1791. if (current_settings.cputype>=cpu_Pentium2) then
  1792. begin
  1793. { check for
  1794. jCC xxx
  1795. <several movs>
  1796. xxx:
  1797. }
  1798. l:=0;
  1799. GetNextInstruction(p, hp1);
  1800. while assigned(hp1) and
  1801. CanBeCMOV(hp1) and
  1802. { stop on labels }
  1803. not(hp1.typ=ait_label) do
  1804. begin
  1805. inc(l);
  1806. GetNextInstruction(hp1,hp1);
  1807. end;
  1808. if assigned(hp1) then
  1809. begin
  1810. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1811. begin
  1812. if (l<=4) and (l>0) then
  1813. begin
  1814. condition:=inverse_cond(taicpu(p).condition);
  1815. hp2:=p;
  1816. GetNextInstruction(p,hp1);
  1817. p:=hp1;
  1818. repeat
  1819. taicpu(hp1).opcode:=A_CMOVcc;
  1820. taicpu(hp1).condition:=condition;
  1821. GetNextInstruction(hp1,hp1);
  1822. until not(assigned(hp1)) or
  1823. not(CanBeCMOV(hp1));
  1824. { wait with removing else GetNextInstruction could
  1825. ignore the label if it was the only usage in the
  1826. jump moved away }
  1827. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1828. asml.remove(hp2);
  1829. hp2.free;
  1830. continue;
  1831. end;
  1832. end
  1833. else
  1834. begin
  1835. { check further for
  1836. jCC xxx
  1837. <several movs 1>
  1838. jmp yyy
  1839. xxx:
  1840. <several movs 2>
  1841. yyy:
  1842. }
  1843. { hp2 points to jmp yyy }
  1844. hp2:=hp1;
  1845. { skip hp1 to xxx }
  1846. GetNextInstruction(hp1, hp1);
  1847. if assigned(hp2) and
  1848. assigned(hp1) and
  1849. (l<=3) and
  1850. (hp2.typ=ait_instruction) and
  1851. (taicpu(hp2).is_jmp) and
  1852. (taicpu(hp2).condition=C_None) and
  1853. { real label and jump, no further references to the
  1854. label are allowed }
  1855. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1856. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1857. begin
  1858. l:=0;
  1859. { skip hp1 to <several moves 2> }
  1860. GetNextInstruction(hp1, hp1);
  1861. while assigned(hp1) and
  1862. CanBeCMOV(hp1) do
  1863. begin
  1864. inc(l);
  1865. GetNextInstruction(hp1, hp1);
  1866. end;
  1867. { hp1 points to yyy: }
  1868. if assigned(hp1) and
  1869. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1870. begin
  1871. condition:=inverse_cond(taicpu(p).condition);
  1872. GetNextInstruction(p,hp1);
  1873. hp3:=p;
  1874. p:=hp1;
  1875. repeat
  1876. taicpu(hp1).opcode:=A_CMOVcc;
  1877. taicpu(hp1).condition:=condition;
  1878. GetNextInstruction(hp1,hp1);
  1879. until not(assigned(hp1)) or
  1880. not(CanBeCMOV(hp1));
  1881. { hp2 is still at jmp yyy }
  1882. GetNextInstruction(hp2,hp1);
  1883. { hp2 is now at xxx: }
  1884. condition:=inverse_cond(condition);
  1885. GetNextInstruction(hp1,hp1);
  1886. { hp1 is now at <several movs 2> }
  1887. repeat
  1888. taicpu(hp1).opcode:=A_CMOVcc;
  1889. taicpu(hp1).condition:=condition;
  1890. GetNextInstruction(hp1,hp1);
  1891. until not(assigned(hp1)) or
  1892. not(CanBeCMOV(hp1));
  1893. {
  1894. asml.remove(hp1.next)
  1895. hp1.next.free;
  1896. asml.remove(hp1);
  1897. hp1.free;
  1898. }
  1899. { remove jCC }
  1900. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1901. asml.remove(hp3);
  1902. hp3.free;
  1903. { remove jmp }
  1904. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1905. asml.remove(hp2);
  1906. hp2.free;
  1907. continue;
  1908. end;
  1909. end;
  1910. end;
  1911. end;
  1912. end;
  1913. end;
  1914. A_FSTP,A_FISTP:
  1915. if doFpuLoadStoreOpt(asmL,p) then
  1916. continue;
  1917. A_IMUL:
  1918. begin
  1919. if (taicpu(p).ops >= 2) and
  1920. ((taicpu(p).oper[0]^.typ = top_const) or
  1921. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1922. (taicpu(p).oper[1]^.typ = top_reg) and
  1923. ((taicpu(p).ops = 2) or
  1924. ((taicpu(p).oper[2]^.typ = top_reg) and
  1925. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1926. getLastInstruction(p,hp1) and
  1927. (hp1.typ = ait_instruction) and
  1928. (taicpu(hp1).opcode = A_MOV) and
  1929. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1930. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1931. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1932. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  1933. begin
  1934. taicpu(p).ops := 3;
  1935. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1936. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1937. asml.remove(hp1);
  1938. hp1.free;
  1939. end;
  1940. end;
  1941. A_MOV:
  1942. begin
  1943. if (taicpu(p).oper[0]^.typ = top_reg) and
  1944. (taicpu(p).oper[1]^.typ = top_reg) and
  1945. GetNextInstruction(p, hp1) and
  1946. (hp1.typ = ait_Instruction) and
  1947. ((taicpu(hp1).opcode = A_MOV) or
  1948. (taicpu(hp1).opcode = A_MOVZX) or
  1949. (taicpu(hp1).opcode = A_MOVSX)) and
  1950. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1951. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1952. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) or
  1953. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)) and
  1954. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1955. {mov reg1, reg2
  1956. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1957. begin
  1958. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1959. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1960. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1961. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1962. asml.remove(p);
  1963. p.free;
  1964. p := hp1;
  1965. continue;
  1966. end
  1967. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1968. GetNextInstruction(p,hp1) and
  1969. (hp1.typ = ait_instruction) and
  1970. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1971. GetNextInstruction(hp1,hp2) and
  1972. (hp2.typ = ait_instruction) and
  1973. (taicpu(hp2).opcode = A_MOV) and
  1974. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1975. (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1976. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1977. begin
  1978. TmpUsedRegs := UsedRegs;
  1979. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1980. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1981. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,
  1982. hp2, TmpUsedRegs))) then
  1983. { change mov (ref), reg }
  1984. { add/sub/or/... reg2/$const, reg }
  1985. { mov reg, (ref) }
  1986. { # release reg }
  1987. { to add/sub/or/... reg2/$const, (ref) }
  1988. begin
  1989. case taicpu(hp1).opcode of
  1990. A_INC,A_DEC:
  1991. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^)
  1992. else
  1993. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1994. end;
  1995. asml.remove(p);
  1996. asml.remove(hp2);
  1997. p.free;
  1998. hp2.free;
  1999. p := hp1
  2000. end;
  2001. end
  2002. end;
  2003. end;
  2004. end;
  2005. end;
  2006. p := tai(p.next)
  2007. end;
  2008. end;
  2009. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  2010. var
  2011. p,hp1,hp2: tai;
  2012. begin
  2013. p := BlockStart;
  2014. while (p <> BlockEnd) Do
  2015. begin
  2016. case p.Typ Of
  2017. Ait_Instruction:
  2018. begin
  2019. case taicpu(p).opcode Of
  2020. A_CALL:
  2021. if (current_settings.optimizecputype < cpu_Pentium2) and
  2022. not(cs_create_pic in current_settings.moduleswitches) and
  2023. GetNextInstruction(p, hp1) and
  2024. (hp1.typ = ait_instruction) and
  2025. (taicpu(hp1).opcode = A_JMP) and
  2026. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  2027. begin
  2028. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  2029. InsertLLItem(asml, p.previous, p, hp2);
  2030. taicpu(p).opcode := A_JMP;
  2031. taicpu(p).is_jmp := true;
  2032. asml.remove(hp1);
  2033. hp1.free;
  2034. end;
  2035. A_CMP:
  2036. begin
  2037. if (taicpu(p).oper[0]^.typ = top_const) and
  2038. (taicpu(p).oper[0]^.val = 0) and
  2039. (taicpu(p).oper[1]^.typ = top_reg) then
  2040. {change "cmp $0, %reg" to "test %reg, %reg"}
  2041. begin
  2042. taicpu(p).opcode := A_TEST;
  2043. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2044. continue;
  2045. end;
  2046. end;
  2047. (*
  2048. Optimization is not safe; xor clears the carry flag.
  2049. See test/tgadint64 in the test suite.
  2050. A_MOV:
  2051. if (taicpu(p).oper[0]^.typ = Top_Const) and
  2052. (taicpu(p).oper[0]^.val = 0) and
  2053. (taicpu(p).oper[1]^.typ = Top_Reg) then
  2054. { change "mov $0, %reg" into "xor %reg, %reg" }
  2055. begin
  2056. taicpu(p).opcode := A_XOR;
  2057. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2058. end;
  2059. *)
  2060. A_MOVZX:
  2061. { if register vars are on, it's possible there is code like }
  2062. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  2063. { so we can't safely replace the movzx then with xor/mov, }
  2064. { since that would change the flags (JM) }
  2065. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  2066. begin
  2067. if (taicpu(p).oper[1]^.typ = top_reg) then
  2068. if (taicpu(p).oper[0]^.typ = top_reg)
  2069. then
  2070. case taicpu(p).opsize of
  2071. S_BL:
  2072. begin
  2073. if IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  2074. not(cs_opt_size in current_settings.optimizerswitches) and
  2075. (current_settings.optimizecputype = cpu_Pentium) then
  2076. {Change "movzbl %reg1, %reg2" to
  2077. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  2078. PentiumMMX}
  2079. begin
  2080. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  2081. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2082. InsertLLItem(asml,p.previous, p, hp1);
  2083. taicpu(p).opcode := A_MOV;
  2084. taicpu(p).changeopsize(S_B);
  2085. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2086. end;
  2087. end;
  2088. end
  2089. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2090. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2091. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  2092. not(cs_opt_size in current_settings.optimizerswitches) and
  2093. IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  2094. (current_settings.optimizecputype = cpu_Pentium) and
  2095. (taicpu(p).opsize = S_BL) then
  2096. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  2097. Pentium and PentiumMMX}
  2098. begin
  2099. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  2100. taicpu(p).oper[1]^.reg);
  2101. taicpu(p).opcode := A_MOV;
  2102. taicpu(p).changeopsize(S_B);
  2103. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2104. InsertLLItem(asml,p.previous, p, hp1);
  2105. end;
  2106. end;
  2107. A_TEST, A_OR:
  2108. {removes the line marked with (x) from the sequence
  2109. and/or/xor/add/sub/... $x, %y
  2110. test/or %y, %y (x)
  2111. j(n)z _Label
  2112. as the first instruction already adjusts the ZF}
  2113. begin
  2114. if OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  2115. if GetLastInstruction(p, hp1) and
  2116. (tai(hp1).typ = ait_instruction) and
  2117. GetNextInstruction(p,hp2) and
  2118. (hp2.typ = ait_instruction) and
  2119. ((taicpu(hp2).opcode = A_SETcc) or
  2120. (taicpu(hp2).opcode = A_Jcc) or
  2121. (taicpu(hp2).opcode = A_CMOVcc)) then
  2122. case taicpu(hp1).opcode Of
  2123. A_ADD, A_SUB, A_OR, A_XOR, A_AND{, A_SHL, A_SHR}:
  2124. begin
  2125. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  2126. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2127. { and in case of carry for A(E)/B(E)/C/NC }
  2128. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2129. ((taicpu(hp1).opcode <> A_ADD) and
  2130. (taicpu(hp1).opcode <> A_SUB))) then
  2131. begin
  2132. hp1 := tai(p.next);
  2133. asml.remove(p);
  2134. p.free;
  2135. p := tai(hp1);
  2136. continue
  2137. end;
  2138. end;
  2139. A_DEC, A_INC, A_NEG:
  2140. begin
  2141. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  2142. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2143. { and in case of carry for A(E)/B(E)/C/NC }
  2144. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2145. begin
  2146. case taicpu(hp1).opcode Of
  2147. A_DEC, A_INC:
  2148. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2149. begin
  2150. case taicpu(hp1).opcode Of
  2151. A_DEC: taicpu(hp1).opcode := A_SUB;
  2152. A_INC: taicpu(hp1).opcode := A_ADD;
  2153. end;
  2154. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2155. taicpu(hp1).loadConst(0,1);
  2156. taicpu(hp1).ops:=2;
  2157. end
  2158. end;
  2159. hp1 := tai(p.next);
  2160. asml.remove(p);
  2161. p.free;
  2162. p := tai(hp1);
  2163. continue
  2164. end;
  2165. end
  2166. end
  2167. end;
  2168. end;
  2169. end;
  2170. end;
  2171. p := tai(p.next)
  2172. end;
  2173. end;
  2174. end.