popt386.pas 124 KB


  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit popt386;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses Aasmbase,aasmtai,aasmdata,aasmcpu,verbose;
  22. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  23. procedure PeepHoleOptPass1(asml: TAsmList; BlockStart, BlockEnd: tai);
  24. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  25. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  26. implementation
  27. uses
  28. cutils,globtype,systems,
  29. globals,cgbase,procinfo,
  30. symsym,
  31. {$ifdef finaldestdebug}
  32. cobjects,
  33. {$endif finaldestdebug}
  34. cpuinfo,cpubase,cgutils,daopt386,
  35. cgx86;
  36. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  37. begin
  38. isFoldableArithOp := False;
  39. case hp1.opcode of
  40. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  41. isFoldableArithOp :=
  42. ((taicpu(hp1).oper[0]^.typ = top_const) or
  43. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  44. (taicpu(hp1).oper[0]^.reg <> reg))) and
  45. (taicpu(hp1).oper[1]^.typ = top_reg) and
  46. (taicpu(hp1).oper[1]^.reg = reg);
  47. A_INC,A_DEC:
  48. isFoldableArithOp :=
  49. (taicpu(hp1).oper[0]^.typ = top_reg) and
  50. (taicpu(hp1).oper[0]^.reg = reg);
  51. end;
  52. end;
  53. function RegUsedAfterInstruction(reg: Tregister; p: tai; var UsedRegs: TRegSet): Boolean;
  54. var
  55. supreg: tsuperregister;
  56. begin
  57. supreg := getsupreg(reg);
  58. UpdateUsedRegs(UsedRegs, tai(p.Next));
  59. RegUsedAfterInstruction :=
  60. (supreg in UsedRegs) and
  61. (not(getNextInstruction(p,p)) or
  62. not(regLoadedWithNewValue(supreg,false,p)));
  63. end;
  64. function doFpuLoadStoreOpt(asmL: TAsmList; var p: tai): boolean;
  65. { returns true if a "continue" should be done after this optimization }
  66. var hp1, hp2: tai;
  67. begin
  68. doFpuLoadStoreOpt := false;
  69. if (taicpu(p).oper[0]^.typ = top_ref) and
  70. getNextInstruction(p, hp1) and
  71. (hp1.typ = ait_instruction) and
  72. (((taicpu(hp1).opcode = A_FLD) and
  73. (taicpu(p).opcode = A_FSTP)) or
  74. ((taicpu(p).opcode = A_FISTP) and
  75. (taicpu(hp1).opcode = A_FILD))) and
  76. (taicpu(hp1).oper[0]^.typ = top_ref) and
  77. (taicpu(hp1).opsize = taicpu(p).opsize) and
  78. refsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  79. begin
  80. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  81. if (taicpu(p).opsize=S_FX) and
  82. getNextInstruction(hp1, hp2) and
  83. (hp2.typ = ait_instruction) and
  84. ((taicpu(hp2).opcode = A_LEAVE) or
  85. (taicpu(hp2).opcode = A_RET)) and
  86. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  87. not(assigned(current_procinfo.procdef.funcretsym) and
  88. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  89. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  90. begin
  91. asml.remove(p);
  92. asml.remove(hp1);
  93. p.free;
  94. hp1.free;
  95. p := hp2;
  96. removeLastDeallocForFuncRes(asmL, p);
  97. doFPULoadStoreOpt := true;
  98. end
  99. (* can't be done because the store operation rounds
  100. else
  101. { fst can't store an extended value! }
  102. if (taicpu(p).opsize <> S_FX) and
  103. (taicpu(p).opsize <> S_IQ) then
  104. begin
  105. if (taicpu(p).opcode = A_FSTP) then
  106. taicpu(p).opcode := A_FST
  107. else taicpu(p).opcode := A_FIST;
  108. asml.remove(hp1);
  109. hp1.free;
  110. end
  111. *)
  112. end;
  113. end;
  114. { returns true if p contains a memory operand with a segment set }
  115. function InsContainsSegRef(p: taicpu): boolean;
  116. var
  117. i: longint;
  118. begin
  119. result:=true;
  120. for i:=0 to p.opercnt-1 do
  121. if (p.oper[i]^.typ=top_ref) and
  122. (p.oper[i]^.ref^.segment<>NR_NO) then
  123. exit;
  124. result:=false;
  125. end;
  126. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  127. var
  128. p,hp1: tai;
  129. l: aint;
  130. tmpRef: treference;
  131. begin
  132. p := BlockStart;
  133. while (p <> BlockEnd) Do
  134. begin
  135. case p.Typ Of
  136. Ait_Instruction:
  137. begin
  138. if InsContainsSegRef(taicpu(p)) then
  139. begin
  140. p := tai(p.next);
  141. continue;
  142. end;
  143. case taicpu(p).opcode Of
  144. A_IMUL:
  145. {changes certain "imul const, %reg"'s to lea sequences}
  146. begin
  147. if (taicpu(p).oper[0]^.typ = Top_Const) and
  148. (taicpu(p).oper[1]^.typ = Top_Reg) and
  149. (taicpu(p).opsize = S_L) then
  150. if (taicpu(p).oper[0]^.val = 1) then
  151. if (taicpu(p).ops = 2) then
  152. {remove "imul $1, reg"}
  153. begin
  154. hp1 := tai(p.Next);
  155. asml.remove(p);
  156. p.free;
  157. p := hp1;
  158. continue;
  159. end
  160. else
  161. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  162. begin
  163. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  164. InsertLLItem(asml, p.previous, p.next, hp1);
  165. p.free;
  166. p := hp1;
  167. end
  168. else if
  169. ((taicpu(p).ops <= 2) or
  170. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  171. (taicpu(p).oper[0]^.val <= 12) and
  172. not(cs_opt_size in current_settings.optimizerswitches) and
  173. (not(GetNextInstruction(p, hp1)) or
  174. {GetNextInstruction(p, hp1) and}
  175. not((tai(hp1).typ = ait_instruction) and
  176. ((taicpu(hp1).opcode=A_Jcc) and
  177. (taicpu(hp1).condition in [C_O,C_NO])))) then
  178. begin
  179. reference_reset(tmpref,1);
  180. case taicpu(p).oper[0]^.val Of
  181. 3: begin
  182. {imul 3, reg1, reg2 to
  183. lea (reg1,reg1,2), reg2
  184. imul 3, reg1 to
  185. lea (reg1,reg1,2), reg1}
  186. TmpRef.base := taicpu(p).oper[1]^.reg;
  187. TmpRef.index := taicpu(p).oper[1]^.reg;
  188. TmpRef.ScaleFactor := 2;
  189. if (taicpu(p).ops = 2) then
  190. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  191. else
  192. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  193. InsertLLItem(asml,p.previous, p.next, hp1);
  194. p.free;
  195. p := hp1;
  196. end;
  197. 5: begin
  198. {imul 5, reg1, reg2 to
  199. lea (reg1,reg1,4), reg2
  200. imul 5, reg1 to
  201. lea (reg1,reg1,4), reg1}
  202. TmpRef.base := taicpu(p).oper[1]^.reg;
  203. TmpRef.index := taicpu(p).oper[1]^.reg;
  204. TmpRef.ScaleFactor := 4;
  205. if (taicpu(p).ops = 2) then
  206. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  207. else
  208. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  209. InsertLLItem(asml,p.previous, p.next, hp1);
  210. p.free;
  211. p := hp1;
  212. end;
  213. 6: begin
  214. {imul 6, reg1, reg2 to
  215. lea (,reg1,2), reg2
  216. lea (reg2,reg1,4), reg2
  217. imul 6, reg1 to
  218. lea (reg1,reg1,2), reg1
  219. add reg1, reg1}
  220. if (current_settings.optimizecputype <= cpu_386) then
  221. begin
  222. TmpRef.index := taicpu(p).oper[1]^.reg;
  223. if (taicpu(p).ops = 3) then
  224. begin
  225. TmpRef.base := taicpu(p).oper[2]^.reg;
  226. TmpRef.ScaleFactor := 4;
  227. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  228. end
  229. else
  230. begin
  231. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  232. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  233. end;
  234. InsertLLItem(asml,p, p.next, hp1);
  235. reference_reset(tmpref,2);
  236. TmpRef.index := taicpu(p).oper[1]^.reg;
  237. TmpRef.ScaleFactor := 2;
  238. if (taicpu(p).ops = 3) then
  239. begin
  240. TmpRef.base := NR_NO;
  241. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  242. taicpu(p).oper[2]^.reg);
  243. end
  244. else
  245. begin
  246. TmpRef.base := taicpu(p).oper[1]^.reg;
  247. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  248. end;
  249. InsertLLItem(asml,p.previous, p.next, hp1);
  250. p.free;
  251. p := tai(hp1.next);
  252. end
  253. end;
  254. 9: begin
  255. {imul 9, reg1, reg2 to
  256. lea (reg1,reg1,8), reg2
  257. imul 9, reg1 to
  258. lea (reg1,reg1,8), reg1}
  259. TmpRef.base := taicpu(p).oper[1]^.reg;
  260. TmpRef.index := taicpu(p).oper[1]^.reg;
  261. TmpRef.ScaleFactor := 8;
  262. if (taicpu(p).ops = 2) then
  263. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  264. else
  265. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  266. InsertLLItem(asml,p.previous, p.next, hp1);
  267. p.free;
  268. p := hp1;
  269. end;
  270. 10: begin
  271. {imul 10, reg1, reg2 to
  272. lea (reg1,reg1,4), reg2
  273. add reg2, reg2
  274. imul 10, reg1 to
  275. lea (reg1,reg1,4), reg1
  276. add reg1, reg1}
  277. if (current_settings.optimizecputype <= cpu_386) then
  278. begin
  279. if (taicpu(p).ops = 3) then
  280. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  281. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  282. else
  283. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  284. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  285. InsertLLItem(asml,p, p.next, hp1);
  286. TmpRef.base := taicpu(p).oper[1]^.reg;
  287. TmpRef.index := taicpu(p).oper[1]^.reg;
  288. TmpRef.ScaleFactor := 4;
  289. if (taicpu(p).ops = 3) then
  290. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  291. else
  292. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  293. InsertLLItem(asml,p.previous, p.next, hp1);
  294. p.free;
  295. p := tai(hp1.next);
  296. end
  297. end;
  298. 12: begin
  299. {imul 12, reg1, reg2 to
  300. lea (,reg1,4), reg2
  301. lea (reg2,reg1,8), reg2
  302. imul 12, reg1 to
  303. lea (reg1,reg1,2), reg1
  304. lea (,reg1,4), reg1}
  305. if (current_settings.optimizecputype <= cpu_386)
  306. then
  307. begin
  308. TmpRef.index := taicpu(p).oper[1]^.reg;
  309. if (taicpu(p).ops = 3) then
  310. begin
  311. TmpRef.base := taicpu(p).oper[2]^.reg;
  312. TmpRef.ScaleFactor := 8;
  313. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  314. end
  315. else
  316. begin
  317. TmpRef.base := NR_NO;
  318. TmpRef.ScaleFactor := 4;
  319. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  320. end;
  321. InsertLLItem(asml,p, p.next, hp1);
  322. reference_reset(tmpref,2);
  323. TmpRef.index := taicpu(p).oper[1]^.reg;
  324. if (taicpu(p).ops = 3) then
  325. begin
  326. TmpRef.base := NR_NO;
  327. TmpRef.ScaleFactor := 4;
  328. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  329. end
  330. else
  331. begin
  332. TmpRef.base := taicpu(p).oper[1]^.reg;
  333. TmpRef.ScaleFactor := 2;
  334. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  335. end;
  336. InsertLLItem(asml,p.previous, p.next, hp1);
  337. p.free;
  338. p := tai(hp1.next);
  339. end
  340. end
  341. end;
  342. end;
  343. end;
  344. A_SAR, A_SHR:
  345. {changes the code sequence
  346. shr/sar const1, x
  347. shl const2, x
  348. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  349. begin
  350. if GetNextInstruction(p, hp1) and
  351. (tai(hp1).typ = ait_instruction) and
  352. (taicpu(hp1).opcode = A_SHL) and
  353. (taicpu(p).oper[0]^.typ = top_const) and
  354. (taicpu(hp1).oper[0]^.typ = top_const) and
  355. (taicpu(hp1).opsize = taicpu(p).opsize) and
  356. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  357. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  358. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  359. not(cs_opt_size in current_settings.optimizerswitches) then
  360. { shr/sar const1, %reg
  361. shl const2, %reg
  362. with const1 > const2 }
  363. begin
  364. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  365. taicpu(hp1).opcode := A_AND;
  366. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  367. case taicpu(p).opsize Of
  368. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  369. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  370. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  371. end;
  372. end
  373. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  374. not(cs_opt_size in current_settings.optimizerswitches) then
  375. { shr/sar const1, %reg
  376. shl const2, %reg
  377. with const1 < const2 }
  378. begin
  379. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  380. taicpu(p).opcode := A_AND;
  381. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  382. case taicpu(p).opsize Of
  383. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  384. S_B: taicpu(p).loadConst(0,l Xor $ff);
  385. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  386. end;
  387. end
  388. else
  389. { shr/sar const1, %reg
  390. shl const2, %reg
  391. with const1 = const2 }
  392. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  393. begin
  394. taicpu(p).opcode := A_AND;
  395. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  396. case taicpu(p).opsize Of
  397. S_B: taicpu(p).loadConst(0,l Xor $ff);
  398. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  399. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  400. end;
  401. asml.remove(hp1);
  402. hp1.free;
  403. end;
  404. end;
  405. A_XOR:
  406. if (taicpu(p).oper[0]^.typ = top_reg) and
  407. (taicpu(p).oper[1]^.typ = top_reg) and
  408. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  409. { temporarily change this to 'mov reg,0' to make it easier }
  410. { for the CSE. Will be changed back in pass 2 }
  411. begin
  412. taicpu(p).opcode := A_MOV;
  413. taicpu(p).loadConst(0,0);
  414. end;
  415. end;
  416. end;
  417. end;
  418. p := tai(p.next)
  419. end;
  420. end;
  421. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  422. begin
  423. result :=
  424. (instr.typ = ait_instruction) and
  425. (taicpu(instr).opcode = op) and
  426. ((opsize = []) or (taicpu(instr).opsize in opsize));
  427. end;
  428. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  429. begin
  430. result :=
  431. (instr.typ = ait_instruction) and
  432. ((taicpu(instr).opcode = op1) or
  433. (taicpu(instr).opcode = op2)
  434. ) and
  435. ((opsize = []) or (taicpu(instr).opsize in opsize));
  436. end;
  437. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  438. begin
  439. result :=
  440. (instr.typ = ait_instruction) and
  441. ((taicpu(instr).opcode = op1) or
  442. (taicpu(instr).opcode = op2) or
  443. (taicpu(instr).opcode = op3)
  444. ) and
  445. ((opsize = []) or (taicpu(instr).opsize in opsize));
  446. end;
  447. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  448. begin
  449. result := (oper.typ = top_reg) and (oper.reg = reg);
  450. end;
  451. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  452. begin
  453. result := (oper.typ = top_const) and (oper.val = a);
  454. end;
  455. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  456. begin
  457. result := oper1.typ = oper2.typ;
  458. if result then
  459. case oper1.typ of
  460. top_const:
  461. Result:=oper1.val = oper2.val;
  462. top_reg:
  463. Result:=oper1.reg = oper2.reg;
  464. top_ref:
  465. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  466. else
  467. internalerror(2013102801);
  468. end
  469. end;
  470. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  471. begin
  472. Result:=(ref.offset=0) and
  473. (ref.scalefactor in [0,1]) and
  474. (ref.segment=NR_NO) and
  475. (ref.symbol=nil) and
  476. (ref.relsymbol=nil) and
  477. ((base=NR_INVALID) or
  478. (ref.base=base)) and
  479. ((index=NR_INVALID) or
  480. (ref.index=index));
  481. end;
  482. { First pass of peephole optimizations }
  483. procedure PeepHoleOptPass1(Asml: TAsmList; BlockStart, BlockEnd: tai);
  484. {$ifdef DEBUG_AOPTCPU}
  485. procedure DebugMsg(const s: string;p : tai);
  486. begin
  487. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  488. end;
  489. {$else DEBUG_AOPTCPU}
  490. procedure DebugMsg(const s: string;p : tai);inline;
  491. begin
  492. end;
  493. {$endif DEBUG_AOPTCPU}
  494. function WriteOk : Boolean;
  495. begin
  496. writeln('Ok');
  497. Result:=True;
  498. end;
  499. var
  500. l : longint;
  501. p,hp1,hp2 : tai;
  502. hp3,hp4: tai;
  503. v:aint;
  504. TmpRef: TReference;
  505. UsedRegs, TmpUsedRegs: TRegSet;
  506. TmpBool1, TmpBool2: Boolean;
  507. function SkipLabels(hp: tai; var hp2: tai): boolean;
  508. {skips all labels and returns the next "real" instruction}
  509. begin
  510. while assigned(hp.next) and
  511. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  512. hp := tai(hp.next);
  513. if assigned(hp.next) then
  514. begin
  515. SkipLabels := True;
  516. hp2 := tai(hp.next)
  517. end
  518. else
  519. begin
  520. hp2 := hp;
  521. SkipLabels := False
  522. end;
  523. end;
  524. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  525. {traces sucessive jumps to their final destination and sets it, e.g.
  526. je l1 je l3
  527. <code> <code>
  528. l1: becomes l1:
  529. je l2 je l3
  530. <code> <code>
  531. l2: l2:
  532. jmp l3 jmp l3
  533. the level parameter denotes how deeep we have already followed the jump,
  534. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  535. var p1, p2: tai;
  536. l: tasmlabel;
  537. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  538. begin
  539. FindAnyLabel := false;
  540. while assigned(hp.next) and
  541. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  542. hp := tai(hp.next);
  543. if assigned(hp.next) and
  544. (tai(hp.next).typ = ait_label) then
  545. begin
  546. FindAnyLabel := true;
  547. l := tai_label(hp.next).labsym;
  548. end
  549. end;
  550. begin
  551. GetfinalDestination := false;
  552. if level > 20 then
  553. exit;
  554. p1 := dfa.getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  555. if assigned(p1) then
  556. begin
  557. SkipLabels(p1,p1);
  558. if (tai(p1).typ = ait_instruction) and
  559. (taicpu(p1).is_jmp) then
  560. if { the next instruction after the label where the jump hp arrives}
  561. { is unconditional or of the same type as hp, so continue }
  562. (taicpu(p1).condition in [C_None,hp.condition]) or
  563. { the next instruction after the label where the jump hp arrives}
  564. { is the opposite of hp (so this one is never taken), but after }
  565. { that one there is a branch that will be taken, so perform a }
  566. { little hack: set p1 equal to this instruction (that's what the}
  567. { last SkipLabels is for, only works with short bool evaluation)}
  568. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  569. SkipLabels(p1,p2) and
  570. (p2.typ = ait_instruction) and
  571. (taicpu(p2).is_jmp) and
  572. (taicpu(p2).condition in [C_None,hp.condition]) and
  573. SkipLabels(p1,p1)) then
  574. begin
  575. { quick check for loops of the form "l5: ; jmp l5 }
  576. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  577. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  578. exit;
  579. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  580. exit;
  581. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  582. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  583. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  584. end
  585. else
  586. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  587. if not FindAnyLabel(p1,l) then
  588. begin
  589. {$ifdef finaldestdebug}
  590. insertllitem(asml,p1,p1.next,tai_comment.Create(
  591. strpnew('previous label inserted'))));
  592. {$endif finaldestdebug}
  593. current_asmdata.getjumplabel(l);
  594. insertllitem(asml,p1,p1.next,tai_label.Create(l));
  595. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  596. hp.oper[0]^.ref^.symbol := l;
  597. l.increfs;
  598. { this won't work, since the new label isn't in the labeltable }
  599. { so it will fail the rangecheck. Labeltable should become a }
  600. { hashtable to support this: }
  601. { GetFinalDestination(asml, hp); }
  602. end
  603. else
  604. begin
  605. {$ifdef finaldestdebug}
  606. insertllitem(asml,p1,p1.next,tai_comment.Create(
  607. strpnew('next label reused'))));
  608. {$endif finaldestdebug}
  609. l.increfs;
  610. hp.oper[0]^.ref^.symbol := l;
  611. if not GetFinalDestination(asml, hp,succ(level)) then
  612. exit;
  613. end;
  614. end;
  615. GetFinalDestination := true;
  616. end;
  617. function DoSubAddOpt(var p: tai): Boolean;
  618. begin
  619. DoSubAddOpt := False;
  620. if GetLastInstruction(p, hp1) and
  621. (hp1.typ = ait_instruction) and
  622. (taicpu(hp1).opsize = taicpu(p).opsize) then
  623. case taicpu(hp1).opcode Of
  624. A_DEC:
  625. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  626. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  627. begin
  628. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  629. asml.remove(hp1);
  630. hp1.free;
  631. end;
  632. A_SUB:
  633. if (taicpu(hp1).oper[0]^.typ = top_const) and
  634. (taicpu(hp1).oper[1]^.typ = top_reg) and
  635. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  636. begin
  637. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  638. asml.remove(hp1);
  639. hp1.free;
  640. end;
  641. A_ADD:
  642. if (taicpu(hp1).oper[0]^.typ = top_const) and
  643. (taicpu(hp1).oper[1]^.typ = top_reg) and
  644. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  645. begin
  646. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  647. asml.remove(hp1);
  648. hp1.free;
  649. if (taicpu(p).oper[0]^.val = 0) then
  650. begin
  651. hp1 := tai(p.next);
  652. asml.remove(p);
  653. p.free;
  654. if not GetLastInstruction(hp1, p) then
  655. p := hp1;
  656. DoSubAddOpt := True;
  657. end
  658. end;
  659. end;
  660. end;
  661. begin
  662. p := BlockStart;
  663. UsedRegs := [];
  664. while (p <> BlockEnd) Do
  665. begin
  666. UpDateUsedRegs(UsedRegs, tai(p.next));
  667. case p.Typ Of
  668. ait_instruction:
  669. begin
  670. current_filepos:=taicpu(p).fileinfo;
  671. if InsContainsSegRef(taicpu(p)) then
  672. begin
  673. p := tai(p.next);
  674. continue;
  675. end;
  676. { Handle Jmp Optimizations }
  677. if taicpu(p).is_jmp then
  678. begin
  679. {the following if-block removes all code between a jmp and the next label,
  680. because it can never be executed}
  681. if (taicpu(p).opcode = A_JMP) then
  682. begin
  683. hp2:=p;
  684. while GetNextInstruction(hp2, hp1) and
  685. (hp1.typ <> ait_label) do
  686. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  687. begin
  688. { don't kill start/end of assembler block,
  689. no-line-info-start/end etc }
  690. if hp1.typ<>ait_marker then
  691. begin
  692. asml.remove(hp1);
  693. hp1.free;
  694. end
  695. else
  696. hp2:=hp1;
  697. end
  698. else break;
  699. end;
  700. { remove jumps to a label coming right after them }
  701. if GetNextInstruction(p, hp1) then
  702. begin
  703. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  704. { TODO: FIXME removing the first instruction fails}
  705. (p<>blockstart) then
  706. begin
  707. hp2:=tai(hp1.next);
  708. asml.remove(p);
  709. p.free;
  710. p:=hp2;
  711. continue;
  712. end
  713. else
  714. begin
  715. if hp1.typ = ait_label then
  716. SkipLabels(hp1,hp1);
  717. if (tai(hp1).typ=ait_instruction) and
  718. (taicpu(hp1).opcode=A_JMP) and
  719. GetNextInstruction(hp1, hp2) and
  720. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  721. begin
  722. if taicpu(p).opcode=A_Jcc then
  723. begin
  724. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  725. tai_label(hp2).labsym.decrefs;
  726. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  727. { when free'ing hp1, the ref. isn't decresed, so we don't
  728. increase it (FK)
  729. taicpu(p).oper[0]^.ref^.symbol.increfs;
  730. }
  731. asml.remove(hp1);
  732. hp1.free;
  733. GetFinalDestination(asml, taicpu(p),0);
  734. end
  735. else
  736. begin
  737. GetFinalDestination(asml, taicpu(p),0);
  738. p:=tai(p.next);
  739. continue;
  740. end;
  741. end
  742. else
  743. GetFinalDestination(asml, taicpu(p),0);
  744. end;
  745. end;
  746. end
  747. else
  748. { All other optimizes }
  749. begin
  750. for l := 0 to taicpu(p).ops-1 Do
  751. if (taicpu(p).oper[l]^.typ = top_ref) then
  752. With taicpu(p).oper[l]^.ref^ Do
  753. begin
  754. if (base = NR_NO) and
  755. (index <> NR_NO) and
  756. (scalefactor in [0,1]) then
  757. begin
  758. base := index;
  759. index := NR_NO
  760. end
  761. end;
  762. case taicpu(p).opcode Of
  763. A_AND:
  764. begin
  765. if (taicpu(p).oper[0]^.typ = top_const) and
  766. (taicpu(p).oper[1]^.typ = top_reg) and
  767. GetNextInstruction(p, hp1) and
  768. (tai(hp1).typ = ait_instruction) and
  769. (taicpu(hp1).opcode = A_AND) and
  770. (taicpu(hp1).oper[0]^.typ = top_const) and
  771. (taicpu(hp1).oper[1]^.typ = top_reg) and
  772. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  773. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) then
  774. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  775. begin
  776. taicpu(hp1).loadConst(0,taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  777. asml.remove(p);
  778. p.free;
  779. p:=hp1;
  780. end
  781. else
  782. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  783. jump, but only if it's a conditional jump (PFV) }
  784. if (taicpu(p).oper[1]^.typ = top_reg) and
  785. GetNextInstruction(p, hp1) and
  786. (hp1.typ = ait_instruction) and
  787. (taicpu(hp1).is_jmp) and
  788. (taicpu(hp1).opcode<>A_JMP) and
  789. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  790. taicpu(p).opcode := A_TEST;
  791. end;
  792. A_CMP:
  793. begin
  794. { cmp register,$8000 neg register
  795. je target --> jo target
  796. .... only if register is deallocated before jump.}
  797. case Taicpu(p).opsize of
  798. S_B: v:=$80;
  799. S_W: v:=$8000;
  800. S_L: v:=aint($80000000);
  801. else
  802. internalerror(2013112905);
  803. end;
  804. if (taicpu(p).oper[0]^.typ=Top_const) and
  805. (taicpu(p).oper[0]^.val=v) and
  806. (Taicpu(p).oper[1]^.typ=top_reg) and
  807. GetNextInstruction(p, hp1) and
  808. (hp1.typ=ait_instruction) and
  809. (taicpu(hp1).opcode=A_Jcc) and
  810. (Taicpu(hp1).condition in [C_E,C_NE]) and
  811. not(getsupreg(Taicpu(p).oper[1]^.reg) in usedregs) then
  812. begin
  813. Taicpu(p).opcode:=A_NEG;
  814. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  815. Taicpu(p).clearop(1);
  816. Taicpu(p).ops:=1;
  817. if Taicpu(hp1).condition=C_E then
  818. Taicpu(hp1).condition:=C_O
  819. else
  820. Taicpu(hp1).condition:=C_NO;
  821. continue;
  822. end;
  823. {
  824. @@2: @@2:
  825. .... ....
  826. cmp operand1,0
  827. jle/jbe @@1
  828. dec operand1 --> sub operand1,1
  829. jmp @@2 jge/jae @@2
  830. @@1: @@1:
  831. ... ....}
  832. if (taicpu(p).oper[0]^.typ = top_const) and
  833. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  834. (taicpu(p).oper[0]^.val = 0) and
  835. GetNextInstruction(p, hp1) and
  836. (hp1.typ = ait_instruction) and
  837. (taicpu(hp1).is_jmp) and
  838. (taicpu(hp1).opcode=A_Jcc) and
  839. (taicpu(hp1).condition in [C_LE,C_BE]) and
  840. GetNextInstruction(hp1,hp2) and
  841. (hp2.typ = ait_instruction) and
  842. (taicpu(hp2).opcode = A_DEC) and
  843. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  844. GetNextInstruction(hp2, hp3) and
  845. (hp3.typ = ait_instruction) and
  846. (taicpu(hp3).is_jmp) and
  847. (taicpu(hp3).opcode = A_JMP) and
  848. GetNextInstruction(hp3, hp4) and
  849. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  850. begin
  851. taicpu(hp2).Opcode := A_SUB;
  852. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  853. taicpu(hp2).loadConst(0,1);
  854. taicpu(hp2).ops:=2;
  855. taicpu(hp3).Opcode := A_Jcc;
  856. case taicpu(hp1).condition of
  857. C_LE: taicpu(hp3).condition := C_GE;
  858. C_BE: taicpu(hp3).condition := C_AE;
  859. end;
  860. asml.remove(p);
  861. asml.remove(hp1);
  862. p.free;
  863. hp1.free;
  864. p := hp2;
  865. continue;
  866. end
  867. end;
  868. A_FLD:
  869. begin
  870. if (taicpu(p).oper[0]^.typ = top_reg) and
  871. GetNextInstruction(p, hp1) and
  872. (hp1.typ = Ait_Instruction) and
  873. (taicpu(hp1).oper[0]^.typ = top_reg) and
  874. (taicpu(hp1).oper[1]^.typ = top_reg) and
  875. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  876. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  877. { change to
  878. fld reg fxxx reg,st
  879. fxxxp st, st1 (hp1)
  880. Remark: non commutative operations must be reversed!
  881. }
  882. begin
  883. case taicpu(hp1).opcode Of
  884. A_FMULP,A_FADDP,
  885. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  886. begin
  887. case taicpu(hp1).opcode Of
  888. A_FADDP: taicpu(hp1).opcode := A_FADD;
  889. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  890. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  891. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  892. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  893. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  894. end;
  895. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  896. taicpu(hp1).oper[1]^.reg := NR_ST;
  897. asml.remove(p);
  898. p.free;
  899. p := hp1;
  900. continue;
  901. end;
  902. end;
  903. end
  904. else
  905. if (taicpu(p).oper[0]^.typ = top_ref) and
  906. GetNextInstruction(p, hp2) and
  907. (hp2.typ = Ait_Instruction) and
  908. (taicpu(hp2).ops = 2) and
  909. (taicpu(hp2).oper[0]^.typ = top_reg) and
  910. (taicpu(hp2).oper[1]^.typ = top_reg) and
  911. (taicpu(p).opsize in [S_FS, S_FL]) and
  912. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  913. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  914. if GetLastInstruction(p, hp1) and
  915. (hp1.typ = Ait_Instruction) and
  916. ((taicpu(hp1).opcode = A_FLD) or
  917. (taicpu(hp1).opcode = A_FST)) and
  918. (taicpu(hp1).opsize = taicpu(p).opsize) and
  919. (taicpu(hp1).oper[0]^.typ = top_ref) and
  920. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  921. if ((taicpu(hp2).opcode = A_FMULP) or
  922. (taicpu(hp2).opcode = A_FADDP)) then
  923. { change to
  924. fld/fst mem1 (hp1) fld/fst mem1
  925. fld mem1 (p) fadd/
  926. faddp/ fmul st, st
  927. fmulp st, st1 (hp2) }
  928. begin
  929. asml.remove(p);
  930. p.free;
  931. p := hp1;
  932. if (taicpu(hp2).opcode = A_FADDP) then
  933. taicpu(hp2).opcode := A_FADD
  934. else
  935. taicpu(hp2).opcode := A_FMUL;
  936. taicpu(hp2).oper[1]^.reg := NR_ST;
  937. end
  938. else
  939. { change to
  940. fld/fst mem1 (hp1) fld/fst mem1
  941. fld mem1 (p) fld st}
  942. begin
  943. taicpu(p).changeopsize(S_FL);
  944. taicpu(p).loadreg(0,NR_ST);
  945. end
  946. else
  947. begin
  948. case taicpu(hp2).opcode Of
  949. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  950. { change to
  951. fld/fst mem1 (hp1) fld/fst mem1
  952. fld mem2 (p) fxxx mem2
  953. fxxxp st, st1 (hp2) }
  954. begin
  955. case taicpu(hp2).opcode Of
  956. A_FADDP: taicpu(p).opcode := A_FADD;
  957. A_FMULP: taicpu(p).opcode := A_FMUL;
  958. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  959. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  960. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  961. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  962. end;
  963. asml.remove(hp2);
  964. hp2.free;
  965. end
  966. end
  967. end
  968. end;
  969. A_FSTP,A_FISTP:
  970. if doFpuLoadStoreOpt(asmL,p) then
  971. continue;
  972. A_LEA:
  973. begin
  974. {removes seg register prefixes from LEA operations, as they
  975. don't do anything}
  976. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  977. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  978. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  979. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  980. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  981. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  982. begin
  983. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  984. (taicpu(p).oper[0]^.ref^.offset = 0) then
  985. begin
  986. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  987. taicpu(p).oper[1]^.reg);
  988. InsertLLItem(asml,p.previous,p.next, hp1);
  989. p.free;
  990. p := hp1;
  991. continue;
  992. end
  993. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  994. begin
  995. hp1 := tai(p.Next);
  996. asml.remove(p);
  997. p.free;
  998. p := hp1;
  999. continue;
  1000. end
  1001. { continue to use lea to adjust the stack pointer,
  1002. it is the recommended way, but only if not optimizing for size }
  1003. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1004. (cs_opt_size in current_settings.optimizerswitches) then
  1005. with taicpu(p).oper[0]^.ref^ do
  1006. if (base = taicpu(p).oper[1]^.reg) then
  1007. begin
  1008. l := offset;
  1009. if (l=1) and UseIncDec then
  1010. begin
  1011. taicpu(p).opcode := A_INC;
  1012. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1013. taicpu(p).ops := 1
  1014. end
  1015. else if (l=-1) and UseIncDec then
  1016. begin
  1017. taicpu(p).opcode := A_DEC;
  1018. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1019. taicpu(p).ops := 1;
  1020. end
  1021. else
  1022. begin
  1023. if (l<0) and (l<>-2147483648) then
  1024. begin
  1025. taicpu(p).opcode := A_SUB;
  1026. taicpu(p).loadConst(0,-l);
  1027. end
  1028. else
  1029. begin
  1030. taicpu(p).opcode := A_ADD;
  1031. taicpu(p).loadConst(0,l);
  1032. end;
  1033. end;
  1034. end;
  1035. end
  1036. (*
  1037. This is unsafe, lea doesn't modify the flags but "add"
  1038. does. This breaks webtbs/tw15694.pp. The above
  1039. transformations are also unsafe, but they don't seem to
  1040. be triggered by code that FPC generators (or that at
  1041. least does not occur in the tests...). This needs to be
  1042. fixed by checking for the liveness of the flags register.
  1043. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1044. begin
  1045. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1046. taicpu(p).oper[0]^.ref^.base);
  1047. InsertLLItem(asml,p.previous,p.next, hp1);
  1048. DebugMsg('Peephole Lea2AddBase done',hp1);
  1049. p.free;
  1050. p:=hp1;
  1051. continue;
  1052. end
  1053. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1054. begin
  1055. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1056. taicpu(p).oper[0]^.ref^.index);
  1057. InsertLLItem(asml,p.previous,p.next,hp1);
  1058. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1059. p.free;
  1060. p:=hp1;
  1061. continue;
  1062. end
  1063. *)
  1064. end;
  1065. A_MOV:
  1066. begin
  1067. TmpUsedRegs := UsedRegs;
  1068. if (taicpu(p).oper[1]^.typ = top_reg) and
  1069. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  1070. GetNextInstruction(p, hp1) and
  1071. (tai(hp1).typ = ait_instruction) and
  1072. (taicpu(hp1).opcode = A_MOV) and
  1073. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1074. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  1075. begin
  1076. {we have "mov x, %treg; mov %treg, y}
  1077. if not(RegInOp(getsupreg(taicpu(p).oper[1]^.reg),taicpu(hp1).oper[1]^)) and
  1078. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1079. {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
  1080. case taicpu(p).oper[0]^.typ Of
  1081. top_reg:
  1082. begin
  1083. { change "mov %reg, %treg; mov %treg, y"
  1084. to "mov %reg, y" }
  1085. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1086. asml.remove(hp1);
  1087. hp1.free;
  1088. continue;
  1089. end;
  1090. top_ref:
  1091. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1092. begin
  1093. { change "mov mem, %treg; mov %treg, %reg"
  1094. to "mov mem, %reg" }
  1095. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1096. asml.remove(hp1);
  1097. hp1.free;
  1098. continue;
  1099. end;
  1100. end
  1101. end
  1102. else
  1103. {Change "mov %reg1, %reg2; xxx %reg2, ???" to
  1104. "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read
  1105. penalty}
  1106. if (taicpu(p).oper[0]^.typ = top_reg) and
  1107. (taicpu(p).oper[1]^.typ = top_reg) and
  1108. GetNextInstruction(p,hp1) and
  1109. (tai(hp1).typ = ait_instruction) and
  1110. (taicpu(hp1).ops >= 1) and
  1111. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1112. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  1113. {we have "mov %reg1, %reg2; XXX %reg2, ???"}
  1114. begin
  1115. if ((taicpu(hp1).opcode = A_OR) or
  1116. (taicpu(hp1).opcode = A_TEST)) and
  1117. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1118. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1119. {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
  1120. begin
  1121. TmpUsedRegs := UsedRegs;
  1122. { reg1 will be used after the first instruction, }
  1123. { so update the allocation info }
  1124. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1125. if GetNextInstruction(hp1, hp2) and
  1126. (hp2.typ = ait_instruction) and
  1127. taicpu(hp2).is_jmp and
  1128. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1129. { change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to
  1130. "test %reg1, %reg1; jxx" }
  1131. begin
  1132. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1133. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1134. asml.remove(p);
  1135. p.free;
  1136. p := hp1;
  1137. continue
  1138. end
  1139. else
  1140. {change "mov %reg1, %reg2; test/or %reg2, %reg2" to
  1141. "mov %reg1, %reg2; test/or %reg1, %reg1"}
  1142. begin
  1143. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1144. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1145. end;
  1146. end
  1147. { else
  1148. if (taicpu(p.next)^.opcode
  1149. in [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])}
  1150. {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to
  1151. "mov %reg1, %reg2; push/or/xor/... %reg1, ???"}
  1152. end
  1153. else
  1154. {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1155. x >= RetOffset) as it doesn't do anything (it writes either to a
  1156. parameter or to the temporary storage room for the function
  1157. result)}
  1158. if GetNextInstruction(p, hp1) and
  1159. (tai(hp1).typ = ait_instruction) then
  1160. if ((taicpu(hp1).opcode = A_LEAVE) or
  1161. (taicpu(hp1).opcode = A_RET)) and
  1162. (taicpu(p).oper[1]^.typ = top_ref) and
  1163. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1164. not(assigned(current_procinfo.procdef.funcretsym) and
  1165. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1166. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  1167. (taicpu(p).oper[0]^.typ = top_reg) then
  1168. begin
  1169. asml.remove(p);
  1170. p.free;
  1171. p := hp1;
  1172. RemoveLastDeallocForFuncRes(asmL,p);
  1173. end
  1174. else
  1175. if (taicpu(p).oper[0]^.typ = top_reg) and
  1176. (taicpu(p).oper[1]^.typ = top_ref) and
  1177. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1178. (taicpu(hp1).opcode = A_CMP) and
  1179. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1180. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1181. {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
  1182. begin
  1183. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1184. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1185. end;
  1186. { Next instruction is also a MOV ? }
  1187. if GetNextInstruction(p, hp1) and
  1188. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1189. begin
  1190. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1191. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1192. {mov reg1, mem1 or mov mem1, reg1
  1193. mov mem2, reg2 mov reg2, mem2}
  1194. begin
  1195. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1196. {mov reg1, mem1 or mov mem1, reg1
  1197. mov mem2, reg1 mov reg2, mem1}
  1198. begin
  1199. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1200. { Removes the second statement from
  1201. mov reg1, mem1/reg2
  1202. mov mem1/reg2, reg1 }
  1203. begin
  1204. if (taicpu(p).oper[0]^.typ = top_reg) then
  1205. AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1206. asml.remove(hp1);
  1207. hp1.free;
  1208. end
  1209. else
  1210. begin
  1211. TmpUsedRegs := UsedRegs;
  1212. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1213. if (taicpu(p).oper[1]^.typ = top_ref) and
  1214. { mov reg1, mem1
  1215. mov mem2, reg1 }
  1216. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1217. GetNextInstruction(hp1, hp2) and
  1218. (hp2.typ = ait_instruction) and
  1219. (taicpu(hp2).opcode = A_CMP) and
  1220. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1221. (taicpu(hp2).oper[0]^.typ = TOp_Ref) and
  1222. (taicpu(hp2).oper[1]^.typ = TOp_Reg) and
  1223. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(p).oper[1]^.ref^) and
  1224. (taicpu(hp2).oper[1]^.reg= taicpu(p).oper[0]^.reg) and
  1225. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1226. { change to
  1227. mov reg1, mem1 mov reg1, mem1
  1228. mov mem2, reg1 cmp reg1, mem2
  1229. cmp mem1, reg1 }
  1230. begin
  1231. asml.remove(hp2);
  1232. hp2.free;
  1233. taicpu(hp1).opcode := A_CMP;
  1234. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1235. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1236. end;
  1237. end;
  1238. end
  1239. else
  1240. begin
  1241. tmpUsedRegs := UsedRegs;
  1242. if GetNextInstruction(hp1, hp2) and
  1243. (taicpu(p).oper[0]^.typ = top_ref) and
  1244. (taicpu(p).oper[1]^.typ = top_reg) and
  1245. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1246. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1247. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1248. (tai(hp2).typ = ait_instruction) and
  1249. (taicpu(hp2).opcode = A_MOV) and
  1250. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1251. (taicpu(hp2).oper[1]^.typ = top_reg) and
  1252. (taicpu(hp2).oper[0]^.typ = top_ref) and
  1253. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1254. if not regInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^) and
  1255. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1256. { mov mem1, %reg1
  1257. mov %reg1, mem2
  1258. mov mem2, reg2
  1259. to:
  1260. mov mem1, reg2
  1261. mov reg2, mem2}
  1262. begin
  1263. AllocRegBetween(asmL,taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1264. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1265. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1266. asml.remove(hp2);
  1267. hp2.free;
  1268. end
  1269. else
  1270. if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1271. not(RegInRef(getsupreg(taicpu(p).oper[1]^.reg),taicpu(p).oper[0]^.ref^)) and
  1272. not(RegInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^)) then
  1273. { mov mem1, reg1 mov mem1, reg1
  1274. mov reg1, mem2 mov reg1, mem2
  1275. mov mem2, reg2 mov mem2, reg1
  1276. to: to:
  1277. mov mem1, reg1 mov mem1, reg1
  1278. mov mem1, reg2 mov reg1, mem2
  1279. mov reg1, mem2
  1280. or (if mem1 depends on reg1
  1281. and/or if mem2 depends on reg2)
  1282. to:
  1283. mov mem1, reg1
  1284. mov reg1, mem2
  1285. mov reg1, reg2
  1286. }
  1287. begin
  1288. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1289. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1290. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1291. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1292. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1293. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1294. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1295. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1296. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1297. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1298. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1299. end
  1300. else
  1301. if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1302. begin
  1303. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1304. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1305. end
  1306. else
  1307. begin
  1308. asml.remove(hp2);
  1309. hp2.free;
  1310. end
  1311. end
  1312. end
  1313. else
  1314. (* {movl [mem1],reg1
  1315. movl [mem1],reg2
  1316. to:
  1317. movl [mem1],reg1
  1318. movl reg1,reg2 }
  1319. if (taicpu(p).oper[0]^.typ = top_ref) and
  1320. (taicpu(p).oper[1]^.typ = top_reg) and
  1321. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1322. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1323. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1324. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1325. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1326. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1327. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1328. else*)
  1329. { movl const1,[mem1]
  1330. movl [mem1],reg1
  1331. to:
  1332. movl const1,reg1
  1333. movl reg1,[mem1] }
  1334. if (taicpu(p).oper[0]^.typ = top_const) and
  1335. (taicpu(p).oper[1]^.typ = top_ref) and
  1336. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1337. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1338. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1339. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1340. not(reginref(getsupreg(taicpu(hp1).oper[1]^.reg),taicpu(hp1).oper[0]^.ref^)) then
  1341. begin
  1342. allocregbetween(asml,taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1343. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1344. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1345. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1346. end
  1347. end;
  1348. if GetNextInstruction(p, hp1) and
  1349. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1350. GetNextInstruction(hp1, hp2) and
  1351. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1352. MatchOperand(Taicpu(p).oper[0]^,0) and
  1353. (Taicpu(p).oper[1]^.typ = top_reg) and
  1354. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1355. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1356. {mov reg1,0
  1357. bts reg1,operand1 --> mov reg1,operand2
  1358. or reg1,operand2 bts reg1,operand1}
  1359. begin
  1360. Taicpu(hp2).opcode:=A_MOV;
  1361. asml.remove(hp1);
  1362. insertllitem(asml,hp2,hp2.next,hp1);
  1363. asml.remove(p);
  1364. p.free;
  1365. p:=hp1;
  1366. end;
  1367. if GetNextInstruction(p, hp1) and
  1368. MatchInstruction(hp1,A_LEA,[S_L]) and
  1369. (Taicpu(p).oper[0]^.typ = top_ref) and
  1370. (Taicpu(p).oper[1]^.typ = top_reg) and
  1371. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1372. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1373. ) or
  1374. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1375. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1376. )
  1377. ) then
  1378. {mov reg1,ref
  1379. lea reg2,[reg1,reg2] --> add reg2,ref}
  1380. begin
  1381. TmpUsedRegs := UsedRegs;
  1382. { reg1 may not be used afterwards }
  1383. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1384. begin
  1385. Taicpu(hp1).opcode:=A_ADD;
  1386. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1387. DebugMsg('Peephole MovLea2Add done',hp1);
  1388. asml.remove(p);
  1389. p.free;
  1390. p:=hp1;
  1391. end;
  1392. end;
  1393. end;
  1394. A_MOVSX,
  1395. A_MOVZX :
  1396. begin
  1397. if (taicpu(p).oper[1]^.typ = top_reg) and
  1398. GetNextInstruction(p,hp1) and
  1399. (hp1.typ = ait_instruction) and
  1400. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1401. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1402. GetNextInstruction(hp1,hp2) and
  1403. MatchInstruction(hp2,A_MOV,[]) and
  1404. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1405. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1406. (((taicpu(hp1).ops=2) and
  1407. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1408. ((taicpu(hp1).ops=1) and
  1409. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1410. { reg2 must not be used after the sequence considered, so
  1411. it must be either deallocated or loaded with a new value }
  1412. (GetNextInstruction(hp2,hp3) and
  1413. (FindRegDealloc(getsupreg(taicpu(hp2).oper[0]^.reg),tai(hp3)) or
  1414. RegLoadedWithNewValue(getsupreg(taicpu(hp2).oper[0]^.reg), false, hp3))) then
  1415. { change movsX/movzX reg/ref, reg2 }
  1416. { add/sub/or/... reg3/$const, reg2 }
  1417. { mov reg2 reg/ref }
  1418. { to add/sub/or/... reg3/$const, reg/ref }
  1419. begin
  1420. { by example:
  1421. movswl %si,%eax movswl %si,%eax p
  1422. decl %eax addl %edx,%eax hp1
  1423. movw %ax,%si movw %ax,%si hp2
  1424. ->
  1425. movswl %si,%eax movswl %si,%eax p
  1426. decw %eax addw %edx,%eax hp1
  1427. movw %ax,%si movw %ax,%si hp2
  1428. }
  1429. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1430. {
  1431. ->
  1432. movswl %si,%eax movswl %si,%eax p
  1433. decw %si addw %dx,%si hp1
  1434. movw %ax,%si movw %ax,%si hp2
  1435. }
  1436. case taicpu(hp1).ops of
  1437. 1:
  1438. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1439. 2:
  1440. begin
  1441. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1442. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1443. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1444. end;
  1445. else
  1446. internalerror(2008042701);
  1447. end;
  1448. {
  1449. ->
  1450. decw %si addw %dx,%si p
  1451. }
  1452. asml.remove(p);
  1453. asml.remove(hp2);
  1454. p.free;
  1455. hp2.free;
  1456. p := hp1
  1457. end
  1458. { removes superfluous And's after movzx's }
  1459. else if taicpu(p).opcode=A_MOVZX then
  1460. begin
  1461. if (taicpu(p).oper[1]^.typ = top_reg) and
  1462. GetNextInstruction(p, hp1) and
  1463. (tai(hp1).typ = ait_instruction) and
  1464. (taicpu(hp1).opcode = A_AND) and
  1465. (taicpu(hp1).oper[0]^.typ = top_const) and
  1466. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1467. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1468. case taicpu(p).opsize Of
  1469. S_BL, S_BW:
  1470. if (taicpu(hp1).oper[0]^.val = $ff) then
  1471. begin
  1472. asml.remove(hp1);
  1473. hp1.free;
  1474. end;
  1475. S_WL:
  1476. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1477. begin
  1478. asml.remove(hp1);
  1479. hp1.free;
  1480. end;
  1481. end;
  1482. {changes some movzx constructs to faster synonims (all examples
  1483. are given with eax/ax, but are also valid for other registers)}
  1484. if (taicpu(p).oper[1]^.typ = top_reg) then
  1485. if (taicpu(p).oper[0]^.typ = top_reg) then
  1486. case taicpu(p).opsize of
  1487. S_BW:
  1488. begin
  1489. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1490. not(cs_opt_size in current_settings.optimizerswitches) then
  1491. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1492. begin
  1493. taicpu(p).opcode := A_AND;
  1494. taicpu(p).changeopsize(S_W);
  1495. taicpu(p).loadConst(0,$ff);
  1496. end
  1497. else if GetNextInstruction(p, hp1) and
  1498. (tai(hp1).typ = ait_instruction) and
  1499. (taicpu(hp1).opcode = A_AND) and
  1500. (taicpu(hp1).oper[0]^.typ = top_const) and
  1501. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1502. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1503. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1504. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1505. begin
  1506. taicpu(p).opcode := A_MOV;
  1507. taicpu(p).changeopsize(S_W);
  1508. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1509. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1510. end;
  1511. end;
  1512. S_BL:
  1513. begin
  1514. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1515. not(cs_opt_size in current_settings.optimizerswitches) then
  1516. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1517. begin
  1518. taicpu(p).opcode := A_AND;
  1519. taicpu(p).changeopsize(S_L);
  1520. taicpu(p).loadConst(0,$ff)
  1521. end
  1522. else if GetNextInstruction(p, hp1) and
  1523. (tai(hp1).typ = ait_instruction) and
  1524. (taicpu(hp1).opcode = A_AND) and
  1525. (taicpu(hp1).oper[0]^.typ = top_const) and
  1526. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1527. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1528. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1529. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1530. begin
  1531. taicpu(p).opcode := A_MOV;
  1532. taicpu(p).changeopsize(S_L);
  1533. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1534. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1535. end
  1536. end;
  1537. S_WL:
  1538. begin
  1539. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1540. not(cs_opt_size in current_settings.optimizerswitches) then
  1541. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1542. begin
  1543. taicpu(p).opcode := A_AND;
  1544. taicpu(p).changeopsize(S_L);
  1545. taicpu(p).loadConst(0,$ffff);
  1546. end
  1547. else if GetNextInstruction(p, hp1) and
  1548. (tai(hp1).typ = ait_instruction) and
  1549. (taicpu(hp1).opcode = A_AND) and
  1550. (taicpu(hp1).oper[0]^.typ = top_const) and
  1551. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1552. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1553. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1554. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1555. begin
  1556. taicpu(p).opcode := A_MOV;
  1557. taicpu(p).changeopsize(S_L);
  1558. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1559. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1560. end;
  1561. end;
  1562. end
  1563. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1564. begin
  1565. if GetNextInstruction(p, hp1) and
  1566. (tai(hp1).typ = ait_instruction) and
  1567. (taicpu(hp1).opcode = A_AND) and
  1568. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1569. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1570. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1571. begin
  1572. taicpu(p).opcode := A_MOV;
  1573. case taicpu(p).opsize Of
  1574. S_BL:
  1575. begin
  1576. taicpu(p).changeopsize(S_L);
  1577. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1578. end;
  1579. S_WL:
  1580. begin
  1581. taicpu(p).changeopsize(S_L);
  1582. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1583. end;
  1584. S_BW:
  1585. begin
  1586. taicpu(p).changeopsize(S_W);
  1587. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1588. end;
  1589. end;
  1590. end;
  1591. end;
  1592. end;
  1593. end;
  1594. (* should not be generated anymore by the current code generator
  1595. A_POP:
  1596. begin
  1597. if target_info.system=system_i386_go32v2 then
  1598. begin
  1599. { Transform a series of pop/pop/pop/push/push/push to }
  1600. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1601. { because I'm not sure whether they can cope with }
  1602. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1603. { such a problem when using esp as frame pointer (JM) }
  1604. if (taicpu(p).oper[0]^.typ = top_reg) then
  1605. begin
  1606. hp1 := p;
  1607. hp2 := p;
  1608. l := 0;
  1609. while getNextInstruction(hp1,hp1) and
  1610. (hp1.typ = ait_instruction) and
  1611. (taicpu(hp1).opcode = A_POP) and
  1612. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1613. begin
  1614. hp2 := hp1;
  1615. inc(l,4);
  1616. end;
  1617. getLastInstruction(p,hp3);
  1618. l1 := 0;
  1619. while (hp2 <> hp3) and
  1620. assigned(hp1) and
  1621. (hp1.typ = ait_instruction) and
  1622. (taicpu(hp1).opcode = A_PUSH) and
  1623. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1624. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1625. begin
  1626. { change it to a two op operation }
  1627. taicpu(hp2).oper[1]^.typ:=top_none;
  1628. taicpu(hp2).ops:=2;
  1629. taicpu(hp2).opcode := A_MOV;
  1630. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1631. reference_reset(tmpref);
  1632. tmpRef.base.enum:=R_INTREGISTER;
  1633. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1634. convert_register_to_enum(tmpref.base);
  1635. tmpRef.offset := l;
  1636. taicpu(hp2).loadRef(0,tmpRef);
  1637. hp4 := hp1;
  1638. getNextInstruction(hp1,hp1);
  1639. asml.remove(hp4);
  1640. hp4.free;
  1641. getLastInstruction(hp2,hp2);
  1642. dec(l,4);
  1643. inc(l1);
  1644. end;
  1645. if l <> -4 then
  1646. begin
  1647. inc(l,4);
  1648. for l1 := l1 downto 1 do
  1649. begin
  1650. getNextInstruction(hp2,hp2);
  1651. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1652. end
  1653. end
  1654. end
  1655. end
  1656. else
  1657. begin
  1658. if (taicpu(p).oper[0]^.typ = top_reg) and
  1659. GetNextInstruction(p, hp1) and
  1660. (tai(hp1).typ=ait_instruction) and
  1661. (taicpu(hp1).opcode=A_PUSH) and
  1662. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1663. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1664. begin
  1665. { change it to a two op operation }
  1666. taicpu(p).oper[1]^.typ:=top_none;
  1667. taicpu(p).ops:=2;
  1668. taicpu(p).opcode := A_MOV;
  1669. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1670. reference_reset(tmpref);
  1671. TmpRef.base.enum := R_ESP;
  1672. taicpu(p).loadRef(0,TmpRef);
  1673. asml.remove(hp1);
  1674. hp1.free;
  1675. end;
  1676. end;
  1677. end;
  1678. *)
  1679. A_PUSH:
  1680. begin
  1681. if (taicpu(p).opsize = S_W) and
  1682. (taicpu(p).oper[0]^.typ = Top_Const) and
  1683. GetNextInstruction(p, hp1) and
  1684. (tai(hp1).typ = ait_instruction) and
  1685. (taicpu(hp1).opcode = A_PUSH) and
  1686. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1687. (taicpu(hp1).opsize = S_W) then
  1688. begin
  1689. taicpu(p).changeopsize(S_L);
  1690. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1691. asml.remove(hp1);
  1692. hp1.free;
  1693. end;
  1694. end;
  1695. A_SHL, A_SAL:
  1696. begin
  1697. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1698. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1699. (taicpu(p).opsize = S_L) and
  1700. (taicpu(p).oper[0]^.val <= 3) then
  1701. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1702. begin
  1703. TmpBool1 := True; {should we check the next instruction?}
  1704. TmpBool2 := False; {have we found an add/sub which could be
  1705. integrated in the lea?}
  1706. reference_reset(tmpref,2);
  1707. TmpRef.index := taicpu(p).oper[1]^.reg;
  1708. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1709. while TmpBool1 and
  1710. GetNextInstruction(p, hp1) and
  1711. (tai(hp1).typ = ait_instruction) and
  1712. ((((taicpu(hp1).opcode = A_ADD) or
  1713. (taicpu(hp1).opcode = A_SUB)) and
  1714. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1715. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1716. (((taicpu(hp1).opcode = A_INC) or
  1717. (taicpu(hp1).opcode = A_DEC)) and
  1718. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1719. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1720. (not GetNextInstruction(hp1,hp2) or
  1721. not instrReadsFlags(hp2)) Do
  1722. begin
  1723. TmpBool1 := False;
  1724. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1725. begin
  1726. TmpBool1 := True;
  1727. TmpBool2 := True;
  1728. case taicpu(hp1).opcode of
  1729. A_ADD:
  1730. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1731. A_SUB:
  1732. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1733. end;
  1734. asml.remove(hp1);
  1735. hp1.free;
  1736. end
  1737. else
  1738. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1739. (((taicpu(hp1).opcode = A_ADD) and
  1740. (TmpRef.base = NR_NO)) or
  1741. (taicpu(hp1).opcode = A_INC) or
  1742. (taicpu(hp1).opcode = A_DEC)) then
  1743. begin
  1744. TmpBool1 := True;
  1745. TmpBool2 := True;
  1746. case taicpu(hp1).opcode of
  1747. A_ADD:
  1748. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1749. A_INC:
  1750. inc(TmpRef.offset);
  1751. A_DEC:
  1752. dec(TmpRef.offset);
  1753. end;
  1754. asml.remove(hp1);
  1755. hp1.free;
  1756. end;
  1757. end;
  1758. if TmpBool2 or
  1759. ((current_settings.optimizecputype < cpu_Pentium2) and
  1760. (taicpu(p).oper[0]^.val <= 3) and
  1761. not(cs_opt_size in current_settings.optimizerswitches)) then
  1762. begin
  1763. if not(TmpBool2) and
  1764. (taicpu(p).oper[0]^.val = 1) then
  1765. begin
  1766. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1767. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1768. end
  1769. else
  1770. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1771. taicpu(p).oper[1]^.reg);
  1772. InsertLLItem(asml,p.previous, p.next, hp1);
  1773. p.free;
  1774. p := hp1;
  1775. end;
  1776. end
  1777. else
  1778. if (current_settings.optimizecputype < cpu_Pentium2) and
  1779. (taicpu(p).oper[0]^.typ = top_const) and
  1780. (taicpu(p).oper[1]^.typ = top_reg) then
  1781. if (taicpu(p).oper[0]^.val = 1) then
  1782. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1783. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1784. (unlike shl, which is only Tairable in the U pipe)}
  1785. begin
  1786. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1787. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1788. InsertLLItem(asml,p.previous, p.next, hp1);
  1789. p.free;
  1790. p := hp1;
  1791. end
  1792. else if (taicpu(p).opsize = S_L) and
  1793. (taicpu(p).oper[0]^.val<= 3) then
  1794. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1795. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1796. begin
  1797. reference_reset(tmpref,2);
  1798. TmpRef.index := taicpu(p).oper[1]^.reg;
  1799. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1800. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1801. InsertLLItem(asml,p.previous, p.next, hp1);
  1802. p.free;
  1803. p := hp1;
  1804. end
  1805. end;
  1806. A_SETcc :
  1807. { changes
  1808. setcc (funcres) setcc reg
  1809. movb (funcres), reg to leave/ret
  1810. leave/ret }
  1811. begin
  1812. if (taicpu(p).oper[0]^.typ = top_ref) and
  1813. GetNextInstruction(p, hp1) and
  1814. GetNextInstruction(hp1, hp2) and
  1815. (hp2.typ = ait_instruction) and
  1816. ((taicpu(hp2).opcode = A_LEAVE) or
  1817. (taicpu(hp2).opcode = A_RET)) and
  1818. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1819. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1820. not(assigned(current_procinfo.procdef.funcretsym) and
  1821. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1822. (hp1.typ = ait_instruction) and
  1823. (taicpu(hp1).opcode = A_MOV) and
  1824. (taicpu(hp1).opsize = S_B) and
  1825. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1826. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1827. begin
  1828. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1829. asml.remove(hp1);
  1830. hp1.free;
  1831. end
  1832. end;
  1833. A_SUB:
  1834. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1835. { * change "sub/add const1, reg" or "dec reg" followed by
  1836. "sub const2, reg" to one "sub ..., reg" }
  1837. begin
  1838. if (taicpu(p).oper[0]^.typ = top_const) and
  1839. (taicpu(p).oper[1]^.typ = top_reg) then
  1840. if (taicpu(p).oper[0]^.val = 2) and
  1841. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1842. { Don't do the sub/push optimization if the sub }
  1843. { comes from setting up the stack frame (JM) }
  1844. (not getLastInstruction(p,hp1) or
  1845. (hp1.typ <> ait_instruction) or
  1846. (taicpu(hp1).opcode <> A_MOV) or
  1847. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1848. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1849. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1850. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1851. begin
  1852. hp1 := tai(p.next);
  1853. while Assigned(hp1) and
  1854. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1855. not regReadByInstruction(RS_ESP,hp1) and
  1856. not regModifiedByInstruction(RS_ESP,hp1) do
  1857. hp1 := tai(hp1.next);
  1858. if Assigned(hp1) and
  1859. (tai(hp1).typ = ait_instruction) and
  1860. (taicpu(hp1).opcode = A_PUSH) and
  1861. (taicpu(hp1).opsize = S_W) then
  1862. begin
  1863. taicpu(hp1).changeopsize(S_L);
  1864. if taicpu(hp1).oper[0]^.typ=top_reg then
  1865. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1866. hp1 := tai(p.next);
  1867. asml.remove(p);
  1868. p.free;
  1869. p := hp1;
  1870. continue
  1871. end;
  1872. if DoSubAddOpt(p) then
  1873. continue;
  1874. end
  1875. else if DoSubAddOpt(p) then
  1876. continue
  1877. end;
  1878. end;
  1879. end; { if is_jmp }
  1880. end;
  1881. end;
  1882. updateUsedRegs(UsedRegs,p);
  1883. p:=tai(p.next);
  1884. end;
  1885. end;
  1886. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  1887. {$ifdef DEBUG_AOPTCPU}
  1888. procedure DebugMsg(const s: string;p : tai);
  1889. begin
  1890. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  1891. end;
  1892. {$else DEBUG_AOPTCPU}
  1893. procedure DebugMsg(const s: string;p : tai);inline;
  1894. begin
  1895. end;
  1896. {$endif DEBUG_AOPTCPU}
  1897. function CanBeCMOV(p : tai) : boolean;
  1898. begin
  1899. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1900. (taicpu(p).opcode=A_MOV) and
  1901. (taicpu(p).opsize in [S_L,S_W]) and
  1902. ((taicpu(p).oper[0]^.typ = top_reg)
  1903. { we can't use cmov ref,reg because
  1904. ref could be nil and cmov still throws an exception
  1905. if ref=nil but the mov isn't done (FK)
  1906. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1907. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1908. }
  1909. ) and
  1910. (taicpu(p).oper[1]^.typ in [top_reg]);
  1911. end;
  1912. var
  1913. p,hp1,hp2,hp3: tai;
  1914. l : longint;
  1915. condition : tasmcond;
  1916. UsedRegs, TmpUsedRegs: TRegSet;
  1917. carryadd_opcode: Tasmop;
  1918. begin
  1919. p := BlockStart;
  1920. UsedRegs := [];
  1921. while (p <> BlockEnd) Do
  1922. begin
  1923. UpdateUsedRegs(UsedRegs, tai(p.next));
  1924. case p.Typ Of
  1925. Ait_Instruction:
  1926. begin
  1927. if InsContainsSegRef(taicpu(p)) then
  1928. begin
  1929. p := tai(p.next);
  1930. continue;
  1931. end;
  1932. case taicpu(p).opcode Of
  1933. A_Jcc:
  1934. begin
  1935. { jb @@1 cmc
  1936. inc/dec operand --> adc/sbb operand,0
  1937. @@1:
  1938. ... and ...
  1939. jnb @@1
  1940. inc/dec operand --> adc/sbb operand,0
  1941. @@1: }
  1942. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1943. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1944. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1945. begin
  1946. carryadd_opcode:=A_NONE;
  1947. if Taicpu(p).condition in [C_NAE,C_B] then
  1948. begin
  1949. if Taicpu(hp1).opcode=A_INC then
  1950. carryadd_opcode:=A_ADC;
  1951. if Taicpu(hp1).opcode=A_DEC then
  1952. carryadd_opcode:=A_SBB;
  1953. if carryadd_opcode<>A_NONE then
  1954. begin
  1955. Taicpu(p).clearop(0);
  1956. Taicpu(p).ops:=0;
  1957. Taicpu(p).is_jmp:=false;
  1958. Taicpu(p).opcode:=A_CMC;
  1959. Taicpu(p).condition:=C_NONE;
  1960. Taicpu(hp1).ops:=2;
  1961. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1962. Taicpu(hp1).loadconst(0,0);
  1963. Taicpu(hp1).opcode:=carryadd_opcode;
  1964. continue;
  1965. end;
  1966. end;
  1967. if Taicpu(p).condition in [C_AE,C_NB] then
  1968. begin
  1969. if Taicpu(hp1).opcode=A_INC then
  1970. carryadd_opcode:=A_ADC;
  1971. if Taicpu(hp1).opcode=A_DEC then
  1972. carryadd_opcode:=A_SBB;
  1973. if carryadd_opcode<>A_NONE then
  1974. begin
  1975. asml.remove(p);
  1976. p.free;
  1977. Taicpu(hp1).ops:=2;
  1978. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1979. Taicpu(hp1).loadconst(0,0);
  1980. Taicpu(hp1).opcode:=carryadd_opcode;
  1981. p:=hp1;
  1982. continue;
  1983. end;
  1984. end;
  1985. end;
  1986. if (current_settings.cputype>=cpu_Pentium2) then
  1987. begin
  1988. { check for
  1989. jCC xxx
  1990. <several movs>
  1991. xxx:
  1992. }
  1993. l:=0;
  1994. GetNextInstruction(p, hp1);
  1995. while assigned(hp1) and
  1996. CanBeCMOV(hp1) and
  1997. { stop on labels }
  1998. not(hp1.typ=ait_label) do
  1999. begin
  2000. inc(l);
  2001. GetNextInstruction(hp1,hp1);
  2002. end;
  2003. if assigned(hp1) then
  2004. begin
  2005. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2006. begin
  2007. if (l<=4) and (l>0) then
  2008. begin
  2009. condition:=inverse_cond(taicpu(p).condition);
  2010. hp2:=p;
  2011. GetNextInstruction(p,hp1);
  2012. p:=hp1;
  2013. repeat
  2014. taicpu(hp1).opcode:=A_CMOVcc;
  2015. taicpu(hp1).condition:=condition;
  2016. GetNextInstruction(hp1,hp1);
  2017. until not(assigned(hp1)) or
  2018. not(CanBeCMOV(hp1));
  2019. { wait with removing else GetNextInstruction could
  2020. ignore the label if it was the only usage in the
  2021. jump moved away }
  2022. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2023. asml.remove(hp2);
  2024. hp2.free;
  2025. continue;
  2026. end;
  2027. end
  2028. else
  2029. begin
  2030. { check further for
  2031. jCC xxx
  2032. <several movs 1>
  2033. jmp yyy
  2034. xxx:
  2035. <several movs 2>
  2036. yyy:
  2037. }
  2038. { hp2 points to jmp yyy }
  2039. hp2:=hp1;
  2040. { skip hp1 to xxx }
  2041. GetNextInstruction(hp1, hp1);
  2042. if assigned(hp2) and
  2043. assigned(hp1) and
  2044. (l<=3) and
  2045. (hp2.typ=ait_instruction) and
  2046. (taicpu(hp2).is_jmp) and
  2047. (taicpu(hp2).condition=C_None) and
  2048. { real label and jump, no further references to the
  2049. label are allowed }
  2050. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  2051. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2052. begin
  2053. l:=0;
  2054. { skip hp1 to <several moves 2> }
  2055. GetNextInstruction(hp1, hp1);
  2056. while assigned(hp1) and
  2057. CanBeCMOV(hp1) do
  2058. begin
  2059. inc(l);
  2060. GetNextInstruction(hp1, hp1);
  2061. end;
  2062. { hp1 points to yyy: }
  2063. if assigned(hp1) and
  2064. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2065. begin
  2066. condition:=inverse_cond(taicpu(p).condition);
  2067. GetNextInstruction(p,hp1);
  2068. hp3:=p;
  2069. p:=hp1;
  2070. repeat
  2071. taicpu(hp1).opcode:=A_CMOVcc;
  2072. taicpu(hp1).condition:=condition;
  2073. GetNextInstruction(hp1,hp1);
  2074. until not(assigned(hp1)) or
  2075. not(CanBeCMOV(hp1));
  2076. { hp2 is still at jmp yyy }
  2077. GetNextInstruction(hp2,hp1);
  2078. { hp2 is now at xxx: }
  2079. condition:=inverse_cond(condition);
  2080. GetNextInstruction(hp1,hp1);
  2081. { hp1 is now at <several movs 2> }
  2082. repeat
  2083. taicpu(hp1).opcode:=A_CMOVcc;
  2084. taicpu(hp1).condition:=condition;
  2085. GetNextInstruction(hp1,hp1);
  2086. until not(assigned(hp1)) or
  2087. not(CanBeCMOV(hp1));
  2088. {
  2089. asml.remove(hp1.next)
  2090. hp1.next.free;
  2091. asml.remove(hp1);
  2092. hp1.free;
  2093. }
  2094. { remove jCC }
  2095. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2096. asml.remove(hp3);
  2097. hp3.free;
  2098. { remove jmp }
  2099. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2100. asml.remove(hp2);
  2101. hp2.free;
  2102. continue;
  2103. end;
  2104. end;
  2105. end;
  2106. end;
  2107. end;
  2108. end;
  2109. A_FSTP,A_FISTP:
  2110. if doFpuLoadStoreOpt(asmL,p) then
  2111. continue;
  2112. A_IMUL:
  2113. begin
  2114. if (taicpu(p).ops >= 2) and
  2115. ((taicpu(p).oper[0]^.typ = top_const) or
  2116. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  2117. (taicpu(p).oper[1]^.typ = top_reg) and
  2118. ((taicpu(p).ops = 2) or
  2119. ((taicpu(p).oper[2]^.typ = top_reg) and
  2120. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  2121. getLastInstruction(p,hp1) and
  2122. (hp1.typ = ait_instruction) and
  2123. (taicpu(hp1).opcode = A_MOV) and
  2124. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2125. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2126. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2127. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  2128. begin
  2129. taicpu(p).ops := 3;
  2130. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  2131. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  2132. asml.remove(hp1);
  2133. hp1.free;
  2134. end;
  2135. end;
  2136. A_MOV:
  2137. begin
  2138. if (taicpu(p).oper[0]^.typ = top_reg) and
  2139. (taicpu(p).oper[1]^.typ = top_reg) and
  2140. GetNextInstruction(p, hp1) and
  2141. (hp1.typ = ait_Instruction) and
  2142. ((taicpu(hp1).opcode = A_MOV) or
  2143. (taicpu(hp1).opcode = A_MOVZX) or
  2144. (taicpu(hp1).opcode = A_MOVSX)) and
  2145. (taicpu(hp1).oper[0]^.typ = top_ref) and
  2146. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2147. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) or
  2148. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)) and
  2149. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  2150. {mov reg1, reg2
  2151. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  2152. begin
  2153. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  2154. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  2155. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  2156. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  2157. asml.remove(p);
  2158. p.free;
  2159. p := hp1;
  2160. continue;
  2161. end
  2162. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2163. GetNextInstruction(p,hp1) and
  2164. (hp1.typ = ait_instruction) and
  2165. (IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) or
  2166. ((taicpu(hp1).opcode=A_LEA) and
  2167. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  2168. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  2169. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)) or
  2170. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  2171. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg))
  2172. )
  2173. )
  2174. ) and
  2175. GetNextInstruction(hp1,hp2) and
  2176. MatchInstruction(hp2,A_MOV,[]) and
  2177. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  2178. (taicpu(hp2).oper[1]^.typ = top_ref) then
  2179. begin
  2180. TmpUsedRegs := UsedRegs;
  2181. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  2182. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  2183. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,
  2184. hp2, TmpUsedRegs))) then
  2185. { change mov (ref), reg }
  2186. { add/sub/or/... reg2/$const, reg }
  2187. { mov reg, (ref) }
  2188. { # release reg }
  2189. { to add/sub/or/... reg2/$const, (ref) }
  2190. begin
  2191. case taicpu(hp1).opcode of
  2192. A_INC,A_DEC:
  2193. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  2194. A_LEA:
  2195. begin
  2196. taicpu(hp1).opcode:=A_ADD;
  2197. if taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg then
  2198. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  2199. else
  2200. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base);
  2201. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2202. DebugMsg('Peephole FoldLea done',hp1);
  2203. end
  2204. else
  2205. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2206. end;
  2207. asml.remove(p);
  2208. asml.remove(hp2);
  2209. p.free;
  2210. hp2.free;
  2211. p := hp1
  2212. end;
  2213. end
  2214. end;
  2215. end;
  2216. end;
  2217. end;
  2218. p := tai(p.next)
  2219. end;
  2220. end;
  2221. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  2222. var
  2223. p,hp1,hp2: tai;
  2224. begin
  2225. p := BlockStart;
  2226. while (p <> BlockEnd) Do
  2227. begin
  2228. case p.Typ Of
  2229. Ait_Instruction:
  2230. begin
  2231. if InsContainsSegRef(taicpu(p)) then
  2232. begin
  2233. p := tai(p.next);
  2234. continue;
  2235. end;
  2236. case taicpu(p).opcode Of
  2237. A_CALL:
  2238. { don't do this on modern CPUs, this really hurts them due to
  2239. broken call/ret pairing }
  2240. if (current_settings.optimizecputype < cpu_Pentium2) and
  2241. not(cs_create_pic in current_settings.moduleswitches) and
  2242. GetNextInstruction(p, hp1) and
  2243. (hp1.typ = ait_instruction) and
  2244. (taicpu(hp1).opcode = A_JMP) and
  2245. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  2246. begin
  2247. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  2248. InsertLLItem(asml, p.previous, p, hp2);
  2249. taicpu(p).opcode := A_JMP;
  2250. taicpu(p).is_jmp := true;
  2251. asml.remove(hp1);
  2252. hp1.free;
  2253. end;
  2254. A_CMP:
  2255. begin
  2256. if (taicpu(p).oper[0]^.typ = top_const) and
  2257. (taicpu(p).oper[0]^.val = 0) and
  2258. (taicpu(p).oper[1]^.typ = top_reg) then
  2259. {change "cmp $0, %reg" to "test %reg, %reg"}
  2260. begin
  2261. taicpu(p).opcode := A_TEST;
  2262. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2263. continue;
  2264. end;
  2265. end;
  2266. (*
  2267. Optimization is not safe; xor clears the carry flag.
  2268. See test/tgadint64 in the test suite.
  2269. A_MOV:
  2270. if (taicpu(p).oper[0]^.typ = Top_Const) and
  2271. (taicpu(p).oper[0]^.val = 0) and
  2272. (taicpu(p).oper[1]^.typ = Top_Reg) then
  2273. { change "mov $0, %reg" into "xor %reg, %reg" }
  2274. begin
  2275. taicpu(p).opcode := A_XOR;
  2276. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2277. end;
  2278. *)
  2279. A_MOVZX:
  2280. { if register vars are on, it's possible there is code like }
  2281. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  2282. { so we can't safely replace the movzx then with xor/mov, }
  2283. { since that would change the flags (JM) }
  2284. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  2285. begin
  2286. if (taicpu(p).oper[1]^.typ = top_reg) then
  2287. if (taicpu(p).oper[0]^.typ = top_reg)
  2288. then
  2289. case taicpu(p).opsize of
  2290. S_BL:
  2291. begin
  2292. if IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  2293. not(cs_opt_size in current_settings.optimizerswitches) and
  2294. (current_settings.optimizecputype = cpu_Pentium) then
  2295. {Change "movzbl %reg1, %reg2" to
  2296. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  2297. PentiumMMX}
  2298. begin
  2299. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  2300. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2301. InsertLLItem(asml,p.previous, p, hp1);
  2302. taicpu(p).opcode := A_MOV;
  2303. taicpu(p).changeopsize(S_B);
  2304. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2305. end;
  2306. end;
  2307. end
  2308. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2309. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2310. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  2311. not(cs_opt_size in current_settings.optimizerswitches) and
  2312. IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  2313. (current_settings.optimizecputype = cpu_Pentium) and
  2314. (taicpu(p).opsize = S_BL) then
  2315. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  2316. Pentium and PentiumMMX}
  2317. begin
  2318. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  2319. taicpu(p).oper[1]^.reg);
  2320. taicpu(p).opcode := A_MOV;
  2321. taicpu(p).changeopsize(S_B);
  2322. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2323. InsertLLItem(asml,p.previous, p, hp1);
  2324. end;
  2325. end;
  2326. A_TEST, A_OR:
  2327. {removes the line marked with (x) from the sequence
  2328. and/or/xor/add/sub/... $x, %y
  2329. test/or %y, %y (x)
  2330. j(n)z _Label
  2331. as the first instruction already adjusts the ZF}
  2332. begin
  2333. if OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  2334. if GetLastInstruction(p, hp1) and
  2335. (tai(hp1).typ = ait_instruction) and
  2336. GetNextInstruction(p,hp2) and
  2337. (hp2.typ = ait_instruction) and
  2338. ((taicpu(hp2).opcode = A_SETcc) or
  2339. (taicpu(hp2).opcode = A_Jcc) or
  2340. (taicpu(hp2).opcode = A_CMOVcc)) then
  2341. case taicpu(hp1).opcode Of
  2342. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  2343. begin
  2344. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  2345. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2346. { and in case of carry for A(E)/B(E)/C/NC }
  2347. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2348. ((taicpu(hp1).opcode <> A_ADD) and
  2349. (taicpu(hp1).opcode <> A_SUB))) then
  2350. begin
  2351. hp1 := tai(p.next);
  2352. asml.remove(p);
  2353. p.free;
  2354. p := tai(hp1);
  2355. continue
  2356. end;
  2357. end;
  2358. A_SHL, A_SAL, A_SHR, A_SAR:
  2359. begin
  2360. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  2361. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  2362. { therefore, it's only safe to do this optimization for }
  2363. { shifts by a (nonzero) constant }
  2364. (taicpu(hp1).oper[0]^.typ = top_const) and
  2365. (taicpu(hp1).oper[0]^.val <> 0) and
  2366. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2367. { and in case of carry for A(E)/B(E)/C/NC }
  2368. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2369. begin
  2370. hp1 := tai(p.next);
  2371. asml.remove(p);
  2372. p.free;
  2373. p := tai(hp1);
  2374. continue
  2375. end;
  2376. end;
  2377. A_DEC, A_INC, A_NEG:
  2378. begin
  2379. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  2380. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2381. { and in case of carry for A(E)/B(E)/C/NC }
  2382. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2383. begin
  2384. case taicpu(hp1).opcode Of
  2385. A_DEC, A_INC:
  2386. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2387. begin
  2388. case taicpu(hp1).opcode Of
  2389. A_DEC: taicpu(hp1).opcode := A_SUB;
  2390. A_INC: taicpu(hp1).opcode := A_ADD;
  2391. end;
  2392. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2393. taicpu(hp1).loadConst(0,1);
  2394. taicpu(hp1).ops:=2;
  2395. end
  2396. end;
  2397. hp1 := tai(p.next);
  2398. asml.remove(p);
  2399. p.free;
  2400. p := tai(hp1);
  2401. continue
  2402. end;
  2403. end
  2404. end
  2405. end;
  2406. end;
  2407. end;
  2408. end;
  2409. p := tai(p.next)
  2410. end;
  2411. end;
  2412. end.