popt386.pas 125 KB


  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit popt386;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses Aasmbase,aasmtai,aasmdata,aasmcpu,verbose;
  22. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  23. procedure PeepHoleOptPass1(asml: TAsmList; BlockStart, BlockEnd: tai);
  24. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  25. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  26. implementation
  27. uses
  28. cutils,globtype,systems,
  29. globals,cgbase,procinfo,
  30. symsym,
  31. {$ifdef finaldestdebug}
  32. cobjects,
  33. {$endif finaldestdebug}
  34. cpuinfo,cpubase,cgutils,daopt386,
  35. cgx86;
  36. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  37. begin
  38. isFoldableArithOp := False;
  39. case hp1.opcode of
  40. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  41. isFoldableArithOp :=
  42. ((taicpu(hp1).oper[0]^.typ = top_const) or
  43. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  44. (taicpu(hp1).oper[0]^.reg <> reg))) and
  45. (taicpu(hp1).oper[1]^.typ = top_reg) and
  46. (taicpu(hp1).oper[1]^.reg = reg);
  47. A_INC,A_DEC:
  48. isFoldableArithOp :=
  49. (taicpu(hp1).oper[0]^.typ = top_reg) and
  50. (taicpu(hp1).oper[0]^.reg = reg);
  51. end;
  52. end;
  53. function RegUsedAfterInstruction(reg: Tregister; p: tai; var UsedRegs: TRegSet): Boolean;
  54. var
  55. supreg: tsuperregister;
  56. begin
  57. supreg := getsupreg(reg);
  58. UpdateUsedRegs(UsedRegs, tai(p.Next));
  59. RegUsedAfterInstruction :=
  60. (supreg in UsedRegs) and
  61. (not(getNextInstruction(p,p)) or
  62. not(regLoadedWithNewValue(supreg,false,p)));
  63. end;
  64. function doFpuLoadStoreOpt(asmL: TAsmList; var p: tai): boolean;
  65. { returns true if a "continue" should be done after this optimization }
  66. var hp1, hp2: tai;
  67. begin
  68. doFpuLoadStoreOpt := false;
  69. if (taicpu(p).oper[0]^.typ = top_ref) and
  70. getNextInstruction(p, hp1) and
  71. (hp1.typ = ait_instruction) and
  72. (((taicpu(hp1).opcode = A_FLD) and
  73. (taicpu(p).opcode = A_FSTP)) or
  74. ((taicpu(p).opcode = A_FISTP) and
  75. (taicpu(hp1).opcode = A_FILD))) and
  76. (taicpu(hp1).oper[0]^.typ = top_ref) and
  77. (taicpu(hp1).opsize = taicpu(p).opsize) and
  78. refsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  79. begin
  80. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  81. if (taicpu(p).opsize=S_FX) and
  82. getNextInstruction(hp1, hp2) and
  83. (hp2.typ = ait_instruction) and
  84. ((taicpu(hp2).opcode = A_LEAVE) or
  85. (taicpu(hp2).opcode = A_RET)) and
  86. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  87. not(assigned(current_procinfo.procdef.funcretsym) and
  88. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  89. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  90. begin
  91. asml.remove(p);
  92. asml.remove(hp1);
  93. p.free;
  94. hp1.free;
  95. p := hp2;
  96. removeLastDeallocForFuncRes(asmL, p);
  97. doFPULoadStoreOpt := true;
  98. end
  99. (* can't be done because the store operation rounds
  100. else
  101. { fst can't store an extended value! }
  102. if (taicpu(p).opsize <> S_FX) and
  103. (taicpu(p).opsize <> S_IQ) then
  104. begin
  105. if (taicpu(p).opcode = A_FSTP) then
  106. taicpu(p).opcode := A_FST
  107. else taicpu(p).opcode := A_FIST;
  108. asml.remove(hp1);
  109. hp1.free;
  110. end
  111. *)
  112. end;
  113. end;
  114. { returns true if p contains a memory operand with a segment set }
  115. function InsContainsSegRef(p: taicpu): boolean;
  116. var
  117. i: longint;
  118. begin
  119. result:=true;
  120. for i:=0 to p.opercnt-1 do
  121. if (p.oper[i]^.typ=top_ref) and
  122. (p.oper[i]^.ref^.segment<>NR_NO) then
  123. exit;
  124. result:=false;
  125. end;
  126. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  127. var
  128. p,hp1: tai;
  129. l: aint;
  130. tmpRef: treference;
  131. begin
  132. p := BlockStart;
  133. while (p <> BlockEnd) Do
  134. begin
  135. case p.Typ Of
  136. Ait_Instruction:
  137. begin
  138. if InsContainsSegRef(taicpu(p)) then
  139. begin
  140. p := tai(p.next);
  141. continue;
  142. end;
  143. case taicpu(p).opcode Of
  144. A_IMUL:
  145. {changes certain "imul const, %reg"'s to lea sequences}
  146. begin
  147. if (taicpu(p).oper[0]^.typ = Top_Const) and
  148. (taicpu(p).oper[1]^.typ = Top_Reg) and
  149. (taicpu(p).opsize = S_L) then
  150. if (taicpu(p).oper[0]^.val = 1) then
  151. if (taicpu(p).ops = 2) then
  152. {remove "imul $1, reg"}
  153. begin
  154. hp1 := tai(p.Next);
  155. asml.remove(p);
  156. p.free;
  157. p := hp1;
  158. continue;
  159. end
  160. else
  161. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  162. begin
  163. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  164. InsertLLItem(asml, p.previous, p.next, hp1);
  165. p.free;
  166. p := hp1;
  167. end
  168. else if
  169. ((taicpu(p).ops <= 2) or
  170. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  171. (taicpu(p).oper[0]^.val <= 12) and
  172. not(cs_opt_size in current_settings.optimizerswitches) and
  173. (not(GetNextInstruction(p, hp1)) or
  174. {GetNextInstruction(p, hp1) and}
  175. not((tai(hp1).typ = ait_instruction) and
  176. ((taicpu(hp1).opcode=A_Jcc) and
  177. (taicpu(hp1).condition in [C_O,C_NO])))) then
  178. begin
  179. reference_reset(tmpref,1);
  180. case taicpu(p).oper[0]^.val Of
  181. 3: begin
  182. {imul 3, reg1, reg2 to
  183. lea (reg1,reg1,2), reg2
  184. imul 3, reg1 to
  185. lea (reg1,reg1,2), reg1}
  186. TmpRef.base := taicpu(p).oper[1]^.reg;
  187. TmpRef.index := taicpu(p).oper[1]^.reg;
  188. TmpRef.ScaleFactor := 2;
  189. if (taicpu(p).ops = 2) then
  190. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  191. else
  192. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  193. InsertLLItem(asml,p.previous, p.next, hp1);
  194. p.free;
  195. p := hp1;
  196. end;
  197. 5: begin
  198. {imul 5, reg1, reg2 to
  199. lea (reg1,reg1,4), reg2
  200. imul 5, reg1 to
  201. lea (reg1,reg1,4), reg1}
  202. TmpRef.base := taicpu(p).oper[1]^.reg;
  203. TmpRef.index := taicpu(p).oper[1]^.reg;
  204. TmpRef.ScaleFactor := 4;
  205. if (taicpu(p).ops = 2) then
  206. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  207. else
  208. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  209. InsertLLItem(asml,p.previous, p.next, hp1);
  210. p.free;
  211. p := hp1;
  212. end;
  213. 6: begin
  214. {imul 6, reg1, reg2 to
  215. lea (,reg1,2), reg2
  216. lea (reg2,reg1,4), reg2
  217. imul 6, reg1 to
  218. lea (reg1,reg1,2), reg1
  219. add reg1, reg1}
  220. if (current_settings.optimizecputype <= cpu_386) then
  221. begin
  222. TmpRef.index := taicpu(p).oper[1]^.reg;
  223. if (taicpu(p).ops = 3) then
  224. begin
  225. TmpRef.base := taicpu(p).oper[2]^.reg;
  226. TmpRef.ScaleFactor := 4;
  227. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  228. end
  229. else
  230. begin
  231. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  232. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  233. end;
  234. InsertLLItem(asml,p, p.next, hp1);
  235. reference_reset(tmpref,2);
  236. TmpRef.index := taicpu(p).oper[1]^.reg;
  237. TmpRef.ScaleFactor := 2;
  238. if (taicpu(p).ops = 3) then
  239. begin
  240. TmpRef.base := NR_NO;
  241. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  242. taicpu(p).oper[2]^.reg);
  243. end
  244. else
  245. begin
  246. TmpRef.base := taicpu(p).oper[1]^.reg;
  247. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  248. end;
  249. InsertLLItem(asml,p.previous, p.next, hp1);
  250. p.free;
  251. p := tai(hp1.next);
  252. end
  253. end;
  254. 9: begin
  255. {imul 9, reg1, reg2 to
  256. lea (reg1,reg1,8), reg2
  257. imul 9, reg1 to
  258. lea (reg1,reg1,8), reg1}
  259. TmpRef.base := taicpu(p).oper[1]^.reg;
  260. TmpRef.index := taicpu(p).oper[1]^.reg;
  261. TmpRef.ScaleFactor := 8;
  262. if (taicpu(p).ops = 2) then
  263. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  264. else
  265. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  266. InsertLLItem(asml,p.previous, p.next, hp1);
  267. p.free;
  268. p := hp1;
  269. end;
  270. 10: begin
  271. {imul 10, reg1, reg2 to
  272. lea (reg1,reg1,4), reg2
  273. add reg2, reg2
  274. imul 10, reg1 to
  275. lea (reg1,reg1,4), reg1
  276. add reg1, reg1}
  277. if (current_settings.optimizecputype <= cpu_386) then
  278. begin
  279. if (taicpu(p).ops = 3) then
  280. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  281. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  282. else
  283. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  284. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  285. InsertLLItem(asml,p, p.next, hp1);
  286. TmpRef.base := taicpu(p).oper[1]^.reg;
  287. TmpRef.index := taicpu(p).oper[1]^.reg;
  288. TmpRef.ScaleFactor := 4;
  289. if (taicpu(p).ops = 3) then
  290. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  291. else
  292. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  293. InsertLLItem(asml,p.previous, p.next, hp1);
  294. p.free;
  295. p := tai(hp1.next);
  296. end
  297. end;
  298. 12: begin
  299. {imul 12, reg1, reg2 to
  300. lea (,reg1,4), reg2
  301. lea (reg2,reg1,8), reg2
  302. imul 12, reg1 to
  303. lea (reg1,reg1,2), reg1
  304. lea (,reg1,4), reg1}
  305. if (current_settings.optimizecputype <= cpu_386)
  306. then
  307. begin
  308. TmpRef.index := taicpu(p).oper[1]^.reg;
  309. if (taicpu(p).ops = 3) then
  310. begin
  311. TmpRef.base := taicpu(p).oper[2]^.reg;
  312. TmpRef.ScaleFactor := 8;
  313. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  314. end
  315. else
  316. begin
  317. TmpRef.base := NR_NO;
  318. TmpRef.ScaleFactor := 4;
  319. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  320. end;
  321. InsertLLItem(asml,p, p.next, hp1);
  322. reference_reset(tmpref,2);
  323. TmpRef.index := taicpu(p).oper[1]^.reg;
  324. if (taicpu(p).ops = 3) then
  325. begin
  326. TmpRef.base := NR_NO;
  327. TmpRef.ScaleFactor := 4;
  328. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  329. end
  330. else
  331. begin
  332. TmpRef.base := taicpu(p).oper[1]^.reg;
  333. TmpRef.ScaleFactor := 2;
  334. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  335. end;
  336. InsertLLItem(asml,p.previous, p.next, hp1);
  337. p.free;
  338. p := tai(hp1.next);
  339. end
  340. end
  341. end;
  342. end;
  343. end;
  344. A_SAR, A_SHR:
  345. {changes the code sequence
  346. shr/sar const1, x
  347. shl const2, x
  348. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  349. begin
  350. if GetNextInstruction(p, hp1) and
  351. (tai(hp1).typ = ait_instruction) and
  352. (taicpu(hp1).opcode = A_SHL) and
  353. (taicpu(p).oper[0]^.typ = top_const) and
  354. (taicpu(hp1).oper[0]^.typ = top_const) and
  355. (taicpu(hp1).opsize = taicpu(p).opsize) and
  356. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  357. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  358. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  359. not(cs_opt_size in current_settings.optimizerswitches) then
  360. { shr/sar const1, %reg
  361. shl const2, %reg
  362. with const1 > const2 }
  363. begin
  364. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  365. taicpu(hp1).opcode := A_AND;
  366. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  367. case taicpu(p).opsize Of
  368. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  369. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  370. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  371. end;
  372. end
  373. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  374. not(cs_opt_size in current_settings.optimizerswitches) then
  375. { shr/sar const1, %reg
  376. shl const2, %reg
  377. with const1 < const2 }
  378. begin
  379. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  380. taicpu(p).opcode := A_AND;
  381. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  382. case taicpu(p).opsize Of
  383. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  384. S_B: taicpu(p).loadConst(0,l Xor $ff);
  385. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  386. end;
  387. end
  388. else
  389. { shr/sar const1, %reg
  390. shl const2, %reg
  391. with const1 = const2 }
  392. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  393. begin
  394. taicpu(p).opcode := A_AND;
  395. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  396. case taicpu(p).opsize Of
  397. S_B: taicpu(p).loadConst(0,l Xor $ff);
  398. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  399. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  400. end;
  401. asml.remove(hp1);
  402. hp1.free;
  403. end;
  404. end;
  405. A_XOR:
  406. if (taicpu(p).oper[0]^.typ = top_reg) and
  407. (taicpu(p).oper[1]^.typ = top_reg) and
  408. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  409. { temporarily change this to 'mov reg,0' to make it easier }
  410. { for the CSE. Will be changed back in pass 2 }
  411. begin
  412. taicpu(p).opcode := A_MOV;
  413. taicpu(p).loadConst(0,0);
  414. end;
  415. end;
  416. end;
  417. end;
  418. p := tai(p.next)
  419. end;
  420. end;
  421. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  422. begin
  423. result :=
  424. (instr.typ = ait_instruction) and
  425. (taicpu(instr).opcode = op) and
  426. ((opsize = []) or (taicpu(instr).opsize in opsize));
  427. end;
  428. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  429. begin
  430. result :=
  431. (instr.typ = ait_instruction) and
  432. ((taicpu(instr).opcode = op1) or
  433. (taicpu(instr).opcode = op2)
  434. ) and
  435. ((opsize = []) or (taicpu(instr).opsize in opsize));
  436. end;
  437. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  438. begin
  439. result :=
  440. (instr.typ = ait_instruction) and
  441. ((taicpu(instr).opcode = op1) or
  442. (taicpu(instr).opcode = op2) or
  443. (taicpu(instr).opcode = op3)
  444. ) and
  445. ((opsize = []) or (taicpu(instr).opsize in opsize));
  446. end;
  447. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  448. begin
  449. result := (oper.typ = top_reg) and (oper.reg = reg);
  450. end;
  451. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  452. begin
  453. result := (oper.typ = top_const) and (oper.val = a);
  454. end;
  455. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  456. begin
  457. result := oper1.typ = oper2.typ;
  458. if result then
  459. case oper1.typ of
  460. top_const:
  461. Result:=oper1.val = oper2.val;
  462. top_reg:
  463. Result:=oper1.reg = oper2.reg;
  464. top_ref:
  465. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  466. else
  467. internalerror(2013102801);
  468. end
  469. end;
  470. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  471. begin
  472. Result:=(ref.offset=0) and
  473. (ref.scalefactor in [0,1]) and
  474. (ref.segment=NR_NO) and
  475. (ref.symbol=nil) and
  476. (ref.relsymbol=nil) and
  477. ((base=NR_INVALID) or
  478. (ref.base=base)) and
  479. ((index=NR_INVALID) or
  480. (ref.index=index));
  481. end;
  482. { First pass of peephole optimizations }
  483. procedure PeepHoleOptPass1(Asml: TAsmList; BlockStart, BlockEnd: tai);
  484. {$ifdef DEBUG_AOPTCPU}
  485. procedure DebugMsg(const s: string;p : tai);
  486. begin
  487. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  488. end;
  489. {$else DEBUG_AOPTCPU}
  490. procedure DebugMsg(const s: string;p : tai);inline;
  491. begin
  492. end;
  493. {$endif DEBUG_AOPTCPU}
  494. function WriteOk : Boolean;
  495. begin
  496. writeln('Ok');
  497. Result:=True;
  498. end;
  499. var
  500. l : longint;
  501. p,hp1,hp2 : tai;
  502. hp3,hp4: tai;
  503. v:aint;
  504. TmpRef: TReference;
  505. UsedRegs, TmpUsedRegs: TRegSet;
  506. TmpBool1, TmpBool2: Boolean;
  507. function SkipLabels(hp: tai; var hp2: tai): boolean;
  508. {skips all labels and returns the next "real" instruction}
  509. begin
  510. while assigned(hp.next) and
  511. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  512. hp := tai(hp.next);
  513. if assigned(hp.next) then
  514. begin
  515. SkipLabels := True;
  516. hp2 := tai(hp.next)
  517. end
  518. else
  519. begin
  520. hp2 := hp;
  521. SkipLabels := False
  522. end;
  523. end;
  524. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  525. {traces sucessive jumps to their final destination and sets it, e.g.
  526. je l1 je l3
  527. <code> <code>
  528. l1: becomes l1:
  529. je l2 je l3
  530. <code> <code>
  531. l2: l2:
  532. jmp l3 jmp l3
  533. the level parameter denotes how deeep we have already followed the jump,
  534. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  535. var p1, p2: tai;
  536. l: tasmlabel;
  537. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  538. begin
  539. FindAnyLabel := false;
  540. while assigned(hp.next) and
  541. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  542. hp := tai(hp.next);
  543. if assigned(hp.next) and
  544. (tai(hp.next).typ = ait_label) then
  545. begin
  546. FindAnyLabel := true;
  547. l := tai_label(hp.next).labsym;
  548. end
  549. end;
  550. begin
  551. GetfinalDestination := false;
  552. if level > 20 then
  553. exit;
  554. p1 := dfa.getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  555. if assigned(p1) then
  556. begin
  557. SkipLabels(p1,p1);
  558. if (tai(p1).typ = ait_instruction) and
  559. (taicpu(p1).is_jmp) then
  560. if { the next instruction after the label where the jump hp arrives}
  561. { is unconditional or of the same type as hp, so continue }
  562. (taicpu(p1).condition in [C_None,hp.condition]) or
  563. { the next instruction after the label where the jump hp arrives}
  564. { is the opposite of hp (so this one is never taken), but after }
  565. { that one there is a branch that will be taken, so perform a }
  566. { little hack: set p1 equal to this instruction (that's what the}
  567. { last SkipLabels is for, only works with short bool evaluation)}
  568. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  569. SkipLabels(p1,p2) and
  570. (p2.typ = ait_instruction) and
  571. (taicpu(p2).is_jmp) and
  572. (taicpu(p2).condition in [C_None,hp.condition]) and
  573. SkipLabels(p1,p1)) then
  574. begin
  575. { quick check for loops of the form "l5: ; jmp l5 }
  576. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  577. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  578. exit;
  579. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  580. exit;
  581. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  582. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  583. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  584. end
  585. else
  586. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  587. if not FindAnyLabel(p1,l) then
  588. begin
  589. {$ifdef finaldestdebug}
  590. insertllitem(asml,p1,p1.next,tai_comment.Create(
  591. strpnew('previous label inserted'))));
  592. {$endif finaldestdebug}
  593. current_asmdata.getjumplabel(l);
  594. insertllitem(asml,p1,p1.next,tai_label.Create(l));
  595. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  596. hp.oper[0]^.ref^.symbol := l;
  597. l.increfs;
  598. { this won't work, since the new label isn't in the labeltable }
  599. { so it will fail the rangecheck. Labeltable should become a }
  600. { hashtable to support this: }
  601. { GetFinalDestination(asml, hp); }
  602. end
  603. else
  604. begin
  605. {$ifdef finaldestdebug}
  606. insertllitem(asml,p1,p1.next,tai_comment.Create(
  607. strpnew('next label reused'))));
  608. {$endif finaldestdebug}
  609. l.increfs;
  610. hp.oper[0]^.ref^.symbol := l;
  611. if not GetFinalDestination(asml, hp,succ(level)) then
  612. exit;
  613. end;
  614. end;
  615. GetFinalDestination := true;
  616. end;
  617. function DoSubAddOpt(var p: tai): Boolean;
  618. begin
  619. DoSubAddOpt := False;
  620. if GetLastInstruction(p, hp1) and
  621. (hp1.typ = ait_instruction) and
  622. (taicpu(hp1).opsize = taicpu(p).opsize) then
  623. case taicpu(hp1).opcode Of
  624. A_DEC:
  625. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  626. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  627. begin
  628. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  629. asml.remove(hp1);
  630. hp1.free;
  631. end;
  632. A_SUB:
  633. if (taicpu(hp1).oper[0]^.typ = top_const) and
  634. (taicpu(hp1).oper[1]^.typ = top_reg) and
  635. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  636. begin
  637. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  638. asml.remove(hp1);
  639. hp1.free;
  640. end;
  641. A_ADD:
  642. if (taicpu(hp1).oper[0]^.typ = top_const) and
  643. (taicpu(hp1).oper[1]^.typ = top_reg) and
  644. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  645. begin
  646. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  647. asml.remove(hp1);
  648. hp1.free;
  649. if (taicpu(p).oper[0]^.val = 0) then
  650. begin
  651. hp1 := tai(p.next);
  652. asml.remove(p);
  653. p.free;
  654. if not GetLastInstruction(hp1, p) then
  655. p := hp1;
  656. DoSubAddOpt := True;
  657. end
  658. end;
  659. end;
  660. end;
  661. begin
  662. p := BlockStart;
  663. UsedRegs := [];
  664. while (p <> BlockEnd) Do
  665. begin
  666. UpDateUsedRegs(UsedRegs, tai(p.next));
  667. case p.Typ Of
  668. ait_instruction:
  669. begin
  670. current_filepos:=taicpu(p).fileinfo;
  671. if InsContainsSegRef(taicpu(p)) then
  672. begin
  673. p := tai(p.next);
  674. continue;
  675. end;
  676. { Handle Jmp Optimizations }
  677. if taicpu(p).is_jmp then
  678. begin
  679. {the following if-block removes all code between a jmp and the next label,
  680. because it can never be executed}
  681. if (taicpu(p).opcode = A_JMP) then
  682. begin
  683. hp2:=p;
  684. while GetNextInstruction(hp2, hp1) and
  685. (hp1.typ <> ait_label) do
  686. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  687. begin
  688. { don't kill start/end of assembler block,
  689. no-line-info-start/end etc }
  690. if hp1.typ<>ait_marker then
  691. begin
  692. asml.remove(hp1);
  693. hp1.free;
  694. end
  695. else
  696. hp2:=hp1;
  697. end
  698. else break;
  699. end;
  700. { remove jumps to a label coming right after them }
  701. if GetNextInstruction(p, hp1) then
  702. begin
  703. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  704. { TODO: FIXME removing the first instruction fails}
  705. (p<>blockstart) then
  706. begin
  707. hp2:=tai(hp1.next);
  708. asml.remove(p);
  709. p.free;
  710. p:=hp2;
  711. continue;
  712. end
  713. else
  714. begin
  715. if hp1.typ = ait_label then
  716. SkipLabels(hp1,hp1);
  717. if (tai(hp1).typ=ait_instruction) and
  718. (taicpu(hp1).opcode=A_JMP) and
  719. GetNextInstruction(hp1, hp2) and
  720. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  721. begin
  722. if taicpu(p).opcode=A_Jcc then
  723. begin
  724. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  725. tai_label(hp2).labsym.decrefs;
  726. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  727. { when free'ing hp1, the ref. isn't decresed, so we don't
  728. increase it (FK)
  729. taicpu(p).oper[0]^.ref^.symbol.increfs;
  730. }
  731. asml.remove(hp1);
  732. hp1.free;
  733. GetFinalDestination(asml, taicpu(p),0);
  734. end
  735. else
  736. begin
  737. GetFinalDestination(asml, taicpu(p),0);
  738. p:=tai(p.next);
  739. continue;
  740. end;
  741. end
  742. else
  743. GetFinalDestination(asml, taicpu(p),0);
  744. end;
  745. end;
  746. end
  747. else
  748. { All other optimizes }
  749. begin
  750. for l := 0 to taicpu(p).ops-1 Do
  751. if (taicpu(p).oper[l]^.typ = top_ref) then
  752. With taicpu(p).oper[l]^.ref^ Do
  753. begin
  754. if (base = NR_NO) and
  755. (index <> NR_NO) and
  756. (scalefactor in [0,1]) then
  757. begin
  758. base := index;
  759. index := NR_NO
  760. end
  761. end;
  762. case taicpu(p).opcode Of
  763. A_AND:
  764. begin
  765. if (taicpu(p).oper[0]^.typ = top_const) and
  766. (taicpu(p).oper[1]^.typ = top_reg) and
  767. GetNextInstruction(p, hp1) and
  768. (tai(hp1).typ = ait_instruction) and
  769. (taicpu(hp1).opcode = A_AND) and
  770. (taicpu(hp1).oper[0]^.typ = top_const) and
  771. (taicpu(hp1).oper[1]^.typ = top_reg) and
  772. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  773. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) then
  774. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  775. begin
  776. taicpu(hp1).loadConst(0,taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  777. asml.remove(p);
  778. p.free;
  779. p:=hp1;
  780. end
  781. else
  782. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  783. jump, but only if it's a conditional jump (PFV) }
  784. if (taicpu(p).oper[1]^.typ = top_reg) and
  785. GetNextInstruction(p, hp1) and
  786. (hp1.typ = ait_instruction) and
  787. (taicpu(hp1).is_jmp) and
  788. (taicpu(hp1).opcode<>A_JMP) and
  789. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  790. taicpu(p).opcode := A_TEST;
  791. end;
  792. A_CMP:
  793. begin
  794. { cmp register,$8000 neg register
  795. je target --> jo target
  796. .... only if register is deallocated before jump.}
  797. case Taicpu(p).opsize of
  798. S_B: v:=$80;
  799. S_W: v:=$8000;
  800. S_L: v:=aint($80000000);
  801. else
  802. internalerror(2013112905);
  803. end;
  804. if (taicpu(p).oper[0]^.typ=Top_const) and
  805. (taicpu(p).oper[0]^.val=v) and
  806. (Taicpu(p).oper[1]^.typ=top_reg) and
  807. GetNextInstruction(p, hp1) and
  808. (hp1.typ=ait_instruction) and
  809. (taicpu(hp1).opcode=A_Jcc) and
  810. (Taicpu(hp1).condition in [C_E,C_NE]) and
  811. not(getsupreg(Taicpu(p).oper[1]^.reg) in usedregs) then
  812. begin
  813. Taicpu(p).opcode:=A_NEG;
  814. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  815. Taicpu(p).clearop(1);
  816. Taicpu(p).ops:=1;
  817. if Taicpu(hp1).condition=C_E then
  818. Taicpu(hp1).condition:=C_O
  819. else
  820. Taicpu(hp1).condition:=C_NO;
  821. continue;
  822. end;
  823. {
  824. @@2: @@2:
  825. .... ....
  826. cmp operand1,0
  827. jle/jbe @@1
  828. dec operand1 --> sub operand1,1
  829. jmp @@2 jge/jae @@2
  830. @@1: @@1:
  831. ... ....}
  832. if (taicpu(p).oper[0]^.typ = top_const) and
  833. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  834. (taicpu(p).oper[0]^.val = 0) and
  835. GetNextInstruction(p, hp1) and
  836. (hp1.typ = ait_instruction) and
  837. (taicpu(hp1).is_jmp) and
  838. (taicpu(hp1).opcode=A_Jcc) and
  839. (taicpu(hp1).condition in [C_LE,C_BE]) and
  840. GetNextInstruction(hp1,hp2) and
  841. (hp2.typ = ait_instruction) and
  842. (taicpu(hp2).opcode = A_DEC) and
  843. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  844. GetNextInstruction(hp2, hp3) and
  845. (hp3.typ = ait_instruction) and
  846. (taicpu(hp3).is_jmp) and
  847. (taicpu(hp3).opcode = A_JMP) and
  848. GetNextInstruction(hp3, hp4) and
  849. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  850. begin
  851. taicpu(hp2).Opcode := A_SUB;
  852. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  853. taicpu(hp2).loadConst(0,1);
  854. taicpu(hp2).ops:=2;
  855. taicpu(hp3).Opcode := A_Jcc;
  856. case taicpu(hp1).condition of
  857. C_LE: taicpu(hp3).condition := C_GE;
  858. C_BE: taicpu(hp3).condition := C_AE;
  859. end;
  860. asml.remove(p);
  861. asml.remove(hp1);
  862. p.free;
  863. hp1.free;
  864. p := hp2;
  865. continue;
  866. end
  867. end;
  868. A_FLD:
  869. begin
  870. if (taicpu(p).oper[0]^.typ = top_reg) and
  871. GetNextInstruction(p, hp1) and
  872. (hp1.typ = Ait_Instruction) and
  873. (taicpu(hp1).oper[0]^.typ = top_reg) and
  874. (taicpu(hp1).oper[1]^.typ = top_reg) and
  875. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  876. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  877. { change to
  878. fld reg fxxx reg,st
  879. fxxxp st, st1 (hp1)
  880. Remark: non commutative operations must be reversed!
  881. }
  882. begin
  883. case taicpu(hp1).opcode Of
  884. A_FMULP,A_FADDP,
  885. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  886. begin
  887. case taicpu(hp1).opcode Of
  888. A_FADDP: taicpu(hp1).opcode := A_FADD;
  889. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  890. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  891. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  892. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  893. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  894. end;
  895. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  896. taicpu(hp1).oper[1]^.reg := NR_ST;
  897. asml.remove(p);
  898. p.free;
  899. p := hp1;
  900. continue;
  901. end;
  902. end;
  903. end
  904. else
  905. if (taicpu(p).oper[0]^.typ = top_ref) and
  906. GetNextInstruction(p, hp2) and
  907. (hp2.typ = Ait_Instruction) and
  908. (taicpu(hp2).ops = 2) and
  909. (taicpu(hp2).oper[0]^.typ = top_reg) and
  910. (taicpu(hp2).oper[1]^.typ = top_reg) and
  911. (taicpu(p).opsize in [S_FS, S_FL]) and
  912. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  913. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  914. if GetLastInstruction(p, hp1) and
  915. (hp1.typ = Ait_Instruction) and
  916. ((taicpu(hp1).opcode = A_FLD) or
  917. (taicpu(hp1).opcode = A_FST)) and
  918. (taicpu(hp1).opsize = taicpu(p).opsize) and
  919. (taicpu(hp1).oper[0]^.typ = top_ref) and
  920. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  921. if ((taicpu(hp2).opcode = A_FMULP) or
  922. (taicpu(hp2).opcode = A_FADDP)) then
  923. { change to
  924. fld/fst mem1 (hp1) fld/fst mem1
  925. fld mem1 (p) fadd/
  926. faddp/ fmul st, st
  927. fmulp st, st1 (hp2) }
  928. begin
  929. asml.remove(p);
  930. p.free;
  931. p := hp1;
  932. if (taicpu(hp2).opcode = A_FADDP) then
  933. taicpu(hp2).opcode := A_FADD
  934. else
  935. taicpu(hp2).opcode := A_FMUL;
  936. taicpu(hp2).oper[1]^.reg := NR_ST;
  937. end
  938. else
  939. { change to
  940. fld/fst mem1 (hp1) fld/fst mem1
  941. fld mem1 (p) fld st}
  942. begin
  943. taicpu(p).changeopsize(S_FL);
  944. taicpu(p).loadreg(0,NR_ST);
  945. end
  946. else
  947. begin
  948. case taicpu(hp2).opcode Of
  949. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  950. { change to
  951. fld/fst mem1 (hp1) fld/fst mem1
  952. fld mem2 (p) fxxx mem2
  953. fxxxp st, st1 (hp2) }
  954. begin
  955. case taicpu(hp2).opcode Of
  956. A_FADDP: taicpu(p).opcode := A_FADD;
  957. A_FMULP: taicpu(p).opcode := A_FMUL;
  958. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  959. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  960. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  961. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  962. end;
  963. asml.remove(hp2);
  964. hp2.free;
  965. end
  966. end
  967. end
  968. end;
  969. A_FSTP,A_FISTP:
  970. if doFpuLoadStoreOpt(asmL,p) then
  971. continue;
  972. A_LEA:
  973. begin
  974. {removes seg register prefixes from LEA operations, as they
  975. don't do anything}
  976. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  977. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  978. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  979. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  980. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  981. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  982. begin
  983. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  984. (taicpu(p).oper[0]^.ref^.offset = 0) then
  985. begin
  986. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  987. taicpu(p).oper[1]^.reg);
  988. InsertLLItem(asml,p.previous,p.next, hp1);
  989. p.free;
  990. p := hp1;
  991. continue;
  992. end
  993. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  994. begin
  995. hp1 := tai(p.Next);
  996. asml.remove(p);
  997. p.free;
  998. p := hp1;
  999. continue;
  1000. end
  1001. { continue to use lea to adjust the stack pointer,
  1002. it is the recommended way, but only if not optimizing for size }
  1003. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1004. (cs_opt_size in current_settings.optimizerswitches) then
  1005. with taicpu(p).oper[0]^.ref^ do
  1006. if (base = taicpu(p).oper[1]^.reg) then
  1007. begin
  1008. l := offset;
  1009. if (l=1) and UseIncDec then
  1010. begin
  1011. taicpu(p).opcode := A_INC;
  1012. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1013. taicpu(p).ops := 1
  1014. end
  1015. else if (l=-1) and UseIncDec then
  1016. begin
  1017. taicpu(p).opcode := A_DEC;
  1018. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1019. taicpu(p).ops := 1;
  1020. end
  1021. else
  1022. begin
  1023. if (l<0) and (l<>-2147483648) then
  1024. begin
  1025. taicpu(p).opcode := A_SUB;
  1026. taicpu(p).loadConst(0,-l);
  1027. end
  1028. else
  1029. begin
  1030. taicpu(p).opcode := A_ADD;
  1031. taicpu(p).loadConst(0,l);
  1032. end;
  1033. end;
  1034. end;
  1035. end
  1036. (*
  1037. This is unsafe, lea doesn't modify the flags but "add"
  1038. does. This breaks webtbs/tw15694.pp. The above
  1039. transformations are also unsafe, but they don't seem to
  1040. be triggered by code that FPC generators (or that at
  1041. least does not occur in the tests...). This needs to be
  1042. fixed by checking for the liveness of the flags register.
  1043. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1044. begin
  1045. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1046. taicpu(p).oper[0]^.ref^.base);
  1047. InsertLLItem(asml,p.previous,p.next, hp1);
  1048. DebugMsg('Peephole Lea2AddBase done',hp1);
  1049. p.free;
  1050. p:=hp1;
  1051. continue;
  1052. end
  1053. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1054. begin
  1055. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1056. taicpu(p).oper[0]^.ref^.index);
  1057. InsertLLItem(asml,p.previous,p.next,hp1);
  1058. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1059. p.free;
  1060. p:=hp1;
  1061. continue;
  1062. end
  1063. *)
  1064. end;
  1065. A_MOV:
  1066. begin
  1067. TmpUsedRegs := UsedRegs;
  1068. if (taicpu(p).oper[1]^.typ = top_reg) and
  1069. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  1070. GetNextInstruction(p, hp1) and
  1071. (tai(hp1).typ = ait_instruction) and
  1072. (taicpu(hp1).opcode = A_MOV) and
  1073. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1074. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  1075. begin
  1076. {we have "mov x, %treg; mov %treg, y}
  1077. if not(RegInOp(getsupreg(taicpu(p).oper[1]^.reg),taicpu(hp1).oper[1]^)) and
  1078. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1079. {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
  1080. case taicpu(p).oper[0]^.typ Of
  1081. top_reg:
  1082. begin
  1083. { change "mov %reg, %treg; mov %treg, y"
  1084. to "mov %reg, y" }
  1085. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1086. asml.remove(hp1);
  1087. hp1.free;
  1088. continue;
  1089. end;
  1090. top_ref:
  1091. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1092. begin
  1093. { change "mov mem, %treg; mov %treg, %reg"
  1094. to "mov mem, %reg" }
  1095. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1096. asml.remove(hp1);
  1097. hp1.free;
  1098. continue;
  1099. end;
  1100. end
  1101. end
  1102. else
  1103. {Change "mov %reg1, %reg2; xxx %reg2, ???" to
  1104. "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read
  1105. penalty}
  1106. if (taicpu(p).oper[0]^.typ = top_reg) and
  1107. (taicpu(p).oper[1]^.typ = top_reg) and
  1108. GetNextInstruction(p,hp1) and
  1109. (tai(hp1).typ = ait_instruction) and
  1110. (taicpu(hp1).ops >= 1) and
  1111. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1112. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  1113. {we have "mov %reg1, %reg2; XXX %reg2, ???"}
  1114. begin
  1115. if ((taicpu(hp1).opcode = A_OR) or
  1116. (taicpu(hp1).opcode = A_TEST)) and
  1117. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1118. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1119. {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
  1120. begin
  1121. TmpUsedRegs := UsedRegs;
  1122. { reg1 will be used after the first instruction, }
  1123. { so update the allocation info }
  1124. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1125. if GetNextInstruction(hp1, hp2) and
  1126. (hp2.typ = ait_instruction) and
  1127. taicpu(hp2).is_jmp and
  1128. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1129. { change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to
  1130. "test %reg1, %reg1; jxx" }
  1131. begin
  1132. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1133. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1134. asml.remove(p);
  1135. p.free;
  1136. p := hp1;
  1137. continue
  1138. end
  1139. else
  1140. {change "mov %reg1, %reg2; test/or %reg2, %reg2" to
  1141. "mov %reg1, %reg2; test/or %reg1, %reg1"}
  1142. begin
  1143. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1144. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1145. end;
  1146. end
  1147. { else
  1148. if (taicpu(p.next)^.opcode
  1149. in [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])}
  1150. {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to
  1151. "mov %reg1, %reg2; push/or/xor/... %reg1, ???"}
  1152. end
  1153. else
  1154. {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1155. x >= RetOffset) as it doesn't do anything (it writes either to a
  1156. parameter or to the temporary storage room for the function
  1157. result)}
  1158. if GetNextInstruction(p, hp1) and
  1159. (tai(hp1).typ = ait_instruction) then
  1160. if ((taicpu(hp1).opcode = A_LEAVE) or
  1161. (taicpu(hp1).opcode = A_RET)) and
  1162. (taicpu(p).oper[1]^.typ = top_ref) and
  1163. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1164. not(assigned(current_procinfo.procdef.funcretsym) and
  1165. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1166. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  1167. (taicpu(p).oper[0]^.typ = top_reg) then
  1168. begin
  1169. asml.remove(p);
  1170. p.free;
  1171. p := hp1;
  1172. RemoveLastDeallocForFuncRes(asmL,p);
  1173. end
  1174. else
  1175. if (taicpu(p).oper[0]^.typ = top_reg) and
  1176. (taicpu(p).oper[1]^.typ = top_ref) and
  1177. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1178. (taicpu(hp1).opcode = A_CMP) and
  1179. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1180. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1181. {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
  1182. begin
  1183. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1184. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1185. end;
  1186. { Next instruction is also a MOV ? }
  1187. if GetNextInstruction(p, hp1) and
  1188. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1189. begin
  1190. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1191. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1192. {mov reg1, mem1 or mov mem1, reg1
  1193. mov mem2, reg2 mov reg2, mem2}
  1194. begin
  1195. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1196. {mov reg1, mem1 or mov mem1, reg1
  1197. mov mem2, reg1 mov reg2, mem1}
  1198. begin
  1199. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1200. { Removes the second statement from
  1201. mov reg1, mem1/reg2
  1202. mov mem1/reg2, reg1 }
  1203. begin
  1204. if (taicpu(p).oper[0]^.typ = top_reg) then
  1205. AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1206. asml.remove(hp1);
  1207. hp1.free;
  1208. end
  1209. else
  1210. begin
  1211. TmpUsedRegs := UsedRegs;
  1212. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1213. if (taicpu(p).oper[1]^.typ = top_ref) and
  1214. { mov reg1, mem1
  1215. mov mem2, reg1 }
  1216. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1217. GetNextInstruction(hp1, hp2) and
  1218. (hp2.typ = ait_instruction) and
  1219. (taicpu(hp2).opcode = A_CMP) and
  1220. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1221. (taicpu(hp2).oper[0]^.typ = TOp_Ref) and
  1222. (taicpu(hp2).oper[1]^.typ = TOp_Reg) and
  1223. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(p).oper[1]^.ref^) and
  1224. (taicpu(hp2).oper[1]^.reg= taicpu(p).oper[0]^.reg) and
  1225. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1226. { change to
  1227. mov reg1, mem1 mov reg1, mem1
  1228. mov mem2, reg1 cmp reg1, mem2
  1229. cmp mem1, reg1 }
  1230. begin
  1231. asml.remove(hp2);
  1232. hp2.free;
  1233. taicpu(hp1).opcode := A_CMP;
  1234. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1235. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1236. end;
  1237. end;
  1238. end
  1239. else
  1240. begin
  1241. tmpUsedRegs := UsedRegs;
  1242. if GetNextInstruction(hp1, hp2) and
  1243. (taicpu(p).oper[0]^.typ = top_ref) and
  1244. (taicpu(p).oper[1]^.typ = top_reg) and
  1245. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1246. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1247. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1248. (tai(hp2).typ = ait_instruction) and
  1249. (taicpu(hp2).opcode = A_MOV) and
  1250. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1251. (taicpu(hp2).oper[1]^.typ = top_reg) and
  1252. (taicpu(hp2).oper[0]^.typ = top_ref) and
  1253. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1254. if not regInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^) and
  1255. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1256. { mov mem1, %reg1
  1257. mov %reg1, mem2
  1258. mov mem2, reg2
  1259. to:
  1260. mov mem1, reg2
  1261. mov reg2, mem2}
  1262. begin
  1263. AllocRegBetween(asmL,taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1264. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1265. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1266. asml.remove(hp2);
  1267. hp2.free;
  1268. end
  1269. else
  1270. if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1271. not(RegInRef(getsupreg(taicpu(p).oper[1]^.reg),taicpu(p).oper[0]^.ref^)) and
  1272. not(RegInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^)) then
  1273. { mov mem1, reg1 mov mem1, reg1
  1274. mov reg1, mem2 mov reg1, mem2
  1275. mov mem2, reg2 mov mem2, reg1
  1276. to: to:
  1277. mov mem1, reg1 mov mem1, reg1
  1278. mov mem1, reg2 mov reg1, mem2
  1279. mov reg1, mem2
  1280. or (if mem1 depends on reg1
  1281. and/or if mem2 depends on reg2)
  1282. to:
  1283. mov mem1, reg1
  1284. mov reg1, mem2
  1285. mov reg1, reg2
  1286. }
  1287. begin
  1288. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1289. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1290. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1291. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1292. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1293. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1294. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1295. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1296. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1297. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1298. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1299. end
  1300. else
  1301. if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1302. begin
  1303. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1304. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1305. end
  1306. else
  1307. begin
  1308. asml.remove(hp2);
  1309. hp2.free;
  1310. end
  1311. end
  1312. end
  1313. else
  1314. (* {movl [mem1],reg1
  1315. movl [mem1],reg2
  1316. to:
  1317. movl [mem1],reg1
  1318. movl reg1,reg2 }
  1319. if (taicpu(p).oper[0]^.typ = top_ref) and
  1320. (taicpu(p).oper[1]^.typ = top_reg) and
  1321. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1322. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1323. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1324. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1325. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1326. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1327. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1328. else*)
  1329. { movl const1,[mem1]
  1330. movl [mem1],reg1
  1331. to:
  1332. movl const1,reg1
  1333. movl reg1,[mem1] }
  1334. if (taicpu(p).oper[0]^.typ = top_const) and
  1335. (taicpu(p).oper[1]^.typ = top_ref) and
  1336. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1337. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1338. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1339. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1340. not(reginref(getsupreg(taicpu(hp1).oper[1]^.reg),taicpu(hp1).oper[0]^.ref^)) then
  1341. begin
  1342. allocregbetween(asml,taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1343. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1344. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1345. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1346. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1347. end
  1348. end;
  1349. if GetNextInstruction(p, hp1) and
  1350. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1351. GetNextInstruction(hp1, hp2) and
  1352. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1353. MatchOperand(Taicpu(p).oper[0]^,0) and
  1354. (Taicpu(p).oper[1]^.typ = top_reg) and
  1355. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1356. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1357. {mov reg1,0
  1358. bts reg1,operand1 --> mov reg1,operand2
  1359. or reg1,operand2 bts reg1,operand1}
  1360. begin
  1361. Taicpu(hp2).opcode:=A_MOV;
  1362. asml.remove(hp1);
  1363. insertllitem(asml,hp2,hp2.next,hp1);
  1364. asml.remove(p);
  1365. p.free;
  1366. p:=hp1;
  1367. end;
  1368. if GetNextInstruction(p, hp1) and
  1369. MatchInstruction(hp1,A_LEA,[S_L]) and
  1370. (Taicpu(p).oper[0]^.typ = top_ref) and
  1371. (Taicpu(p).oper[1]^.typ = top_reg) and
  1372. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1373. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1374. ) or
  1375. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1376. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1377. )
  1378. ) then
  1379. {mov reg1,ref
  1380. lea reg2,[reg1,reg2] --> add reg2,ref}
  1381. begin
  1382. TmpUsedRegs := UsedRegs;
  1383. { reg1 may not be used afterwards }
  1384. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1385. begin
  1386. Taicpu(hp1).opcode:=A_ADD;
  1387. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1388. DebugMsg('Peephole MovLea2Add done',hp1);
  1389. asml.remove(p);
  1390. p.free;
  1391. p:=hp1;
  1392. end;
  1393. end;
  1394. end;
  1395. A_MOVSX,
  1396. A_MOVZX :
  1397. begin
  1398. if (taicpu(p).oper[1]^.typ = top_reg) and
  1399. GetNextInstruction(p,hp1) and
  1400. (hp1.typ = ait_instruction) and
  1401. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1402. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1403. GetNextInstruction(hp1,hp2) and
  1404. MatchInstruction(hp2,A_MOV,[]) and
  1405. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1406. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1407. (((taicpu(hp1).ops=2) and
  1408. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1409. ((taicpu(hp1).ops=1) and
  1410. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1411. { reg2 must not be used after the sequence considered, so
  1412. it must be either deallocated or loaded with a new value }
  1413. (GetNextInstruction(hp2,hp3) and
  1414. (FindRegDealloc(getsupreg(taicpu(hp2).oper[0]^.reg),tai(hp3)) or
  1415. RegLoadedWithNewValue(getsupreg(taicpu(hp2).oper[0]^.reg), false, hp3))) then
  1416. { change movsX/movzX reg/ref, reg2 }
  1417. { add/sub/or/... reg3/$const, reg2 }
  1418. { mov reg2 reg/ref }
  1419. { to add/sub/or/... reg3/$const, reg/ref }
  1420. begin
  1421. { by example:
  1422. movswl %si,%eax movswl %si,%eax p
  1423. decl %eax addl %edx,%eax hp1
  1424. movw %ax,%si movw %ax,%si hp2
  1425. ->
  1426. movswl %si,%eax movswl %si,%eax p
  1427. decw %eax addw %edx,%eax hp1
  1428. movw %ax,%si movw %ax,%si hp2
  1429. }
  1430. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1431. {
  1432. ->
  1433. movswl %si,%eax movswl %si,%eax p
  1434. decw %si addw %dx,%si hp1
  1435. movw %ax,%si movw %ax,%si hp2
  1436. }
  1437. case taicpu(hp1).ops of
  1438. 1:
  1439. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1440. 2:
  1441. begin
  1442. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1443. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1444. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1445. end;
  1446. else
  1447. internalerror(2008042701);
  1448. end;
  1449. {
  1450. ->
  1451. decw %si addw %dx,%si p
  1452. }
  1453. asml.remove(p);
  1454. asml.remove(hp2);
  1455. p.free;
  1456. hp2.free;
  1457. p := hp1
  1458. end
  1459. { removes superfluous And's after movzx's }
  1460. else if taicpu(p).opcode=A_MOVZX then
  1461. begin
  1462. if (taicpu(p).oper[1]^.typ = top_reg) and
  1463. GetNextInstruction(p, hp1) and
  1464. (tai(hp1).typ = ait_instruction) and
  1465. (taicpu(hp1).opcode = A_AND) and
  1466. (taicpu(hp1).oper[0]^.typ = top_const) and
  1467. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1468. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1469. case taicpu(p).opsize Of
  1470. S_BL, S_BW:
  1471. if (taicpu(hp1).oper[0]^.val = $ff) then
  1472. begin
  1473. asml.remove(hp1);
  1474. hp1.free;
  1475. end;
  1476. S_WL:
  1477. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1478. begin
  1479. asml.remove(hp1);
  1480. hp1.free;
  1481. end;
  1482. end;
  1483. {changes some movzx constructs to faster synonims (all examples
  1484. are given with eax/ax, but are also valid for other registers)}
  1485. if (taicpu(p).oper[1]^.typ = top_reg) then
  1486. if (taicpu(p).oper[0]^.typ = top_reg) then
  1487. case taicpu(p).opsize of
  1488. S_BW:
  1489. begin
  1490. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1491. not(cs_opt_size in current_settings.optimizerswitches) then
  1492. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1493. begin
  1494. taicpu(p).opcode := A_AND;
  1495. taicpu(p).changeopsize(S_W);
  1496. taicpu(p).loadConst(0,$ff);
  1497. end
  1498. else if GetNextInstruction(p, hp1) and
  1499. (tai(hp1).typ = ait_instruction) and
  1500. (taicpu(hp1).opcode = A_AND) and
  1501. (taicpu(hp1).oper[0]^.typ = top_const) and
  1502. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1503. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1504. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1505. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1506. begin
  1507. taicpu(p).opcode := A_MOV;
  1508. taicpu(p).changeopsize(S_W);
  1509. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1510. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1511. end;
  1512. end;
  1513. S_BL:
  1514. begin
  1515. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1516. not(cs_opt_size in current_settings.optimizerswitches) then
  1517. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1518. begin
  1519. taicpu(p).opcode := A_AND;
  1520. taicpu(p).changeopsize(S_L);
  1521. taicpu(p).loadConst(0,$ff)
  1522. end
  1523. else if GetNextInstruction(p, hp1) and
  1524. (tai(hp1).typ = ait_instruction) and
  1525. (taicpu(hp1).opcode = A_AND) and
  1526. (taicpu(hp1).oper[0]^.typ = top_const) and
  1527. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1528. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1529. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1530. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1531. begin
  1532. taicpu(p).opcode := A_MOV;
  1533. taicpu(p).changeopsize(S_L);
  1534. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1535. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1536. end
  1537. end;
  1538. S_WL:
  1539. begin
  1540. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1541. not(cs_opt_size in current_settings.optimizerswitches) then
  1542. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1543. begin
  1544. taicpu(p).opcode := A_AND;
  1545. taicpu(p).changeopsize(S_L);
  1546. taicpu(p).loadConst(0,$ffff);
  1547. end
  1548. else if GetNextInstruction(p, hp1) and
  1549. (tai(hp1).typ = ait_instruction) and
  1550. (taicpu(hp1).opcode = A_AND) and
  1551. (taicpu(hp1).oper[0]^.typ = top_const) and
  1552. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1553. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1554. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1555. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1556. begin
  1557. taicpu(p).opcode := A_MOV;
  1558. taicpu(p).changeopsize(S_L);
  1559. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1560. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1561. end;
  1562. end;
  1563. end
  1564. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1565. begin
  1566. if GetNextInstruction(p, hp1) and
  1567. (tai(hp1).typ = ait_instruction) and
  1568. (taicpu(hp1).opcode = A_AND) and
  1569. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1570. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1571. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1572. begin
  1573. taicpu(p).opcode := A_MOV;
  1574. case taicpu(p).opsize Of
  1575. S_BL:
  1576. begin
  1577. taicpu(p).changeopsize(S_L);
  1578. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1579. end;
  1580. S_WL:
  1581. begin
  1582. taicpu(p).changeopsize(S_L);
  1583. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1584. end;
  1585. S_BW:
  1586. begin
  1587. taicpu(p).changeopsize(S_W);
  1588. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1589. end;
  1590. end;
  1591. end;
  1592. end;
  1593. end;
  1594. end;
  1595. (* should not be generated anymore by the current code generator
  1596. A_POP:
  1597. begin
  1598. if target_info.system=system_i386_go32v2 then
  1599. begin
  1600. { Transform a series of pop/pop/pop/push/push/push to }
  1601. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1602. { because I'm not sure whether they can cope with }
  1603. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1604. { such a problem when using esp as frame pointer (JM) }
  1605. if (taicpu(p).oper[0]^.typ = top_reg) then
  1606. begin
  1607. hp1 := p;
  1608. hp2 := p;
  1609. l := 0;
  1610. while getNextInstruction(hp1,hp1) and
  1611. (hp1.typ = ait_instruction) and
  1612. (taicpu(hp1).opcode = A_POP) and
  1613. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1614. begin
  1615. hp2 := hp1;
  1616. inc(l,4);
  1617. end;
  1618. getLastInstruction(p,hp3);
  1619. l1 := 0;
  1620. while (hp2 <> hp3) and
  1621. assigned(hp1) and
  1622. (hp1.typ = ait_instruction) and
  1623. (taicpu(hp1).opcode = A_PUSH) and
  1624. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1625. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1626. begin
  1627. { change it to a two op operation }
  1628. taicpu(hp2).oper[1]^.typ:=top_none;
  1629. taicpu(hp2).ops:=2;
  1630. taicpu(hp2).opcode := A_MOV;
  1631. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1632. reference_reset(tmpref);
  1633. tmpRef.base.enum:=R_INTREGISTER;
  1634. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1635. convert_register_to_enum(tmpref.base);
  1636. tmpRef.offset := l;
  1637. taicpu(hp2).loadRef(0,tmpRef);
  1638. hp4 := hp1;
  1639. getNextInstruction(hp1,hp1);
  1640. asml.remove(hp4);
  1641. hp4.free;
  1642. getLastInstruction(hp2,hp2);
  1643. dec(l,4);
  1644. inc(l1);
  1645. end;
  1646. if l <> -4 then
  1647. begin
  1648. inc(l,4);
  1649. for l1 := l1 downto 1 do
  1650. begin
  1651. getNextInstruction(hp2,hp2);
  1652. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1653. end
  1654. end
  1655. end
  1656. end
  1657. else
  1658. begin
  1659. if (taicpu(p).oper[0]^.typ = top_reg) and
  1660. GetNextInstruction(p, hp1) and
  1661. (tai(hp1).typ=ait_instruction) and
  1662. (taicpu(hp1).opcode=A_PUSH) and
  1663. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1664. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1665. begin
  1666. { change it to a two op operation }
  1667. taicpu(p).oper[1]^.typ:=top_none;
  1668. taicpu(p).ops:=2;
  1669. taicpu(p).opcode := A_MOV;
  1670. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1671. reference_reset(tmpref);
  1672. TmpRef.base.enum := R_ESP;
  1673. taicpu(p).loadRef(0,TmpRef);
  1674. asml.remove(hp1);
  1675. hp1.free;
  1676. end;
  1677. end;
  1678. end;
  1679. *)
  1680. A_PUSH:
  1681. begin
  1682. if (taicpu(p).opsize = S_W) and
  1683. (taicpu(p).oper[0]^.typ = Top_Const) and
  1684. GetNextInstruction(p, hp1) and
  1685. (tai(hp1).typ = ait_instruction) and
  1686. (taicpu(hp1).opcode = A_PUSH) and
  1687. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1688. (taicpu(hp1).opsize = S_W) then
  1689. begin
  1690. taicpu(p).changeopsize(S_L);
  1691. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1692. asml.remove(hp1);
  1693. hp1.free;
  1694. end;
  1695. end;
  1696. A_SHL, A_SAL:
  1697. begin
  1698. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1699. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1700. (taicpu(p).opsize = S_L) and
  1701. (taicpu(p).oper[0]^.val <= 3) then
  1702. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1703. begin
  1704. TmpBool1 := True; {should we check the next instruction?}
  1705. TmpBool2 := False; {have we found an add/sub which could be
  1706. integrated in the lea?}
  1707. reference_reset(tmpref,2);
  1708. TmpRef.index := taicpu(p).oper[1]^.reg;
  1709. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1710. while TmpBool1 and
  1711. GetNextInstruction(p, hp1) and
  1712. (tai(hp1).typ = ait_instruction) and
  1713. ((((taicpu(hp1).opcode = A_ADD) or
  1714. (taicpu(hp1).opcode = A_SUB)) and
  1715. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1716. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1717. (((taicpu(hp1).opcode = A_INC) or
  1718. (taicpu(hp1).opcode = A_DEC)) and
  1719. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1720. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1721. (not GetNextInstruction(hp1,hp2) or
  1722. not instrReadsFlags(hp2)) Do
  1723. begin
  1724. TmpBool1 := False;
  1725. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1726. begin
  1727. TmpBool1 := True;
  1728. TmpBool2 := True;
  1729. case taicpu(hp1).opcode of
  1730. A_ADD:
  1731. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1732. A_SUB:
  1733. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1734. end;
  1735. asml.remove(hp1);
  1736. hp1.free;
  1737. end
  1738. else
  1739. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1740. (((taicpu(hp1).opcode = A_ADD) and
  1741. (TmpRef.base = NR_NO)) or
  1742. (taicpu(hp1).opcode = A_INC) or
  1743. (taicpu(hp1).opcode = A_DEC)) then
  1744. begin
  1745. TmpBool1 := True;
  1746. TmpBool2 := True;
  1747. case taicpu(hp1).opcode of
  1748. A_ADD:
  1749. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1750. A_INC:
  1751. inc(TmpRef.offset);
  1752. A_DEC:
  1753. dec(TmpRef.offset);
  1754. end;
  1755. asml.remove(hp1);
  1756. hp1.free;
  1757. end;
  1758. end;
  1759. if TmpBool2 or
  1760. ((current_settings.optimizecputype < cpu_Pentium2) and
  1761. (taicpu(p).oper[0]^.val <= 3) and
  1762. not(cs_opt_size in current_settings.optimizerswitches)) then
  1763. begin
  1764. if not(TmpBool2) and
  1765. (taicpu(p).oper[0]^.val = 1) then
  1766. begin
  1767. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1768. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1769. end
  1770. else
  1771. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1772. taicpu(p).oper[1]^.reg);
  1773. InsertLLItem(asml,p.previous, p.next, hp1);
  1774. p.free;
  1775. p := hp1;
  1776. end;
  1777. end
  1778. else
  1779. if (current_settings.optimizecputype < cpu_Pentium2) and
  1780. (taicpu(p).oper[0]^.typ = top_const) and
  1781. (taicpu(p).oper[1]^.typ = top_reg) then
  1782. if (taicpu(p).oper[0]^.val = 1) then
  1783. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1784. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1785. (unlike shl, which is only Tairable in the U pipe)}
  1786. begin
  1787. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1788. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1789. InsertLLItem(asml,p.previous, p.next, hp1);
  1790. p.free;
  1791. p := hp1;
  1792. end
  1793. else if (taicpu(p).opsize = S_L) and
  1794. (taicpu(p).oper[0]^.val<= 3) then
  1795. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1796. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1797. begin
  1798. reference_reset(tmpref,2);
  1799. TmpRef.index := taicpu(p).oper[1]^.reg;
  1800. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1801. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1802. InsertLLItem(asml,p.previous, p.next, hp1);
  1803. p.free;
  1804. p := hp1;
  1805. end
  1806. end;
  1807. A_SETcc :
  1808. { changes
  1809. setcc (funcres) setcc reg
  1810. movb (funcres), reg to leave/ret
  1811. leave/ret }
  1812. begin
  1813. if (taicpu(p).oper[0]^.typ = top_ref) and
  1814. GetNextInstruction(p, hp1) and
  1815. GetNextInstruction(hp1, hp2) and
  1816. (hp2.typ = ait_instruction) and
  1817. ((taicpu(hp2).opcode = A_LEAVE) or
  1818. (taicpu(hp2).opcode = A_RET)) and
  1819. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1820. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1821. not(assigned(current_procinfo.procdef.funcretsym) and
  1822. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1823. (hp1.typ = ait_instruction) and
  1824. (taicpu(hp1).opcode = A_MOV) and
  1825. (taicpu(hp1).opsize = S_B) and
  1826. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1827. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1828. begin
  1829. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1830. asml.remove(hp1);
  1831. hp1.free;
  1832. end
  1833. end;
  1834. A_SUB:
  1835. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1836. { * change "sub/add const1, reg" or "dec reg" followed by
  1837. "sub const2, reg" to one "sub ..., reg" }
  1838. begin
  1839. if (taicpu(p).oper[0]^.typ = top_const) and
  1840. (taicpu(p).oper[1]^.typ = top_reg) then
  1841. if (taicpu(p).oper[0]^.val = 2) and
  1842. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1843. { Don't do the sub/push optimization if the sub }
  1844. { comes from setting up the stack frame (JM) }
  1845. (not getLastInstruction(p,hp1) or
  1846. (hp1.typ <> ait_instruction) or
  1847. (taicpu(hp1).opcode <> A_MOV) or
  1848. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1849. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1850. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1851. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1852. begin
  1853. hp1 := tai(p.next);
  1854. while Assigned(hp1) and
  1855. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1856. not regReadByInstruction(RS_ESP,hp1) and
  1857. not regModifiedByInstruction(RS_ESP,hp1) do
  1858. hp1 := tai(hp1.next);
  1859. if Assigned(hp1) and
  1860. (tai(hp1).typ = ait_instruction) and
  1861. (taicpu(hp1).opcode = A_PUSH) and
  1862. (taicpu(hp1).opsize = S_W) then
  1863. begin
  1864. taicpu(hp1).changeopsize(S_L);
  1865. if taicpu(hp1).oper[0]^.typ=top_reg then
  1866. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1867. hp1 := tai(p.next);
  1868. asml.remove(p);
  1869. p.free;
  1870. p := hp1;
  1871. continue
  1872. end;
  1873. if DoSubAddOpt(p) then
  1874. continue;
  1875. end
  1876. else if DoSubAddOpt(p) then
  1877. continue
  1878. end;
  1879. end;
  1880. end; { if is_jmp }
  1881. end;
  1882. end;
  1883. updateUsedRegs(UsedRegs,p);
  1884. p:=tai(p.next);
  1885. end;
  1886. end;
  1887. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  1888. {$ifdef DEBUG_AOPTCPU}
  1889. procedure DebugMsg(const s: string;p : tai);
  1890. begin
  1891. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  1892. end;
  1893. {$else DEBUG_AOPTCPU}
  1894. procedure DebugMsg(const s: string;p : tai);inline;
  1895. begin
  1896. end;
  1897. {$endif DEBUG_AOPTCPU}
  1898. function CanBeCMOV(p : tai) : boolean;
  1899. begin
  1900. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1901. (taicpu(p).opcode=A_MOV) and
  1902. (taicpu(p).opsize in [S_L,S_W]) and
  1903. ((taicpu(p).oper[0]^.typ = top_reg)
  1904. { we can't use cmov ref,reg because
  1905. ref could be nil and cmov still throws an exception
  1906. if ref=nil but the mov isn't done (FK)
  1907. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1908. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1909. }
  1910. ) and
  1911. (taicpu(p).oper[1]^.typ in [top_reg]);
  1912. end;
  1913. var
  1914. p,hp1,hp2,hp3: tai;
  1915. l : longint;
  1916. condition : tasmcond;
  1917. UsedRegs, TmpUsedRegs: TRegSet;
  1918. carryadd_opcode: Tasmop;
  1919. begin
  1920. p := BlockStart;
  1921. UsedRegs := [];
  1922. while (p <> BlockEnd) Do
  1923. begin
  1924. UpdateUsedRegs(UsedRegs, tai(p.next));
  1925. case p.Typ Of
  1926. Ait_Instruction:
  1927. begin
  1928. if InsContainsSegRef(taicpu(p)) then
  1929. begin
  1930. p := tai(p.next);
  1931. continue;
  1932. end;
  1933. case taicpu(p).opcode Of
  1934. A_Jcc:
  1935. begin
  1936. { jb @@1 cmc
  1937. inc/dec operand --> adc/sbb operand,0
  1938. @@1:
  1939. ... and ...
  1940. jnb @@1
  1941. inc/dec operand --> adc/sbb operand,0
  1942. @@1: }
  1943. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1944. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1945. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1946. begin
  1947. carryadd_opcode:=A_NONE;
  1948. if Taicpu(p).condition in [C_NAE,C_B] then
  1949. begin
  1950. if Taicpu(hp1).opcode=A_INC then
  1951. carryadd_opcode:=A_ADC;
  1952. if Taicpu(hp1).opcode=A_DEC then
  1953. carryadd_opcode:=A_SBB;
  1954. if carryadd_opcode<>A_NONE then
  1955. begin
  1956. Taicpu(p).clearop(0);
  1957. Taicpu(p).ops:=0;
  1958. Taicpu(p).is_jmp:=false;
  1959. Taicpu(p).opcode:=A_CMC;
  1960. Taicpu(p).condition:=C_NONE;
  1961. Taicpu(hp1).ops:=2;
  1962. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1963. Taicpu(hp1).loadconst(0,0);
  1964. Taicpu(hp1).opcode:=carryadd_opcode;
  1965. continue;
  1966. end;
  1967. end;
  1968. if Taicpu(p).condition in [C_AE,C_NB] then
  1969. begin
  1970. if Taicpu(hp1).opcode=A_INC then
  1971. carryadd_opcode:=A_ADC;
  1972. if Taicpu(hp1).opcode=A_DEC then
  1973. carryadd_opcode:=A_SBB;
  1974. if carryadd_opcode<>A_NONE then
  1975. begin
  1976. asml.remove(p);
  1977. p.free;
  1978. Taicpu(hp1).ops:=2;
  1979. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1980. Taicpu(hp1).loadconst(0,0);
  1981. Taicpu(hp1).opcode:=carryadd_opcode;
  1982. p:=hp1;
  1983. continue;
  1984. end;
  1985. end;
  1986. end;
  1987. if (current_settings.cputype>=cpu_Pentium2) then
  1988. begin
  1989. { check for
  1990. jCC xxx
  1991. <several movs>
  1992. xxx:
  1993. }
  1994. l:=0;
  1995. GetNextInstruction(p, hp1);
  1996. while assigned(hp1) and
  1997. CanBeCMOV(hp1) and
  1998. { stop on labels }
  1999. not(hp1.typ=ait_label) do
  2000. begin
  2001. inc(l);
  2002. GetNextInstruction(hp1,hp1);
  2003. end;
  2004. if assigned(hp1) then
  2005. begin
  2006. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2007. begin
  2008. if (l<=4) and (l>0) then
  2009. begin
  2010. condition:=inverse_cond(taicpu(p).condition);
  2011. hp2:=p;
  2012. GetNextInstruction(p,hp1);
  2013. p:=hp1;
  2014. repeat
  2015. taicpu(hp1).opcode:=A_CMOVcc;
  2016. taicpu(hp1).condition:=condition;
  2017. GetNextInstruction(hp1,hp1);
  2018. until not(assigned(hp1)) or
  2019. not(CanBeCMOV(hp1));
  2020. { wait with removing else GetNextInstruction could
  2021. ignore the label if it was the only usage in the
  2022. jump moved away }
  2023. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2024. asml.remove(hp2);
  2025. hp2.free;
  2026. continue;
  2027. end;
  2028. end
  2029. else
  2030. begin
  2031. { check further for
  2032. jCC xxx
  2033. <several movs 1>
  2034. jmp yyy
  2035. xxx:
  2036. <several movs 2>
  2037. yyy:
  2038. }
  2039. { hp2 points to jmp yyy }
  2040. hp2:=hp1;
  2041. { skip hp1 to xxx }
  2042. GetNextInstruction(hp1, hp1);
  2043. if assigned(hp2) and
  2044. assigned(hp1) and
  2045. (l<=3) and
  2046. (hp2.typ=ait_instruction) and
  2047. (taicpu(hp2).is_jmp) and
  2048. (taicpu(hp2).condition=C_None) and
  2049. { real label and jump, no further references to the
  2050. label are allowed }
  2051. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  2052. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2053. begin
  2054. l:=0;
  2055. { skip hp1 to <several moves 2> }
  2056. GetNextInstruction(hp1, hp1);
  2057. while assigned(hp1) and
  2058. CanBeCMOV(hp1) do
  2059. begin
  2060. inc(l);
  2061. GetNextInstruction(hp1, hp1);
  2062. end;
  2063. { hp1 points to yyy: }
  2064. if assigned(hp1) and
  2065. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2066. begin
  2067. condition:=inverse_cond(taicpu(p).condition);
  2068. GetNextInstruction(p,hp1);
  2069. hp3:=p;
  2070. p:=hp1;
  2071. repeat
  2072. taicpu(hp1).opcode:=A_CMOVcc;
  2073. taicpu(hp1).condition:=condition;
  2074. GetNextInstruction(hp1,hp1);
  2075. until not(assigned(hp1)) or
  2076. not(CanBeCMOV(hp1));
  2077. { hp2 is still at jmp yyy }
  2078. GetNextInstruction(hp2,hp1);
  2079. { hp2 is now at xxx: }
  2080. condition:=inverse_cond(condition);
  2081. GetNextInstruction(hp1,hp1);
  2082. { hp1 is now at <several movs 2> }
  2083. repeat
  2084. taicpu(hp1).opcode:=A_CMOVcc;
  2085. taicpu(hp1).condition:=condition;
  2086. GetNextInstruction(hp1,hp1);
  2087. until not(assigned(hp1)) or
  2088. not(CanBeCMOV(hp1));
  2089. {
  2090. asml.remove(hp1.next)
  2091. hp1.next.free;
  2092. asml.remove(hp1);
  2093. hp1.free;
  2094. }
  2095. { remove jCC }
  2096. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2097. asml.remove(hp3);
  2098. hp3.free;
  2099. { remove jmp }
  2100. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2101. asml.remove(hp2);
  2102. hp2.free;
  2103. continue;
  2104. end;
  2105. end;
  2106. end;
  2107. end;
  2108. end;
  2109. end;
  2110. A_FSTP,A_FISTP:
  2111. if doFpuLoadStoreOpt(asmL,p) then
  2112. continue;
  2113. A_IMUL:
  2114. begin
  2115. if (taicpu(p).ops >= 2) and
  2116. ((taicpu(p).oper[0]^.typ = top_const) or
  2117. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  2118. (taicpu(p).oper[1]^.typ = top_reg) and
  2119. ((taicpu(p).ops = 2) or
  2120. ((taicpu(p).oper[2]^.typ = top_reg) and
  2121. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  2122. getLastInstruction(p,hp1) and
  2123. (hp1.typ = ait_instruction) and
  2124. (taicpu(hp1).opcode = A_MOV) and
  2125. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2126. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2127. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2128. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  2129. begin
  2130. taicpu(p).ops := 3;
  2131. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  2132. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  2133. asml.remove(hp1);
  2134. hp1.free;
  2135. end;
  2136. end;
  2137. A_MOV:
  2138. begin
  2139. if (taicpu(p).oper[0]^.typ = top_reg) and
  2140. (taicpu(p).oper[1]^.typ = top_reg) and
  2141. GetNextInstruction(p, hp1) and
  2142. (hp1.typ = ait_Instruction) and
  2143. ((taicpu(hp1).opcode = A_MOV) or
  2144. (taicpu(hp1).opcode = A_MOVZX) or
  2145. (taicpu(hp1).opcode = A_MOVSX)) and
  2146. (taicpu(hp1).oper[0]^.typ = top_ref) and
  2147. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2148. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) or
  2149. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)) and
  2150. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  2151. {mov reg1, reg2
  2152. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  2153. begin
  2154. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  2155. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  2156. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  2157. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  2158. asml.remove(p);
  2159. p.free;
  2160. p := hp1;
  2161. continue;
  2162. end
  2163. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2164. GetNextInstruction(p,hp1) and
  2165. (hp1.typ = ait_instruction) and
  2166. (IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) or
  2167. ((taicpu(hp1).opcode=A_LEA) and
  2168. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  2169. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  2170. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)) or
  2171. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  2172. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg))
  2173. )
  2174. )
  2175. ) and
  2176. GetNextInstruction(hp1,hp2) and
  2177. MatchInstruction(hp2,A_MOV,[]) and
  2178. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  2179. (taicpu(hp2).oper[1]^.typ = top_ref) then
  2180. begin
  2181. TmpUsedRegs := UsedRegs;
  2182. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  2183. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  2184. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,
  2185. hp2, TmpUsedRegs))) then
  2186. { change mov (ref), reg }
  2187. { add/sub/or/... reg2/$const, reg }
  2188. { mov reg, (ref) }
  2189. { # release reg }
  2190. { to add/sub/or/... reg2/$const, (ref) }
  2191. begin
  2192. case taicpu(hp1).opcode of
  2193. A_INC,A_DEC:
  2194. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  2195. A_LEA:
  2196. begin
  2197. taicpu(hp1).opcode:=A_ADD;
  2198. if taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg then
  2199. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  2200. else
  2201. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base);
  2202. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2203. DebugMsg('Peephole FoldLea done',hp1);
  2204. end
  2205. else
  2206. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2207. end;
  2208. asml.remove(p);
  2209. asml.remove(hp2);
  2210. p.free;
  2211. hp2.free;
  2212. p := hp1
  2213. end;
  2214. end
  2215. end;
  2216. end;
  2217. end;
  2218. end;
  2219. p := tai(p.next)
  2220. end;
  2221. end;
  2222. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  2223. var
  2224. p,hp1,hp2: tai;
  2225. IsTestConstX: boolean;
  2226. begin
  2227. p := BlockStart;
  2228. while (p <> BlockEnd) Do
  2229. begin
  2230. case p.Typ Of
  2231. Ait_Instruction:
  2232. begin
  2233. if InsContainsSegRef(taicpu(p)) then
  2234. begin
  2235. p := tai(p.next);
  2236. continue;
  2237. end;
  2238. case taicpu(p).opcode Of
  2239. A_CALL:
  2240. { don't do this on modern CPUs, this really hurts them due to
  2241. broken call/ret pairing }
  2242. if (current_settings.optimizecputype < cpu_Pentium2) and
  2243. not(cs_create_pic in current_settings.moduleswitches) and
  2244. GetNextInstruction(p, hp1) and
  2245. (hp1.typ = ait_instruction) and
  2246. (taicpu(hp1).opcode = A_JMP) and
  2247. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  2248. begin
  2249. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  2250. InsertLLItem(asml, p.previous, p, hp2);
  2251. taicpu(p).opcode := A_JMP;
  2252. taicpu(p).is_jmp := true;
  2253. asml.remove(hp1);
  2254. hp1.free;
  2255. end;
  2256. A_CMP:
  2257. begin
  2258. if (taicpu(p).oper[0]^.typ = top_const) and
  2259. (taicpu(p).oper[0]^.val = 0) and
  2260. (taicpu(p).oper[1]^.typ = top_reg) then
  2261. {change "cmp $0, %reg" to "test %reg, %reg"}
  2262. begin
  2263. taicpu(p).opcode := A_TEST;
  2264. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2265. continue;
  2266. end;
  2267. end;
  2268. (*
  2269. Optimization is not safe; xor clears the carry flag.
  2270. See test/tgadint64 in the test suite.
  2271. A_MOV:
  2272. if (taicpu(p).oper[0]^.typ = Top_Const) and
  2273. (taicpu(p).oper[0]^.val = 0) and
  2274. (taicpu(p).oper[1]^.typ = Top_Reg) then
  2275. { change "mov $0, %reg" into "xor %reg, %reg" }
  2276. begin
  2277. taicpu(p).opcode := A_XOR;
  2278. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2279. end;
  2280. *)
  2281. A_MOVZX:
  2282. { if register vars are on, it's possible there is code like }
  2283. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  2284. { so we can't safely replace the movzx then with xor/mov, }
  2285. { since that would change the flags (JM) }
  2286. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  2287. begin
  2288. if (taicpu(p).oper[1]^.typ = top_reg) then
  2289. if (taicpu(p).oper[0]^.typ = top_reg)
  2290. then
  2291. case taicpu(p).opsize of
  2292. S_BL:
  2293. begin
  2294. if IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  2295. not(cs_opt_size in current_settings.optimizerswitches) and
  2296. (current_settings.optimizecputype = cpu_Pentium) then
  2297. {Change "movzbl %reg1, %reg2" to
  2298. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  2299. PentiumMMX}
  2300. begin
  2301. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  2302. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2303. InsertLLItem(asml,p.previous, p, hp1);
  2304. taicpu(p).opcode := A_MOV;
  2305. taicpu(p).changeopsize(S_B);
  2306. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2307. end;
  2308. end;
  2309. end
  2310. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2311. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2312. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  2313. not(cs_opt_size in current_settings.optimizerswitches) and
  2314. IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  2315. (current_settings.optimizecputype = cpu_Pentium) and
  2316. (taicpu(p).opsize = S_BL) then
  2317. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  2318. Pentium and PentiumMMX}
  2319. begin
  2320. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  2321. taicpu(p).oper[1]^.reg);
  2322. taicpu(p).opcode := A_MOV;
  2323. taicpu(p).changeopsize(S_B);
  2324. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2325. InsertLLItem(asml,p.previous, p, hp1);
  2326. end;
  2327. end;
  2328. A_TEST, A_OR:
  2329. {removes the line marked with (x) from the sequence
  2330. and/or/xor/add/sub/... $x, %y
  2331. test/or %y, %y | test $-1, %y (x)
  2332. j(n)z _Label
  2333. as the first instruction already adjusts the ZF
  2334. %y operand may also be a reference }
  2335. begin
  2336. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  2337. MatchOperand(taicpu(p).oper[0]^,-1);
  2338. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  2339. GetLastInstruction(p, hp1) and
  2340. (tai(hp1).typ = ait_instruction) and
  2341. GetNextInstruction(p,hp2) and
  2342. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  2343. case taicpu(hp1).opcode Of
  2344. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  2345. begin
  2346. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2347. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2348. { and in case of carry for A(E)/B(E)/C/NC }
  2349. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2350. ((taicpu(hp1).opcode <> A_ADD) and
  2351. (taicpu(hp1).opcode <> A_SUB))) then
  2352. begin
  2353. hp1 := tai(p.next);
  2354. asml.remove(p);
  2355. p.free;
  2356. p := tai(hp1);
  2357. continue
  2358. end;
  2359. end;
  2360. A_SHL, A_SAL, A_SHR, A_SAR:
  2361. begin
  2362. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2363. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  2364. { therefore, it's only safe to do this optimization for }
  2365. { shifts by a (nonzero) constant }
  2366. (taicpu(hp1).oper[0]^.typ = top_const) and
  2367. (taicpu(hp1).oper[0]^.val <> 0) and
  2368. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2369. { and in case of carry for A(E)/B(E)/C/NC }
  2370. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2371. begin
  2372. hp1 := tai(p.next);
  2373. asml.remove(p);
  2374. p.free;
  2375. p := tai(hp1);
  2376. continue
  2377. end;
  2378. end;
  2379. A_DEC, A_INC, A_NEG:
  2380. begin
  2381. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  2382. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2383. { and in case of carry for A(E)/B(E)/C/NC }
  2384. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2385. begin
  2386. case taicpu(hp1).opcode Of
  2387. A_DEC, A_INC:
  2388. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2389. begin
  2390. case taicpu(hp1).opcode Of
  2391. A_DEC: taicpu(hp1).opcode := A_SUB;
  2392. A_INC: taicpu(hp1).opcode := A_ADD;
  2393. end;
  2394. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2395. taicpu(hp1).loadConst(0,1);
  2396. taicpu(hp1).ops:=2;
  2397. end
  2398. end;
  2399. hp1 := tai(p.next);
  2400. asml.remove(p);
  2401. p.free;
  2402. p := tai(hp1);
  2403. continue
  2404. end;
  2405. end
  2406. else
  2407. { change "test $-1,%reg" into "test %reg,%reg" }
  2408. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2409. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2410. end { case }
  2411. else
  2412. { change "test $-1,%reg" into "test %reg,%reg" }
  2413. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2414. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2415. end;
  2416. end;
  2417. end;
  2418. end;
  2419. p := tai(p.next)
  2420. end;
  2421. end;
  2422. end.