aoptcpu.pas 110 KB


  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aoptobj, aoptcpub, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. function DoFpuLoadStoreOpt(var p : tai) : boolean;
  33. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  34. function InstructionLoadsFromReg(const reg : TRegister;const hp : tai) : boolean;override;
  35. end;
  36. Var
  37. AsmOptimizer : TCpuAsmOptimizer;
  38. Implementation
  39. uses
  40. verbose,globtype,globals,
  41. cutils,
  42. aoptbase,
  43. cpuinfo,
  44. aasmcpu,
  45. procinfo,
  46. cgutils,cgx86,
  47. { units we should get rid off: }
  48. symsym,symconst;
  49. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  50. begin
  51. isFoldableArithOp := False;
  52. case hp1.opcode of
  53. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  54. isFoldableArithOp :=
  55. ((taicpu(hp1).oper[0]^.typ = top_const) or
  56. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  57. (taicpu(hp1).oper[0]^.reg <> reg))) and
  58. (taicpu(hp1).oper[1]^.typ = top_reg) and
  59. (taicpu(hp1).oper[1]^.reg = reg);
  60. A_INC,A_DEC,A_NEG,A_NOT:
  61. isFoldableArithOp :=
  62. (taicpu(hp1).oper[0]^.typ = top_reg) and
  63. (taicpu(hp1).oper[0]^.reg = reg);
  64. end;
  65. end;
  66. function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
  67. { returns true if a "continue" should be done after this optimization }
  68. var hp1, hp2: tai;
  69. begin
  70. DoFpuLoadStoreOpt := false;
  71. if (taicpu(p).oper[0]^.typ = top_ref) and
  72. getNextInstruction(p, hp1) and
  73. (hp1.typ = ait_instruction) and
  74. (((taicpu(hp1).opcode = A_FLD) and
  75. (taicpu(p).opcode = A_FSTP)) or
  76. ((taicpu(p).opcode = A_FISTP) and
  77. (taicpu(hp1).opcode = A_FILD))) and
  78. (taicpu(hp1).oper[0]^.typ = top_ref) and
  79. (taicpu(hp1).opsize = taicpu(p).opsize) and
  80. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  81. begin
  82. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  83. if (taicpu(p).opsize=S_FX) and
  84. getNextInstruction(hp1, hp2) and
  85. (hp2.typ = ait_instruction) and
  86. IsExitCode(hp2) and
  87. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  88. not(assigned(current_procinfo.procdef.funcretsym) and
  89. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  90. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  91. begin
  92. asml.remove(p);
  93. asml.remove(hp1);
  94. p.free;
  95. hp1.free;
  96. p := hp2;
  97. removeLastDeallocForFuncRes(p);
  98. doFPULoadStoreOpt := true;
  99. end
  100. (* can't be done because the store operation rounds
  101. else
  102. { fst can't store an extended value! }
  103. if (taicpu(p).opsize <> S_FX) and
  104. (taicpu(p).opsize <> S_IQ) then
  105. begin
  106. if (taicpu(p).opcode = A_FSTP) then
  107. taicpu(p).opcode := A_FST
  108. else taicpu(p).opcode := A_FIST;
  109. asml.remove(hp1);
  110. hp1.free;
  111. end
  112. *)
  113. end;
  114. end;
  115. { converts a TChange variable to a TRegister }
  116. function tch2reg(ch: tinschange): tsuperregister;
  117. const
  118. ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
  119. begin
  120. if (ch <= CH_REDI) then
  121. tch2reg := ch2reg[ch]
  122. else if (ch <= CH_WEDI) then
  123. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
  124. else if (ch <= CH_RWEDI) then
  125. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
  126. else if (ch <= CH_MEDI) then
  127. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
  128. else
  129. InternalError(2016041901)
  130. end;
  131. { Checks if the register is a 32 bit general purpose register }
  132. function isgp32reg(reg: TRegister): boolean;
  133. begin
  134. {$push}{$warnings off}
  135. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  136. {$pop}
  137. end;
  138. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  139. begin
  140. Result:=RegReadByInstruction(reg,hp);
  141. end;
  142. function TCpuAsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  143. var
  144. p: taicpu;
  145. opcount: longint;
  146. begin
  147. RegReadByInstruction := false;
  148. if hp.typ <> ait_instruction then
  149. exit;
  150. p := taicpu(hp);
  151. case p.opcode of
  152. A_CALL:
  153. regreadbyinstruction := true;
  154. A_IMUL:
  155. case p.ops of
  156. 1:
  157. regReadByInstruction :=
  158. (reg = NR_EAX) or RegInOp(reg,p.oper[0]^);
  159. 2,3:
  160. regReadByInstruction :=
  161. reginop(reg,p.oper[0]^) or
  162. reginop(reg,p.oper[1]^);
  163. end;
  164. A_IDIV,A_DIV,A_MUL:
  165. begin
  166. regReadByInstruction :=
  167. RegInOp(reg,p.oper[0]^) or (getsupreg(reg) in [RS_EAX,RS_EDX]);
  168. end;
  169. else
  170. begin
  171. for opcount := 0 to p.ops-1 do
  172. if (p.oper[opCount]^.typ = top_ref) and
  173. RegInRef(reg,p.oper[opcount]^.ref^) then
  174. begin
  175. RegReadByInstruction := true;
  176. exit
  177. end;
  178. for opcount := 1 to maxinschanges do
  179. case insprop[p.opcode].ch[opcount] of
  180. CH_REAX..CH_REDI,CH_RWEAX..CH_MEDI:
  181. if getsupreg(reg) = tch2reg(insprop[p.opcode].ch[opcount]) then
  182. begin
  183. RegReadByInstruction := true;
  184. exit
  185. end;
  186. CH_RWOP1,CH_ROP1,CH_MOP1:
  187. if reginop(reg,p.oper[0]^) then
  188. begin
  189. RegReadByInstruction := true;
  190. exit
  191. end;
  192. Ch_RWOP2,Ch_ROP2,Ch_MOP2:
  193. if reginop(reg,p.oper[1]^) then
  194. begin
  195. RegReadByInstruction := true;
  196. exit
  197. end;
  198. Ch_RWOP3,Ch_ROP3,Ch_MOP3:
  199. if reginop(reg,p.oper[2]^) then
  200. begin
  201. RegReadByInstruction := true;
  202. exit
  203. end;
  204. Ch_RFlags,Ch_RWFlags:
  205. if reg=NR_DEFAULTFLAGS then
  206. begin
  207. RegReadByInstruction := true;
  208. exit
  209. end;
  210. end;
  211. end;
  212. end;
  213. end;
  214. { returns true if p contains a memory operand with a segment set }
  215. function InsContainsSegRef(p: taicpu): boolean;
  216. var
  217. i: longint;
  218. begin
  219. result:=true;
  220. for i:=0 to p.opercnt-1 do
  221. if (p.oper[i]^.typ=top_ref) and
  222. (p.oper[i]^.ref^.segment<>NR_NO) then
  223. exit;
  224. result:=false;
  225. end;
  226. function InstrReadsFlags(p: tai): boolean;
  227. var
  228. l: longint;
  229. begin
  230. InstrReadsFlags := true;
  231. case p.typ of
  232. ait_instruction:
  233. begin
  234. for l := 1 to maxinschanges do
  235. if InsProp[taicpu(p).opcode].Ch[l] in [Ch_RFlags,Ch_RWFlags,Ch_All] then
  236. exit;
  237. end;
  238. ait_label:
  239. exit;
  240. end;
  241. InstrReadsFlags := false;
  242. end;
  243. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  244. var
  245. p,hp1: tai;
  246. l: aint;
  247. tmpRef: treference;
  248. begin
  249. p := BlockStart;
  250. while (p <> BlockEnd) Do
  251. begin
  252. case p.Typ Of
  253. Ait_Instruction:
  254. begin
  255. if InsContainsSegRef(taicpu(p)) then
  256. begin
  257. p := tai(p.next);
  258. continue;
  259. end;
  260. case taicpu(p).opcode Of
  261. A_IMUL:
  262. {changes certain "imul const, %reg"'s to lea sequences}
  263. begin
  264. if (taicpu(p).oper[0]^.typ = Top_Const) and
  265. (taicpu(p).oper[1]^.typ = Top_Reg) and
  266. (taicpu(p).opsize = S_L) then
  267. if (taicpu(p).oper[0]^.val = 1) then
  268. if (taicpu(p).ops = 2) then
  269. {remove "imul $1, reg"}
  270. begin
  271. hp1 := tai(p.Next);
  272. asml.remove(p);
  273. p.free;
  274. p := hp1;
  275. continue;
  276. end
  277. else
  278. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  279. begin
  280. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  281. InsertLLItem(p.previous, p.next, hp1);
  282. p.free;
  283. p := hp1;
  284. end
  285. else if
  286. ((taicpu(p).ops <= 2) or
  287. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  288. (taicpu(p).oper[0]^.val <= 12) and
  289. not(cs_opt_size in current_settings.optimizerswitches) and
  290. (not(GetNextInstruction(p, hp1)) or
  291. {GetNextInstruction(p, hp1) and}
  292. not((tai(hp1).typ = ait_instruction) and
  293. ((taicpu(hp1).opcode=A_Jcc) and
  294. (taicpu(hp1).condition in [C_O,C_NO])))) then
  295. begin
  296. reference_reset(tmpref,1);
  297. case taicpu(p).oper[0]^.val Of
  298. 3: begin
  299. {imul 3, reg1, reg2 to
  300. lea (reg1,reg1,2), reg2
  301. imul 3, reg1 to
  302. lea (reg1,reg1,2), reg1}
  303. TmpRef.base := taicpu(p).oper[1]^.reg;
  304. TmpRef.index := taicpu(p).oper[1]^.reg;
  305. TmpRef.ScaleFactor := 2;
  306. if (taicpu(p).ops = 2) then
  307. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  308. else
  309. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  310. InsertLLItem(p.previous, p.next, hp1);
  311. p.free;
  312. p := hp1;
  313. end;
  314. 5: begin
  315. {imul 5, reg1, reg2 to
  316. lea (reg1,reg1,4), reg2
  317. imul 5, reg1 to
  318. lea (reg1,reg1,4), reg1}
  319. TmpRef.base := taicpu(p).oper[1]^.reg;
  320. TmpRef.index := taicpu(p).oper[1]^.reg;
  321. TmpRef.ScaleFactor := 4;
  322. if (taicpu(p).ops = 2) then
  323. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  324. else
  325. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  326. InsertLLItem(p.previous, p.next, hp1);
  327. p.free;
  328. p := hp1;
  329. end;
  330. 6: begin
  331. {imul 6, reg1, reg2 to
  332. lea (,reg1,2), reg2
  333. lea (reg2,reg1,4), reg2
  334. imul 6, reg1 to
  335. lea (reg1,reg1,2), reg1
  336. add reg1, reg1}
  337. if (current_settings.optimizecputype <= cpu_386) then
  338. begin
  339. TmpRef.index := taicpu(p).oper[1]^.reg;
  340. if (taicpu(p).ops = 3) then
  341. begin
  342. TmpRef.base := taicpu(p).oper[2]^.reg;
  343. TmpRef.ScaleFactor := 4;
  344. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  345. end
  346. else
  347. begin
  348. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  349. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  350. end;
  351. InsertLLItem(p, p.next, hp1);
  352. reference_reset(tmpref,2);
  353. TmpRef.index := taicpu(p).oper[1]^.reg;
  354. TmpRef.ScaleFactor := 2;
  355. if (taicpu(p).ops = 3) then
  356. begin
  357. TmpRef.base := NR_NO;
  358. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  359. taicpu(p).oper[2]^.reg);
  360. end
  361. else
  362. begin
  363. TmpRef.base := taicpu(p).oper[1]^.reg;
  364. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  365. end;
  366. InsertLLItem(p.previous, p.next, hp1);
  367. p.free;
  368. p := tai(hp1.next);
  369. end
  370. end;
  371. 9: begin
  372. {imul 9, reg1, reg2 to
  373. lea (reg1,reg1,8), reg2
  374. imul 9, reg1 to
  375. lea (reg1,reg1,8), reg1}
  376. TmpRef.base := taicpu(p).oper[1]^.reg;
  377. TmpRef.index := taicpu(p).oper[1]^.reg;
  378. TmpRef.ScaleFactor := 8;
  379. if (taicpu(p).ops = 2) then
  380. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  381. else
  382. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  383. InsertLLItem(p.previous, p.next, hp1);
  384. p.free;
  385. p := hp1;
  386. end;
  387. 10: begin
  388. {imul 10, reg1, reg2 to
  389. lea (reg1,reg1,4), reg2
  390. add reg2, reg2
  391. imul 10, reg1 to
  392. lea (reg1,reg1,4), reg1
  393. add reg1, reg1}
  394. if (current_settings.optimizecputype <= cpu_386) then
  395. begin
  396. if (taicpu(p).ops = 3) then
  397. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  398. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  399. else
  400. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  401. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  402. InsertLLItem(p, p.next, hp1);
  403. TmpRef.base := taicpu(p).oper[1]^.reg;
  404. TmpRef.index := taicpu(p).oper[1]^.reg;
  405. TmpRef.ScaleFactor := 4;
  406. if (taicpu(p).ops = 3) then
  407. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  408. else
  409. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  410. InsertLLItem(p.previous, p.next, hp1);
  411. p.free;
  412. p := tai(hp1.next);
  413. end
  414. end;
  415. 12: begin
  416. {imul 12, reg1, reg2 to
  417. lea (,reg1,4), reg2
  418. lea (reg2,reg1,8), reg2
  419. imul 12, reg1 to
  420. lea (reg1,reg1,2), reg1
  421. lea (,reg1,4), reg1}
  422. if (current_settings.optimizecputype <= cpu_386)
  423. then
  424. begin
  425. TmpRef.index := taicpu(p).oper[1]^.reg;
  426. if (taicpu(p).ops = 3) then
  427. begin
  428. TmpRef.base := taicpu(p).oper[2]^.reg;
  429. TmpRef.ScaleFactor := 8;
  430. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  431. end
  432. else
  433. begin
  434. TmpRef.base := NR_NO;
  435. TmpRef.ScaleFactor := 4;
  436. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  437. end;
  438. InsertLLItem(p, p.next, hp1);
  439. reference_reset(tmpref,2);
  440. TmpRef.index := taicpu(p).oper[1]^.reg;
  441. if (taicpu(p).ops = 3) then
  442. begin
  443. TmpRef.base := NR_NO;
  444. TmpRef.ScaleFactor := 4;
  445. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  446. end
  447. else
  448. begin
  449. TmpRef.base := taicpu(p).oper[1]^.reg;
  450. TmpRef.ScaleFactor := 2;
  451. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  452. end;
  453. InsertLLItem(p.previous, p.next, hp1);
  454. p.free;
  455. p := tai(hp1.next);
  456. end
  457. end
  458. end;
  459. end;
  460. end;
  461. A_SAR, A_SHR:
  462. {changes the code sequence
  463. shr/sar const1, x
  464. shl const2, x
  465. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  466. begin
  467. if GetNextInstruction(p, hp1) and
  468. (tai(hp1).typ = ait_instruction) and
  469. (taicpu(hp1).opcode = A_SHL) and
  470. (taicpu(p).oper[0]^.typ = top_const) and
  471. (taicpu(hp1).oper[0]^.typ = top_const) and
  472. (taicpu(hp1).opsize = taicpu(p).opsize) and
  473. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  474. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  475. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  476. not(cs_opt_size in current_settings.optimizerswitches) then
  477. { shr/sar const1, %reg
  478. shl const2, %reg
  479. with const1 > const2 }
  480. begin
  481. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  482. taicpu(hp1).opcode := A_AND;
  483. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  484. case taicpu(p).opsize Of
  485. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  486. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  487. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  488. end;
  489. end
  490. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  491. not(cs_opt_size in current_settings.optimizerswitches) then
  492. { shr/sar const1, %reg
  493. shl const2, %reg
  494. with const1 < const2 }
  495. begin
  496. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  497. taicpu(p).opcode := A_AND;
  498. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  499. case taicpu(p).opsize Of
  500. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  501. S_B: taicpu(p).loadConst(0,l Xor $ff);
  502. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  503. end;
  504. end
  505. else
  506. { shr/sar const1, %reg
  507. shl const2, %reg
  508. with const1 = const2 }
  509. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  510. begin
  511. taicpu(p).opcode := A_AND;
  512. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  513. case taicpu(p).opsize Of
  514. S_B: taicpu(p).loadConst(0,l Xor $ff);
  515. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  516. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  517. end;
  518. asml.remove(hp1);
  519. hp1.free;
  520. end;
  521. end;
  522. A_XOR:
  523. if (taicpu(p).oper[0]^.typ = top_reg) and
  524. (taicpu(p).oper[1]^.typ = top_reg) and
  525. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  526. { temporarily change this to 'mov reg,0' to make it easier }
  527. { for the CSE. Will be changed back in pass 2 }
  528. begin
  529. taicpu(p).opcode := A_MOV;
  530. taicpu(p).loadConst(0,0);
  531. end;
  532. end;
  533. end;
  534. end;
  535. p := tai(p.next)
  536. end;
  537. end;
  538. { skips all labels and returns the next "real" instruction }
  539. function SkipLabels(hp: tai; var hp2: tai): boolean;
  540. begin
  541. while assigned(hp.next) and
  542. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  543. hp := tai(hp.next);
  544. if assigned(hp.next) then
  545. begin
  546. SkipLabels := True;
  547. hp2 := tai(hp.next)
  548. end
  549. else
  550. begin
  551. hp2 := hp;
  552. SkipLabels := False
  553. end;
  554. end;
  555. { First pass of peephole optimizations }
  556. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  557. function WriteOk : Boolean;
  558. begin
  559. writeln('Ok');
  560. Result:=True;
  561. end;
  562. var
  563. l : longint;
  564. p,hp1,hp2 : tai;
  565. hp3,hp4: tai;
  566. v:aint;
  567. TmpRef: TReference;
  568. TmpUsedRegs: TAllUsedRegs;
  569. TmpBool1, TmpBool2: Boolean;
  570. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  571. {traces sucessive jumps to their final destination and sets it, e.g.
  572. je l1 je l3
  573. <code> <code>
  574. l1: becomes l1:
  575. je l2 je l3
  576. <code> <code>
  577. l2: l2:
  578. jmp l3 jmp l3
  579. the level parameter denotes how deeep we have already followed the jump,
  580. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  581. var p1, p2: tai;
  582. l: tasmlabel;
  583. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  584. begin
  585. FindAnyLabel := false;
  586. while assigned(hp.next) and
  587. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  588. hp := tai(hp.next);
  589. if assigned(hp.next) and
  590. (tai(hp.next).typ = ait_label) then
  591. begin
  592. FindAnyLabel := true;
  593. l := tai_label(hp.next).labsym;
  594. end
  595. end;
  596. begin
  597. GetfinalDestination := false;
  598. if level > 20 then
  599. exit;
  600. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  601. if assigned(p1) then
  602. begin
  603. SkipLabels(p1,p1);
  604. if (tai(p1).typ = ait_instruction) and
  605. (taicpu(p1).is_jmp) then
  606. if { the next instruction after the label where the jump hp arrives}
  607. { is unconditional or of the same type as hp, so continue }
  608. (taicpu(p1).condition in [C_None,hp.condition]) or
  609. { the next instruction after the label where the jump hp arrives}
  610. { is the opposite of hp (so this one is never taken), but after }
  611. { that one there is a branch that will be taken, so perform a }
  612. { little hack: set p1 equal to this instruction (that's what the}
  613. { last SkipLabels is for, only works with short bool evaluation)}
  614. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  615. SkipLabels(p1,p2) and
  616. (p2.typ = ait_instruction) and
  617. (taicpu(p2).is_jmp) and
  618. (taicpu(p2).condition in [C_None,hp.condition]) and
  619. SkipLabels(p1,p1)) then
  620. begin
  621. { quick check for loops of the form "l5: ; jmp l5 }
  622. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  623. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  624. exit;
  625. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  626. exit;
  627. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  628. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  629. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  630. end
  631. else
  632. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  633. if not FindAnyLabel(p1,l) then
  634. begin
  635. {$ifdef finaldestdebug}
  636. insertllitem(asml,p1,p1.next,tai_comment.Create(
  637. strpnew('previous label inserted'))));
  638. {$endif finaldestdebug}
  639. current_asmdata.getjumplabel(l);
  640. insertllitem(p1,p1.next,tai_label.Create(l));
  641. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  642. hp.oper[0]^.ref^.symbol := l;
  643. l.increfs;
  644. { this won't work, since the new label isn't in the labeltable }
  645. { so it will fail the rangecheck. Labeltable should become a }
  646. { hashtable to support this: }
  647. { GetFinalDestination(asml, hp); }
  648. end
  649. else
  650. begin
  651. {$ifdef finaldestdebug}
  652. insertllitem(asml,p1,p1.next,tai_comment.Create(
  653. strpnew('next label reused'))));
  654. {$endif finaldestdebug}
  655. l.increfs;
  656. hp.oper[0]^.ref^.symbol := l;
  657. if not GetFinalDestination(asml, hp,succ(level)) then
  658. exit;
  659. end;
  660. end;
  661. GetFinalDestination := true;
  662. end;
  663. function DoSubAddOpt(var p: tai): Boolean;
  664. begin
  665. DoSubAddOpt := False;
  666. if GetLastInstruction(p, hp1) and
  667. (hp1.typ = ait_instruction) and
  668. (taicpu(hp1).opsize = taicpu(p).opsize) then
  669. case taicpu(hp1).opcode Of
  670. A_DEC:
  671. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  672. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  673. begin
  674. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  675. asml.remove(hp1);
  676. hp1.free;
  677. end;
  678. A_SUB:
  679. if (taicpu(hp1).oper[0]^.typ = top_const) and
  680. (taicpu(hp1).oper[1]^.typ = top_reg) and
  681. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  682. begin
  683. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  684. asml.remove(hp1);
  685. hp1.free;
  686. end;
  687. A_ADD:
  688. if (taicpu(hp1).oper[0]^.typ = top_const) and
  689. (taicpu(hp1).oper[1]^.typ = top_reg) and
  690. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  691. begin
  692. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  693. asml.remove(hp1);
  694. hp1.free;
  695. if (taicpu(p).oper[0]^.val = 0) then
  696. begin
  697. hp1 := tai(p.next);
  698. asml.remove(p);
  699. p.free;
  700. if not GetLastInstruction(hp1, p) then
  701. p := hp1;
  702. DoSubAddOpt := True;
  703. end
  704. end;
  705. end;
  706. end;
  707. begin
  708. p := BlockStart;
  709. ClearUsedRegs;
  710. while (p <> BlockEnd) Do
  711. begin
  712. UpDateUsedRegs(UsedRegs, tai(p.next));
  713. case p.Typ Of
  714. ait_instruction:
  715. begin
  716. current_filepos:=taicpu(p).fileinfo;
  717. if InsContainsSegRef(taicpu(p)) then
  718. begin
  719. p := tai(p.next);
  720. continue;
  721. end;
  722. { Handle Jmp Optimizations }
  723. if taicpu(p).is_jmp then
  724. begin
  725. {the following if-block removes all code between a jmp and the next label,
  726. because it can never be executed}
  727. if (taicpu(p).opcode = A_JMP) then
  728. begin
  729. hp2:=p;
  730. while GetNextInstruction(hp2, hp1) and
  731. (hp1.typ <> ait_label) do
  732. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  733. begin
  734. { don't kill start/end of assembler block,
  735. no-line-info-start/end etc }
  736. if hp1.typ<>ait_marker then
  737. begin
  738. asml.remove(hp1);
  739. hp1.free;
  740. end
  741. else
  742. hp2:=hp1;
  743. end
  744. else break;
  745. end;
  746. { remove jumps to a label coming right after them }
  747. if GetNextInstruction(p, hp1) then
  748. begin
  749. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  750. { TODO: FIXME removing the first instruction fails}
  751. (p<>blockstart) then
  752. begin
  753. hp2:=tai(hp1.next);
  754. asml.remove(p);
  755. p.free;
  756. p:=hp2;
  757. continue;
  758. end
  759. else
  760. begin
  761. if hp1.typ = ait_label then
  762. SkipLabels(hp1,hp1);
  763. if (tai(hp1).typ=ait_instruction) and
  764. (taicpu(hp1).opcode=A_JMP) and
  765. GetNextInstruction(hp1, hp2) and
  766. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  767. begin
  768. if taicpu(p).opcode=A_Jcc then
  769. begin
  770. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  771. tai_label(hp2).labsym.decrefs;
  772. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  773. { when free'ing hp1, the ref. isn't decresed, so we don't
  774. increase it (FK)
  775. taicpu(p).oper[0]^.ref^.symbol.increfs;
  776. }
  777. asml.remove(hp1);
  778. hp1.free;
  779. GetFinalDestination(asml, taicpu(p),0);
  780. end
  781. else
  782. begin
  783. GetFinalDestination(asml, taicpu(p),0);
  784. p:=tai(p.next);
  785. continue;
  786. end;
  787. end
  788. else
  789. GetFinalDestination(asml, taicpu(p),0);
  790. end;
  791. end;
  792. end
  793. else
  794. { All other optimizes }
  795. begin
  796. for l := 0 to taicpu(p).ops-1 Do
  797. if (taicpu(p).oper[l]^.typ = top_ref) then
  798. With taicpu(p).oper[l]^.ref^ Do
  799. begin
  800. if (base = NR_NO) and
  801. (index <> NR_NO) and
  802. (scalefactor in [0,1]) then
  803. begin
  804. base := index;
  805. index := NR_NO
  806. end
  807. end;
  808. case taicpu(p).opcode Of
  809. A_AND:
  810. begin
  811. if (taicpu(p).oper[0]^.typ = top_const) and
  812. (taicpu(p).oper[1]^.typ = top_reg) and
  813. GetNextInstruction(p, hp1) and
  814. (tai(hp1).typ = ait_instruction) and
  815. (taicpu(hp1).opcode = A_AND) and
  816. (taicpu(hp1).oper[0]^.typ = top_const) and
  817. (taicpu(hp1).oper[1]^.typ = top_reg) and
  818. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  819. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) then
  820. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  821. begin
  822. taicpu(hp1).loadConst(0,taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  823. asml.remove(p);
  824. p.free;
  825. p:=hp1;
  826. end
  827. else
  828. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  829. jump, but only if it's a conditional jump (PFV) }
  830. if (taicpu(p).oper[1]^.typ = top_reg) and
  831. GetNextInstruction(p, hp1) and
  832. (hp1.typ = ait_instruction) and
  833. (taicpu(hp1).is_jmp) and
  834. (taicpu(hp1).opcode<>A_JMP) and
  835. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  836. taicpu(p).opcode := A_TEST;
  837. end;
  838. A_CMP:
  839. begin
  840. { cmp register,$8000 neg register
  841. je target --> jo target
  842. .... only if register is deallocated before jump.}
  843. case Taicpu(p).opsize of
  844. S_B: v:=$80;
  845. S_W: v:=$8000;
  846. S_L: v:=aint($80000000);
  847. else
  848. internalerror(2013112905);
  849. end;
  850. if (taicpu(p).oper[0]^.typ=Top_const) and
  851. (taicpu(p).oper[0]^.val=v) and
  852. (Taicpu(p).oper[1]^.typ=top_reg) and
  853. GetNextInstruction(p, hp1) and
  854. (hp1.typ=ait_instruction) and
  855. (taicpu(hp1).opcode=A_Jcc) and
  856. (Taicpu(hp1).condition in [C_E,C_NE]) and
  857. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  858. begin
  859. Taicpu(p).opcode:=A_NEG;
  860. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  861. Taicpu(p).clearop(1);
  862. Taicpu(p).ops:=1;
  863. if Taicpu(hp1).condition=C_E then
  864. Taicpu(hp1).condition:=C_O
  865. else
  866. Taicpu(hp1).condition:=C_NO;
  867. continue;
  868. end;
  869. {
  870. @@2: @@2:
  871. .... ....
  872. cmp operand1,0
  873. jle/jbe @@1
  874. dec operand1 --> sub operand1,1
  875. jmp @@2 jge/jae @@2
  876. @@1: @@1:
  877. ... ....}
  878. if (taicpu(p).oper[0]^.typ = top_const) and
  879. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  880. (taicpu(p).oper[0]^.val = 0) and
  881. GetNextInstruction(p, hp1) and
  882. (hp1.typ = ait_instruction) and
  883. (taicpu(hp1).is_jmp) and
  884. (taicpu(hp1).opcode=A_Jcc) and
  885. (taicpu(hp1).condition in [C_LE,C_BE]) and
  886. GetNextInstruction(hp1,hp2) and
  887. (hp2.typ = ait_instruction) and
  888. (taicpu(hp2).opcode = A_DEC) and
  889. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  890. GetNextInstruction(hp2, hp3) and
  891. (hp3.typ = ait_instruction) and
  892. (taicpu(hp3).is_jmp) and
  893. (taicpu(hp3).opcode = A_JMP) and
  894. GetNextInstruction(hp3, hp4) and
  895. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  896. begin
  897. taicpu(hp2).Opcode := A_SUB;
  898. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  899. taicpu(hp2).loadConst(0,1);
  900. taicpu(hp2).ops:=2;
  901. taicpu(hp3).Opcode := A_Jcc;
  902. case taicpu(hp1).condition of
  903. C_LE: taicpu(hp3).condition := C_GE;
  904. C_BE: taicpu(hp3).condition := C_AE;
  905. end;
  906. asml.remove(p);
  907. asml.remove(hp1);
  908. p.free;
  909. hp1.free;
  910. p := hp2;
  911. continue;
  912. end
  913. end;
  914. A_FLD:
  915. begin
  916. if (taicpu(p).oper[0]^.typ = top_reg) and
  917. GetNextInstruction(p, hp1) and
  918. (hp1.typ = Ait_Instruction) and
  919. (taicpu(hp1).oper[0]^.typ = top_reg) and
  920. (taicpu(hp1).oper[1]^.typ = top_reg) and
  921. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  922. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  923. { change to
  924. fld reg fxxx reg,st
  925. fxxxp st, st1 (hp1)
  926. Remark: non commutative operations must be reversed!
  927. }
  928. begin
  929. case taicpu(hp1).opcode Of
  930. A_FMULP,A_FADDP,
  931. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  932. begin
  933. case taicpu(hp1).opcode Of
  934. A_FADDP: taicpu(hp1).opcode := A_FADD;
  935. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  936. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  937. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  938. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  939. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  940. end;
  941. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  942. taicpu(hp1).oper[1]^.reg := NR_ST;
  943. asml.remove(p);
  944. p.free;
  945. p := hp1;
  946. continue;
  947. end;
  948. end;
  949. end
  950. else
  951. if (taicpu(p).oper[0]^.typ = top_ref) and
  952. GetNextInstruction(p, hp2) and
  953. (hp2.typ = Ait_Instruction) and
  954. (taicpu(hp2).ops = 2) and
  955. (taicpu(hp2).oper[0]^.typ = top_reg) and
  956. (taicpu(hp2).oper[1]^.typ = top_reg) and
  957. (taicpu(p).opsize in [S_FS, S_FL]) and
  958. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  959. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  960. if GetLastInstruction(p, hp1) and
  961. (hp1.typ = Ait_Instruction) and
  962. ((taicpu(hp1).opcode = A_FLD) or
  963. (taicpu(hp1).opcode = A_FST)) and
  964. (taicpu(hp1).opsize = taicpu(p).opsize) and
  965. (taicpu(hp1).oper[0]^.typ = top_ref) and
  966. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  967. if ((taicpu(hp2).opcode = A_FMULP) or
  968. (taicpu(hp2).opcode = A_FADDP)) then
  969. { change to
  970. fld/fst mem1 (hp1) fld/fst mem1
  971. fld mem1 (p) fadd/
  972. faddp/ fmul st, st
  973. fmulp st, st1 (hp2) }
  974. begin
  975. asml.remove(p);
  976. p.free;
  977. p := hp1;
  978. if (taicpu(hp2).opcode = A_FADDP) then
  979. taicpu(hp2).opcode := A_FADD
  980. else
  981. taicpu(hp2).opcode := A_FMUL;
  982. taicpu(hp2).oper[1]^.reg := NR_ST;
  983. end
  984. else
  985. { change to
  986. fld/fst mem1 (hp1) fld/fst mem1
  987. fld mem1 (p) fld st}
  988. begin
  989. taicpu(p).changeopsize(S_FL);
  990. taicpu(p).loadreg(0,NR_ST);
  991. end
  992. else
  993. begin
  994. case taicpu(hp2).opcode Of
  995. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  996. { change to
  997. fld/fst mem1 (hp1) fld/fst mem1
  998. fld mem2 (p) fxxx mem2
  999. fxxxp st, st1 (hp2) }
  1000. begin
  1001. case taicpu(hp2).opcode Of
  1002. A_FADDP: taicpu(p).opcode := A_FADD;
  1003. A_FMULP: taicpu(p).opcode := A_FMUL;
  1004. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  1005. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  1006. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  1007. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  1008. end;
  1009. asml.remove(hp2);
  1010. hp2.free;
  1011. end
  1012. end
  1013. end
  1014. end;
  1015. A_FSTP,A_FISTP:
  1016. if doFpuLoadStoreOpt(p) then
  1017. continue;
  1018. A_LEA:
  1019. begin
  1020. {removes seg register prefixes from LEA operations, as they
  1021. don't do anything}
  1022. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  1023. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  1024. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1025. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  1026. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1027. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1028. begin
  1029. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1030. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1031. begin
  1032. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  1033. taicpu(p).oper[1]^.reg);
  1034. InsertLLItem(p.previous,p.next, hp1);
  1035. p.free;
  1036. p := hp1;
  1037. continue;
  1038. end
  1039. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1040. begin
  1041. hp1 := tai(p.Next);
  1042. asml.remove(p);
  1043. p.free;
  1044. p := hp1;
  1045. continue;
  1046. end
  1047. { continue to use lea to adjust the stack pointer,
  1048. it is the recommended way, but only if not optimizing for size }
  1049. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1050. (cs_opt_size in current_settings.optimizerswitches) then
  1051. with taicpu(p).oper[0]^.ref^ do
  1052. if (base = taicpu(p).oper[1]^.reg) then
  1053. begin
  1054. l := offset;
  1055. if (l=1) and UseIncDec then
  1056. begin
  1057. taicpu(p).opcode := A_INC;
  1058. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1059. taicpu(p).ops := 1
  1060. end
  1061. else if (l=-1) and UseIncDec then
  1062. begin
  1063. taicpu(p).opcode := A_DEC;
  1064. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1065. taicpu(p).ops := 1;
  1066. end
  1067. else
  1068. begin
  1069. if (l<0) and (l<>-2147483648) then
  1070. begin
  1071. taicpu(p).opcode := A_SUB;
  1072. taicpu(p).loadConst(0,-l);
  1073. end
  1074. else
  1075. begin
  1076. taicpu(p).opcode := A_ADD;
  1077. taicpu(p).loadConst(0,l);
  1078. end;
  1079. end;
  1080. end;
  1081. end
  1082. (*
  1083. This is unsafe, lea doesn't modify the flags but "add"
  1084. does. This breaks webtbs/tw15694.pp. The above
  1085. transformations are also unsafe, but they don't seem to
  1086. be triggered by code that FPC generators (or that at
  1087. least does not occur in the tests...). This needs to be
  1088. fixed by checking for the liveness of the flags register.
  1089. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1090. begin
  1091. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1092. taicpu(p).oper[0]^.ref^.base);
  1093. InsertLLItem(asml,p.previous,p.next, hp1);
  1094. DebugMsg('Peephole Lea2AddBase done',hp1);
  1095. p.free;
  1096. p:=hp1;
  1097. continue;
  1098. end
  1099. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1100. begin
  1101. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1102. taicpu(p).oper[0]^.ref^.index);
  1103. InsertLLItem(asml,p.previous,p.next,hp1);
  1104. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1105. p.free;
  1106. p:=hp1;
  1107. continue;
  1108. end
  1109. *)
  1110. end;
  1111. A_MOV:
  1112. begin
  1113. If OptPass1MOV(p) then
  1114. Continue;
  1115. end;
  1116. A_MOVSX,
  1117. A_MOVZX :
  1118. begin
  1119. if (taicpu(p).oper[1]^.typ = top_reg) and
  1120. GetNextInstruction(p,hp1) and
  1121. (hp1.typ = ait_instruction) and
  1122. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1123. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1124. GetNextInstruction(hp1,hp2) and
  1125. MatchInstruction(hp2,A_MOV,[]) and
  1126. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1127. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1128. (((taicpu(hp1).ops=2) and
  1129. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1130. ((taicpu(hp1).ops=1) and
  1131. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1132. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  1133. { change movsX/movzX reg/ref, reg2 }
  1134. { add/sub/or/... reg3/$const, reg2 }
  1135. { mov reg2 reg/ref }
  1136. { to add/sub/or/... reg3/$const, reg/ref }
  1137. begin
  1138. { by example:
  1139. movswl %si,%eax movswl %si,%eax p
  1140. decl %eax addl %edx,%eax hp1
  1141. movw %ax,%si movw %ax,%si hp2
  1142. ->
  1143. movswl %si,%eax movswl %si,%eax p
  1144. decw %eax addw %edx,%eax hp1
  1145. movw %ax,%si movw %ax,%si hp2
  1146. }
  1147. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1148. {
  1149. ->
  1150. movswl %si,%eax movswl %si,%eax p
  1151. decw %si addw %dx,%si hp1
  1152. movw %ax,%si movw %ax,%si hp2
  1153. }
  1154. case taicpu(hp1).ops of
  1155. 1:
  1156. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1157. 2:
  1158. begin
  1159. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1160. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1161. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1162. end;
  1163. else
  1164. internalerror(2008042701);
  1165. end;
  1166. {
  1167. ->
  1168. decw %si addw %dx,%si p
  1169. }
  1170. asml.remove(p);
  1171. asml.remove(hp2);
  1172. p.free;
  1173. hp2.free;
  1174. p := hp1
  1175. end
  1176. { removes superfluous And's after movzx's }
  1177. else if taicpu(p).opcode=A_MOVZX then
  1178. begin
  1179. if (taicpu(p).oper[1]^.typ = top_reg) and
  1180. GetNextInstruction(p, hp1) and
  1181. (tai(hp1).typ = ait_instruction) and
  1182. (taicpu(hp1).opcode = A_AND) and
  1183. (taicpu(hp1).oper[0]^.typ = top_const) and
  1184. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1185. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1186. case taicpu(p).opsize Of
  1187. S_BL, S_BW:
  1188. if (taicpu(hp1).oper[0]^.val = $ff) then
  1189. begin
  1190. asml.remove(hp1);
  1191. hp1.free;
  1192. end;
  1193. S_WL:
  1194. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1195. begin
  1196. asml.remove(hp1);
  1197. hp1.free;
  1198. end;
  1199. end;
  1200. {changes some movzx constructs to faster synonims (all examples
  1201. are given with eax/ax, but are also valid for other registers)}
  1202. if (taicpu(p).oper[1]^.typ = top_reg) then
  1203. if (taicpu(p).oper[0]^.typ = top_reg) then
  1204. case taicpu(p).opsize of
  1205. S_BW:
  1206. begin
  1207. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1208. not(cs_opt_size in current_settings.optimizerswitches) then
  1209. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1210. begin
  1211. taicpu(p).opcode := A_AND;
  1212. taicpu(p).changeopsize(S_W);
  1213. taicpu(p).loadConst(0,$ff);
  1214. end
  1215. else if GetNextInstruction(p, hp1) and
  1216. (tai(hp1).typ = ait_instruction) and
  1217. (taicpu(hp1).opcode = A_AND) and
  1218. (taicpu(hp1).oper[0]^.typ = top_const) and
  1219. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1220. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1221. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1222. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1223. begin
  1224. taicpu(p).opcode := A_MOV;
  1225. taicpu(p).changeopsize(S_W);
  1226. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1227. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1228. end;
  1229. end;
  1230. S_BL:
  1231. begin
  1232. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1233. not(cs_opt_size in current_settings.optimizerswitches) then
  1234. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1235. begin
  1236. taicpu(p).opcode := A_AND;
  1237. taicpu(p).changeopsize(S_L);
  1238. taicpu(p).loadConst(0,$ff)
  1239. end
  1240. else if GetNextInstruction(p, hp1) and
  1241. (tai(hp1).typ = ait_instruction) and
  1242. (taicpu(hp1).opcode = A_AND) and
  1243. (taicpu(hp1).oper[0]^.typ = top_const) and
  1244. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1245. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1246. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1247. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1248. begin
  1249. taicpu(p).opcode := A_MOV;
  1250. taicpu(p).changeopsize(S_L);
  1251. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1252. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1253. end
  1254. end;
  1255. S_WL:
  1256. begin
  1257. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1258. not(cs_opt_size in current_settings.optimizerswitches) then
  1259. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1260. begin
  1261. taicpu(p).opcode := A_AND;
  1262. taicpu(p).changeopsize(S_L);
  1263. taicpu(p).loadConst(0,$ffff);
  1264. end
  1265. else if GetNextInstruction(p, hp1) and
  1266. (tai(hp1).typ = ait_instruction) and
  1267. (taicpu(hp1).opcode = A_AND) and
  1268. (taicpu(hp1).oper[0]^.typ = top_const) and
  1269. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1270. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1271. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1272. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1273. begin
  1274. taicpu(p).opcode := A_MOV;
  1275. taicpu(p).changeopsize(S_L);
  1276. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1277. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1278. end;
  1279. end;
  1280. end
  1281. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1282. begin
  1283. if GetNextInstruction(p, hp1) and
  1284. (tai(hp1).typ = ait_instruction) and
  1285. (taicpu(hp1).opcode = A_AND) and
  1286. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1287. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1288. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1289. begin
  1290. taicpu(p).opcode := A_MOV;
  1291. case taicpu(p).opsize Of
  1292. S_BL:
  1293. begin
  1294. taicpu(p).changeopsize(S_L);
  1295. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1296. end;
  1297. S_WL:
  1298. begin
  1299. taicpu(p).changeopsize(S_L);
  1300. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1301. end;
  1302. S_BW:
  1303. begin
  1304. taicpu(p).changeopsize(S_W);
  1305. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1306. end;
  1307. end;
  1308. end;
  1309. end;
  1310. end;
  1311. end;
  1312. (* should not be generated anymore by the current code generator
  1313. A_POP:
  1314. begin
  1315. if target_info.system=system_i386_go32v2 then
  1316. begin
  1317. { Transform a series of pop/pop/pop/push/push/push to }
  1318. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1319. { because I'm not sure whether they can cope with }
  1320. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1321. { such a problem when using esp as frame pointer (JM) }
  1322. if (taicpu(p).oper[0]^.typ = top_reg) then
  1323. begin
  1324. hp1 := p;
  1325. hp2 := p;
  1326. l := 0;
  1327. while getNextInstruction(hp1,hp1) and
  1328. (hp1.typ = ait_instruction) and
  1329. (taicpu(hp1).opcode = A_POP) and
  1330. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1331. begin
  1332. hp2 := hp1;
  1333. inc(l,4);
  1334. end;
  1335. getLastInstruction(p,hp3);
  1336. l1 := 0;
  1337. while (hp2 <> hp3) and
  1338. assigned(hp1) and
  1339. (hp1.typ = ait_instruction) and
  1340. (taicpu(hp1).opcode = A_PUSH) and
  1341. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1342. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1343. begin
  1344. { change it to a two op operation }
  1345. taicpu(hp2).oper[1]^.typ:=top_none;
  1346. taicpu(hp2).ops:=2;
  1347. taicpu(hp2).opcode := A_MOV;
  1348. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1349. reference_reset(tmpref);
  1350. tmpRef.base.enum:=R_INTREGISTER;
  1351. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1352. convert_register_to_enum(tmpref.base);
  1353. tmpRef.offset := l;
  1354. taicpu(hp2).loadRef(0,tmpRef);
  1355. hp4 := hp1;
  1356. getNextInstruction(hp1,hp1);
  1357. asml.remove(hp4);
  1358. hp4.free;
  1359. getLastInstruction(hp2,hp2);
  1360. dec(l,4);
  1361. inc(l1);
  1362. end;
  1363. if l <> -4 then
  1364. begin
  1365. inc(l,4);
  1366. for l1 := l1 downto 1 do
  1367. begin
  1368. getNextInstruction(hp2,hp2);
  1369. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1370. end
  1371. end
  1372. end
  1373. end
  1374. else
  1375. begin
  1376. if (taicpu(p).oper[0]^.typ = top_reg) and
  1377. GetNextInstruction(p, hp1) and
  1378. (tai(hp1).typ=ait_instruction) and
  1379. (taicpu(hp1).opcode=A_PUSH) and
  1380. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1381. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1382. begin
  1383. { change it to a two op operation }
  1384. taicpu(p).oper[1]^.typ:=top_none;
  1385. taicpu(p).ops:=2;
  1386. taicpu(p).opcode := A_MOV;
  1387. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1388. reference_reset(tmpref);
  1389. TmpRef.base.enum := R_ESP;
  1390. taicpu(p).loadRef(0,TmpRef);
  1391. asml.remove(hp1);
  1392. hp1.free;
  1393. end;
  1394. end;
  1395. end;
  1396. *)
  1397. A_PUSH:
  1398. begin
  1399. if (taicpu(p).opsize = S_W) and
  1400. (taicpu(p).oper[0]^.typ = Top_Const) and
  1401. GetNextInstruction(p, hp1) and
  1402. (tai(hp1).typ = ait_instruction) and
  1403. (taicpu(hp1).opcode = A_PUSH) and
  1404. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1405. (taicpu(hp1).opsize = S_W) then
  1406. begin
  1407. taicpu(p).changeopsize(S_L);
  1408. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1409. asml.remove(hp1);
  1410. hp1.free;
  1411. end;
  1412. end;
  1413. A_SHL, A_SAL:
  1414. begin
  1415. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1416. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1417. (taicpu(p).opsize = S_L) and
  1418. (taicpu(p).oper[0]^.val <= 3) then
  1419. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1420. begin
  1421. TmpBool1 := True; {should we check the next instruction?}
  1422. TmpBool2 := False; {have we found an add/sub which could be
  1423. integrated in the lea?}
  1424. reference_reset(tmpref,2);
  1425. TmpRef.index := taicpu(p).oper[1]^.reg;
  1426. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1427. while TmpBool1 and
  1428. GetNextInstruction(p, hp1) and
  1429. (tai(hp1).typ = ait_instruction) and
  1430. ((((taicpu(hp1).opcode = A_ADD) or
  1431. (taicpu(hp1).opcode = A_SUB)) and
  1432. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1433. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1434. (((taicpu(hp1).opcode = A_INC) or
  1435. (taicpu(hp1).opcode = A_DEC)) and
  1436. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1437. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1438. (not GetNextInstruction(hp1,hp2) or
  1439. not instrReadsFlags(hp2)) Do
  1440. begin
  1441. TmpBool1 := False;
  1442. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1443. begin
  1444. TmpBool1 := True;
  1445. TmpBool2 := True;
  1446. case taicpu(hp1).opcode of
  1447. A_ADD:
  1448. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1449. A_SUB:
  1450. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1451. end;
  1452. asml.remove(hp1);
  1453. hp1.free;
  1454. end
  1455. else
  1456. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1457. (((taicpu(hp1).opcode = A_ADD) and
  1458. (TmpRef.base = NR_NO)) or
  1459. (taicpu(hp1).opcode = A_INC) or
  1460. (taicpu(hp1).opcode = A_DEC)) then
  1461. begin
  1462. TmpBool1 := True;
  1463. TmpBool2 := True;
  1464. case taicpu(hp1).opcode of
  1465. A_ADD:
  1466. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1467. A_INC:
  1468. inc(TmpRef.offset);
  1469. A_DEC:
  1470. dec(TmpRef.offset);
  1471. end;
  1472. asml.remove(hp1);
  1473. hp1.free;
  1474. end;
  1475. end;
  1476. if TmpBool2 or
  1477. ((current_settings.optimizecputype < cpu_Pentium2) and
  1478. (taicpu(p).oper[0]^.val <= 3) and
  1479. not(cs_opt_size in current_settings.optimizerswitches)) then
  1480. begin
  1481. if not(TmpBool2) and
  1482. (taicpu(p).oper[0]^.val = 1) then
  1483. begin
  1484. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1485. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1486. end
  1487. else
  1488. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1489. taicpu(p).oper[1]^.reg);
  1490. InsertLLItem(p.previous, p.next, hp1);
  1491. p.free;
  1492. p := hp1;
  1493. end;
  1494. end
  1495. else
  1496. if (current_settings.optimizecputype < cpu_Pentium2) and
  1497. (taicpu(p).oper[0]^.typ = top_const) and
  1498. (taicpu(p).oper[1]^.typ = top_reg) then
  1499. if (taicpu(p).oper[0]^.val = 1) then
  1500. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1501. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1502. (unlike shl, which is only Tairable in the U pipe)}
  1503. begin
  1504. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1505. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1506. InsertLLItem(p.previous, p.next, hp1);
  1507. p.free;
  1508. p := hp1;
  1509. end
  1510. else if (taicpu(p).opsize = S_L) and
  1511. (taicpu(p).oper[0]^.val<= 3) then
  1512. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1513. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1514. begin
  1515. reference_reset(tmpref,2);
  1516. TmpRef.index := taicpu(p).oper[1]^.reg;
  1517. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1518. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1519. InsertLLItem(p.previous, p.next, hp1);
  1520. p.free;
  1521. p := hp1;
  1522. end
  1523. end;
  1524. A_SETcc :
  1525. { changes
  1526. setcc (funcres) setcc reg
  1527. movb (funcres), reg to leave/ret
  1528. leave/ret }
  1529. begin
  1530. if (taicpu(p).oper[0]^.typ = top_ref) and
  1531. GetNextInstruction(p, hp1) and
  1532. GetNextInstruction(hp1, hp2) and
  1533. IsExitCode(hp2) and
  1534. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1535. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1536. not(assigned(current_procinfo.procdef.funcretsym) and
  1537. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1538. (hp1.typ = ait_instruction) and
  1539. (taicpu(hp1).opcode = A_MOV) and
  1540. (taicpu(hp1).opsize = S_B) and
  1541. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1542. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1543. begin
  1544. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1545. asml.remove(hp1);
  1546. hp1.free;
  1547. end
  1548. end;
  1549. A_SUB:
  1550. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1551. { * change "sub/add const1, reg" or "dec reg" followed by
  1552. "sub const2, reg" to one "sub ..., reg" }
  1553. begin
  1554. if (taicpu(p).oper[0]^.typ = top_const) and
  1555. (taicpu(p).oper[1]^.typ = top_reg) then
  1556. if (taicpu(p).oper[0]^.val = 2) and
  1557. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1558. { Don't do the sub/push optimization if the sub }
  1559. { comes from setting up the stack frame (JM) }
  1560. (not getLastInstruction(p,hp1) or
  1561. (hp1.typ <> ait_instruction) or
  1562. (taicpu(hp1).opcode <> A_MOV) or
  1563. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1564. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1565. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1566. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1567. begin
  1568. hp1 := tai(p.next);
  1569. while Assigned(hp1) and
  1570. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1571. not RegReadByInstruction(NR_ESP,hp1) and
  1572. not RegModifiedByInstruction(NR_ESP,hp1) do
  1573. hp1 := tai(hp1.next);
  1574. if Assigned(hp1) and
  1575. (tai(hp1).typ = ait_instruction) and
  1576. (taicpu(hp1).opcode = A_PUSH) and
  1577. (taicpu(hp1).opsize = S_W) then
  1578. begin
  1579. taicpu(hp1).changeopsize(S_L);
  1580. if taicpu(hp1).oper[0]^.typ=top_reg then
  1581. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1582. hp1 := tai(p.next);
  1583. asml.remove(p);
  1584. p.free;
  1585. p := hp1;
  1586. continue
  1587. end;
  1588. if DoSubAddOpt(p) then
  1589. continue;
  1590. end
  1591. else if DoSubAddOpt(p) then
  1592. continue
  1593. end;
  1594. A_VMOVAPS,
  1595. A_VMOVAPD:
  1596. if OptPass1VMOVAP(p) then
  1597. continue;
  1598. A_VDIVSD,
  1599. A_VDIVSS,
  1600. A_VSUBSD,
  1601. A_VSUBSS,
  1602. A_VMULSD,
  1603. A_VMULSS,
  1604. A_VADDSD,
  1605. A_VADDSS:
  1606. if OptPass1VOP(p) then
  1607. continue;
  1608. end;
  1609. end; { if is_jmp }
  1610. end;
  1611. end;
  1612. updateUsedRegs(UsedRegs,p);
  1613. p:=tai(p.next);
  1614. end;
  1615. end;
  1616. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  1617. {$ifdef DEBUG_AOPTCPU}
  1618. procedure DebugMsg(const s: string;p : tai);
  1619. begin
  1620. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  1621. end;
  1622. {$else DEBUG_AOPTCPU}
  1623. procedure DebugMsg(const s: string;p : tai);inline;
  1624. begin
  1625. end;
  1626. {$endif DEBUG_AOPTCPU}
  1627. function CanBeCMOV(p : tai) : boolean;
  1628. begin
  1629. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1630. (taicpu(p).opcode=A_MOV) and
  1631. (taicpu(p).opsize in [S_L,S_W]) and
  1632. ((taicpu(p).oper[0]^.typ = top_reg)
  1633. { we can't use cmov ref,reg because
  1634. ref could be nil and cmov still throws an exception
  1635. if ref=nil but the mov isn't done (FK)
  1636. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1637. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1638. }
  1639. ) and
  1640. (taicpu(p).oper[1]^.typ in [top_reg]);
  1641. end;
  1642. var
  1643. p,hp1,hp2,hp3: tai;
  1644. l : longint;
  1645. condition : tasmcond;
  1646. TmpUsedRegs: TAllUsedRegs;
  1647. carryadd_opcode: Tasmop;
  1648. begin
  1649. p := BlockStart;
  1650. ClearUsedRegs;
  1651. while (p <> BlockEnd) Do
  1652. begin
  1653. UpdateUsedRegs(UsedRegs, tai(p.next));
  1654. case p.Typ Of
  1655. Ait_Instruction:
  1656. begin
  1657. if InsContainsSegRef(taicpu(p)) then
  1658. begin
  1659. p := tai(p.next);
  1660. continue;
  1661. end;
  1662. case taicpu(p).opcode Of
  1663. A_Jcc:
  1664. begin
  1665. { jb @@1 cmc
  1666. inc/dec operand --> adc/sbb operand,0
  1667. @@1:
  1668. ... and ...
  1669. jnb @@1
  1670. inc/dec operand --> adc/sbb operand,0
  1671. @@1: }
  1672. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1673. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1674. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1675. begin
  1676. carryadd_opcode:=A_NONE;
  1677. if Taicpu(p).condition in [C_NAE,C_B] then
  1678. begin
  1679. if Taicpu(hp1).opcode=A_INC then
  1680. carryadd_opcode:=A_ADC;
  1681. if Taicpu(hp1).opcode=A_DEC then
  1682. carryadd_opcode:=A_SBB;
  1683. if carryadd_opcode<>A_NONE then
  1684. begin
  1685. Taicpu(p).clearop(0);
  1686. Taicpu(p).ops:=0;
  1687. Taicpu(p).is_jmp:=false;
  1688. Taicpu(p).opcode:=A_CMC;
  1689. Taicpu(p).condition:=C_NONE;
  1690. Taicpu(hp1).ops:=2;
  1691. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1692. Taicpu(hp1).loadconst(0,0);
  1693. Taicpu(hp1).opcode:=carryadd_opcode;
  1694. continue;
  1695. end;
  1696. end;
  1697. if Taicpu(p).condition in [C_AE,C_NB] then
  1698. begin
  1699. if Taicpu(hp1).opcode=A_INC then
  1700. carryadd_opcode:=A_ADC;
  1701. if Taicpu(hp1).opcode=A_DEC then
  1702. carryadd_opcode:=A_SBB;
  1703. if carryadd_opcode<>A_NONE then
  1704. begin
  1705. asml.remove(p);
  1706. p.free;
  1707. Taicpu(hp1).ops:=2;
  1708. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1709. Taicpu(hp1).loadconst(0,0);
  1710. Taicpu(hp1).opcode:=carryadd_opcode;
  1711. p:=hp1;
  1712. continue;
  1713. end;
  1714. end;
  1715. end;
  1716. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1717. begin
  1718. { check for
  1719. jCC xxx
  1720. <several movs>
  1721. xxx:
  1722. }
  1723. l:=0;
  1724. GetNextInstruction(p, hp1);
  1725. while assigned(hp1) and
  1726. CanBeCMOV(hp1) and
  1727. { stop on labels }
  1728. not(hp1.typ=ait_label) do
  1729. begin
  1730. inc(l);
  1731. GetNextInstruction(hp1,hp1);
  1732. end;
  1733. if assigned(hp1) then
  1734. begin
  1735. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1736. begin
  1737. if (l<=4) and (l>0) then
  1738. begin
  1739. condition:=inverse_cond(taicpu(p).condition);
  1740. hp2:=p;
  1741. GetNextInstruction(p,hp1);
  1742. p:=hp1;
  1743. repeat
  1744. taicpu(hp1).opcode:=A_CMOVcc;
  1745. taicpu(hp1).condition:=condition;
  1746. GetNextInstruction(hp1,hp1);
  1747. until not(assigned(hp1)) or
  1748. not(CanBeCMOV(hp1));
  1749. { wait with removing else GetNextInstruction could
  1750. ignore the label if it was the only usage in the
  1751. jump moved away }
  1752. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1753. asml.remove(hp2);
  1754. hp2.free;
  1755. continue;
  1756. end;
  1757. end
  1758. else
  1759. begin
  1760. { check further for
  1761. jCC xxx
  1762. <several movs 1>
  1763. jmp yyy
  1764. xxx:
  1765. <several movs 2>
  1766. yyy:
  1767. }
  1768. { hp2 points to jmp yyy }
  1769. hp2:=hp1;
  1770. { skip hp1 to xxx }
  1771. GetNextInstruction(hp1, hp1);
  1772. if assigned(hp2) and
  1773. assigned(hp1) and
  1774. (l<=3) and
  1775. (hp2.typ=ait_instruction) and
  1776. (taicpu(hp2).is_jmp) and
  1777. (taicpu(hp2).condition=C_None) and
  1778. { real label and jump, no further references to the
  1779. label are allowed }
  1780. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  1781. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1782. begin
  1783. l:=0;
  1784. { skip hp1 to <several moves 2> }
  1785. GetNextInstruction(hp1, hp1);
  1786. while assigned(hp1) and
  1787. CanBeCMOV(hp1) do
  1788. begin
  1789. inc(l);
  1790. GetNextInstruction(hp1, hp1);
  1791. end;
  1792. { hp1 points to yyy: }
  1793. if assigned(hp1) and
  1794. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1795. begin
  1796. condition:=inverse_cond(taicpu(p).condition);
  1797. GetNextInstruction(p,hp1);
  1798. hp3:=p;
  1799. p:=hp1;
  1800. repeat
  1801. taicpu(hp1).opcode:=A_CMOVcc;
  1802. taicpu(hp1).condition:=condition;
  1803. GetNextInstruction(hp1,hp1);
  1804. until not(assigned(hp1)) or
  1805. not(CanBeCMOV(hp1));
  1806. { hp2 is still at jmp yyy }
  1807. GetNextInstruction(hp2,hp1);
  1808. { hp2 is now at xxx: }
  1809. condition:=inverse_cond(condition);
  1810. GetNextInstruction(hp1,hp1);
  1811. { hp1 is now at <several movs 2> }
  1812. repeat
  1813. taicpu(hp1).opcode:=A_CMOVcc;
  1814. taicpu(hp1).condition:=condition;
  1815. GetNextInstruction(hp1,hp1);
  1816. until not(assigned(hp1)) or
  1817. not(CanBeCMOV(hp1));
  1818. {
  1819. asml.remove(hp1.next)
  1820. hp1.next.free;
  1821. asml.remove(hp1);
  1822. hp1.free;
  1823. }
  1824. { remove jCC }
  1825. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1826. asml.remove(hp3);
  1827. hp3.free;
  1828. { remove jmp }
  1829. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1830. asml.remove(hp2);
  1831. hp2.free;
  1832. continue;
  1833. end;
  1834. end;
  1835. end;
  1836. end;
  1837. end;
  1838. end;
  1839. A_FSTP,A_FISTP:
  1840. if DoFpuLoadStoreOpt(p) then
  1841. continue;
  1842. A_IMUL:
  1843. begin
  1844. if (taicpu(p).ops >= 2) and
  1845. ((taicpu(p).oper[0]^.typ = top_const) or
  1846. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1847. (taicpu(p).oper[1]^.typ = top_reg) and
  1848. ((taicpu(p).ops = 2) or
  1849. ((taicpu(p).oper[2]^.typ = top_reg) and
  1850. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1851. getLastInstruction(p,hp1) and
  1852. (hp1.typ = ait_instruction) and
  1853. (taicpu(hp1).opcode = A_MOV) and
  1854. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1855. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1856. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1857. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  1858. begin
  1859. taicpu(p).ops := 3;
  1860. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1861. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1862. asml.remove(hp1);
  1863. hp1.free;
  1864. end;
  1865. end;
  1866. A_JMP:
  1867. {
  1868. change
  1869. jmp .L1
  1870. ...
  1871. .L1:
  1872. ret
  1873. into
  1874. ret
  1875. }
  1876. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) then
  1877. begin
  1878. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1879. if assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_RET) and (taicpu(p).condition=C_None) then
  1880. begin
  1881. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1882. taicpu(p).opcode:=A_RET;
  1883. taicpu(p).is_jmp:=false;
  1884. taicpu(p).ops:=taicpu(hp1).ops;
  1885. case taicpu(hp1).ops of
  1886. 0:
  1887. taicpu(p).clearop(0);
  1888. 1:
  1889. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1890. else
  1891. internalerror(2016041301);
  1892. end;
  1893. continue;
  1894. end;
  1895. end;
  1896. A_MOV:
  1897. begin
  1898. if (taicpu(p).oper[0]^.typ = top_reg) and
  1899. (taicpu(p).oper[1]^.typ = top_reg) and
  1900. GetNextInstruction(p, hp1) and
  1901. (hp1.typ = ait_Instruction) and
  1902. ((taicpu(hp1).opcode = A_MOV) or
  1903. (taicpu(hp1).opcode = A_MOVZX) or
  1904. (taicpu(hp1).opcode = A_MOVSX)) and
  1905. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1906. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1907. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) or
  1908. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)) and
  1909. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1910. {mov reg1, reg2
  1911. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1912. begin
  1913. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1914. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1915. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1916. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1917. asml.remove(p);
  1918. p.free;
  1919. p := hp1;
  1920. continue;
  1921. end
  1922. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1923. GetNextInstruction(p,hp1) and
  1924. (hp1.typ = ait_instruction) and
  1925. (IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) or
  1926. ((taicpu(hp1).opcode=A_LEA) and
  1927. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  1928. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1929. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)) or
  1930. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  1931. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg))
  1932. )
  1933. )
  1934. ) and
  1935. GetNextInstruction(hp1,hp2) and
  1936. MatchInstruction(hp2,A_MOV,[]) and
  1937. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1938. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1939. begin
  1940. CopyUsedRegs(TmpUsedRegs);
  1941. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1942. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1943. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,
  1944. hp2, TmpUsedRegs))) then
  1945. { change mov (ref), reg }
  1946. { add/sub/or/... reg2/$const, reg }
  1947. { mov reg, (ref) }
  1948. { # release reg }
  1949. { to add/sub/or/... reg2/$const, (ref) }
  1950. begin
  1951. case taicpu(hp1).opcode of
  1952. A_INC,A_DEC,A_NOT,A_NEG:
  1953. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1954. A_LEA:
  1955. begin
  1956. taicpu(hp1).opcode:=A_ADD;
  1957. if taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg then
  1958. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1959. else
  1960. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base);
  1961. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1962. DebugMsg('Peephole FoldLea done',hp1);
  1963. end
  1964. else
  1965. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1966. end;
  1967. asml.remove(p);
  1968. asml.remove(hp2);
  1969. p.free;
  1970. hp2.free;
  1971. p := hp1
  1972. end;
  1973. ReleaseUsedRegs(TmpUsedRegs);
  1974. end
  1975. end;
  1976. end;
  1977. end;
  1978. end;
  1979. p := tai(p.next)
  1980. end;
  1981. end;
  1982. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  1983. var
  1984. p,hp1,hp2: tai;
  1985. IsTestConstX: boolean;
  1986. begin
  1987. p := BlockStart;
  1988. ClearUsedRegs;
  1989. while (p <> BlockEnd) Do
  1990. begin
  1991. UpdateUsedRegs(UsedRegs, tai(p.next));
  1992. case p.Typ Of
  1993. Ait_Instruction:
  1994. begin
  1995. if InsContainsSegRef(taicpu(p)) then
  1996. begin
  1997. p := tai(p.next);
  1998. continue;
  1999. end;
  2000. case taicpu(p).opcode Of
  2001. A_CALL:
  2002. begin
  2003. { don't do this on modern CPUs, this really hurts them due to
  2004. broken call/ret pairing }
  2005. if (current_settings.optimizecputype < cpu_Pentium2) and
  2006. not(cs_create_pic in current_settings.moduleswitches) and
  2007. GetNextInstruction(p, hp1) and
  2008. (hp1.typ = ait_instruction) and
  2009. (taicpu(hp1).opcode = A_JMP) and
  2010. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  2011. begin
  2012. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  2013. InsertLLItem(p.previous, p, hp2);
  2014. taicpu(p).opcode := A_JMP;
  2015. taicpu(p).is_jmp := true;
  2016. asml.remove(hp1);
  2017. hp1.free;
  2018. end
  2019. { replace
  2020. call procname
  2021. ret
  2022. by
  2023. jmp procname
  2024. this should never hurt except when pic is used, not sure
  2025. how to handle it then
  2026. but do it only on level 4 because it destroys stack back traces
  2027. }
  2028. else if (cs_opt_level4 in current_settings.optimizerswitches) and
  2029. not(cs_create_pic in current_settings.moduleswitches) and
  2030. GetNextInstruction(p, hp1) and
  2031. (hp1.typ = ait_instruction) and
  2032. (taicpu(hp1).opcode = A_RET) and
  2033. (taicpu(hp1).ops=0) then
  2034. begin
  2035. taicpu(p).opcode := A_JMP;
  2036. taicpu(p).is_jmp := true;
  2037. asml.remove(hp1);
  2038. hp1.free;
  2039. end;
  2040. end;
  2041. A_CMP:
  2042. begin
  2043. if (taicpu(p).oper[0]^.typ = top_const) and
  2044. (taicpu(p).oper[0]^.val = 0) and
  2045. (taicpu(p).oper[1]^.typ = top_reg) then
  2046. {change "cmp $0, %reg" to "test %reg, %reg"}
  2047. begin
  2048. taicpu(p).opcode := A_TEST;
  2049. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2050. continue;
  2051. end;
  2052. end;
  2053. A_MOV:
  2054. PostPeepholeOptMov(p);
  2055. A_MOVZX:
  2056. { if register vars are on, it's possible there is code like }
  2057. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  2058. { so we can't safely replace the movzx then with xor/mov, }
  2059. { since that would change the flags (JM) }
  2060. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  2061. begin
  2062. if (taicpu(p).oper[1]^.typ = top_reg) then
  2063. if (taicpu(p).oper[0]^.typ = top_reg)
  2064. then
  2065. case taicpu(p).opsize of
  2066. S_BL:
  2067. begin
  2068. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  2069. not(cs_opt_size in current_settings.optimizerswitches) and
  2070. (current_settings.optimizecputype = cpu_Pentium) then
  2071. {Change "movzbl %reg1, %reg2" to
  2072. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  2073. PentiumMMX}
  2074. begin
  2075. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  2076. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2077. InsertLLItem(p.previous, p, hp1);
  2078. taicpu(p).opcode := A_MOV;
  2079. taicpu(p).changeopsize(S_B);
  2080. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2081. end;
  2082. end;
  2083. end
  2084. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2085. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2086. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  2087. not(cs_opt_size in current_settings.optimizerswitches) and
  2088. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  2089. (current_settings.optimizecputype = cpu_Pentium) and
  2090. (taicpu(p).opsize = S_BL) then
  2091. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  2092. Pentium and PentiumMMX}
  2093. begin
  2094. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  2095. taicpu(p).oper[1]^.reg);
  2096. taicpu(p).opcode := A_MOV;
  2097. taicpu(p).changeopsize(S_B);
  2098. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2099. InsertLLItem(p.previous, p, hp1);
  2100. end;
  2101. end;
  2102. A_TEST, A_OR:
  2103. {removes the line marked with (x) from the sequence
  2104. and/or/xor/add/sub/... $x, %y
  2105. test/or %y, %y | test $-1, %y (x)
  2106. j(n)z _Label
  2107. as the first instruction already adjusts the ZF
  2108. %y operand may also be a reference }
  2109. begin
  2110. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  2111. MatchOperand(taicpu(p).oper[0]^,-1);
  2112. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  2113. GetLastInstruction(p, hp1) and
  2114. (tai(hp1).typ = ait_instruction) and
  2115. GetNextInstruction(p,hp2) and
  2116. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  2117. case taicpu(hp1).opcode Of
  2118. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  2119. begin
  2120. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2121. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2122. { and in case of carry for A(E)/B(E)/C/NC }
  2123. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2124. ((taicpu(hp1).opcode <> A_ADD) and
  2125. (taicpu(hp1).opcode <> A_SUB))) then
  2126. begin
  2127. hp1 := tai(p.next);
  2128. asml.remove(p);
  2129. p.free;
  2130. p := tai(hp1);
  2131. continue
  2132. end;
  2133. end;
  2134. A_SHL, A_SAL, A_SHR, A_SAR:
  2135. begin
  2136. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2137. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  2138. { therefore, it's only safe to do this optimization for }
  2139. { shifts by a (nonzero) constant }
  2140. (taicpu(hp1).oper[0]^.typ = top_const) and
  2141. (taicpu(hp1).oper[0]^.val <> 0) and
  2142. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2143. { and in case of carry for A(E)/B(E)/C/NC }
  2144. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2145. begin
  2146. hp1 := tai(p.next);
  2147. asml.remove(p);
  2148. p.free;
  2149. p := tai(hp1);
  2150. continue
  2151. end;
  2152. end;
  2153. A_DEC, A_INC, A_NEG:
  2154. begin
  2155. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  2156. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2157. { and in case of carry for A(E)/B(E)/C/NC }
  2158. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2159. begin
  2160. case taicpu(hp1).opcode Of
  2161. A_DEC, A_INC:
  2162. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2163. begin
  2164. case taicpu(hp1).opcode Of
  2165. A_DEC: taicpu(hp1).opcode := A_SUB;
  2166. A_INC: taicpu(hp1).opcode := A_ADD;
  2167. end;
  2168. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2169. taicpu(hp1).loadConst(0,1);
  2170. taicpu(hp1).ops:=2;
  2171. end
  2172. end;
  2173. hp1 := tai(p.next);
  2174. asml.remove(p);
  2175. p.free;
  2176. p := tai(hp1);
  2177. continue
  2178. end;
  2179. end
  2180. else
  2181. { change "test $-1,%reg" into "test %reg,%reg" }
  2182. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2183. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2184. end { case }
  2185. else
  2186. { change "test $-1,%reg" into "test %reg,%reg" }
  2187. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2188. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2189. end;
  2190. end;
  2191. end;
  2192. end;
  2193. p := tai(p.next)
  2194. end;
  2195. end;
  2196. Procedure TCpuAsmOptimizer.Optimize;
  2197. Var
  2198. HP: Tai;
  2199. pass: longint;
  2200. slowopt, changed, lastLoop: boolean;
  2201. Begin
  2202. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  2203. pass := 0;
  2204. changed := false;
  2205. repeat
  2206. lastLoop :=
  2207. not(slowopt) or
  2208. (not changed and (pass > 2)) or
  2209. { prevent endless loops }
  2210. (pass = 4);
  2211. changed := false;
  2212. { Setup labeltable, always necessary }
  2213. blockstart := tai(asml.first);
  2214. pass_1;
  2215. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  2216. { or nil }
  2217. While Assigned(BlockStart) Do
  2218. Begin
  2219. if (cs_opt_peephole in current_settings.optimizerswitches) then
  2220. begin
  2221. if (pass = 0) then
  2222. PrePeepHoleOpts;
  2223. { Peephole optimizations }
  2224. PeepHoleOptPass1;
  2225. { Only perform them twice in the first pass }
  2226. if pass = 0 then
  2227. PeepHoleOptPass1;
  2228. end;
  2229. { More peephole optimizations }
  2230. if (cs_opt_peephole in current_settings.optimizerswitches) then
  2231. begin
  2232. PeepHoleOptPass2;
  2233. if lastLoop then
  2234. PostPeepHoleOpts;
  2235. end;
  2236. { Continue where we left off, BlockEnd is either the start of an }
  2237. { assembler block or nil }
  2238. BlockStart := BlockEnd;
  2239. While Assigned(BlockStart) And
  2240. (BlockStart.typ = ait_Marker) And
  2241. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  2242. Begin
  2243. { We stopped at an assembler block, so skip it }
  2244. Repeat
  2245. BlockStart := Tai(BlockStart.Next);
  2246. Until (BlockStart.Typ = Ait_Marker) And
  2247. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  2248. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  2249. If GetNextInstruction(BlockStart, HP) And
  2250. ((HP.typ <> ait_Marker) Or
  2251. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  2252. { There is no assembler block anymore after the current one, so }
  2253. { optimize the next block of "normal" instructions }
  2254. pass_1
  2255. { Otherwise, skip the next assembler block }
  2256. else
  2257. blockStart := hp;
  2258. End;
  2259. End;
  2260. inc(pass);
  2261. until lastLoop;
  2262. dfa.free;
  2263. End;
  2264. begin
  2265. casmoptimizer:=TCpuAsmOptimizer;
  2266. end.