aoptcpu.pas 103 KB


  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aoptobj, aoptcpub, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. function DoFpuLoadStoreOpt(var p : tai) : boolean;
  33. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  34. function InstructionLoadsFromReg(const reg : TRegister;const hp : tai) : boolean;override;
  35. end;
  36. Var
  37. AsmOptimizer : TCpuAsmOptimizer;
  38. Implementation
  39. uses
  40. verbose,globtype,globals,
  41. cutils,
  42. aoptbase,
  43. cpuinfo,
  44. aasmcpu,
  45. procinfo,
  46. cgutils,cgx86,
  47. { units we should get rid off: }
  48. symsym,symconst;
  49. function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
  50. { returns true if a "continue" should be done after this optimization }
  51. var hp1, hp2: tai;
  52. begin
  53. DoFpuLoadStoreOpt := false;
  54. if (taicpu(p).oper[0]^.typ = top_ref) and
  55. getNextInstruction(p, hp1) and
  56. (hp1.typ = ait_instruction) and
  57. (((taicpu(hp1).opcode = A_FLD) and
  58. (taicpu(p).opcode = A_FSTP)) or
  59. ((taicpu(p).opcode = A_FISTP) and
  60. (taicpu(hp1).opcode = A_FILD))) and
  61. (taicpu(hp1).oper[0]^.typ = top_ref) and
  62. (taicpu(hp1).opsize = taicpu(p).opsize) and
  63. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  64. begin
  65. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  66. if (taicpu(p).opsize=S_FX) and
  67. getNextInstruction(hp1, hp2) and
  68. (hp2.typ = ait_instruction) and
  69. IsExitCode(hp2) and
  70. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  71. not(assigned(current_procinfo.procdef.funcretsym) and
  72. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  73. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  74. begin
  75. asml.remove(p);
  76. asml.remove(hp1);
  77. p.free;
  78. hp1.free;
  79. p := hp2;
  80. removeLastDeallocForFuncRes(p);
  81. doFPULoadStoreOpt := true;
  82. end
  83. (* can't be done because the store operation rounds
  84. else
  85. { fst can't store an extended value! }
  86. if (taicpu(p).opsize <> S_FX) and
  87. (taicpu(p).opsize <> S_IQ) then
  88. begin
  89. if (taicpu(p).opcode = A_FSTP) then
  90. taicpu(p).opcode := A_FST
  91. else taicpu(p).opcode := A_FIST;
  92. asml.remove(hp1);
  93. hp1.free;
  94. end
  95. *)
  96. end;
  97. end;
  98. { converts a TChange variable to a TRegister }
  99. function tch2reg(ch: tinschange): tsuperregister;
  100. const
  101. ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
  102. begin
  103. if (ch <= CH_REDI) then
  104. tch2reg := ch2reg[ch]
  105. else if (ch <= CH_WEDI) then
  106. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
  107. else if (ch <= CH_RWEDI) then
  108. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
  109. else if (ch <= CH_MEDI) then
  110. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
  111. else
  112. InternalError(2016041901)
  113. end;
  114. { Checks if the register is a 32 bit general purpose register }
  115. function isgp32reg(reg: TRegister): boolean;
  116. begin
  117. {$push}{$warnings off}
  118. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  119. {$pop}
  120. end;
  121. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  122. begin
  123. Result:=RegReadByInstruction(reg,hp);
  124. end;
  125. function TCpuAsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  126. var
  127. p: taicpu;
  128. opcount: longint;
  129. begin
  130. RegReadByInstruction := false;
  131. if hp.typ <> ait_instruction then
  132. exit;
  133. p := taicpu(hp);
  134. case p.opcode of
  135. A_CALL:
  136. regreadbyinstruction := true;
  137. A_IMUL:
  138. case p.ops of
  139. 1:
  140. regReadByInstruction :=
  141. (reg = NR_EAX) or RegInOp(reg,p.oper[0]^);
  142. 2,3:
  143. regReadByInstruction :=
  144. reginop(reg,p.oper[0]^) or
  145. reginop(reg,p.oper[1]^);
  146. end;
  147. A_IDIV,A_DIV,A_MUL:
  148. begin
  149. regReadByInstruction :=
  150. RegInOp(reg,p.oper[0]^) or (getsupreg(reg) in [RS_EAX,RS_EDX]);
  151. end;
  152. else
  153. begin
  154. for opcount := 0 to p.ops-1 do
  155. if (p.oper[opCount]^.typ = top_ref) and
  156. RegInRef(reg,p.oper[opcount]^.ref^) then
  157. begin
  158. RegReadByInstruction := true;
  159. exit
  160. end;
  161. for opcount := 1 to maxinschanges do
  162. case insprop[p.opcode].ch[opcount] of
  163. CH_REAX..CH_REDI,CH_RWEAX..CH_MEDI:
  164. if getsupreg(reg) = tch2reg(insprop[p.opcode].ch[opcount]) then
  165. begin
  166. RegReadByInstruction := true;
  167. exit
  168. end;
  169. CH_RWOP1,CH_ROP1,CH_MOP1:
  170. if reginop(reg,p.oper[0]^) then
  171. begin
  172. RegReadByInstruction := true;
  173. exit
  174. end;
  175. Ch_RWOP2,Ch_ROP2,Ch_MOP2:
  176. if reginop(reg,p.oper[1]^) then
  177. begin
  178. RegReadByInstruction := true;
  179. exit
  180. end;
  181. Ch_RWOP3,Ch_ROP3,Ch_MOP3:
  182. if reginop(reg,p.oper[2]^) then
  183. begin
  184. RegReadByInstruction := true;
  185. exit
  186. end;
  187. Ch_RFlags,Ch_RWFlags:
  188. if reg=NR_DEFAULTFLAGS then
  189. begin
  190. RegReadByInstruction := true;
  191. exit
  192. end;
  193. end;
  194. end;
  195. end;
  196. end;
  197. { returns true if p contains a memory operand with a segment set }
  198. function InsContainsSegRef(p: taicpu): boolean;
  199. var
  200. i: longint;
  201. begin
  202. result:=true;
  203. for i:=0 to p.opercnt-1 do
  204. if (p.oper[i]^.typ=top_ref) and
  205. (p.oper[i]^.ref^.segment<>NR_NO) then
  206. exit;
  207. result:=false;
  208. end;
  209. function InstrReadsFlags(p: tai): boolean;
  210. var
  211. l: longint;
  212. begin
  213. InstrReadsFlags := true;
  214. case p.typ of
  215. ait_instruction:
  216. begin
  217. for l := 1 to maxinschanges do
  218. if InsProp[taicpu(p).opcode].Ch[l] in [Ch_RFlags,Ch_RWFlags,Ch_All] then
  219. exit;
  220. end;
  221. ait_label:
  222. exit;
  223. end;
  224. InstrReadsFlags := false;
  225. end;
  226. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  227. var
  228. p,hp1: tai;
  229. l: aint;
  230. tmpRef: treference;
  231. begin
  232. p := BlockStart;
  233. while (p <> BlockEnd) Do
  234. begin
  235. case p.Typ Of
  236. Ait_Instruction:
  237. begin
  238. if InsContainsSegRef(taicpu(p)) then
  239. begin
  240. p := tai(p.next);
  241. continue;
  242. end;
  243. case taicpu(p).opcode Of
  244. A_IMUL:
  245. {changes certain "imul const, %reg"'s to lea sequences}
  246. begin
  247. if (taicpu(p).oper[0]^.typ = Top_Const) and
  248. (taicpu(p).oper[1]^.typ = Top_Reg) and
  249. (taicpu(p).opsize = S_L) then
  250. if (taicpu(p).oper[0]^.val = 1) then
  251. if (taicpu(p).ops = 2) then
  252. {remove "imul $1, reg"}
  253. begin
  254. hp1 := tai(p.Next);
  255. asml.remove(p);
  256. p.free;
  257. p := hp1;
  258. continue;
  259. end
  260. else
  261. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  262. begin
  263. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  264. InsertLLItem(p.previous, p.next, hp1);
  265. p.free;
  266. p := hp1;
  267. end
  268. else if
  269. ((taicpu(p).ops <= 2) or
  270. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  271. (taicpu(p).oper[0]^.val <= 12) and
  272. not(cs_opt_size in current_settings.optimizerswitches) and
  273. (not(GetNextInstruction(p, hp1)) or
  274. {GetNextInstruction(p, hp1) and}
  275. not((tai(hp1).typ = ait_instruction) and
  276. ((taicpu(hp1).opcode=A_Jcc) and
  277. (taicpu(hp1).condition in [C_O,C_NO])))) then
  278. begin
  279. reference_reset(tmpref,1,[]);
  280. case taicpu(p).oper[0]^.val Of
  281. 3: begin
  282. {imul 3, reg1, reg2 to
  283. lea (reg1,reg1,2), reg2
  284. imul 3, reg1 to
  285. lea (reg1,reg1,2), reg1}
  286. TmpRef.base := taicpu(p).oper[1]^.reg;
  287. TmpRef.index := taicpu(p).oper[1]^.reg;
  288. TmpRef.ScaleFactor := 2;
  289. if (taicpu(p).ops = 2) then
  290. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  291. else
  292. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  293. InsertLLItem(p.previous, p.next, hp1);
  294. p.free;
  295. p := hp1;
  296. end;
  297. 5: begin
  298. {imul 5, reg1, reg2 to
  299. lea (reg1,reg1,4), reg2
  300. imul 5, reg1 to
  301. lea (reg1,reg1,4), reg1}
  302. TmpRef.base := taicpu(p).oper[1]^.reg;
  303. TmpRef.index := taicpu(p).oper[1]^.reg;
  304. TmpRef.ScaleFactor := 4;
  305. if (taicpu(p).ops = 2) then
  306. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  307. else
  308. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  309. InsertLLItem(p.previous, p.next, hp1);
  310. p.free;
  311. p := hp1;
  312. end;
  313. 6: begin
  314. {imul 6, reg1, reg2 to
  315. lea (,reg1,2), reg2
  316. lea (reg2,reg1,4), reg2
  317. imul 6, reg1 to
  318. lea (reg1,reg1,2), reg1
  319. add reg1, reg1}
  320. if (current_settings.optimizecputype <= cpu_386) then
  321. begin
  322. TmpRef.index := taicpu(p).oper[1]^.reg;
  323. if (taicpu(p).ops = 3) then
  324. begin
  325. TmpRef.base := taicpu(p).oper[2]^.reg;
  326. TmpRef.ScaleFactor := 4;
  327. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  328. end
  329. else
  330. begin
  331. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  332. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  333. end;
  334. InsertLLItem(p, p.next, hp1);
  335. reference_reset(tmpref,2,[]);
  336. TmpRef.index := taicpu(p).oper[1]^.reg;
  337. TmpRef.ScaleFactor := 2;
  338. if (taicpu(p).ops = 3) then
  339. begin
  340. TmpRef.base := NR_NO;
  341. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  342. taicpu(p).oper[2]^.reg);
  343. end
  344. else
  345. begin
  346. TmpRef.base := taicpu(p).oper[1]^.reg;
  347. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  348. end;
  349. InsertLLItem(p.previous, p.next, hp1);
  350. p.free;
  351. p := tai(hp1.next);
  352. end
  353. end;
  354. 9: begin
  355. {imul 9, reg1, reg2 to
  356. lea (reg1,reg1,8), reg2
  357. imul 9, reg1 to
  358. lea (reg1,reg1,8), reg1}
  359. TmpRef.base := taicpu(p).oper[1]^.reg;
  360. TmpRef.index := taicpu(p).oper[1]^.reg;
  361. TmpRef.ScaleFactor := 8;
  362. if (taicpu(p).ops = 2) then
  363. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  364. else
  365. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  366. InsertLLItem(p.previous, p.next, hp1);
  367. p.free;
  368. p := hp1;
  369. end;
  370. 10: begin
  371. {imul 10, reg1, reg2 to
  372. lea (reg1,reg1,4), reg2
  373. add reg2, reg2
  374. imul 10, reg1 to
  375. lea (reg1,reg1,4), reg1
  376. add reg1, reg1}
  377. if (current_settings.optimizecputype <= cpu_386) then
  378. begin
  379. if (taicpu(p).ops = 3) then
  380. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  381. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  382. else
  383. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  384. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  385. InsertLLItem(p, p.next, hp1);
  386. TmpRef.base := taicpu(p).oper[1]^.reg;
  387. TmpRef.index := taicpu(p).oper[1]^.reg;
  388. TmpRef.ScaleFactor := 4;
  389. if (taicpu(p).ops = 3) then
  390. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  391. else
  392. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  393. InsertLLItem(p.previous, p.next, hp1);
  394. p.free;
  395. p := tai(hp1.next);
  396. end
  397. end;
  398. 12: begin
  399. {imul 12, reg1, reg2 to
  400. lea (,reg1,4), reg2
  401. lea (reg2,reg1,8), reg2
  402. imul 12, reg1 to
  403. lea (reg1,reg1,2), reg1
  404. lea (,reg1,4), reg1}
  405. if (current_settings.optimizecputype <= cpu_386)
  406. then
  407. begin
  408. TmpRef.index := taicpu(p).oper[1]^.reg;
  409. if (taicpu(p).ops = 3) then
  410. begin
  411. TmpRef.base := taicpu(p).oper[2]^.reg;
  412. TmpRef.ScaleFactor := 8;
  413. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  414. end
  415. else
  416. begin
  417. TmpRef.base := NR_NO;
  418. TmpRef.ScaleFactor := 4;
  419. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  420. end;
  421. InsertLLItem(p, p.next, hp1);
  422. reference_reset(tmpref,2,[]);
  423. TmpRef.index := taicpu(p).oper[1]^.reg;
  424. if (taicpu(p).ops = 3) then
  425. begin
  426. TmpRef.base := NR_NO;
  427. TmpRef.ScaleFactor := 4;
  428. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  429. end
  430. else
  431. begin
  432. TmpRef.base := taicpu(p).oper[1]^.reg;
  433. TmpRef.ScaleFactor := 2;
  434. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  435. end;
  436. InsertLLItem(p.previous, p.next, hp1);
  437. p.free;
  438. p := tai(hp1.next);
  439. end
  440. end
  441. end;
  442. end;
  443. end;
  444. A_SAR, A_SHR:
  445. {changes the code sequence
  446. shr/sar const1, x
  447. shl const2, x
  448. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  449. begin
  450. if GetNextInstruction(p, hp1) and
  451. (tai(hp1).typ = ait_instruction) and
  452. (taicpu(hp1).opcode = A_SHL) and
  453. (taicpu(p).oper[0]^.typ = top_const) and
  454. (taicpu(hp1).oper[0]^.typ = top_const) and
  455. (taicpu(hp1).opsize = taicpu(p).opsize) and
  456. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  457. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  458. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  459. not(cs_opt_size in current_settings.optimizerswitches) then
  460. { shr/sar const1, %reg
  461. shl const2, %reg
  462. with const1 > const2 }
  463. begin
  464. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  465. taicpu(hp1).opcode := A_AND;
  466. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  467. case taicpu(p).opsize Of
  468. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  469. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  470. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  471. end;
  472. end
  473. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  474. not(cs_opt_size in current_settings.optimizerswitches) then
  475. { shr/sar const1, %reg
  476. shl const2, %reg
  477. with const1 < const2 }
  478. begin
  479. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  480. taicpu(p).opcode := A_AND;
  481. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  482. case taicpu(p).opsize Of
  483. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  484. S_B: taicpu(p).loadConst(0,l Xor $ff);
  485. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  486. end;
  487. end
  488. else
  489. { shr/sar const1, %reg
  490. shl const2, %reg
  491. with const1 = const2 }
  492. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  493. begin
  494. taicpu(p).opcode := A_AND;
  495. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  496. case taicpu(p).opsize Of
  497. S_B: taicpu(p).loadConst(0,l Xor $ff);
  498. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  499. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  500. end;
  501. asml.remove(hp1);
  502. hp1.free;
  503. end;
  504. end;
  505. A_XOR:
  506. if (taicpu(p).oper[0]^.typ = top_reg) and
  507. (taicpu(p).oper[1]^.typ = top_reg) and
  508. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  509. { temporarily change this to 'mov reg,0' to make it easier }
  510. { for the CSE. Will be changed back in pass 2 }
  511. begin
  512. taicpu(p).opcode := A_MOV;
  513. taicpu(p).loadConst(0,0);
  514. end;
  515. end;
  516. end;
  517. end;
  518. p := tai(p.next)
  519. end;
  520. end;
  521. { skips all labels and returns the next "real" instruction }
  522. function SkipLabels(hp: tai; var hp2: tai): boolean;
  523. begin
  524. while assigned(hp.next) and
  525. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  526. hp := tai(hp.next);
  527. if assigned(hp.next) then
  528. begin
  529. SkipLabels := True;
  530. hp2 := tai(hp.next)
  531. end
  532. else
  533. begin
  534. hp2 := hp;
  535. SkipLabels := False
  536. end;
  537. end;
  538. { First pass of peephole optimizations }
  539. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  540. function WriteOk : Boolean;
  541. begin
  542. writeln('Ok');
  543. Result:=True;
  544. end;
  545. var
  546. l : longint;
  547. p,hp1,hp2 : tai;
  548. hp3,hp4: tai;
  549. v:aint;
  550. TmpRef: TReference;
  551. TmpBool1, TmpBool2: Boolean;
  552. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  553. {traces sucessive jumps to their final destination and sets it, e.g.
  554. je l1 je l3
  555. <code> <code>
  556. l1: becomes l1:
  557. je l2 je l3
  558. <code> <code>
  559. l2: l2:
  560. jmp l3 jmp l3
  561. the level parameter denotes how deeep we have already followed the jump,
  562. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  563. var p1, p2: tai;
  564. l: tasmlabel;
  565. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  566. begin
  567. FindAnyLabel := false;
  568. while assigned(hp.next) and
  569. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  570. hp := tai(hp.next);
  571. if assigned(hp.next) and
  572. (tai(hp.next).typ = ait_label) then
  573. begin
  574. FindAnyLabel := true;
  575. l := tai_label(hp.next).labsym;
  576. end
  577. end;
  578. begin
  579. GetfinalDestination := false;
  580. if level > 20 then
  581. exit;
  582. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  583. if assigned(p1) then
  584. begin
  585. SkipLabels(p1,p1);
  586. if (tai(p1).typ = ait_instruction) and
  587. (taicpu(p1).is_jmp) then
  588. if { the next instruction after the label where the jump hp arrives}
  589. { is unconditional or of the same type as hp, so continue }
  590. (taicpu(p1).condition in [C_None,hp.condition]) or
  591. { the next instruction after the label where the jump hp arrives}
  592. { is the opposite of hp (so this one is never taken), but after }
  593. { that one there is a branch that will be taken, so perform a }
  594. { little hack: set p1 equal to this instruction (that's what the}
  595. { last SkipLabels is for, only works with short bool evaluation)}
  596. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  597. SkipLabels(p1,p2) and
  598. (p2.typ = ait_instruction) and
  599. (taicpu(p2).is_jmp) and
  600. (taicpu(p2).condition in [C_None,hp.condition]) and
  601. SkipLabels(p1,p1)) then
  602. begin
  603. { quick check for loops of the form "l5: ; jmp l5 }
  604. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  605. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  606. exit;
  607. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  608. exit;
  609. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  610. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  611. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  612. end
  613. else
  614. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  615. if not FindAnyLabel(p1,l) then
  616. begin
  617. {$ifdef finaldestdebug}
  618. insertllitem(asml,p1,p1.next,tai_comment.Create(
  619. strpnew('previous label inserted'))));
  620. {$endif finaldestdebug}
  621. current_asmdata.getjumplabel(l);
  622. insertllitem(p1,p1.next,tai_label.Create(l));
  623. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  624. hp.oper[0]^.ref^.symbol := l;
  625. l.increfs;
  626. { this won't work, since the new label isn't in the labeltable }
  627. { so it will fail the rangecheck. Labeltable should become a }
  628. { hashtable to support this: }
  629. { GetFinalDestination(asml, hp); }
  630. end
  631. else
  632. begin
  633. {$ifdef finaldestdebug}
  634. insertllitem(asml,p1,p1.next,tai_comment.Create(
  635. strpnew('next label reused'))));
  636. {$endif finaldestdebug}
  637. l.increfs;
  638. hp.oper[0]^.ref^.symbol := l;
  639. if not GetFinalDestination(asml, hp,succ(level)) then
  640. exit;
  641. end;
  642. end;
  643. GetFinalDestination := true;
  644. end;
  645. function DoSubAddOpt(var p: tai): Boolean;
  646. begin
  647. DoSubAddOpt := False;
  648. if GetLastInstruction(p, hp1) and
  649. (hp1.typ = ait_instruction) and
  650. (taicpu(hp1).opsize = taicpu(p).opsize) then
  651. case taicpu(hp1).opcode Of
  652. A_DEC:
  653. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  654. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  655. begin
  656. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  657. asml.remove(hp1);
  658. hp1.free;
  659. end;
  660. A_SUB:
  661. if (taicpu(hp1).oper[0]^.typ = top_const) and
  662. (taicpu(hp1).oper[1]^.typ = top_reg) and
  663. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  664. begin
  665. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  666. asml.remove(hp1);
  667. hp1.free;
  668. end;
  669. A_ADD:
  670. if (taicpu(hp1).oper[0]^.typ = top_const) and
  671. (taicpu(hp1).oper[1]^.typ = top_reg) and
  672. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  673. begin
  674. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  675. asml.remove(hp1);
  676. hp1.free;
  677. if (taicpu(p).oper[0]^.val = 0) then
  678. begin
  679. hp1 := tai(p.next);
  680. asml.remove(p);
  681. p.free;
  682. if not GetLastInstruction(hp1, p) then
  683. p := hp1;
  684. DoSubAddOpt := True;
  685. end
  686. end;
  687. end;
  688. end;
  689. begin
  690. p := BlockStart;
  691. ClearUsedRegs;
  692. while (p <> BlockEnd) Do
  693. begin
  694. UpDateUsedRegs(UsedRegs, tai(p.next));
  695. case p.Typ Of
  696. ait_instruction:
  697. begin
  698. current_filepos:=taicpu(p).fileinfo;
  699. if InsContainsSegRef(taicpu(p)) then
  700. begin
  701. p := tai(p.next);
  702. continue;
  703. end;
  704. { Handle Jmp Optimizations }
  705. if taicpu(p).is_jmp then
  706. begin
  707. {the following if-block removes all code between a jmp and the next label,
  708. because it can never be executed}
  709. if (taicpu(p).opcode = A_JMP) then
  710. begin
  711. hp2:=p;
  712. while GetNextInstruction(hp2, hp1) and
  713. (hp1.typ <> ait_label) do
  714. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  715. begin
  716. { don't kill start/end of assembler block,
  717. no-line-info-start/end etc }
  718. if hp1.typ<>ait_marker then
  719. begin
  720. asml.remove(hp1);
  721. hp1.free;
  722. end
  723. else
  724. hp2:=hp1;
  725. end
  726. else break;
  727. end;
  728. { remove jumps to a label coming right after them }
  729. if GetNextInstruction(p, hp1) then
  730. begin
  731. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  732. { TODO: FIXME removing the first instruction fails}
  733. (p<>blockstart) then
  734. begin
  735. hp2:=tai(hp1.next);
  736. asml.remove(p);
  737. p.free;
  738. p:=hp2;
  739. continue;
  740. end
  741. else
  742. begin
  743. if hp1.typ = ait_label then
  744. SkipLabels(hp1,hp1);
  745. if (tai(hp1).typ=ait_instruction) and
  746. (taicpu(hp1).opcode=A_JMP) and
  747. GetNextInstruction(hp1, hp2) and
  748. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  749. begin
  750. if taicpu(p).opcode=A_Jcc then
  751. begin
  752. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  753. tai_label(hp2).labsym.decrefs;
  754. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  755. { when free'ing hp1, the ref. isn't decresed, so we don't
  756. increase it (FK)
  757. taicpu(p).oper[0]^.ref^.symbol.increfs;
  758. }
  759. asml.remove(hp1);
  760. hp1.free;
  761. GetFinalDestination(asml, taicpu(p),0);
  762. end
  763. else
  764. begin
  765. GetFinalDestination(asml, taicpu(p),0);
  766. p:=tai(p.next);
  767. continue;
  768. end;
  769. end
  770. else
  771. GetFinalDestination(asml, taicpu(p),0);
  772. end;
  773. end;
  774. end
  775. else
  776. { All other optimizes }
  777. begin
  778. for l := 0 to taicpu(p).ops-1 Do
  779. if (taicpu(p).oper[l]^.typ = top_ref) then
  780. With taicpu(p).oper[l]^.ref^ Do
  781. begin
  782. if (base = NR_NO) and
  783. (index <> NR_NO) and
  784. (scalefactor in [0,1]) then
  785. begin
  786. base := index;
  787. index := NR_NO
  788. end
  789. end;
  790. case taicpu(p).opcode Of
  791. A_AND:
  792. if OptPass1And(p) then
  793. continue;
  794. A_CMP:
  795. begin
  796. { cmp register,$8000 neg register
  797. je target --> jo target
  798. .... only if register is deallocated before jump.}
  799. case Taicpu(p).opsize of
  800. S_B: v:=$80;
  801. S_W: v:=$8000;
  802. S_L: v:=aint($80000000);
  803. else
  804. internalerror(2013112905);
  805. end;
  806. if (taicpu(p).oper[0]^.typ=Top_const) and
  807. (taicpu(p).oper[0]^.val=v) and
  808. (Taicpu(p).oper[1]^.typ=top_reg) and
  809. GetNextInstruction(p, hp1) and
  810. (hp1.typ=ait_instruction) and
  811. (taicpu(hp1).opcode=A_Jcc) and
  812. (Taicpu(hp1).condition in [C_E,C_NE]) and
  813. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  814. begin
  815. Taicpu(p).opcode:=A_NEG;
  816. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  817. Taicpu(p).clearop(1);
  818. Taicpu(p).ops:=1;
  819. if Taicpu(hp1).condition=C_E then
  820. Taicpu(hp1).condition:=C_O
  821. else
  822. Taicpu(hp1).condition:=C_NO;
  823. continue;
  824. end;
  825. {
  826. @@2: @@2:
  827. .... ....
  828. cmp operand1,0
  829. jle/jbe @@1
  830. dec operand1 --> sub operand1,1
  831. jmp @@2 jge/jae @@2
  832. @@1: @@1:
  833. ... ....}
  834. if (taicpu(p).oper[0]^.typ = top_const) and
  835. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  836. (taicpu(p).oper[0]^.val = 0) and
  837. GetNextInstruction(p, hp1) and
  838. (hp1.typ = ait_instruction) and
  839. (taicpu(hp1).is_jmp) and
  840. (taicpu(hp1).opcode=A_Jcc) and
  841. (taicpu(hp1).condition in [C_LE,C_BE]) and
  842. GetNextInstruction(hp1,hp2) and
  843. (hp2.typ = ait_instruction) and
  844. (taicpu(hp2).opcode = A_DEC) and
  845. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  846. GetNextInstruction(hp2, hp3) and
  847. (hp3.typ = ait_instruction) and
  848. (taicpu(hp3).is_jmp) and
  849. (taicpu(hp3).opcode = A_JMP) and
  850. GetNextInstruction(hp3, hp4) and
  851. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  852. begin
  853. taicpu(hp2).Opcode := A_SUB;
  854. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  855. taicpu(hp2).loadConst(0,1);
  856. taicpu(hp2).ops:=2;
  857. taicpu(hp3).Opcode := A_Jcc;
  858. case taicpu(hp1).condition of
  859. C_LE: taicpu(hp3).condition := C_GE;
  860. C_BE: taicpu(hp3).condition := C_AE;
  861. end;
  862. asml.remove(p);
  863. asml.remove(hp1);
  864. p.free;
  865. hp1.free;
  866. p := hp2;
  867. continue;
  868. end
  869. end;
  870. A_FLD:
  871. begin
  872. if (taicpu(p).oper[0]^.typ = top_reg) and
  873. GetNextInstruction(p, hp1) and
  874. (hp1.typ = Ait_Instruction) and
  875. (taicpu(hp1).oper[0]^.typ = top_reg) and
  876. (taicpu(hp1).oper[1]^.typ = top_reg) and
  877. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  878. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  879. { change to
  880. fld reg fxxx reg,st
  881. fxxxp st, st1 (hp1)
  882. Remark: non commutative operations must be reversed!
  883. }
  884. begin
  885. case taicpu(hp1).opcode Of
  886. A_FMULP,A_FADDP,
  887. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  888. begin
  889. case taicpu(hp1).opcode Of
  890. A_FADDP: taicpu(hp1).opcode := A_FADD;
  891. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  892. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  893. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  894. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  895. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  896. end;
  897. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  898. taicpu(hp1).oper[1]^.reg := NR_ST;
  899. asml.remove(p);
  900. p.free;
  901. p := hp1;
  902. continue;
  903. end;
  904. end;
  905. end
  906. else
  907. if (taicpu(p).oper[0]^.typ = top_ref) and
  908. GetNextInstruction(p, hp2) and
  909. (hp2.typ = Ait_Instruction) and
  910. (taicpu(hp2).ops = 2) and
  911. (taicpu(hp2).oper[0]^.typ = top_reg) and
  912. (taicpu(hp2).oper[1]^.typ = top_reg) and
  913. (taicpu(p).opsize in [S_FS, S_FL]) and
  914. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  915. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  916. if GetLastInstruction(p, hp1) and
  917. (hp1.typ = Ait_Instruction) and
  918. ((taicpu(hp1).opcode = A_FLD) or
  919. (taicpu(hp1).opcode = A_FST)) and
  920. (taicpu(hp1).opsize = taicpu(p).opsize) and
  921. (taicpu(hp1).oper[0]^.typ = top_ref) and
  922. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  923. if ((taicpu(hp2).opcode = A_FMULP) or
  924. (taicpu(hp2).opcode = A_FADDP)) then
  925. { change to
  926. fld/fst mem1 (hp1) fld/fst mem1
  927. fld mem1 (p) fadd/
  928. faddp/ fmul st, st
  929. fmulp st, st1 (hp2) }
  930. begin
  931. asml.remove(p);
  932. p.free;
  933. p := hp1;
  934. if (taicpu(hp2).opcode = A_FADDP) then
  935. taicpu(hp2).opcode := A_FADD
  936. else
  937. taicpu(hp2).opcode := A_FMUL;
  938. taicpu(hp2).oper[1]^.reg := NR_ST;
  939. end
  940. else
  941. { change to
  942. fld/fst mem1 (hp1) fld/fst mem1
  943. fld mem1 (p) fld st}
  944. begin
  945. taicpu(p).changeopsize(S_FL);
  946. taicpu(p).loadreg(0,NR_ST);
  947. end
  948. else
  949. begin
  950. case taicpu(hp2).opcode Of
  951. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  952. { change to
  953. fld/fst mem1 (hp1) fld/fst mem1
  954. fld mem2 (p) fxxx mem2
  955. fxxxp st, st1 (hp2) }
  956. begin
  957. case taicpu(hp2).opcode Of
  958. A_FADDP: taicpu(p).opcode := A_FADD;
  959. A_FMULP: taicpu(p).opcode := A_FMUL;
  960. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  961. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  962. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  963. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  964. end;
  965. asml.remove(hp2);
  966. hp2.free;
  967. end
  968. end
  969. end
  970. end;
  971. A_FSTP,A_FISTP:
  972. if doFpuLoadStoreOpt(p) then
  973. continue;
  974. A_LEA:
  975. begin
  976. {removes seg register prefixes from LEA operations, as they
  977. don't do anything}
  978. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  979. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  980. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  981. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  982. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  983. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  984. begin
  985. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  986. (taicpu(p).oper[0]^.ref^.offset = 0) then
  987. begin
  988. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  989. taicpu(p).oper[1]^.reg);
  990. InsertLLItem(p.previous,p.next, hp1);
  991. p.free;
  992. p := hp1;
  993. continue;
  994. end
  995. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  996. begin
  997. hp1 := tai(p.Next);
  998. asml.remove(p);
  999. p.free;
  1000. p := hp1;
  1001. continue;
  1002. end
  1003. { continue to use lea to adjust the stack pointer,
  1004. it is the recommended way, but only if not optimizing for size }
  1005. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1006. (cs_opt_size in current_settings.optimizerswitches) then
  1007. with taicpu(p).oper[0]^.ref^ do
  1008. if (base = taicpu(p).oper[1]^.reg) then
  1009. begin
  1010. l := offset;
  1011. if (l=1) and UseIncDec then
  1012. begin
  1013. taicpu(p).opcode := A_INC;
  1014. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1015. taicpu(p).ops := 1
  1016. end
  1017. else if (l=-1) and UseIncDec then
  1018. begin
  1019. taicpu(p).opcode := A_DEC;
  1020. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1021. taicpu(p).ops := 1;
  1022. end
  1023. else
  1024. begin
  1025. if (l<0) and (l<>-2147483648) then
  1026. begin
  1027. taicpu(p).opcode := A_SUB;
  1028. taicpu(p).loadConst(0,-l);
  1029. end
  1030. else
  1031. begin
  1032. taicpu(p).opcode := A_ADD;
  1033. taicpu(p).loadConst(0,l);
  1034. end;
  1035. end;
  1036. end;
  1037. end
  1038. (*
  1039. This is unsafe, lea doesn't modify the flags but "add"
  1040. does. This breaks webtbs/tw15694.pp. The above
  1041. transformations are also unsafe, but they don't seem to
  1042. be triggered by code that FPC generators (or that at
  1043. least does not occur in the tests...). This needs to be
  1044. fixed by checking for the liveness of the flags register.
  1045. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1046. begin
  1047. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1048. taicpu(p).oper[0]^.ref^.base);
  1049. InsertLLItem(asml,p.previous,p.next, hp1);
  1050. DebugMsg('Peephole Lea2AddBase done',hp1);
  1051. p.free;
  1052. p:=hp1;
  1053. continue;
  1054. end
  1055. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1056. begin
  1057. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1058. taicpu(p).oper[0]^.ref^.index);
  1059. InsertLLItem(asml,p.previous,p.next,hp1);
  1060. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1061. p.free;
  1062. p:=hp1;
  1063. continue;
  1064. end
  1065. *)
  1066. end;
  1067. A_MOV:
  1068. begin
  1069. If OptPass1MOV(p) then
  1070. Continue;
  1071. end;
  1072. A_MOVSX,
  1073. A_MOVZX :
  1074. begin
  1075. if (taicpu(p).oper[1]^.typ = top_reg) and
  1076. GetNextInstruction(p,hp1) and
  1077. (hp1.typ = ait_instruction) and
  1078. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1079. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1080. GetNextInstruction(hp1,hp2) and
  1081. MatchInstruction(hp2,A_MOV,[]) and
  1082. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1083. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1084. (((taicpu(hp1).ops=2) and
  1085. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1086. ((taicpu(hp1).ops=1) and
  1087. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1088. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  1089. { change movsX/movzX reg/ref, reg2 }
  1090. { add/sub/or/... reg3/$const, reg2 }
  1091. { mov reg2 reg/ref }
  1092. { to add/sub/or/... reg3/$const, reg/ref }
  1093. begin
  1094. { by example:
  1095. movswl %si,%eax movswl %si,%eax p
  1096. decl %eax addl %edx,%eax hp1
  1097. movw %ax,%si movw %ax,%si hp2
  1098. ->
  1099. movswl %si,%eax movswl %si,%eax p
  1100. decw %eax addw %edx,%eax hp1
  1101. movw %ax,%si movw %ax,%si hp2
  1102. }
  1103. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1104. {
  1105. ->
  1106. movswl %si,%eax movswl %si,%eax p
  1107. decw %si addw %dx,%si hp1
  1108. movw %ax,%si movw %ax,%si hp2
  1109. }
  1110. case taicpu(hp1).ops of
  1111. 1:
  1112. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1113. 2:
  1114. begin
  1115. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1116. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1117. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1118. end;
  1119. else
  1120. internalerror(2008042701);
  1121. end;
  1122. {
  1123. ->
  1124. decw %si addw %dx,%si p
  1125. }
  1126. asml.remove(p);
  1127. asml.remove(hp2);
  1128. p.free;
  1129. hp2.free;
  1130. p := hp1
  1131. end
  1132. { removes superfluous And's after movzx's }
  1133. else if taicpu(p).opcode=A_MOVZX then
  1134. begin
  1135. if (taicpu(p).oper[1]^.typ = top_reg) and
  1136. GetNextInstruction(p, hp1) and
  1137. (tai(hp1).typ = ait_instruction) and
  1138. (taicpu(hp1).opcode = A_AND) and
  1139. (taicpu(hp1).oper[0]^.typ = top_const) and
  1140. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1141. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1142. case taicpu(p).opsize Of
  1143. S_BL, S_BW:
  1144. if (taicpu(hp1).oper[0]^.val = $ff) then
  1145. begin
  1146. asml.remove(hp1);
  1147. hp1.free;
  1148. end;
  1149. S_WL:
  1150. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1151. begin
  1152. asml.remove(hp1);
  1153. hp1.free;
  1154. end;
  1155. end;
  1156. {changes some movzx constructs to faster synonims (all examples
  1157. are given with eax/ax, but are also valid for other registers)}
  1158. if (taicpu(p).oper[1]^.typ = top_reg) then
  1159. if (taicpu(p).oper[0]^.typ = top_reg) then
  1160. case taicpu(p).opsize of
  1161. S_BW:
  1162. begin
  1163. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1164. not(cs_opt_size in current_settings.optimizerswitches) then
  1165. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1166. begin
  1167. taicpu(p).opcode := A_AND;
  1168. taicpu(p).changeopsize(S_W);
  1169. taicpu(p).loadConst(0,$ff);
  1170. end
  1171. else if GetNextInstruction(p, hp1) and
  1172. (tai(hp1).typ = ait_instruction) and
  1173. (taicpu(hp1).opcode = A_AND) and
  1174. (taicpu(hp1).oper[0]^.typ = top_const) and
  1175. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1176. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1177. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1178. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1179. begin
  1180. taicpu(p).opcode := A_MOV;
  1181. taicpu(p).changeopsize(S_W);
  1182. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1183. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1184. end;
  1185. end;
  1186. S_BL:
  1187. begin
  1188. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1189. not(cs_opt_size in current_settings.optimizerswitches) then
  1190. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1191. begin
  1192. taicpu(p).opcode := A_AND;
  1193. taicpu(p).changeopsize(S_L);
  1194. taicpu(p).loadConst(0,$ff)
  1195. end
  1196. else if GetNextInstruction(p, hp1) and
  1197. (tai(hp1).typ = ait_instruction) and
  1198. (taicpu(hp1).opcode = A_AND) and
  1199. (taicpu(hp1).oper[0]^.typ = top_const) and
  1200. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1201. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1202. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1203. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1204. begin
  1205. taicpu(p).opcode := A_MOV;
  1206. taicpu(p).changeopsize(S_L);
  1207. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1208. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1209. end
  1210. end;
  1211. S_WL:
  1212. begin
  1213. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1214. not(cs_opt_size in current_settings.optimizerswitches) then
  1215. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1216. begin
  1217. taicpu(p).opcode := A_AND;
  1218. taicpu(p).changeopsize(S_L);
  1219. taicpu(p).loadConst(0,$ffff);
  1220. end
  1221. else if GetNextInstruction(p, hp1) and
  1222. (tai(hp1).typ = ait_instruction) and
  1223. (taicpu(hp1).opcode = A_AND) and
  1224. (taicpu(hp1).oper[0]^.typ = top_const) and
  1225. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1226. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1227. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1228. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1229. begin
  1230. taicpu(p).opcode := A_MOV;
  1231. taicpu(p).changeopsize(S_L);
  1232. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1233. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1234. end;
  1235. end;
  1236. end
  1237. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1238. begin
  1239. if GetNextInstruction(p, hp1) and
  1240. (tai(hp1).typ = ait_instruction) and
  1241. (taicpu(hp1).opcode = A_AND) and
  1242. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1243. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1244. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1245. begin
  1246. taicpu(p).opcode := A_MOV;
  1247. case taicpu(p).opsize Of
  1248. S_BL:
  1249. begin
  1250. taicpu(p).changeopsize(S_L);
  1251. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1252. end;
  1253. S_WL:
  1254. begin
  1255. taicpu(p).changeopsize(S_L);
  1256. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1257. end;
  1258. S_BW:
  1259. begin
  1260. taicpu(p).changeopsize(S_W);
  1261. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1262. end;
  1263. end;
  1264. end;
  1265. end;
  1266. end;
  1267. end;
  1268. (* should not be generated anymore by the current code generator
  1269. A_POP:
  1270. begin
  1271. if target_info.system=system_i386_go32v2 then
  1272. begin
  1273. { Transform a series of pop/pop/pop/push/push/push to }
  1274. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1275. { because I'm not sure whether they can cope with }
  1276. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1277. { such a problem when using esp as frame pointer (JM) }
  1278. if (taicpu(p).oper[0]^.typ = top_reg) then
  1279. begin
  1280. hp1 := p;
  1281. hp2 := p;
  1282. l := 0;
  1283. while getNextInstruction(hp1,hp1) and
  1284. (hp1.typ = ait_instruction) and
  1285. (taicpu(hp1).opcode = A_POP) and
  1286. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1287. begin
  1288. hp2 := hp1;
  1289. inc(l,4);
  1290. end;
  1291. getLastInstruction(p,hp3);
  1292. l1 := 0;
  1293. while (hp2 <> hp3) and
  1294. assigned(hp1) and
  1295. (hp1.typ = ait_instruction) and
  1296. (taicpu(hp1).opcode = A_PUSH) and
  1297. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1298. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1299. begin
  1300. { change it to a two op operation }
  1301. taicpu(hp2).oper[1]^.typ:=top_none;
  1302. taicpu(hp2).ops:=2;
  1303. taicpu(hp2).opcode := A_MOV;
  1304. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1305. reference_reset(tmpref);
  1306. tmpRef.base.enum:=R_INTREGISTER;
  1307. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1308. convert_register_to_enum(tmpref.base);
  1309. tmpRef.offset := l;
  1310. taicpu(hp2).loadRef(0,tmpRef);
  1311. hp4 := hp1;
  1312. getNextInstruction(hp1,hp1);
  1313. asml.remove(hp4);
  1314. hp4.free;
  1315. getLastInstruction(hp2,hp2);
  1316. dec(l,4);
  1317. inc(l1);
  1318. end;
  1319. if l <> -4 then
  1320. begin
  1321. inc(l,4);
  1322. for l1 := l1 downto 1 do
  1323. begin
  1324. getNextInstruction(hp2,hp2);
  1325. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1326. end
  1327. end
  1328. end
  1329. end
  1330. else
  1331. begin
  1332. if (taicpu(p).oper[0]^.typ = top_reg) and
  1333. GetNextInstruction(p, hp1) and
  1334. (tai(hp1).typ=ait_instruction) and
  1335. (taicpu(hp1).opcode=A_PUSH) and
  1336. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1337. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1338. begin
  1339. { change it to a two op operation }
  1340. taicpu(p).oper[1]^.typ:=top_none;
  1341. taicpu(p).ops:=2;
  1342. taicpu(p).opcode := A_MOV;
  1343. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1344. reference_reset(tmpref);
  1345. TmpRef.base.enum := R_ESP;
  1346. taicpu(p).loadRef(0,TmpRef);
  1347. asml.remove(hp1);
  1348. hp1.free;
  1349. end;
  1350. end;
  1351. end;
  1352. *)
  1353. A_PUSH:
  1354. begin
  1355. if (taicpu(p).opsize = S_W) and
  1356. (taicpu(p).oper[0]^.typ = Top_Const) and
  1357. GetNextInstruction(p, hp1) and
  1358. (tai(hp1).typ = ait_instruction) and
  1359. (taicpu(hp1).opcode = A_PUSH) and
  1360. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1361. (taicpu(hp1).opsize = S_W) then
  1362. begin
  1363. taicpu(p).changeopsize(S_L);
  1364. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1365. asml.remove(hp1);
  1366. hp1.free;
  1367. end;
  1368. end;
  1369. A_SHL, A_SAL:
  1370. begin
  1371. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1372. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1373. (taicpu(p).opsize = S_L) and
  1374. (taicpu(p).oper[0]^.val <= 3) then
  1375. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1376. begin
  1377. TmpBool1 := True; {should we check the next instruction?}
  1378. TmpBool2 := False; {have we found an add/sub which could be
  1379. integrated in the lea?}
  1380. reference_reset(tmpref,2,[]);
  1381. TmpRef.index := taicpu(p).oper[1]^.reg;
  1382. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1383. while TmpBool1 and
  1384. GetNextInstruction(p, hp1) and
  1385. (tai(hp1).typ = ait_instruction) and
  1386. ((((taicpu(hp1).opcode = A_ADD) or
  1387. (taicpu(hp1).opcode = A_SUB)) and
  1388. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1389. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1390. (((taicpu(hp1).opcode = A_INC) or
  1391. (taicpu(hp1).opcode = A_DEC)) and
  1392. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1393. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1394. (not GetNextInstruction(hp1,hp2) or
  1395. not instrReadsFlags(hp2)) Do
  1396. begin
  1397. TmpBool1 := False;
  1398. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1399. begin
  1400. TmpBool1 := True;
  1401. TmpBool2 := True;
  1402. case taicpu(hp1).opcode of
  1403. A_ADD:
  1404. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1405. A_SUB:
  1406. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1407. end;
  1408. asml.remove(hp1);
  1409. hp1.free;
  1410. end
  1411. else
  1412. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1413. (((taicpu(hp1).opcode = A_ADD) and
  1414. (TmpRef.base = NR_NO)) or
  1415. (taicpu(hp1).opcode = A_INC) or
  1416. (taicpu(hp1).opcode = A_DEC)) then
  1417. begin
  1418. TmpBool1 := True;
  1419. TmpBool2 := True;
  1420. case taicpu(hp1).opcode of
  1421. A_ADD:
  1422. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1423. A_INC:
  1424. inc(TmpRef.offset);
  1425. A_DEC:
  1426. dec(TmpRef.offset);
  1427. end;
  1428. asml.remove(hp1);
  1429. hp1.free;
  1430. end;
  1431. end;
  1432. if TmpBool2 or
  1433. ((current_settings.optimizecputype < cpu_Pentium2) and
  1434. (taicpu(p).oper[0]^.val <= 3) and
  1435. not(cs_opt_size in current_settings.optimizerswitches)) then
  1436. begin
  1437. if not(TmpBool2) and
  1438. (taicpu(p).oper[0]^.val = 1) then
  1439. begin
  1440. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1441. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1442. end
  1443. else
  1444. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1445. taicpu(p).oper[1]^.reg);
  1446. InsertLLItem(p.previous, p.next, hp1);
  1447. p.free;
  1448. p := hp1;
  1449. end;
  1450. end
  1451. else
  1452. if (current_settings.optimizecputype < cpu_Pentium2) and
  1453. (taicpu(p).oper[0]^.typ = top_const) and
  1454. (taicpu(p).oper[1]^.typ = top_reg) then
  1455. if (taicpu(p).oper[0]^.val = 1) then
  1456. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1457. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1458. (unlike shl, which is only Tairable in the U pipe)}
  1459. begin
  1460. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1461. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1462. InsertLLItem(p.previous, p.next, hp1);
  1463. p.free;
  1464. p := hp1;
  1465. end
  1466. else if (taicpu(p).opsize = S_L) and
  1467. (taicpu(p).oper[0]^.val<= 3) then
  1468. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1469. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1470. begin
  1471. reference_reset(tmpref,2,[]);
  1472. TmpRef.index := taicpu(p).oper[1]^.reg;
  1473. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1474. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1475. InsertLLItem(p.previous, p.next, hp1);
  1476. p.free;
  1477. p := hp1;
  1478. end
  1479. end;
  1480. A_SETcc :
  1481. { changes
  1482. setcc (funcres) setcc reg
  1483. movb (funcres), reg to leave/ret
  1484. leave/ret }
  1485. begin
  1486. if (taicpu(p).oper[0]^.typ = top_ref) and
  1487. GetNextInstruction(p, hp1) and
  1488. GetNextInstruction(hp1, hp2) and
  1489. IsExitCode(hp2) and
  1490. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1491. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1492. not(assigned(current_procinfo.procdef.funcretsym) and
  1493. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1494. (hp1.typ = ait_instruction) and
  1495. (taicpu(hp1).opcode = A_MOV) and
  1496. (taicpu(hp1).opsize = S_B) and
  1497. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1498. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1499. begin
  1500. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1501. asml.remove(hp1);
  1502. hp1.free;
  1503. end
  1504. end;
  1505. A_SUB:
  1506. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1507. { * change "sub/add const1, reg" or "dec reg" followed by
  1508. "sub const2, reg" to one "sub ..., reg" }
  1509. begin
  1510. if (taicpu(p).oper[0]^.typ = top_const) and
  1511. (taicpu(p).oper[1]^.typ = top_reg) then
  1512. if (taicpu(p).oper[0]^.val = 2) and
  1513. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1514. { Don't do the sub/push optimization if the sub }
  1515. { comes from setting up the stack frame (JM) }
  1516. (not getLastInstruction(p,hp1) or
  1517. (hp1.typ <> ait_instruction) or
  1518. (taicpu(hp1).opcode <> A_MOV) or
  1519. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1520. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1521. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1522. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1523. begin
  1524. hp1 := tai(p.next);
  1525. while Assigned(hp1) and
  1526. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1527. not RegReadByInstruction(NR_ESP,hp1) and
  1528. not RegModifiedByInstruction(NR_ESP,hp1) do
  1529. hp1 := tai(hp1.next);
  1530. if Assigned(hp1) and
  1531. (tai(hp1).typ = ait_instruction) and
  1532. (taicpu(hp1).opcode = A_PUSH) and
  1533. (taicpu(hp1).opsize = S_W) then
  1534. begin
  1535. taicpu(hp1).changeopsize(S_L);
  1536. if taicpu(hp1).oper[0]^.typ=top_reg then
  1537. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1538. hp1 := tai(p.next);
  1539. asml.remove(p);
  1540. p.free;
  1541. p := hp1;
  1542. continue
  1543. end;
  1544. if DoSubAddOpt(p) then
  1545. continue;
  1546. end
  1547. else if DoSubAddOpt(p) then
  1548. continue
  1549. end;
  1550. A_VMOVAPS,
  1551. A_VMOVAPD:
  1552. if OptPass1VMOVAP(p) then
  1553. continue;
  1554. A_VDIVSD,
  1555. A_VDIVSS,
  1556. A_VSUBSD,
  1557. A_VSUBSS,
  1558. A_VMULSD,
  1559. A_VMULSS,
  1560. A_VADDSD,
  1561. A_VADDSS:
  1562. if OptPass1VOP(p) then
  1563. continue;
  1564. end;
  1565. end; { if is_jmp }
  1566. end;
  1567. end;
  1568. updateUsedRegs(UsedRegs,p);
  1569. p:=tai(p.next);
  1570. end;
  1571. end;
  1572. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  1573. {$ifdef DEBUG_AOPTCPU}
  1574. procedure DebugMsg(const s: string;p : tai);
  1575. begin
  1576. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  1577. end;
  1578. {$else DEBUG_AOPTCPU}
  1579. procedure DebugMsg(const s: string;p : tai);inline;
  1580. begin
  1581. end;
  1582. {$endif DEBUG_AOPTCPU}
  1583. function CanBeCMOV(p : tai) : boolean;
  1584. begin
  1585. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1586. (taicpu(p).opcode=A_MOV) and
  1587. (taicpu(p).opsize in [S_L,S_W]) and
  1588. ((taicpu(p).oper[0]^.typ = top_reg)
  1589. { we can't use cmov ref,reg because
  1590. ref could be nil and cmov still throws an exception
  1591. if ref=nil but the mov isn't done (FK)
  1592. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1593. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1594. }
  1595. ) and
  1596. (taicpu(p).oper[1]^.typ in [top_reg]);
  1597. end;
  1598. var
  1599. p,hp1,hp2,hp3: tai;
  1600. l : longint;
  1601. condition : tasmcond;
  1602. carryadd_opcode: Tasmop;
  1603. begin
  1604. p := BlockStart;
  1605. ClearUsedRegs;
  1606. while (p <> BlockEnd) Do
  1607. begin
  1608. UpdateUsedRegs(UsedRegs, tai(p.next));
  1609. case p.Typ Of
  1610. Ait_Instruction:
  1611. begin
  1612. if InsContainsSegRef(taicpu(p)) then
  1613. begin
  1614. p := tai(p.next);
  1615. continue;
  1616. end;
  1617. case taicpu(p).opcode Of
  1618. A_Jcc:
  1619. begin
  1620. { jb @@1 cmc
  1621. inc/dec operand --> adc/sbb operand,0
  1622. @@1:
  1623. ... and ...
  1624. jnb @@1
  1625. inc/dec operand --> adc/sbb operand,0
  1626. @@1: }
  1627. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1628. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1629. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1630. begin
  1631. carryadd_opcode:=A_NONE;
  1632. if Taicpu(p).condition in [C_NAE,C_B] then
  1633. begin
  1634. if Taicpu(hp1).opcode=A_INC then
  1635. carryadd_opcode:=A_ADC;
  1636. if Taicpu(hp1).opcode=A_DEC then
  1637. carryadd_opcode:=A_SBB;
  1638. if carryadd_opcode<>A_NONE then
  1639. begin
  1640. Taicpu(p).clearop(0);
  1641. Taicpu(p).ops:=0;
  1642. Taicpu(p).is_jmp:=false;
  1643. Taicpu(p).opcode:=A_CMC;
  1644. Taicpu(p).condition:=C_NONE;
  1645. Taicpu(hp1).ops:=2;
  1646. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1647. Taicpu(hp1).loadconst(0,0);
  1648. Taicpu(hp1).opcode:=carryadd_opcode;
  1649. continue;
  1650. end;
  1651. end;
  1652. if Taicpu(p).condition in [C_AE,C_NB] then
  1653. begin
  1654. if Taicpu(hp1).opcode=A_INC then
  1655. carryadd_opcode:=A_ADC;
  1656. if Taicpu(hp1).opcode=A_DEC then
  1657. carryadd_opcode:=A_SBB;
  1658. if carryadd_opcode<>A_NONE then
  1659. begin
  1660. asml.remove(p);
  1661. p.free;
  1662. Taicpu(hp1).ops:=2;
  1663. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1664. Taicpu(hp1).loadconst(0,0);
  1665. Taicpu(hp1).opcode:=carryadd_opcode;
  1666. p:=hp1;
  1667. continue;
  1668. end;
  1669. end;
  1670. end;
  1671. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1672. begin
  1673. { check for
  1674. jCC xxx
  1675. <several movs>
  1676. xxx:
  1677. }
  1678. l:=0;
  1679. GetNextInstruction(p, hp1);
  1680. while assigned(hp1) and
  1681. CanBeCMOV(hp1) and
  1682. { stop on labels }
  1683. not(hp1.typ=ait_label) do
  1684. begin
  1685. inc(l);
  1686. GetNextInstruction(hp1,hp1);
  1687. end;
  1688. if assigned(hp1) then
  1689. begin
  1690. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1691. begin
  1692. if (l<=4) and (l>0) then
  1693. begin
  1694. condition:=inverse_cond(taicpu(p).condition);
  1695. hp2:=p;
  1696. GetNextInstruction(p,hp1);
  1697. p:=hp1;
  1698. repeat
  1699. taicpu(hp1).opcode:=A_CMOVcc;
  1700. taicpu(hp1).condition:=condition;
  1701. GetNextInstruction(hp1,hp1);
  1702. until not(assigned(hp1)) or
  1703. not(CanBeCMOV(hp1));
  1704. { wait with removing else GetNextInstruction could
  1705. ignore the label if it was the only usage in the
  1706. jump moved away }
  1707. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1708. asml.remove(hp2);
  1709. hp2.free;
  1710. continue;
  1711. end;
  1712. end
  1713. else
  1714. begin
  1715. { check further for
  1716. jCC xxx
  1717. <several movs 1>
  1718. jmp yyy
  1719. xxx:
  1720. <several movs 2>
  1721. yyy:
  1722. }
  1723. { hp2 points to jmp yyy }
  1724. hp2:=hp1;
  1725. { skip hp1 to xxx }
  1726. GetNextInstruction(hp1, hp1);
  1727. if assigned(hp2) and
  1728. assigned(hp1) and
  1729. (l<=3) and
  1730. (hp2.typ=ait_instruction) and
  1731. (taicpu(hp2).is_jmp) and
  1732. (taicpu(hp2).condition=C_None) and
  1733. { real label and jump, no further references to the
  1734. label are allowed }
  1735. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  1736. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1737. begin
  1738. l:=0;
  1739. { skip hp1 to <several moves 2> }
  1740. GetNextInstruction(hp1, hp1);
  1741. while assigned(hp1) and
  1742. CanBeCMOV(hp1) do
  1743. begin
  1744. inc(l);
  1745. GetNextInstruction(hp1, hp1);
  1746. end;
  1747. { hp1 points to yyy: }
  1748. if assigned(hp1) and
  1749. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1750. begin
  1751. condition:=inverse_cond(taicpu(p).condition);
  1752. GetNextInstruction(p,hp1);
  1753. hp3:=p;
  1754. p:=hp1;
  1755. repeat
  1756. taicpu(hp1).opcode:=A_CMOVcc;
  1757. taicpu(hp1).condition:=condition;
  1758. GetNextInstruction(hp1,hp1);
  1759. until not(assigned(hp1)) or
  1760. not(CanBeCMOV(hp1));
  1761. { hp2 is still at jmp yyy }
  1762. GetNextInstruction(hp2,hp1);
  1763. { hp2 is now at xxx: }
  1764. condition:=inverse_cond(condition);
  1765. GetNextInstruction(hp1,hp1);
  1766. { hp1 is now at <several movs 2> }
  1767. repeat
  1768. taicpu(hp1).opcode:=A_CMOVcc;
  1769. taicpu(hp1).condition:=condition;
  1770. GetNextInstruction(hp1,hp1);
  1771. until not(assigned(hp1)) or
  1772. not(CanBeCMOV(hp1));
  1773. {
  1774. asml.remove(hp1.next)
  1775. hp1.next.free;
  1776. asml.remove(hp1);
  1777. hp1.free;
  1778. }
  1779. { remove jCC }
  1780. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1781. asml.remove(hp3);
  1782. hp3.free;
  1783. { remove jmp }
  1784. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1785. asml.remove(hp2);
  1786. hp2.free;
  1787. continue;
  1788. end;
  1789. end;
  1790. end;
  1791. end;
  1792. end;
  1793. end;
  1794. A_FSTP,A_FISTP:
  1795. if DoFpuLoadStoreOpt(p) then
  1796. continue;
  1797. A_IMUL:
  1798. begin
  1799. if (taicpu(p).ops >= 2) and
  1800. ((taicpu(p).oper[0]^.typ = top_const) or
  1801. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1802. (taicpu(p).oper[1]^.typ = top_reg) and
  1803. ((taicpu(p).ops = 2) or
  1804. ((taicpu(p).oper[2]^.typ = top_reg) and
  1805. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1806. getLastInstruction(p,hp1) and
  1807. (hp1.typ = ait_instruction) and
  1808. (taicpu(hp1).opcode = A_MOV) and
  1809. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1810. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1811. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1812. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  1813. begin
  1814. taicpu(p).ops := 3;
  1815. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1816. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1817. asml.remove(hp1);
  1818. hp1.free;
  1819. end;
  1820. end;
  1821. A_JMP:
  1822. {
  1823. change
  1824. jmp .L1
  1825. ...
  1826. .L1:
  1827. ret
  1828. into
  1829. ret
  1830. }
  1831. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) then
  1832. begin
  1833. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1834. if assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_RET) and (taicpu(p).condition=C_None) then
  1835. begin
  1836. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1837. taicpu(p).opcode:=A_RET;
  1838. taicpu(p).is_jmp:=false;
  1839. taicpu(p).ops:=taicpu(hp1).ops;
  1840. case taicpu(hp1).ops of
  1841. 0:
  1842. taicpu(p).clearop(0);
  1843. 1:
  1844. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1845. else
  1846. internalerror(2016041301);
  1847. end;
  1848. continue;
  1849. end;
  1850. end;
  1851. A_MOV:
  1852. if OptPass2MOV(p) then
  1853. continue;
  1854. end;
  1855. end;
  1856. end;
  1857. p := tai(p.next)
  1858. end;
  1859. end;
  1860. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  1861. var
  1862. p,hp1,hp2: tai;
  1863. IsTestConstX: boolean;
  1864. begin
  1865. p := BlockStart;
  1866. ClearUsedRegs;
  1867. while (p <> BlockEnd) Do
  1868. begin
  1869. UpdateUsedRegs(UsedRegs, tai(p.next));
  1870. case p.Typ Of
  1871. Ait_Instruction:
  1872. begin
  1873. if InsContainsSegRef(taicpu(p)) then
  1874. begin
  1875. p := tai(p.next);
  1876. continue;
  1877. end;
  1878. case taicpu(p).opcode Of
  1879. A_CALL:
  1880. begin
  1881. { don't do this on modern CPUs, this really hurts them due to
  1882. broken call/ret pairing }
  1883. if (current_settings.optimizecputype < cpu_Pentium2) and
  1884. not(cs_create_pic in current_settings.moduleswitches) and
  1885. GetNextInstruction(p, hp1) and
  1886. (hp1.typ = ait_instruction) and
  1887. (taicpu(hp1).opcode = A_JMP) and
  1888. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  1889. begin
  1890. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  1891. InsertLLItem(p.previous, p, hp2);
  1892. taicpu(p).opcode := A_JMP;
  1893. taicpu(p).is_jmp := true;
  1894. asml.remove(hp1);
  1895. hp1.free;
  1896. end
  1897. { replace
  1898. call procname
  1899. ret
  1900. by
  1901. jmp procname
  1902. this should never hurt except when pic is used, not sure
  1903. how to handle it then
  1904. but do it only on level 4 because it destroys stack back traces
  1905. }
  1906. else if (cs_opt_level4 in current_settings.optimizerswitches) and
  1907. not(cs_create_pic in current_settings.moduleswitches) and
  1908. GetNextInstruction(p, hp1) and
  1909. (hp1.typ = ait_instruction) and
  1910. (taicpu(hp1).opcode = A_RET) and
  1911. (taicpu(hp1).ops=0) then
  1912. begin
  1913. taicpu(p).opcode := A_JMP;
  1914. taicpu(p).is_jmp := true;
  1915. asml.remove(hp1);
  1916. hp1.free;
  1917. end;
  1918. end;
  1919. A_CMP:
  1920. begin
  1921. if (taicpu(p).oper[0]^.typ = top_const) and
  1922. (taicpu(p).oper[0]^.val = 0) and
  1923. (taicpu(p).oper[1]^.typ = top_reg) then
  1924. {change "cmp $0, %reg" to "test %reg, %reg"}
  1925. begin
  1926. taicpu(p).opcode := A_TEST;
  1927. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1928. continue;
  1929. end;
  1930. end;
  1931. A_MOV:
  1932. PostPeepholeOptMov(p);
  1933. A_MOVZX:
  1934. { if register vars are on, it's possible there is code like }
  1935. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  1936. { so we can't safely replace the movzx then with xor/mov, }
  1937. { since that would change the flags (JM) }
  1938. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  1939. begin
  1940. if (taicpu(p).oper[1]^.typ = top_reg) then
  1941. if (taicpu(p).oper[0]^.typ = top_reg)
  1942. then
  1943. case taicpu(p).opsize of
  1944. S_BL:
  1945. begin
  1946. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1947. not(cs_opt_size in current_settings.optimizerswitches) and
  1948. (current_settings.optimizecputype = cpu_Pentium) then
  1949. {Change "movzbl %reg1, %reg2" to
  1950. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  1951. PentiumMMX}
  1952. begin
  1953. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  1954. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1955. InsertLLItem(p.previous, p, hp1);
  1956. taicpu(p).opcode := A_MOV;
  1957. taicpu(p).changeopsize(S_B);
  1958. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1959. end;
  1960. end;
  1961. end
  1962. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1963. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1964. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  1965. not(cs_opt_size in current_settings.optimizerswitches) and
  1966. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1967. (current_settings.optimizecputype = cpu_Pentium) and
  1968. (taicpu(p).opsize = S_BL) then
  1969. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  1970. Pentium and PentiumMMX}
  1971. begin
  1972. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  1973. taicpu(p).oper[1]^.reg);
  1974. taicpu(p).opcode := A_MOV;
  1975. taicpu(p).changeopsize(S_B);
  1976. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1977. InsertLLItem(p.previous, p, hp1);
  1978. end;
  1979. end;
  1980. A_TEST, A_OR:
  1981. {removes the line marked with (x) from the sequence
  1982. and/or/xor/add/sub/... $x, %y
  1983. test/or %y, %y | test $-1, %y (x)
  1984. j(n)z _Label
  1985. as the first instruction already adjusts the ZF
  1986. %y operand may also be a reference }
  1987. begin
  1988. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  1989. MatchOperand(taicpu(p).oper[0]^,-1);
  1990. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  1991. GetLastInstruction(p, hp1) and
  1992. (tai(hp1).typ = ait_instruction) and
  1993. GetNextInstruction(p,hp2) and
  1994. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  1995. case taicpu(hp1).opcode Of
  1996. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  1997. begin
  1998. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  1999. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2000. { and in case of carry for A(E)/B(E)/C/NC }
  2001. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2002. ((taicpu(hp1).opcode <> A_ADD) and
  2003. (taicpu(hp1).opcode <> A_SUB))) then
  2004. begin
  2005. hp1 := tai(p.next);
  2006. asml.remove(p);
  2007. p.free;
  2008. p := tai(hp1);
  2009. continue
  2010. end;
  2011. end;
  2012. A_SHL, A_SAL, A_SHR, A_SAR:
  2013. begin
  2014. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2015. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  2016. { therefore, it's only safe to do this optimization for }
  2017. { shifts by a (nonzero) constant }
  2018. (taicpu(hp1).oper[0]^.typ = top_const) and
  2019. (taicpu(hp1).oper[0]^.val <> 0) and
  2020. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2021. { and in case of carry for A(E)/B(E)/C/NC }
  2022. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2023. begin
  2024. hp1 := tai(p.next);
  2025. asml.remove(p);
  2026. p.free;
  2027. p := tai(hp1);
  2028. continue
  2029. end;
  2030. end;
  2031. A_DEC, A_INC, A_NEG:
  2032. begin
  2033. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  2034. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2035. { and in case of carry for A(E)/B(E)/C/NC }
  2036. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2037. begin
  2038. case taicpu(hp1).opcode Of
  2039. A_DEC, A_INC:
  2040. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2041. begin
  2042. case taicpu(hp1).opcode Of
  2043. A_DEC: taicpu(hp1).opcode := A_SUB;
  2044. A_INC: taicpu(hp1).opcode := A_ADD;
  2045. end;
  2046. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2047. taicpu(hp1).loadConst(0,1);
  2048. taicpu(hp1).ops:=2;
  2049. end
  2050. end;
  2051. hp1 := tai(p.next);
  2052. asml.remove(p);
  2053. p.free;
  2054. p := tai(hp1);
  2055. continue
  2056. end;
  2057. end
  2058. else
  2059. { change "test $-1,%reg" into "test %reg,%reg" }
  2060. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2061. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2062. end { case }
  2063. else
  2064. { change "test $-1,%reg" into "test %reg,%reg" }
  2065. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2066. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2067. end;
  2068. end;
  2069. end;
  2070. end;
  2071. p := tai(p.next)
  2072. end;
  2073. end;
  2074. Procedure TCpuAsmOptimizer.Optimize;
  2075. Var
  2076. HP: Tai;
  2077. pass: longint;
  2078. slowopt, changed, lastLoop: boolean;
  2079. Begin
  2080. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  2081. pass := 0;
  2082. changed := false;
  2083. repeat
  2084. lastLoop :=
  2085. not(slowopt) or
  2086. (not changed and (pass > 2)) or
  2087. { prevent endless loops }
  2088. (pass = 4);
  2089. changed := false;
  2090. { Setup labeltable, always necessary }
  2091. blockstart := tai(asml.first);
  2092. pass_1;
  2093. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  2094. { or nil }
  2095. While Assigned(BlockStart) Do
  2096. Begin
  2097. if (cs_opt_peephole in current_settings.optimizerswitches) then
  2098. begin
  2099. if (pass = 0) then
  2100. PrePeepHoleOpts;
  2101. { Peephole optimizations }
  2102. PeepHoleOptPass1;
  2103. { Only perform them twice in the first pass }
  2104. if pass = 0 then
  2105. PeepHoleOptPass1;
  2106. end;
  2107. { More peephole optimizations }
  2108. if (cs_opt_peephole in current_settings.optimizerswitches) then
  2109. begin
  2110. PeepHoleOptPass2;
  2111. if lastLoop then
  2112. PostPeepHoleOpts;
  2113. end;
  2114. { Continue where we left off, BlockEnd is either the start of an }
  2115. { assembler block or nil }
  2116. BlockStart := BlockEnd;
  2117. While Assigned(BlockStart) And
  2118. (BlockStart.typ = ait_Marker) And
  2119. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  2120. Begin
  2121. { We stopped at an assembler block, so skip it }
  2122. Repeat
  2123. BlockStart := Tai(BlockStart.Next);
  2124. Until (BlockStart.Typ = Ait_Marker) And
  2125. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  2126. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  2127. If GetNextInstruction(BlockStart, HP) And
  2128. ((HP.typ <> ait_Marker) Or
  2129. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  2130. { There is no assembler block anymore after the current one, so }
  2131. { optimize the next block of "normal" instructions }
  2132. pass_1
  2133. { Otherwise, skip the next assembler block }
  2134. else
  2135. blockStart := hp;
  2136. End;
  2137. End;
  2138. inc(pass);
  2139. until lastLoop;
  2140. dfa.free;
  2141. End;
  2142. begin
  2143. casmoptimizer:=TCpuAsmOptimizer;
  2144. end.