aoptcpu.pas 95 KB


  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aoptobj, aoptcpub, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. function DoFpuLoadStoreOpt(var p : tai) : boolean;
  33. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  34. function InstructionLoadsFromReg(const reg : TRegister;const hp : tai) : boolean;override;
  35. end;
  36. Var
  37. AsmOptimizer : TCpuAsmOptimizer;
  38. Implementation
  39. uses
  40. verbose,globtype,globals,
  41. cutils,
  42. aoptbase,
  43. cpuinfo,
  44. aasmcpu,
  45. aoptutils,
  46. procinfo,
  47. cgutils,cgx86,
  48. { units we should get rid off: }
  49. symsym,symconst;
  50. function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
  51. { returns true if a "continue" should be done after this optimization }
  52. var hp1, hp2: tai;
  53. begin
  54. DoFpuLoadStoreOpt := false;
  55. if (taicpu(p).oper[0]^.typ = top_ref) and
  56. getNextInstruction(p, hp1) and
  57. (hp1.typ = ait_instruction) and
  58. (((taicpu(hp1).opcode = A_FLD) and
  59. (taicpu(p).opcode = A_FSTP)) or
  60. ((taicpu(p).opcode = A_FISTP) and
  61. (taicpu(hp1).opcode = A_FILD))) and
  62. (taicpu(hp1).oper[0]^.typ = top_ref) and
  63. (taicpu(hp1).opsize = taicpu(p).opsize) and
  64. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  65. begin
  66. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  67. if (taicpu(p).opsize=S_FX) and
  68. getNextInstruction(hp1, hp2) and
  69. (hp2.typ = ait_instruction) and
  70. IsExitCode(hp2) and
  71. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  72. not(assigned(current_procinfo.procdef.funcretsym) and
  73. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  74. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  75. begin
  76. asml.remove(p);
  77. asml.remove(hp1);
  78. p.free;
  79. hp1.free;
  80. p := hp2;
  81. removeLastDeallocForFuncRes(p);
  82. doFPULoadStoreOpt := true;
  83. end
  84. (* can't be done because the store operation rounds
  85. else
  86. { fst can't store an extended value! }
  87. if (taicpu(p).opsize <> S_FX) and
  88. (taicpu(p).opsize <> S_IQ) then
  89. begin
  90. if (taicpu(p).opcode = A_FSTP) then
  91. taicpu(p).opcode := A_FST
  92. else taicpu(p).opcode := A_FIST;
  93. asml.remove(hp1);
  94. hp1.free;
  95. end
  96. *)
  97. end;
  98. end;
  99. { converts a TChange variable to a TRegister }
  100. function tch2reg(ch: tinschange): tsuperregister;
  101. const
  102. ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
  103. begin
  104. if (ch <= CH_REDI) then
  105. tch2reg := ch2reg[ch]
  106. else if (ch <= CH_WEDI) then
  107. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
  108. else if (ch <= CH_RWEDI) then
  109. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
  110. else if (ch <= CH_MEDI) then
  111. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
  112. else
  113. InternalError(2016041901)
  114. end;
  115. { Checks if the register is a 32 bit general purpose register }
  116. function isgp32reg(reg: TRegister): boolean;
  117. begin
  118. {$push}{$warnings off}
  119. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  120. {$pop}
  121. end;
  122. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  123. begin
  124. Result:=RegReadByInstruction(reg,hp);
  125. end;
  126. function TCpuAsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  127. var
  128. p: taicpu;
  129. opcount: longint;
  130. begin
  131. RegReadByInstruction := false;
  132. if hp.typ <> ait_instruction then
  133. exit;
  134. p := taicpu(hp);
  135. case p.opcode of
  136. A_CALL:
  137. regreadbyinstruction := true;
  138. A_IMUL:
  139. case p.ops of
  140. 1:
  141. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  142. (
  143. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  144. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  145. );
  146. 2,3:
  147. regReadByInstruction :=
  148. reginop(reg,p.oper[0]^) or
  149. reginop(reg,p.oper[1]^);
  150. end;
  151. A_MUL:
  152. begin
  153. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  154. (
  155. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  156. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  157. );
  158. end;
  159. A_IDIV,A_DIV:
  160. begin
  161. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  162. (
  163. (getregtype(reg)=R_INTREGISTER) and
  164. (
  165. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  166. )
  167. );
  168. end;
  169. else
  170. begin
  171. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  172. begin
  173. RegReadByInstruction := false;
  174. exit;
  175. end;
  176. for opcount := 0 to p.ops-1 do
  177. if (p.oper[opCount]^.typ = top_ref) and
  178. RegInRef(reg,p.oper[opcount]^.ref^) then
  179. begin
  180. RegReadByInstruction := true;
  181. exit
  182. end;
  183. { special handling for SSE MOVSD }
  184. if (p.opcode=A_MOVSD) and (p.ops>0) then
  185. begin
  186. if p.ops<>2 then
  187. internalerror(2017042702);
  188. regReadByInstruction := reginop(reg,p.oper[0]^) or
  189. (
  190. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  191. );
  192. exit;
  193. end;
  194. with insprop[p.opcode] do
  195. begin
  196. if getregtype(reg)=R_INTREGISTER then
  197. begin
  198. case getsupreg(reg) of
  199. RS_EAX:
  200. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  201. begin
  202. RegReadByInstruction := true;
  203. exit
  204. end;
  205. RS_ECX:
  206. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  207. begin
  208. RegReadByInstruction := true;
  209. exit
  210. end;
  211. RS_EDX:
  212. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  213. begin
  214. RegReadByInstruction := true;
  215. exit
  216. end;
  217. RS_EBX:
  218. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  219. begin
  220. RegReadByInstruction := true;
  221. exit
  222. end;
  223. RS_ESP:
  224. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  225. begin
  226. RegReadByInstruction := true;
  227. exit
  228. end;
  229. RS_EBP:
  230. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  231. begin
  232. RegReadByInstruction := true;
  233. exit
  234. end;
  235. RS_ESI:
  236. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  237. begin
  238. RegReadByInstruction := true;
  239. exit
  240. end;
  241. RS_EDI:
  242. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  243. begin
  244. RegReadByInstruction := true;
  245. exit
  246. end;
  247. end;
  248. end;
  249. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  250. begin
  251. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  252. begin
  253. case p.condition of
  254. C_A,C_NBE, { CF=0 and ZF=0 }
  255. C_BE,C_NA: { CF=1 or ZF=1 }
  256. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  257. C_AE,C_NB,C_NC, { CF=0 }
  258. C_B,C_NAE,C_C: { CF=1 }
  259. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  260. C_NE,C_NZ, { ZF=0 }
  261. C_E,C_Z: { ZF=1 }
  262. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  263. C_G,C_NLE, { ZF=0 and SF=OF }
  264. C_LE,C_NG: { ZF=1 or SF<>OF }
  265. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  266. C_GE,C_NL, { SF=OF }
  267. C_L,C_NGE: { SF<>OF }
  268. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  269. C_NO, { OF=0 }
  270. C_O: { OF=1 }
  271. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  272. C_NP,C_PO, { PF=0 }
  273. C_P,C_PE: { PF=1 }
  274. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  275. C_NS, { SF=0 }
  276. C_S: { SF=1 }
  277. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  278. else
  279. internalerror(2017042701);
  280. end;
  281. if RegReadByInstruction then
  282. exit;
  283. end;
  284. case getsubreg(reg) of
  285. R_SUBW,R_SUBD,R_SUBQ:
  286. RegReadByInstruction :=
  287. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  288. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  289. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  290. R_SUBFLAGCARRY:
  291. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  292. R_SUBFLAGPARITY:
  293. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  294. R_SUBFLAGAUXILIARY:
  295. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  296. R_SUBFLAGZERO:
  297. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  298. R_SUBFLAGSIGN:
  299. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  300. R_SUBFLAGOVERFLOW:
  301. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  302. R_SUBFLAGINTERRUPT:
  303. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  304. R_SUBFLAGDIRECTION:
  305. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  306. else
  307. internalerror(2017042601);
  308. end;
  309. exit;
  310. end;
  311. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  312. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  313. (p.oper[0]^.reg=p.oper[1]^.reg) then
  314. exit;
  315. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  316. begin
  317. RegReadByInstruction := true;
  318. exit
  319. end;
  320. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  321. begin
  322. RegReadByInstruction := true;
  323. exit
  324. end;
  325. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  326. begin
  327. RegReadByInstruction := true;
  328. exit
  329. end;
  330. end;
  331. end;
  332. end;
  333. end;
  334. { returns true if p contains a memory operand with a segment set }
  335. function InsContainsSegRef(p: taicpu): boolean;
  336. var
  337. i: longint;
  338. begin
  339. result:=true;
  340. for i:=0 to p.opercnt-1 do
  341. if (p.oper[i]^.typ=top_ref) and
  342. (p.oper[i]^.ref^.segment<>NR_NO) then
  343. exit;
  344. result:=false;
  345. end;
  346. function InstrReadsFlags(p: tai): boolean;
  347. var
  348. l: longint;
  349. begin
  350. InstrReadsFlags := true;
  351. case p.typ of
  352. ait_instruction:
  353. if InsProp[taicpu(p).opcode].Ch*
  354. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  355. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  356. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  357. exit;
  358. ait_label:
  359. exit;
  360. end;
  361. InstrReadsFlags := false;
  362. end;
  363. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  364. var
  365. p,hp1: tai;
  366. l: aint;
  367. tmpRef: treference;
  368. begin
  369. p := BlockStart;
  370. while (p <> BlockEnd) Do
  371. begin
  372. case p.Typ Of
  373. Ait_Instruction:
  374. begin
  375. if InsContainsSegRef(taicpu(p)) then
  376. begin
  377. p := tai(p.next);
  378. continue;
  379. end;
  380. case taicpu(p).opcode Of
  381. A_IMUL:
  382. {changes certain "imul const, %reg"'s to lea sequences}
  383. begin
  384. if (taicpu(p).oper[0]^.typ = Top_Const) and
  385. (taicpu(p).oper[1]^.typ = Top_Reg) and
  386. (taicpu(p).opsize = S_L) then
  387. if (taicpu(p).oper[0]^.val = 1) then
  388. if (taicpu(p).ops = 2) then
  389. {remove "imul $1, reg"}
  390. begin
  391. hp1 := tai(p.Next);
  392. asml.remove(p);
  393. p.free;
  394. p := hp1;
  395. continue;
  396. end
  397. else
  398. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  399. begin
  400. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  401. InsertLLItem(p.previous, p.next, hp1);
  402. p.free;
  403. p := hp1;
  404. end
  405. else if
  406. ((taicpu(p).ops <= 2) or
  407. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  408. (taicpu(p).oper[0]^.val <= 12) and
  409. not(cs_opt_size in current_settings.optimizerswitches) and
  410. (not(GetNextInstruction(p, hp1)) or
  411. {GetNextInstruction(p, hp1) and}
  412. not((tai(hp1).typ = ait_instruction) and
  413. ((taicpu(hp1).opcode=A_Jcc) and
  414. (taicpu(hp1).condition in [C_O,C_NO])))) then
  415. begin
  416. reference_reset(tmpref,1,[]);
  417. case taicpu(p).oper[0]^.val Of
  418. 3: begin
  419. {imul 3, reg1, reg2 to
  420. lea (reg1,reg1,2), reg2
  421. imul 3, reg1 to
  422. lea (reg1,reg1,2), reg1}
  423. TmpRef.base := taicpu(p).oper[1]^.reg;
  424. TmpRef.index := taicpu(p).oper[1]^.reg;
  425. TmpRef.ScaleFactor := 2;
  426. if (taicpu(p).ops = 2) then
  427. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  428. else
  429. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  430. InsertLLItem(p.previous, p.next, hp1);
  431. p.free;
  432. p := hp1;
  433. end;
  434. 5: begin
  435. {imul 5, reg1, reg2 to
  436. lea (reg1,reg1,4), reg2
  437. imul 5, reg1 to
  438. lea (reg1,reg1,4), reg1}
  439. TmpRef.base := taicpu(p).oper[1]^.reg;
  440. TmpRef.index := taicpu(p).oper[1]^.reg;
  441. TmpRef.ScaleFactor := 4;
  442. if (taicpu(p).ops = 2) then
  443. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  444. else
  445. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  446. InsertLLItem(p.previous, p.next, hp1);
  447. p.free;
  448. p := hp1;
  449. end;
  450. 6: begin
  451. {imul 6, reg1, reg2 to
  452. lea (,reg1,2), reg2
  453. lea (reg2,reg1,4), reg2
  454. imul 6, reg1 to
  455. lea (reg1,reg1,2), reg1
  456. add reg1, reg1}
  457. if (current_settings.optimizecputype <= cpu_386) then
  458. begin
  459. TmpRef.index := taicpu(p).oper[1]^.reg;
  460. if (taicpu(p).ops = 3) then
  461. begin
  462. TmpRef.base := taicpu(p).oper[2]^.reg;
  463. TmpRef.ScaleFactor := 4;
  464. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  465. end
  466. else
  467. begin
  468. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  469. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  470. end;
  471. InsertLLItem(p, p.next, hp1);
  472. reference_reset(tmpref,2,[]);
  473. TmpRef.index := taicpu(p).oper[1]^.reg;
  474. TmpRef.ScaleFactor := 2;
  475. if (taicpu(p).ops = 3) then
  476. begin
  477. TmpRef.base := NR_NO;
  478. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  479. taicpu(p).oper[2]^.reg);
  480. end
  481. else
  482. begin
  483. TmpRef.base := taicpu(p).oper[1]^.reg;
  484. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  485. end;
  486. InsertLLItem(p.previous, p.next, hp1);
  487. p.free;
  488. p := tai(hp1.next);
  489. end
  490. end;
  491. 9: begin
  492. {imul 9, reg1, reg2 to
  493. lea (reg1,reg1,8), reg2
  494. imul 9, reg1 to
  495. lea (reg1,reg1,8), reg1}
  496. TmpRef.base := taicpu(p).oper[1]^.reg;
  497. TmpRef.index := taicpu(p).oper[1]^.reg;
  498. TmpRef.ScaleFactor := 8;
  499. if (taicpu(p).ops = 2) then
  500. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  501. else
  502. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  503. InsertLLItem(p.previous, p.next, hp1);
  504. p.free;
  505. p := hp1;
  506. end;
  507. 10: begin
  508. {imul 10, reg1, reg2 to
  509. lea (reg1,reg1,4), reg2
  510. add reg2, reg2
  511. imul 10, reg1 to
  512. lea (reg1,reg1,4), reg1
  513. add reg1, reg1}
  514. if (current_settings.optimizecputype <= cpu_386) then
  515. begin
  516. if (taicpu(p).ops = 3) then
  517. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  518. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  519. else
  520. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  521. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  522. InsertLLItem(p, p.next, hp1);
  523. TmpRef.base := taicpu(p).oper[1]^.reg;
  524. TmpRef.index := taicpu(p).oper[1]^.reg;
  525. TmpRef.ScaleFactor := 4;
  526. if (taicpu(p).ops = 3) then
  527. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  528. else
  529. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  530. InsertLLItem(p.previous, p.next, hp1);
  531. p.free;
  532. p := tai(hp1.next);
  533. end
  534. end;
  535. 12: begin
  536. {imul 12, reg1, reg2 to
  537. lea (,reg1,4), reg2
  538. lea (reg2,reg1,8), reg2
  539. imul 12, reg1 to
  540. lea (reg1,reg1,2), reg1
  541. lea (,reg1,4), reg1}
  542. if (current_settings.optimizecputype <= cpu_386)
  543. then
  544. begin
  545. TmpRef.index := taicpu(p).oper[1]^.reg;
  546. if (taicpu(p).ops = 3) then
  547. begin
  548. TmpRef.base := taicpu(p).oper[2]^.reg;
  549. TmpRef.ScaleFactor := 8;
  550. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  551. end
  552. else
  553. begin
  554. TmpRef.base := NR_NO;
  555. TmpRef.ScaleFactor := 4;
  556. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  557. end;
  558. InsertLLItem(p, p.next, hp1);
  559. reference_reset(tmpref,2,[]);
  560. TmpRef.index := taicpu(p).oper[1]^.reg;
  561. if (taicpu(p).ops = 3) then
  562. begin
  563. TmpRef.base := NR_NO;
  564. TmpRef.ScaleFactor := 4;
  565. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  566. end
  567. else
  568. begin
  569. TmpRef.base := taicpu(p).oper[1]^.reg;
  570. TmpRef.ScaleFactor := 2;
  571. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  572. end;
  573. InsertLLItem(p.previous, p.next, hp1);
  574. p.free;
  575. p := tai(hp1.next);
  576. end
  577. end
  578. end;
  579. end;
  580. end;
  581. A_SAR,A_SHR:
  582. if PrePeepholeOptSxx(p) then
  583. continue;
  584. A_XOR:
  585. if (taicpu(p).oper[0]^.typ = top_reg) and
  586. (taicpu(p).oper[1]^.typ = top_reg) and
  587. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  588. { temporarily change this to 'mov reg,0' to make it easier }
  589. { for the CSE. Will be changed back in pass 2 }
  590. begin
  591. taicpu(p).opcode := A_MOV;
  592. taicpu(p).loadConst(0,0);
  593. end;
  594. end;
  595. end;
  596. end;
  597. p := tai(p.next)
  598. end;
  599. end;
  600. { First pass of peephole optimizations }
  601. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  602. function WriteOk : Boolean;
  603. begin
  604. writeln('Ok');
  605. Result:=True;
  606. end;
  607. var
  608. l : longint;
  609. p,hp1,hp2 : tai;
  610. hp3,hp4: tai;
  611. v:aint;
  612. TmpRef: TReference;
  613. TmpBool1, TmpBool2: Boolean;
  614. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  615. {traces sucessive jumps to their final destination and sets it, e.g.
  616. je l1 je l3
  617. <code> <code>
  618. l1: becomes l1:
  619. je l2 je l3
  620. <code> <code>
  621. l2: l2:
  622. jmp l3 jmp l3
  623. the level parameter denotes how deeep we have already followed the jump,
  624. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  625. var p1, p2: tai;
  626. l: tasmlabel;
  627. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  628. begin
  629. FindAnyLabel := false;
  630. while assigned(hp.next) and
  631. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  632. hp := tai(hp.next);
  633. if assigned(hp.next) and
  634. (tai(hp.next).typ = ait_label) then
  635. begin
  636. FindAnyLabel := true;
  637. l := tai_label(hp.next).labsym;
  638. end
  639. end;
  640. begin
  641. GetfinalDestination := false;
  642. if level > 20 then
  643. exit;
  644. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  645. if assigned(p1) then
  646. begin
  647. SkipLabels(p1,p1);
  648. if (tai(p1).typ = ait_instruction) and
  649. (taicpu(p1).is_jmp) then
  650. if { the next instruction after the label where the jump hp arrives}
  651. { is unconditional or of the same type as hp, so continue }
  652. (taicpu(p1).condition in [C_None,hp.condition]) or
  653. { the next instruction after the label where the jump hp arrives}
  654. { is the opposite of hp (so this one is never taken), but after }
  655. { that one there is a branch that will be taken, so perform a }
  656. { little hack: set p1 equal to this instruction (that's what the}
  657. { last SkipLabels is for, only works with short bool evaluation)}
  658. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  659. SkipLabels(p1,p2) and
  660. (p2.typ = ait_instruction) and
  661. (taicpu(p2).is_jmp) and
  662. (taicpu(p2).condition in [C_None,hp.condition]) and
  663. SkipLabels(p1,p1)) then
  664. begin
  665. { quick check for loops of the form "l5: ; jmp l5 }
  666. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  667. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  668. exit;
  669. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  670. exit;
  671. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  672. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  673. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  674. end
  675. else
  676. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  677. if not FindAnyLabel(p1,l) then
  678. begin
  679. {$ifdef finaldestdebug}
  680. insertllitem(asml,p1,p1.next,tai_comment.Create(
  681. strpnew('previous label inserted'))));
  682. {$endif finaldestdebug}
  683. current_asmdata.getjumplabel(l);
  684. insertllitem(p1,p1.next,tai_label.Create(l));
  685. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  686. hp.oper[0]^.ref^.symbol := l;
  687. l.increfs;
  688. { this won't work, since the new label isn't in the labeltable }
  689. { so it will fail the rangecheck. Labeltable should become a }
  690. { hashtable to support this: }
  691. { GetFinalDestination(asml, hp); }
  692. end
  693. else
  694. begin
  695. {$ifdef finaldestdebug}
  696. insertllitem(asml,p1,p1.next,tai_comment.Create(
  697. strpnew('next label reused'))));
  698. {$endif finaldestdebug}
  699. l.increfs;
  700. hp.oper[0]^.ref^.symbol := l;
  701. if not GetFinalDestination(asml, hp,succ(level)) then
  702. exit;
  703. end;
  704. end;
  705. GetFinalDestination := true;
  706. end;
  707. function DoSubAddOpt(var p: tai): Boolean;
  708. begin
  709. DoSubAddOpt := False;
  710. if GetLastInstruction(p, hp1) and
  711. (hp1.typ = ait_instruction) and
  712. (taicpu(hp1).opsize = taicpu(p).opsize) then
  713. case taicpu(hp1).opcode Of
  714. A_DEC:
  715. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  716. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  717. begin
  718. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  719. asml.remove(hp1);
  720. hp1.free;
  721. end;
  722. A_SUB:
  723. if (taicpu(hp1).oper[0]^.typ = top_const) and
  724. (taicpu(hp1).oper[1]^.typ = top_reg) and
  725. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  726. begin
  727. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  728. asml.remove(hp1);
  729. hp1.free;
  730. end;
  731. A_ADD:
  732. if (taicpu(hp1).oper[0]^.typ = top_const) and
  733. (taicpu(hp1).oper[1]^.typ = top_reg) and
  734. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  735. begin
  736. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  737. asml.remove(hp1);
  738. hp1.free;
  739. if (taicpu(p).oper[0]^.val = 0) then
  740. begin
  741. hp1 := tai(p.next);
  742. asml.remove(p);
  743. p.free;
  744. if not GetLastInstruction(hp1, p) then
  745. p := hp1;
  746. DoSubAddOpt := True;
  747. end
  748. end;
  749. end;
  750. end;
  751. begin
  752. p := BlockStart;
  753. ClearUsedRegs;
  754. while (p <> BlockEnd) Do
  755. begin
  756. UpDateUsedRegs(UsedRegs, tai(p.next));
  757. case p.Typ Of
  758. ait_instruction:
  759. begin
  760. current_filepos:=taicpu(p).fileinfo;
  761. if InsContainsSegRef(taicpu(p)) then
  762. begin
  763. p := tai(p.next);
  764. continue;
  765. end;
  766. { Handle Jmp Optimizations }
  767. if taicpu(p).is_jmp then
  768. begin
  769. {the following if-block removes all code between a jmp and the next label,
  770. because it can never be executed}
  771. if (taicpu(p).opcode = A_JMP) then
  772. begin
  773. hp2:=p;
  774. while GetNextInstruction(hp2, hp1) and
  775. (hp1.typ <> ait_label) do
  776. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  777. begin
  778. { don't kill start/end of assembler block,
  779. no-line-info-start/end etc }
  780. if hp1.typ<>ait_marker then
  781. begin
  782. asml.remove(hp1);
  783. hp1.free;
  784. end
  785. else
  786. hp2:=hp1;
  787. end
  788. else break;
  789. end;
  790. { remove jumps to a label coming right after them }
  791. if GetNextInstruction(p, hp1) then
  792. begin
  793. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  794. { TODO: FIXME removing the first instruction fails}
  795. (p<>blockstart) then
  796. begin
  797. hp2:=tai(hp1.next);
  798. asml.remove(p);
  799. p.free;
  800. p:=hp2;
  801. continue;
  802. end
  803. else
  804. begin
  805. if hp1.typ = ait_label then
  806. SkipLabels(hp1,hp1);
  807. if (tai(hp1).typ=ait_instruction) and
  808. (taicpu(hp1).opcode=A_JMP) and
  809. GetNextInstruction(hp1, hp2) and
  810. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  811. begin
  812. if taicpu(p).opcode=A_Jcc then
  813. begin
  814. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  815. tai_label(hp2).labsym.decrefs;
  816. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  817. { when free'ing hp1, the ref. isn't decresed, so we don't
  818. increase it (FK)
  819. taicpu(p).oper[0]^.ref^.symbol.increfs;
  820. }
  821. asml.remove(hp1);
  822. hp1.free;
  823. GetFinalDestination(asml, taicpu(p),0);
  824. end
  825. else
  826. begin
  827. GetFinalDestination(asml, taicpu(p),0);
  828. p:=tai(p.next);
  829. continue;
  830. end;
  831. end
  832. else
  833. GetFinalDestination(asml, taicpu(p),0);
  834. end;
  835. end;
  836. end
  837. else
  838. { All other optimizes }
  839. begin
  840. for l := 0 to taicpu(p).ops-1 Do
  841. if (taicpu(p).oper[l]^.typ = top_ref) then
  842. With taicpu(p).oper[l]^.ref^ Do
  843. begin
  844. if (base = NR_NO) and
  845. (index <> NR_NO) and
  846. (scalefactor in [0,1]) then
  847. begin
  848. base := index;
  849. index := NR_NO
  850. end
  851. end;
  852. case taicpu(p).opcode Of
  853. A_AND:
  854. if OptPass1And(p) then
  855. continue;
  856. A_CMP:
  857. begin
  858. { cmp register,$8000 neg register
  859. je target --> jo target
  860. .... only if register is deallocated before jump.}
  861. case Taicpu(p).opsize of
  862. S_B: v:=$80;
  863. S_W: v:=$8000;
  864. S_L: v:=aint($80000000);
  865. else
  866. internalerror(2013112905);
  867. end;
  868. if (taicpu(p).oper[0]^.typ=Top_const) and
  869. (taicpu(p).oper[0]^.val=v) and
  870. (Taicpu(p).oper[1]^.typ=top_reg) and
  871. GetNextInstruction(p, hp1) and
  872. (hp1.typ=ait_instruction) and
  873. (taicpu(hp1).opcode=A_Jcc) and
  874. (Taicpu(hp1).condition in [C_E,C_NE]) and
  875. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  876. begin
  877. Taicpu(p).opcode:=A_NEG;
  878. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  879. Taicpu(p).clearop(1);
  880. Taicpu(p).ops:=1;
  881. if Taicpu(hp1).condition=C_E then
  882. Taicpu(hp1).condition:=C_O
  883. else
  884. Taicpu(hp1).condition:=C_NO;
  885. continue;
  886. end;
  887. {
  888. @@2: @@2:
  889. .... ....
  890. cmp operand1,0
  891. jle/jbe @@1
  892. dec operand1 --> sub operand1,1
  893. jmp @@2 jge/jae @@2
  894. @@1: @@1:
  895. ... ....}
  896. if (taicpu(p).oper[0]^.typ = top_const) and
  897. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  898. (taicpu(p).oper[0]^.val = 0) and
  899. GetNextInstruction(p, hp1) and
  900. (hp1.typ = ait_instruction) and
  901. (taicpu(hp1).is_jmp) and
  902. (taicpu(hp1).opcode=A_Jcc) and
  903. (taicpu(hp1).condition in [C_LE,C_BE]) and
  904. GetNextInstruction(hp1,hp2) and
  905. (hp2.typ = ait_instruction) and
  906. (taicpu(hp2).opcode = A_DEC) and
  907. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  908. GetNextInstruction(hp2, hp3) and
  909. (hp3.typ = ait_instruction) and
  910. (taicpu(hp3).is_jmp) and
  911. (taicpu(hp3).opcode = A_JMP) and
  912. GetNextInstruction(hp3, hp4) and
  913. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  914. begin
  915. taicpu(hp2).Opcode := A_SUB;
  916. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  917. taicpu(hp2).loadConst(0,1);
  918. taicpu(hp2).ops:=2;
  919. taicpu(hp3).Opcode := A_Jcc;
  920. case taicpu(hp1).condition of
  921. C_LE: taicpu(hp3).condition := C_GE;
  922. C_BE: taicpu(hp3).condition := C_AE;
  923. end;
  924. asml.remove(p);
  925. asml.remove(hp1);
  926. p.free;
  927. hp1.free;
  928. p := hp2;
  929. continue;
  930. end
  931. end;
  932. A_FLD:
  933. begin
  934. if (taicpu(p).oper[0]^.typ = top_reg) and
  935. GetNextInstruction(p, hp1) and
  936. (hp1.typ = Ait_Instruction) and
  937. (taicpu(hp1).oper[0]^.typ = top_reg) and
  938. (taicpu(hp1).oper[1]^.typ = top_reg) and
  939. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  940. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  941. { change to
  942. fld reg fxxx reg,st
  943. fxxxp st, st1 (hp1)
  944. Remark: non commutative operations must be reversed!
  945. }
  946. begin
  947. case taicpu(hp1).opcode Of
  948. A_FMULP,A_FADDP,
  949. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  950. begin
  951. case taicpu(hp1).opcode Of
  952. A_FADDP: taicpu(hp1).opcode := A_FADD;
  953. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  954. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  955. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  956. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  957. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  958. end;
  959. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  960. taicpu(hp1).oper[1]^.reg := NR_ST;
  961. asml.remove(p);
  962. p.free;
  963. p := hp1;
  964. continue;
  965. end;
  966. end;
  967. end
  968. else
  969. if (taicpu(p).oper[0]^.typ = top_ref) and
  970. GetNextInstruction(p, hp2) and
  971. (hp2.typ = Ait_Instruction) and
  972. (taicpu(hp2).ops = 2) and
  973. (taicpu(hp2).oper[0]^.typ = top_reg) and
  974. (taicpu(hp2).oper[1]^.typ = top_reg) and
  975. (taicpu(p).opsize in [S_FS, S_FL]) and
  976. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  977. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  978. if GetLastInstruction(p, hp1) and
  979. (hp1.typ = Ait_Instruction) and
  980. ((taicpu(hp1).opcode = A_FLD) or
  981. (taicpu(hp1).opcode = A_FST)) and
  982. (taicpu(hp1).opsize = taicpu(p).opsize) and
  983. (taicpu(hp1).oper[0]^.typ = top_ref) and
  984. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  985. if ((taicpu(hp2).opcode = A_FMULP) or
  986. (taicpu(hp2).opcode = A_FADDP)) then
  987. { change to
  988. fld/fst mem1 (hp1) fld/fst mem1
  989. fld mem1 (p) fadd/
  990. faddp/ fmul st, st
  991. fmulp st, st1 (hp2) }
  992. begin
  993. asml.remove(p);
  994. p.free;
  995. p := hp1;
  996. if (taicpu(hp2).opcode = A_FADDP) then
  997. taicpu(hp2).opcode := A_FADD
  998. else
  999. taicpu(hp2).opcode := A_FMUL;
  1000. taicpu(hp2).oper[1]^.reg := NR_ST;
  1001. end
  1002. else
  1003. { change to
  1004. fld/fst mem1 (hp1) fld/fst mem1
  1005. fld mem1 (p) fld st}
  1006. begin
  1007. taicpu(p).changeopsize(S_FL);
  1008. taicpu(p).loadreg(0,NR_ST);
  1009. end
  1010. else
  1011. begin
  1012. case taicpu(hp2).opcode Of
  1013. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  1014. { change to
  1015. fld/fst mem1 (hp1) fld/fst mem1
  1016. fld mem2 (p) fxxx mem2
  1017. fxxxp st, st1 (hp2) }
  1018. begin
  1019. case taicpu(hp2).opcode Of
  1020. A_FADDP: taicpu(p).opcode := A_FADD;
  1021. A_FMULP: taicpu(p).opcode := A_FMUL;
  1022. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  1023. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  1024. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  1025. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  1026. end;
  1027. asml.remove(hp2);
  1028. hp2.free;
  1029. end
  1030. end
  1031. end
  1032. end;
  1033. A_FSTP,A_FISTP:
  1034. if doFpuLoadStoreOpt(p) then
  1035. continue;
  1036. A_LEA:
  1037. begin
  1038. {removes seg register prefixes from LEA operations, as they
  1039. don't do anything}
  1040. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  1041. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  1042. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1043. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  1044. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1045. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1046. begin
  1047. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1048. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1049. begin
  1050. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  1051. taicpu(p).oper[1]^.reg);
  1052. InsertLLItem(p.previous,p.next, hp1);
  1053. p.free;
  1054. p := hp1;
  1055. continue;
  1056. end
  1057. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1058. begin
  1059. hp1 := tai(p.Next);
  1060. asml.remove(p);
  1061. p.free;
  1062. p := hp1;
  1063. continue;
  1064. end
  1065. { continue to use lea to adjust the stack pointer,
  1066. it is the recommended way, but only if not optimizing for size }
  1067. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1068. (cs_opt_size in current_settings.optimizerswitches) then
  1069. with taicpu(p).oper[0]^.ref^ do
  1070. if (base = taicpu(p).oper[1]^.reg) then
  1071. begin
  1072. l := offset;
  1073. if (l=1) and UseIncDec then
  1074. begin
  1075. taicpu(p).opcode := A_INC;
  1076. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1077. taicpu(p).ops := 1
  1078. end
  1079. else if (l=-1) and UseIncDec then
  1080. begin
  1081. taicpu(p).opcode := A_DEC;
  1082. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1083. taicpu(p).ops := 1;
  1084. end
  1085. else
  1086. begin
  1087. if (l<0) and (l<>-2147483648) then
  1088. begin
  1089. taicpu(p).opcode := A_SUB;
  1090. taicpu(p).loadConst(0,-l);
  1091. end
  1092. else
  1093. begin
  1094. taicpu(p).opcode := A_ADD;
  1095. taicpu(p).loadConst(0,l);
  1096. end;
  1097. end;
  1098. end;
  1099. end
  1100. (*
  1101. This is unsafe, lea doesn't modify the flags but "add"
  1102. does. This breaks webtbs/tw15694.pp. The above
  1103. transformations are also unsafe, but they don't seem to
  1104. be triggered by code that FPC generators (or that at
  1105. least does not occur in the tests...). This needs to be
  1106. fixed by checking for the liveness of the flags register.
  1107. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1108. begin
  1109. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1110. taicpu(p).oper[0]^.ref^.base);
  1111. InsertLLItem(asml,p.previous,p.next, hp1);
  1112. DebugMsg('Peephole Lea2AddBase done',hp1);
  1113. p.free;
  1114. p:=hp1;
  1115. continue;
  1116. end
  1117. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1118. begin
  1119. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1120. taicpu(p).oper[0]^.ref^.index);
  1121. InsertLLItem(asml,p.previous,p.next,hp1);
  1122. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1123. p.free;
  1124. p:=hp1;
  1125. continue;
  1126. end
  1127. *)
  1128. end;
  1129. A_MOV:
  1130. begin
  1131. If OptPass1MOV(p) then
  1132. Continue;
  1133. end;
  1134. A_MOVSX,
  1135. A_MOVZX :
  1136. begin
  1137. if (taicpu(p).oper[1]^.typ = top_reg) and
  1138. GetNextInstruction(p,hp1) and
  1139. (hp1.typ = ait_instruction) and
  1140. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1141. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1142. GetNextInstruction(hp1,hp2) and
  1143. MatchInstruction(hp2,A_MOV,[]) and
  1144. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1145. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1146. (((taicpu(hp1).ops=2) and
  1147. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1148. ((taicpu(hp1).ops=1) and
  1149. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1150. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  1151. { change movsX/movzX reg/ref, reg2 }
  1152. { add/sub/or/... reg3/$const, reg2 }
  1153. { mov reg2 reg/ref }
  1154. { to add/sub/or/... reg3/$const, reg/ref }
  1155. begin
  1156. { by example:
  1157. movswl %si,%eax movswl %si,%eax p
  1158. decl %eax addl %edx,%eax hp1
  1159. movw %ax,%si movw %ax,%si hp2
  1160. ->
  1161. movswl %si,%eax movswl %si,%eax p
  1162. decw %eax addw %edx,%eax hp1
  1163. movw %ax,%si movw %ax,%si hp2
  1164. }
  1165. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1166. {
  1167. ->
  1168. movswl %si,%eax movswl %si,%eax p
  1169. decw %si addw %dx,%si hp1
  1170. movw %ax,%si movw %ax,%si hp2
  1171. }
  1172. case taicpu(hp1).ops of
  1173. 1:
  1174. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1175. 2:
  1176. begin
  1177. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1178. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1179. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1180. end;
  1181. else
  1182. internalerror(2008042701);
  1183. end;
  1184. {
  1185. ->
  1186. decw %si addw %dx,%si p
  1187. }
  1188. asml.remove(p);
  1189. asml.remove(hp2);
  1190. p.free;
  1191. hp2.free;
  1192. p := hp1
  1193. end
  1194. { removes superfluous And's after movzx's }
  1195. else if taicpu(p).opcode=A_MOVZX then
  1196. begin
  1197. if (taicpu(p).oper[1]^.typ = top_reg) and
  1198. GetNextInstruction(p, hp1) and
  1199. (tai(hp1).typ = ait_instruction) and
  1200. (taicpu(hp1).opcode = A_AND) and
  1201. (taicpu(hp1).oper[0]^.typ = top_const) and
  1202. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1203. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1204. case taicpu(p).opsize Of
  1205. S_BL, S_BW:
  1206. if (taicpu(hp1).oper[0]^.val = $ff) then
  1207. begin
  1208. asml.remove(hp1);
  1209. hp1.free;
  1210. end;
  1211. S_WL:
  1212. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1213. begin
  1214. asml.remove(hp1);
  1215. hp1.free;
  1216. end;
  1217. end;
  1218. {changes some movzx constructs to faster synonims (all examples
  1219. are given with eax/ax, but are also valid for other registers)}
  1220. if (taicpu(p).oper[1]^.typ = top_reg) then
  1221. if (taicpu(p).oper[0]^.typ = top_reg) then
  1222. case taicpu(p).opsize of
  1223. S_BW:
  1224. begin
  1225. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1226. not(cs_opt_size in current_settings.optimizerswitches) then
  1227. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1228. begin
  1229. taicpu(p).opcode := A_AND;
  1230. taicpu(p).changeopsize(S_W);
  1231. taicpu(p).loadConst(0,$ff);
  1232. end
  1233. else if GetNextInstruction(p, hp1) and
  1234. (tai(hp1).typ = ait_instruction) and
  1235. (taicpu(hp1).opcode = A_AND) and
  1236. (taicpu(hp1).oper[0]^.typ = top_const) and
  1237. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1238. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1239. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1240. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1241. begin
  1242. taicpu(p).opcode := A_MOV;
  1243. taicpu(p).changeopsize(S_W);
  1244. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1245. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1246. end;
  1247. end;
  1248. S_BL:
  1249. begin
  1250. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1251. not(cs_opt_size in current_settings.optimizerswitches) then
  1252. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1253. begin
  1254. taicpu(p).opcode := A_AND;
  1255. taicpu(p).changeopsize(S_L);
  1256. taicpu(p).loadConst(0,$ff)
  1257. end
  1258. else if GetNextInstruction(p, hp1) and
  1259. (tai(hp1).typ = ait_instruction) and
  1260. (taicpu(hp1).opcode = A_AND) and
  1261. (taicpu(hp1).oper[0]^.typ = top_const) and
  1262. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1263. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1264. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1265. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1266. begin
  1267. taicpu(p).opcode := A_MOV;
  1268. taicpu(p).changeopsize(S_L);
  1269. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1270. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1271. end
  1272. end;
  1273. S_WL:
  1274. begin
  1275. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1276. not(cs_opt_size in current_settings.optimizerswitches) then
  1277. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1278. begin
  1279. taicpu(p).opcode := A_AND;
  1280. taicpu(p).changeopsize(S_L);
  1281. taicpu(p).loadConst(0,$ffff);
  1282. end
  1283. else if GetNextInstruction(p, hp1) and
  1284. (tai(hp1).typ = ait_instruction) and
  1285. (taicpu(hp1).opcode = A_AND) and
  1286. (taicpu(hp1).oper[0]^.typ = top_const) and
  1287. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1288. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1289. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1290. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1291. begin
  1292. taicpu(p).opcode := A_MOV;
  1293. taicpu(p).changeopsize(S_L);
  1294. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1295. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1296. end;
  1297. end;
  1298. end
  1299. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1300. begin
  1301. if GetNextInstruction(p, hp1) and
  1302. (tai(hp1).typ = ait_instruction) and
  1303. (taicpu(hp1).opcode = A_AND) and
  1304. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1305. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1306. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1307. begin
  1308. taicpu(p).opcode := A_MOV;
  1309. case taicpu(p).opsize Of
  1310. S_BL:
  1311. begin
  1312. taicpu(p).changeopsize(S_L);
  1313. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1314. end;
  1315. S_WL:
  1316. begin
  1317. taicpu(p).changeopsize(S_L);
  1318. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1319. end;
  1320. S_BW:
  1321. begin
  1322. taicpu(p).changeopsize(S_W);
  1323. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1324. end;
  1325. end;
  1326. end;
  1327. end;
  1328. end;
  1329. end;
  1330. (* should not be generated anymore by the current code generator
  1331. A_POP:
  1332. begin
  1333. if target_info.system=system_i386_go32v2 then
  1334. begin
  1335. { Transform a series of pop/pop/pop/push/push/push to }
  1336. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1337. { because I'm not sure whether they can cope with }
  1338. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1339. { such a problem when using esp as frame pointer (JM) }
  1340. if (taicpu(p).oper[0]^.typ = top_reg) then
  1341. begin
  1342. hp1 := p;
  1343. hp2 := p;
  1344. l := 0;
  1345. while getNextInstruction(hp1,hp1) and
  1346. (hp1.typ = ait_instruction) and
  1347. (taicpu(hp1).opcode = A_POP) and
  1348. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1349. begin
  1350. hp2 := hp1;
  1351. inc(l,4);
  1352. end;
  1353. getLastInstruction(p,hp3);
  1354. l1 := 0;
  1355. while (hp2 <> hp3) and
  1356. assigned(hp1) and
  1357. (hp1.typ = ait_instruction) and
  1358. (taicpu(hp1).opcode = A_PUSH) and
  1359. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1360. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1361. begin
  1362. { change it to a two op operation }
  1363. taicpu(hp2).oper[1]^.typ:=top_none;
  1364. taicpu(hp2).ops:=2;
  1365. taicpu(hp2).opcode := A_MOV;
  1366. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1367. reference_reset(tmpref);
  1368. tmpRef.base.enum:=R_INTREGISTER;
  1369. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1370. convert_register_to_enum(tmpref.base);
  1371. tmpRef.offset := l;
  1372. taicpu(hp2).loadRef(0,tmpRef);
  1373. hp4 := hp1;
  1374. getNextInstruction(hp1,hp1);
  1375. asml.remove(hp4);
  1376. hp4.free;
  1377. getLastInstruction(hp2,hp2);
  1378. dec(l,4);
  1379. inc(l1);
  1380. end;
  1381. if l <> -4 then
  1382. begin
  1383. inc(l,4);
  1384. for l1 := l1 downto 1 do
  1385. begin
  1386. getNextInstruction(hp2,hp2);
  1387. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1388. end
  1389. end
  1390. end
  1391. end
  1392. else
  1393. begin
  1394. if (taicpu(p).oper[0]^.typ = top_reg) and
  1395. GetNextInstruction(p, hp1) and
  1396. (tai(hp1).typ=ait_instruction) and
  1397. (taicpu(hp1).opcode=A_PUSH) and
  1398. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1399. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1400. begin
  1401. { change it to a two op operation }
  1402. taicpu(p).oper[1]^.typ:=top_none;
  1403. taicpu(p).ops:=2;
  1404. taicpu(p).opcode := A_MOV;
  1405. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1406. reference_reset(tmpref);
  1407. TmpRef.base.enum := R_ESP;
  1408. taicpu(p).loadRef(0,TmpRef);
  1409. asml.remove(hp1);
  1410. hp1.free;
  1411. end;
  1412. end;
  1413. end;
  1414. *)
  1415. A_PUSH:
  1416. begin
  1417. if (taicpu(p).opsize = S_W) and
  1418. (taicpu(p).oper[0]^.typ = Top_Const) and
  1419. GetNextInstruction(p, hp1) and
  1420. (tai(hp1).typ = ait_instruction) and
  1421. (taicpu(hp1).opcode = A_PUSH) and
  1422. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1423. (taicpu(hp1).opsize = S_W) then
  1424. begin
  1425. taicpu(p).changeopsize(S_L);
  1426. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1427. asml.remove(hp1);
  1428. hp1.free;
  1429. end;
  1430. end;
  1431. A_SHL, A_SAL:
  1432. begin
  1433. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1434. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1435. (taicpu(p).opsize = S_L) and
  1436. (taicpu(p).oper[0]^.val <= 3) then
  1437. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1438. begin
  1439. TmpBool1 := True; {should we check the next instruction?}
  1440. TmpBool2 := False; {have we found an add/sub which could be
  1441. integrated in the lea?}
  1442. reference_reset(tmpref,2,[]);
  1443. TmpRef.index := taicpu(p).oper[1]^.reg;
  1444. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1445. while TmpBool1 and
  1446. GetNextInstruction(p, hp1) and
  1447. (tai(hp1).typ = ait_instruction) and
  1448. ((((taicpu(hp1).opcode = A_ADD) or
  1449. (taicpu(hp1).opcode = A_SUB)) and
  1450. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1451. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1452. (((taicpu(hp1).opcode = A_INC) or
  1453. (taicpu(hp1).opcode = A_DEC)) and
  1454. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1455. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1456. (not GetNextInstruction(hp1,hp2) or
  1457. not instrReadsFlags(hp2)) Do
  1458. begin
  1459. TmpBool1 := False;
  1460. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1461. begin
  1462. TmpBool1 := True;
  1463. TmpBool2 := True;
  1464. case taicpu(hp1).opcode of
  1465. A_ADD:
  1466. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1467. A_SUB:
  1468. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1469. end;
  1470. asml.remove(hp1);
  1471. hp1.free;
  1472. end
  1473. else
  1474. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1475. (((taicpu(hp1).opcode = A_ADD) and
  1476. (TmpRef.base = NR_NO)) or
  1477. (taicpu(hp1).opcode = A_INC) or
  1478. (taicpu(hp1).opcode = A_DEC)) then
  1479. begin
  1480. TmpBool1 := True;
  1481. TmpBool2 := True;
  1482. case taicpu(hp1).opcode of
  1483. A_ADD:
  1484. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1485. A_INC:
  1486. inc(TmpRef.offset);
  1487. A_DEC:
  1488. dec(TmpRef.offset);
  1489. end;
  1490. asml.remove(hp1);
  1491. hp1.free;
  1492. end;
  1493. end;
  1494. if TmpBool2 or
  1495. ((current_settings.optimizecputype < cpu_Pentium2) and
  1496. (taicpu(p).oper[0]^.val <= 3) and
  1497. not(cs_opt_size in current_settings.optimizerswitches)) then
  1498. begin
  1499. if not(TmpBool2) and
  1500. (taicpu(p).oper[0]^.val = 1) then
  1501. begin
  1502. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1503. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1504. end
  1505. else
  1506. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1507. taicpu(p).oper[1]^.reg);
  1508. InsertLLItem(p.previous, p.next, hp1);
  1509. p.free;
  1510. p := hp1;
  1511. end;
  1512. end
  1513. else
  1514. if (current_settings.optimizecputype < cpu_Pentium2) and
  1515. (taicpu(p).oper[0]^.typ = top_const) and
  1516. (taicpu(p).oper[1]^.typ = top_reg) then
  1517. if (taicpu(p).oper[0]^.val = 1) then
  1518. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1519. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1520. (unlike shl, which is only Tairable in the U pipe)}
  1521. begin
  1522. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1523. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1524. InsertLLItem(p.previous, p.next, hp1);
  1525. p.free;
  1526. p := hp1;
  1527. end
  1528. else if (taicpu(p).opsize = S_L) and
  1529. (taicpu(p).oper[0]^.val<= 3) then
  1530. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1531. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1532. begin
  1533. reference_reset(tmpref,2,[]);
  1534. TmpRef.index := taicpu(p).oper[1]^.reg;
  1535. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1536. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1537. InsertLLItem(p.previous, p.next, hp1);
  1538. p.free;
  1539. p := hp1;
  1540. end
  1541. end;
  1542. A_SETcc :
  1543. { changes
  1544. setcc (funcres) setcc reg
  1545. movb (funcres), reg to leave/ret
  1546. leave/ret }
  1547. begin
  1548. if (taicpu(p).oper[0]^.typ = top_ref) and
  1549. GetNextInstruction(p, hp1) and
  1550. GetNextInstruction(hp1, hp2) and
  1551. IsExitCode(hp2) and
  1552. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1553. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1554. not(assigned(current_procinfo.procdef.funcretsym) and
  1555. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1556. (hp1.typ = ait_instruction) and
  1557. (taicpu(hp1).opcode = A_MOV) and
  1558. (taicpu(hp1).opsize = S_B) and
  1559. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1560. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1561. begin
  1562. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1563. DebugMsg('Peephole optimizer SetccMovbLeaveRet2SetccLeaveRet',p);
  1564. asml.remove(hp1);
  1565. hp1.free;
  1566. end
  1567. end;
  1568. A_SUB:
  1569. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1570. { * change "sub/add const1, reg" or "dec reg" followed by
  1571. "sub const2, reg" to one "sub ..., reg" }
  1572. begin
  1573. if (taicpu(p).oper[0]^.typ = top_const) and
  1574. (taicpu(p).oper[1]^.typ = top_reg) then
  1575. if (taicpu(p).oper[0]^.val = 2) and
  1576. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1577. { Don't do the sub/push optimization if the sub }
  1578. { comes from setting up the stack frame (JM) }
  1579. (not getLastInstruction(p,hp1) or
  1580. (hp1.typ <> ait_instruction) or
  1581. (taicpu(hp1).opcode <> A_MOV) or
  1582. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1583. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1584. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1585. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1586. begin
  1587. hp1 := tai(p.next);
  1588. while Assigned(hp1) and
  1589. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1590. not RegReadByInstruction(NR_ESP,hp1) and
  1591. not RegModifiedByInstruction(NR_ESP,hp1) do
  1592. hp1 := tai(hp1.next);
  1593. if Assigned(hp1) and
  1594. (tai(hp1).typ = ait_instruction) and
  1595. (taicpu(hp1).opcode = A_PUSH) and
  1596. (taicpu(hp1).opsize = S_W) then
  1597. begin
  1598. taicpu(hp1).changeopsize(S_L);
  1599. if taicpu(hp1).oper[0]^.typ=top_reg then
  1600. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1601. hp1 := tai(p.next);
  1602. asml.remove(p);
  1603. p.free;
  1604. p := hp1;
  1605. continue
  1606. end;
  1607. if DoSubAddOpt(p) then
  1608. continue;
  1609. end
  1610. else if DoSubAddOpt(p) then
  1611. continue
  1612. end;
  1613. A_VMOVAPS,
  1614. A_VMOVAPD:
  1615. if OptPass1VMOVAP(p) then
  1616. continue;
  1617. A_VDIVSD,
  1618. A_VDIVSS,
  1619. A_VSUBSD,
  1620. A_VSUBSS,
  1621. A_VMULSD,
  1622. A_VMULSS,
  1623. A_VADDSD,
  1624. A_VADDSS:
  1625. if OptPass1VOP(p) then
  1626. continue;
  1627. end;
  1628. end; { if is_jmp }
  1629. end;
  1630. end;
  1631. updateUsedRegs(UsedRegs,p);
  1632. p:=tai(p.next);
  1633. end;
  1634. end;
  1635. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  1636. var
  1637. p : tai;
  1638. begin
  1639. p := BlockStart;
  1640. ClearUsedRegs;
  1641. while (p <> BlockEnd) Do
  1642. begin
  1643. UpdateUsedRegs(UsedRegs, tai(p.next));
  1644. case p.Typ Of
  1645. Ait_Instruction:
  1646. begin
  1647. if InsContainsSegRef(taicpu(p)) then
  1648. begin
  1649. p := tai(p.next);
  1650. continue;
  1651. end;
  1652. case taicpu(p).opcode Of
  1653. A_Jcc:
  1654. if OptPass2Jcc(p) then
  1655. continue;
  1656. A_FSTP,A_FISTP:
  1657. if DoFpuLoadStoreOpt(p) then
  1658. continue;
  1659. A_IMUL:
  1660. if OptPass2Imul(p) then
  1661. continue;
  1662. A_JMP:
  1663. if OptPass2Jmp(p) then
  1664. continue;
  1665. A_MOV:
  1666. if OptPass2MOV(p) then
  1667. continue;
  1668. end;
  1669. end;
  1670. end;
  1671. p := tai(p.next)
  1672. end;
  1673. end;
  1674. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  1675. var
  1676. p,hp1,hp2: tai;
  1677. IsTestConstX: boolean;
  1678. begin
  1679. p := BlockStart;
  1680. ClearUsedRegs;
  1681. while (p <> BlockEnd) Do
  1682. begin
  1683. UpdateUsedRegs(UsedRegs, tai(p.next));
  1684. case p.Typ Of
  1685. Ait_Instruction:
  1686. begin
  1687. if InsContainsSegRef(taicpu(p)) then
  1688. begin
  1689. p := tai(p.next);
  1690. continue;
  1691. end;
  1692. case taicpu(p).opcode Of
  1693. A_CALL:
  1694. begin
  1695. { don't do this on modern CPUs, this really hurts them due to
  1696. broken call/ret pairing }
  1697. if (current_settings.optimizecputype < cpu_Pentium2) and
  1698. not(cs_create_pic in current_settings.moduleswitches) and
  1699. GetNextInstruction(p, hp1) and
  1700. (hp1.typ = ait_instruction) and
  1701. (taicpu(hp1).opcode = A_JMP) and
  1702. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  1703. begin
  1704. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  1705. InsertLLItem(p.previous, p, hp2);
  1706. taicpu(p).opcode := A_JMP;
  1707. taicpu(p).is_jmp := true;
  1708. asml.remove(hp1);
  1709. hp1.free;
  1710. end
  1711. { replace
  1712. call procname
  1713. ret
  1714. by
  1715. jmp procname
  1716. this should never hurt except when pic is used, not sure
  1717. how to handle it then
  1718. but do it only on level 4 because it destroys stack back traces
  1719. }
  1720. else if (cs_opt_level4 in current_settings.optimizerswitches) and
  1721. not(cs_create_pic in current_settings.moduleswitches) and
  1722. GetNextInstruction(p, hp1) and
  1723. (hp1.typ = ait_instruction) and
  1724. (taicpu(hp1).opcode = A_RET) and
  1725. (taicpu(hp1).ops=0) then
  1726. begin
  1727. taicpu(p).opcode := A_JMP;
  1728. taicpu(p).is_jmp := true;
  1729. asml.remove(hp1);
  1730. hp1.free;
  1731. end;
  1732. end;
  1733. A_CMP:
  1734. begin
  1735. if (taicpu(p).oper[0]^.typ = top_const) and
  1736. (taicpu(p).oper[0]^.val = 0) and
  1737. (taicpu(p).oper[1]^.typ = top_reg) then
  1738. {change "cmp $0, %reg" to "test %reg, %reg"}
  1739. begin
  1740. taicpu(p).opcode := A_TEST;
  1741. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1742. continue;
  1743. end;
  1744. end;
  1745. A_MOV:
  1746. PostPeepholeOptMov(p);
  1747. A_MOVZX:
  1748. { if register vars are on, it's possible there is code like }
  1749. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  1750. { so we can't safely replace the movzx then with xor/mov, }
  1751. { since that would change the flags (JM) }
  1752. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  1753. begin
  1754. if (taicpu(p).oper[1]^.typ = top_reg) then
  1755. if (taicpu(p).oper[0]^.typ = top_reg)
  1756. then
  1757. case taicpu(p).opsize of
  1758. S_BL:
  1759. begin
  1760. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1761. not(cs_opt_size in current_settings.optimizerswitches) and
  1762. (current_settings.optimizecputype = cpu_Pentium) then
  1763. {Change "movzbl %reg1, %reg2" to
  1764. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  1765. PentiumMMX}
  1766. begin
  1767. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  1768. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1769. InsertLLItem(p.previous, p, hp1);
  1770. taicpu(p).opcode := A_MOV;
  1771. taicpu(p).changeopsize(S_B);
  1772. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1773. end;
  1774. end;
  1775. end
  1776. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1777. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1778. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  1779. not(cs_opt_size in current_settings.optimizerswitches) and
  1780. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1781. (current_settings.optimizecputype = cpu_Pentium) and
  1782. (taicpu(p).opsize = S_BL) then
  1783. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  1784. Pentium and PentiumMMX}
  1785. begin
  1786. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  1787. taicpu(p).oper[1]^.reg);
  1788. taicpu(p).opcode := A_MOV;
  1789. taicpu(p).changeopsize(S_B);
  1790. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1791. InsertLLItem(p.previous, p, hp1);
  1792. end;
  1793. end;
  1794. A_TEST, A_OR:
  1795. {removes the line marked with (x) from the sequence
  1796. and/or/xor/add/sub/... $x, %y
  1797. test/or %y, %y | test $-1, %y (x)
  1798. j(n)z _Label
  1799. as the first instruction already adjusts the ZF
  1800. %y operand may also be a reference }
  1801. begin
  1802. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  1803. MatchOperand(taicpu(p).oper[0]^,-1);
  1804. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  1805. GetLastInstruction(p, hp1) and
  1806. (tai(hp1).typ = ait_instruction) and
  1807. GetNextInstruction(p,hp2) and
  1808. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  1809. case taicpu(hp1).opcode Of
  1810. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  1811. begin
  1812. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  1813. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1814. { and in case of carry for A(E)/B(E)/C/NC }
  1815. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  1816. ((taicpu(hp1).opcode <> A_ADD) and
  1817. (taicpu(hp1).opcode <> A_SUB))) then
  1818. begin
  1819. hp1 := tai(p.next);
  1820. asml.remove(p);
  1821. p.free;
  1822. p := tai(hp1);
  1823. continue
  1824. end;
  1825. end;
  1826. A_SHL, A_SAL, A_SHR, A_SAR:
  1827. begin
  1828. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  1829. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  1830. { therefore, it's only safe to do this optimization for }
  1831. { shifts by a (nonzero) constant }
  1832. (taicpu(hp1).oper[0]^.typ = top_const) and
  1833. (taicpu(hp1).oper[0]^.val <> 0) and
  1834. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1835. { and in case of carry for A(E)/B(E)/C/NC }
  1836. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  1837. begin
  1838. hp1 := tai(p.next);
  1839. asml.remove(p);
  1840. p.free;
  1841. p := tai(hp1);
  1842. continue
  1843. end;
  1844. end;
  1845. A_DEC, A_INC, A_NEG:
  1846. begin
  1847. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  1848. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1849. { and in case of carry for A(E)/B(E)/C/NC }
  1850. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  1851. begin
  1852. case taicpu(hp1).opcode Of
  1853. A_DEC, A_INC:
  1854. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  1855. begin
  1856. case taicpu(hp1).opcode Of
  1857. A_DEC: taicpu(hp1).opcode := A_SUB;
  1858. A_INC: taicpu(hp1).opcode := A_ADD;
  1859. end;
  1860. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  1861. taicpu(hp1).loadConst(0,1);
  1862. taicpu(hp1).ops:=2;
  1863. end
  1864. end;
  1865. hp1 := tai(p.next);
  1866. asml.remove(p);
  1867. p.free;
  1868. p := tai(hp1);
  1869. continue
  1870. end;
  1871. end
  1872. else
  1873. { change "test $-1,%reg" into "test %reg,%reg" }
  1874. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  1875. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  1876. end { case }
  1877. else
  1878. { change "test $-1,%reg" into "test %reg,%reg" }
  1879. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  1880. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  1881. end;
  1882. end;
  1883. end;
  1884. end;
  1885. p := tai(p.next)
  1886. end;
  1887. end;
  1888. Procedure TCpuAsmOptimizer.Optimize;
  1889. Var
  1890. HP: Tai;
  1891. pass: longint;
  1892. slowopt, changed, lastLoop: boolean;
  1893. Begin
  1894. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  1895. pass := 0;
  1896. changed := false;
  1897. repeat
  1898. lastLoop :=
  1899. not(slowopt) or
  1900. (not changed and (pass > 2)) or
  1901. { prevent endless loops }
  1902. (pass = 4);
  1903. changed := false;
  1904. { Setup labeltable, always necessary }
  1905. blockstart := tai(asml.first);
  1906. pass_1;
  1907. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  1908. { or nil }
  1909. While Assigned(BlockStart) Do
  1910. Begin
  1911. if (cs_opt_peephole in current_settings.optimizerswitches) then
  1912. begin
  1913. if (pass = 0) then
  1914. PrePeepHoleOpts;
  1915. { Peephole optimizations }
  1916. PeepHoleOptPass1;
  1917. { Only perform them twice in the first pass }
  1918. if pass = 0 then
  1919. PeepHoleOptPass1;
  1920. end;
  1921. { More peephole optimizations }
  1922. if (cs_opt_peephole in current_settings.optimizerswitches) then
  1923. begin
  1924. PeepHoleOptPass2;
  1925. if lastLoop then
  1926. PostPeepHoleOpts;
  1927. end;
  1928. { Continue where we left off, BlockEnd is either the start of an }
  1929. { assembler block or nil }
  1930. BlockStart := BlockEnd;
  1931. While Assigned(BlockStart) And
  1932. (BlockStart.typ = ait_Marker) And
  1933. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  1934. Begin
  1935. { We stopped at an assembler block, so skip it }
  1936. Repeat
  1937. BlockStart := Tai(BlockStart.Next);
  1938. Until (BlockStart.Typ = Ait_Marker) And
  1939. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  1940. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  1941. If GetNextInstruction(BlockStart, HP) And
  1942. ((HP.typ <> ait_Marker) Or
  1943. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  1944. { There is no assembler block anymore after the current one, so }
  1945. { optimize the next block of "normal" instructions }
  1946. pass_1
  1947. { Otherwise, skip the next assembler block }
  1948. else
  1949. blockStart := hp;
  1950. End;
  1951. End;
  1952. inc(pass);
  1953. until lastLoop;
  1954. dfa.free;
  1955. End;
  1956. begin
  1957. casmoptimizer:=TCpuAsmOptimizer;
  1958. end.