aoptcpu.pas 98 KB


  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aoptobj, aoptcpub, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. function DoFpuLoadStoreOpt(var p : tai) : boolean;
  33. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  34. function InstructionLoadsFromReg(const reg : TRegister;const hp : tai) : boolean;override;
  35. end;
  36. Var
  37. AsmOptimizer : TCpuAsmOptimizer;
  38. Implementation
  39. uses
  40. verbose,globtype,globals,
  41. cutils,
  42. aoptbase,
  43. cpuinfo,
  44. aasmcpu,
  45. aoptutils,
  46. procinfo,
  47. cgutils,cgx86,
  48. { units we should get rid off: }
  49. symsym,symconst;
  50. function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
  51. { returns true if a "continue" should be done after this optimization }
  52. var hp1, hp2: tai;
  53. begin
  54. DoFpuLoadStoreOpt := false;
  55. if (taicpu(p).oper[0]^.typ = top_ref) and
  56. getNextInstruction(p, hp1) and
  57. (hp1.typ = ait_instruction) and
  58. (((taicpu(hp1).opcode = A_FLD) and
  59. (taicpu(p).opcode = A_FSTP)) or
  60. ((taicpu(p).opcode = A_FISTP) and
  61. (taicpu(hp1).opcode = A_FILD))) and
  62. (taicpu(hp1).oper[0]^.typ = top_ref) and
  63. (taicpu(hp1).opsize = taicpu(p).opsize) and
  64. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  65. begin
  66. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  67. if (taicpu(p).opsize=S_FX) and
  68. getNextInstruction(hp1, hp2) and
  69. (hp2.typ = ait_instruction) and
  70. IsExitCode(hp2) and
  71. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  72. not(assigned(current_procinfo.procdef.funcretsym) and
  73. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  74. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  75. begin
  76. asml.remove(p);
  77. asml.remove(hp1);
  78. p.free;
  79. hp1.free;
  80. p := hp2;
  81. removeLastDeallocForFuncRes(p);
  82. doFPULoadStoreOpt := true;
  83. end
  84. (* can't be done because the store operation rounds
  85. else
  86. { fst can't store an extended value! }
  87. if (taicpu(p).opsize <> S_FX) and
  88. (taicpu(p).opsize <> S_IQ) then
  89. begin
  90. if (taicpu(p).opcode = A_FSTP) then
  91. taicpu(p).opcode := A_FST
  92. else taicpu(p).opcode := A_FIST;
  93. asml.remove(hp1);
  94. hp1.free;
  95. end
  96. *)
  97. end;
  98. end;
  99. { converts a TChange variable to a TRegister }
  100. function tch2reg(ch: tinschange): tsuperregister;
  101. const
  102. ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
  103. begin
  104. if (ch <= CH_REDI) then
  105. tch2reg := ch2reg[ch]
  106. else if (ch <= CH_WEDI) then
  107. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
  108. else if (ch <= CH_RWEDI) then
  109. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
  110. else if (ch <= CH_MEDI) then
  111. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
  112. else
  113. InternalError(2016041901)
  114. end;
  115. { Checks if the register is a 32 bit general purpose register }
  116. function isgp32reg(reg: TRegister): boolean;
  117. begin
  118. {$push}{$warnings off}
  119. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  120. {$pop}
  121. end;
  122. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  123. begin
  124. Result:=RegReadByInstruction(reg,hp);
  125. end;
  126. function TCpuAsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  127. var
  128. p: taicpu;
  129. opcount: longint;
  130. begin
  131. RegReadByInstruction := false;
  132. if hp.typ <> ait_instruction then
  133. exit;
  134. p := taicpu(hp);
  135. case p.opcode of
  136. A_CALL:
  137. regreadbyinstruction := true;
  138. A_IMUL:
  139. case p.ops of
  140. 1:
  141. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  142. (
  143. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  144. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  145. );
  146. 2,3:
  147. regReadByInstruction :=
  148. reginop(reg,p.oper[0]^) or
  149. reginop(reg,p.oper[1]^);
  150. end;
  151. A_MUL:
  152. begin
  153. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  154. (
  155. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  156. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  157. );
  158. end;
  159. A_IDIV,A_DIV:
  160. begin
  161. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  162. (
  163. (getregtype(reg)=R_INTREGISTER) and
  164. (
  165. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  166. )
  167. );
  168. end;
  169. else
  170. begin
  171. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  172. begin
  173. RegReadByInstruction := false;
  174. exit;
  175. end;
  176. for opcount := 0 to p.ops-1 do
  177. if (p.oper[opCount]^.typ = top_ref) and
  178. RegInRef(reg,p.oper[opcount]^.ref^) then
  179. begin
  180. RegReadByInstruction := true;
  181. exit
  182. end;
  183. { special handling for SSE MOVSD }
  184. if (p.opcode=A_MOVSD) and (p.ops>0) then
  185. begin
  186. if p.ops<>2 then
  187. internalerror(2017042702);
  188. regReadByInstruction := reginop(reg,p.oper[0]^) or
  189. (
  190. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  191. );
  192. exit;
  193. end;
  194. with insprop[p.opcode] do
  195. begin
  196. if getregtype(reg)=R_INTREGISTER then
  197. begin
  198. case getsupreg(reg) of
  199. RS_EAX:
  200. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  201. begin
  202. RegReadByInstruction := true;
  203. exit
  204. end;
  205. RS_ECX:
  206. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  207. begin
  208. RegReadByInstruction := true;
  209. exit
  210. end;
  211. RS_EDX:
  212. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  213. begin
  214. RegReadByInstruction := true;
  215. exit
  216. end;
  217. RS_EBX:
  218. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  219. begin
  220. RegReadByInstruction := true;
  221. exit
  222. end;
  223. RS_ESP:
  224. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  225. begin
  226. RegReadByInstruction := true;
  227. exit
  228. end;
  229. RS_EBP:
  230. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  231. begin
  232. RegReadByInstruction := true;
  233. exit
  234. end;
  235. RS_ESI:
  236. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  237. begin
  238. RegReadByInstruction := true;
  239. exit
  240. end;
  241. RS_EDI:
  242. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  243. begin
  244. RegReadByInstruction := true;
  245. exit
  246. end;
  247. end;
  248. end;
  249. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  250. begin
  251. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  252. begin
  253. case p.condition of
  254. C_A,C_NBE, { CF=0 and ZF=0 }
  255. C_BE,C_NA: { CF=1 or ZF=1 }
  256. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  257. C_AE,C_NB,C_NC, { CF=0 }
  258. C_B,C_NAE,C_C: { CF=1 }
  259. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  260. C_NE,C_NZ, { ZF=0 }
  261. C_E,C_Z: { ZF=1 }
  262. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  263. C_G,C_NLE, { ZF=0 and SF=OF }
  264. C_LE,C_NG: { ZF=1 or SF<>OF }
  265. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  266. C_GE,C_NL, { SF=OF }
  267. C_L,C_NGE: { SF<>OF }
  268. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  269. C_NO, { OF=0 }
  270. C_O: { OF=1 }
  271. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  272. C_NP,C_PO, { PF=0 }
  273. C_P,C_PE: { PF=1 }
  274. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  275. C_NS, { SF=0 }
  276. C_S: { SF=1 }
  277. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  278. else
  279. internalerror(2017042701);
  280. end;
  281. if RegReadByInstruction then
  282. exit;
  283. end;
  284. case getsubreg(reg) of
  285. R_SUBW,R_SUBD,R_SUBQ:
  286. RegReadByInstruction :=
  287. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  288. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  289. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  290. R_SUBFLAGCARRY:
  291. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  292. R_SUBFLAGPARITY:
  293. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  294. R_SUBFLAGAUXILIARY:
  295. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  296. R_SUBFLAGZERO:
  297. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  298. R_SUBFLAGSIGN:
  299. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  300. R_SUBFLAGOVERFLOW:
  301. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  302. R_SUBFLAGINTERRUPT:
  303. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  304. R_SUBFLAGDIRECTION:
  305. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  306. else
  307. internalerror(2017042601);
  308. end;
  309. exit;
  310. end;
  311. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  312. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  313. (p.oper[0]^.reg=p.oper[1]^.reg) then
  314. exit;
  315. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  316. begin
  317. RegReadByInstruction := true;
  318. exit
  319. end;
  320. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  321. begin
  322. RegReadByInstruction := true;
  323. exit
  324. end;
  325. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  326. begin
  327. RegReadByInstruction := true;
  328. exit
  329. end;
  330. end;
  331. end;
  332. end;
  333. end;
  334. { returns true if p contains a memory operand with a segment set }
  335. function InsContainsSegRef(p: taicpu): boolean;
  336. var
  337. i: longint;
  338. begin
  339. result:=true;
  340. for i:=0 to p.opercnt-1 do
  341. if (p.oper[i]^.typ=top_ref) and
  342. (p.oper[i]^.ref^.segment<>NR_NO) then
  343. exit;
  344. result:=false;
  345. end;
  346. function InstrReadsFlags(p: tai): boolean;
  347. var
  348. l: longint;
  349. begin
  350. InstrReadsFlags := true;
  351. case p.typ of
  352. ait_instruction:
  353. if InsProp[taicpu(p).opcode].Ch*
  354. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  355. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  356. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  357. exit;
  358. ait_label:
  359. exit;
  360. end;
  361. InstrReadsFlags := false;
  362. end;
  363. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  364. var
  365. p,hp1: tai;
  366. l: aint;
  367. tmpRef: treference;
  368. begin
  369. p := BlockStart;
  370. while (p <> BlockEnd) Do
  371. begin
  372. case p.Typ Of
  373. Ait_Instruction:
  374. begin
  375. if InsContainsSegRef(taicpu(p)) then
  376. begin
  377. p := tai(p.next);
  378. continue;
  379. end;
  380. case taicpu(p).opcode Of
  381. A_IMUL:
  382. {changes certain "imul const, %reg"'s to lea sequences}
  383. begin
  384. if (taicpu(p).oper[0]^.typ = Top_Const) and
  385. (taicpu(p).oper[1]^.typ = Top_Reg) and
  386. (taicpu(p).opsize = S_L) then
  387. if (taicpu(p).oper[0]^.val = 1) then
  388. if (taicpu(p).ops = 2) then
  389. {remove "imul $1, reg"}
  390. begin
  391. hp1 := tai(p.Next);
  392. asml.remove(p);
  393. p.free;
  394. p := hp1;
  395. continue;
  396. end
  397. else
  398. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  399. begin
  400. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  401. InsertLLItem(p.previous, p.next, hp1);
  402. p.free;
  403. p := hp1;
  404. end
  405. else if
  406. ((taicpu(p).ops <= 2) or
  407. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  408. (taicpu(p).oper[0]^.val <= 12) and
  409. not(cs_opt_size in current_settings.optimizerswitches) and
  410. (not(GetNextInstruction(p, hp1)) or
  411. {GetNextInstruction(p, hp1) and}
  412. not((tai(hp1).typ = ait_instruction) and
  413. ((taicpu(hp1).opcode=A_Jcc) and
  414. (taicpu(hp1).condition in [C_O,C_NO])))) then
  415. begin
  416. reference_reset(tmpref,1,[]);
  417. case taicpu(p).oper[0]^.val Of
  418. 3: begin
  419. {imul 3, reg1, reg2 to
  420. lea (reg1,reg1,2), reg2
  421. imul 3, reg1 to
  422. lea (reg1,reg1,2), reg1}
  423. TmpRef.base := taicpu(p).oper[1]^.reg;
  424. TmpRef.index := taicpu(p).oper[1]^.reg;
  425. TmpRef.ScaleFactor := 2;
  426. if (taicpu(p).ops = 2) then
  427. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  428. else
  429. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  430. InsertLLItem(p.previous, p.next, hp1);
  431. p.free;
  432. p := hp1;
  433. end;
  434. 5: begin
  435. {imul 5, reg1, reg2 to
  436. lea (reg1,reg1,4), reg2
  437. imul 5, reg1 to
  438. lea (reg1,reg1,4), reg1}
  439. TmpRef.base := taicpu(p).oper[1]^.reg;
  440. TmpRef.index := taicpu(p).oper[1]^.reg;
  441. TmpRef.ScaleFactor := 4;
  442. if (taicpu(p).ops = 2) then
  443. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  444. else
  445. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  446. InsertLLItem(p.previous, p.next, hp1);
  447. p.free;
  448. p := hp1;
  449. end;
  450. 6: begin
  451. {imul 6, reg1, reg2 to
  452. lea (,reg1,2), reg2
  453. lea (reg2,reg1,4), reg2
  454. imul 6, reg1 to
  455. lea (reg1,reg1,2), reg1
  456. add reg1, reg1}
  457. if (current_settings.optimizecputype <= cpu_386) then
  458. begin
  459. TmpRef.index := taicpu(p).oper[1]^.reg;
  460. if (taicpu(p).ops = 3) then
  461. begin
  462. TmpRef.base := taicpu(p).oper[2]^.reg;
  463. TmpRef.ScaleFactor := 4;
  464. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  465. end
  466. else
  467. begin
  468. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  469. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  470. end;
  471. InsertLLItem(p, p.next, hp1);
  472. reference_reset(tmpref,2,[]);
  473. TmpRef.index := taicpu(p).oper[1]^.reg;
  474. TmpRef.ScaleFactor := 2;
  475. if (taicpu(p).ops = 3) then
  476. begin
  477. TmpRef.base := NR_NO;
  478. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  479. taicpu(p).oper[2]^.reg);
  480. end
  481. else
  482. begin
  483. TmpRef.base := taicpu(p).oper[1]^.reg;
  484. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  485. end;
  486. InsertLLItem(p.previous, p.next, hp1);
  487. p.free;
  488. p := tai(hp1.next);
  489. end
  490. end;
  491. 9: begin
  492. {imul 9, reg1, reg2 to
  493. lea (reg1,reg1,8), reg2
  494. imul 9, reg1 to
  495. lea (reg1,reg1,8), reg1}
  496. TmpRef.base := taicpu(p).oper[1]^.reg;
  497. TmpRef.index := taicpu(p).oper[1]^.reg;
  498. TmpRef.ScaleFactor := 8;
  499. if (taicpu(p).ops = 2) then
  500. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  501. else
  502. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  503. InsertLLItem(p.previous, p.next, hp1);
  504. p.free;
  505. p := hp1;
  506. end;
  507. 10: begin
  508. {imul 10, reg1, reg2 to
  509. lea (reg1,reg1,4), reg2
  510. add reg2, reg2
  511. imul 10, reg1 to
  512. lea (reg1,reg1,4), reg1
  513. add reg1, reg1}
  514. if (current_settings.optimizecputype <= cpu_386) then
  515. begin
  516. if (taicpu(p).ops = 3) then
  517. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  518. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  519. else
  520. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  521. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  522. InsertLLItem(p, p.next, hp1);
  523. TmpRef.base := taicpu(p).oper[1]^.reg;
  524. TmpRef.index := taicpu(p).oper[1]^.reg;
  525. TmpRef.ScaleFactor := 4;
  526. if (taicpu(p).ops = 3) then
  527. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  528. else
  529. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  530. InsertLLItem(p.previous, p.next, hp1);
  531. p.free;
  532. p := tai(hp1.next);
  533. end
  534. end;
  535. 12: begin
  536. {imul 12, reg1, reg2 to
  537. lea (,reg1,4), reg2
  538. lea (reg2,reg1,8), reg2
  539. imul 12, reg1 to
  540. lea (reg1,reg1,2), reg1
  541. lea (,reg1,4), reg1}
  542. if (current_settings.optimizecputype <= cpu_386)
  543. then
  544. begin
  545. TmpRef.index := taicpu(p).oper[1]^.reg;
  546. if (taicpu(p).ops = 3) then
  547. begin
  548. TmpRef.base := taicpu(p).oper[2]^.reg;
  549. TmpRef.ScaleFactor := 8;
  550. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  551. end
  552. else
  553. begin
  554. TmpRef.base := NR_NO;
  555. TmpRef.ScaleFactor := 4;
  556. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  557. end;
  558. InsertLLItem(p, p.next, hp1);
  559. reference_reset(tmpref,2,[]);
  560. TmpRef.index := taicpu(p).oper[1]^.reg;
  561. if (taicpu(p).ops = 3) then
  562. begin
  563. TmpRef.base := NR_NO;
  564. TmpRef.ScaleFactor := 4;
  565. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  566. end
  567. else
  568. begin
  569. TmpRef.base := taicpu(p).oper[1]^.reg;
  570. TmpRef.ScaleFactor := 2;
  571. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  572. end;
  573. InsertLLItem(p.previous, p.next, hp1);
  574. p.free;
  575. p := tai(hp1.next);
  576. end
  577. end
  578. end;
  579. end;
  580. end;
  581. A_SAR, A_SHR:
  582. {changes the code sequence
  583. shr/sar const1, x
  584. shl const2, x
  585. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  586. begin
  587. if GetNextInstruction(p, hp1) and
  588. (tai(hp1).typ = ait_instruction) and
  589. (taicpu(hp1).opcode = A_SHL) and
  590. (taicpu(p).oper[0]^.typ = top_const) and
  591. (taicpu(hp1).oper[0]^.typ = top_const) and
  592. (taicpu(hp1).opsize = taicpu(p).opsize) and
  593. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  594. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  595. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  596. not(cs_opt_size in current_settings.optimizerswitches) then
  597. { shr/sar const1, %reg
  598. shl const2, %reg
  599. with const1 > const2 }
  600. begin
  601. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  602. taicpu(hp1).opcode := A_AND;
  603. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  604. case taicpu(p).opsize Of
  605. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  606. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  607. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  608. end;
  609. end
  610. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  611. not(cs_opt_size in current_settings.optimizerswitches) then
  612. { shr/sar const1, %reg
  613. shl const2, %reg
  614. with const1 < const2 }
  615. begin
  616. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  617. taicpu(p).opcode := A_AND;
  618. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  619. case taicpu(p).opsize Of
  620. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  621. S_B: taicpu(p).loadConst(0,l Xor $ff);
  622. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  623. end;
  624. end
  625. else
  626. { shr/sar const1, %reg
  627. shl const2, %reg
  628. with const1 = const2 }
  629. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  630. begin
  631. taicpu(p).opcode := A_AND;
  632. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  633. case taicpu(p).opsize Of
  634. S_B: taicpu(p).loadConst(0,l Xor $ff);
  635. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  636. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  637. end;
  638. asml.remove(hp1);
  639. hp1.free;
  640. end;
  641. end;
  642. A_XOR:
  643. if (taicpu(p).oper[0]^.typ = top_reg) and
  644. (taicpu(p).oper[1]^.typ = top_reg) and
  645. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  646. { temporarily change this to 'mov reg,0' to make it easier }
  647. { for the CSE. Will be changed back in pass 2 }
  648. begin
  649. taicpu(p).opcode := A_MOV;
  650. taicpu(p).loadConst(0,0);
  651. end;
  652. end;
  653. end;
  654. end;
  655. p := tai(p.next)
  656. end;
  657. end;
  658. { First pass of peephole optimizations }
  659. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  660. function WriteOk : Boolean;
  661. begin
  662. writeln('Ok');
  663. Result:=True;
  664. end;
  665. var
  666. l : longint;
  667. p,hp1,hp2 : tai;
  668. hp3,hp4: tai;
  669. v:aint;
  670. TmpRef: TReference;
  671. TmpBool1, TmpBool2: Boolean;
  672. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  673. {traces sucessive jumps to their final destination and sets it, e.g.
  674. je l1 je l3
  675. <code> <code>
  676. l1: becomes l1:
  677. je l2 je l3
  678. <code> <code>
  679. l2: l2:
  680. jmp l3 jmp l3
  681. the level parameter denotes how deeep we have already followed the jump,
  682. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  683. var p1, p2: tai;
  684. l: tasmlabel;
  685. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  686. begin
  687. FindAnyLabel := false;
  688. while assigned(hp.next) and
  689. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  690. hp := tai(hp.next);
  691. if assigned(hp.next) and
  692. (tai(hp.next).typ = ait_label) then
  693. begin
  694. FindAnyLabel := true;
  695. l := tai_label(hp.next).labsym;
  696. end
  697. end;
  698. begin
  699. GetfinalDestination := false;
  700. if level > 20 then
  701. exit;
  702. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  703. if assigned(p1) then
  704. begin
  705. SkipLabels(p1,p1);
  706. if (tai(p1).typ = ait_instruction) and
  707. (taicpu(p1).is_jmp) then
  708. if { the next instruction after the label where the jump hp arrives}
  709. { is unconditional or of the same type as hp, so continue }
  710. (taicpu(p1).condition in [C_None,hp.condition]) or
  711. { the next instruction after the label where the jump hp arrives}
  712. { is the opposite of hp (so this one is never taken), but after }
  713. { that one there is a branch that will be taken, so perform a }
  714. { little hack: set p1 equal to this instruction (that's what the}
  715. { last SkipLabels is for, only works with short bool evaluation)}
  716. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  717. SkipLabels(p1,p2) and
  718. (p2.typ = ait_instruction) and
  719. (taicpu(p2).is_jmp) and
  720. (taicpu(p2).condition in [C_None,hp.condition]) and
  721. SkipLabels(p1,p1)) then
  722. begin
  723. { quick check for loops of the form "l5: ; jmp l5 }
  724. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  725. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  726. exit;
  727. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  728. exit;
  729. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  730. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  731. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  732. end
  733. else
  734. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  735. if not FindAnyLabel(p1,l) then
  736. begin
  737. {$ifdef finaldestdebug}
  738. insertllitem(asml,p1,p1.next,tai_comment.Create(
  739. strpnew('previous label inserted'))));
  740. {$endif finaldestdebug}
  741. current_asmdata.getjumplabel(l);
  742. insertllitem(p1,p1.next,tai_label.Create(l));
  743. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  744. hp.oper[0]^.ref^.symbol := l;
  745. l.increfs;
  746. { this won't work, since the new label isn't in the labeltable }
  747. { so it will fail the rangecheck. Labeltable should become a }
  748. { hashtable to support this: }
  749. { GetFinalDestination(asml, hp); }
  750. end
  751. else
  752. begin
  753. {$ifdef finaldestdebug}
  754. insertllitem(asml,p1,p1.next,tai_comment.Create(
  755. strpnew('next label reused'))));
  756. {$endif finaldestdebug}
  757. l.increfs;
  758. hp.oper[0]^.ref^.symbol := l;
  759. if not GetFinalDestination(asml, hp,succ(level)) then
  760. exit;
  761. end;
  762. end;
  763. GetFinalDestination := true;
  764. end;
  765. function DoSubAddOpt(var p: tai): Boolean;
  766. begin
  767. DoSubAddOpt := False;
  768. if GetLastInstruction(p, hp1) and
  769. (hp1.typ = ait_instruction) and
  770. (taicpu(hp1).opsize = taicpu(p).opsize) then
  771. case taicpu(hp1).opcode Of
  772. A_DEC:
  773. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  774. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  775. begin
  776. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  777. asml.remove(hp1);
  778. hp1.free;
  779. end;
  780. A_SUB:
  781. if (taicpu(hp1).oper[0]^.typ = top_const) and
  782. (taicpu(hp1).oper[1]^.typ = top_reg) and
  783. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  784. begin
  785. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  786. asml.remove(hp1);
  787. hp1.free;
  788. end;
  789. A_ADD:
  790. if (taicpu(hp1).oper[0]^.typ = top_const) and
  791. (taicpu(hp1).oper[1]^.typ = top_reg) and
  792. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  793. begin
  794. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  795. asml.remove(hp1);
  796. hp1.free;
  797. if (taicpu(p).oper[0]^.val = 0) then
  798. begin
  799. hp1 := tai(p.next);
  800. asml.remove(p);
  801. p.free;
  802. if not GetLastInstruction(hp1, p) then
  803. p := hp1;
  804. DoSubAddOpt := True;
  805. end
  806. end;
  807. end;
  808. end;
  809. begin
  810. p := BlockStart;
  811. ClearUsedRegs;
  812. while (p <> BlockEnd) Do
  813. begin
  814. UpDateUsedRegs(UsedRegs, tai(p.next));
  815. case p.Typ Of
  816. ait_instruction:
  817. begin
  818. current_filepos:=taicpu(p).fileinfo;
  819. if InsContainsSegRef(taicpu(p)) then
  820. begin
  821. p := tai(p.next);
  822. continue;
  823. end;
  824. { Handle Jmp Optimizations }
  825. if taicpu(p).is_jmp then
  826. begin
  827. {the following if-block removes all code between a jmp and the next label,
  828. because it can never be executed}
  829. if (taicpu(p).opcode = A_JMP) then
  830. begin
  831. hp2:=p;
  832. while GetNextInstruction(hp2, hp1) and
  833. (hp1.typ <> ait_label) do
  834. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  835. begin
  836. { don't kill start/end of assembler block,
  837. no-line-info-start/end etc }
  838. if hp1.typ<>ait_marker then
  839. begin
  840. asml.remove(hp1);
  841. hp1.free;
  842. end
  843. else
  844. hp2:=hp1;
  845. end
  846. else break;
  847. end;
  848. { remove jumps to a label coming right after them }
  849. if GetNextInstruction(p, hp1) then
  850. begin
  851. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  852. { TODO: FIXME removing the first instruction fails}
  853. (p<>blockstart) then
  854. begin
  855. hp2:=tai(hp1.next);
  856. asml.remove(p);
  857. p.free;
  858. p:=hp2;
  859. continue;
  860. end
  861. else
  862. begin
  863. if hp1.typ = ait_label then
  864. SkipLabels(hp1,hp1);
  865. if (tai(hp1).typ=ait_instruction) and
  866. (taicpu(hp1).opcode=A_JMP) and
  867. GetNextInstruction(hp1, hp2) and
  868. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  869. begin
  870. if taicpu(p).opcode=A_Jcc then
  871. begin
  872. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  873. tai_label(hp2).labsym.decrefs;
  874. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  875. { when free'ing hp1, the ref. isn't decresed, so we don't
  876. increase it (FK)
  877. taicpu(p).oper[0]^.ref^.symbol.increfs;
  878. }
  879. asml.remove(hp1);
  880. hp1.free;
  881. GetFinalDestination(asml, taicpu(p),0);
  882. end
  883. else
  884. begin
  885. GetFinalDestination(asml, taicpu(p),0);
  886. p:=tai(p.next);
  887. continue;
  888. end;
  889. end
  890. else
  891. GetFinalDestination(asml, taicpu(p),0);
  892. end;
  893. end;
  894. end
  895. else
  896. { All other optimizes }
  897. begin
  898. for l := 0 to taicpu(p).ops-1 Do
  899. if (taicpu(p).oper[l]^.typ = top_ref) then
  900. With taicpu(p).oper[l]^.ref^ Do
  901. begin
  902. if (base = NR_NO) and
  903. (index <> NR_NO) and
  904. (scalefactor in [0,1]) then
  905. begin
  906. base := index;
  907. index := NR_NO
  908. end
  909. end;
  910. case taicpu(p).opcode Of
  911. A_AND:
  912. if OptPass1And(p) then
  913. continue;
  914. A_CMP:
  915. begin
  916. { cmp register,$8000 neg register
  917. je target --> jo target
  918. .... only if register is deallocated before jump.}
  919. case Taicpu(p).opsize of
  920. S_B: v:=$80;
  921. S_W: v:=$8000;
  922. S_L: v:=aint($80000000);
  923. else
  924. internalerror(2013112905);
  925. end;
  926. if (taicpu(p).oper[0]^.typ=Top_const) and
  927. (taicpu(p).oper[0]^.val=v) and
  928. (Taicpu(p).oper[1]^.typ=top_reg) and
  929. GetNextInstruction(p, hp1) and
  930. (hp1.typ=ait_instruction) and
  931. (taicpu(hp1).opcode=A_Jcc) and
  932. (Taicpu(hp1).condition in [C_E,C_NE]) and
  933. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  934. begin
  935. Taicpu(p).opcode:=A_NEG;
  936. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  937. Taicpu(p).clearop(1);
  938. Taicpu(p).ops:=1;
  939. if Taicpu(hp1).condition=C_E then
  940. Taicpu(hp1).condition:=C_O
  941. else
  942. Taicpu(hp1).condition:=C_NO;
  943. continue;
  944. end;
  945. {
  946. @@2: @@2:
  947. .... ....
  948. cmp operand1,0
  949. jle/jbe @@1
  950. dec operand1 --> sub operand1,1
  951. jmp @@2 jge/jae @@2
  952. @@1: @@1:
  953. ... ....}
  954. if (taicpu(p).oper[0]^.typ = top_const) and
  955. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  956. (taicpu(p).oper[0]^.val = 0) and
  957. GetNextInstruction(p, hp1) and
  958. (hp1.typ = ait_instruction) and
  959. (taicpu(hp1).is_jmp) and
  960. (taicpu(hp1).opcode=A_Jcc) and
  961. (taicpu(hp1).condition in [C_LE,C_BE]) and
  962. GetNextInstruction(hp1,hp2) and
  963. (hp2.typ = ait_instruction) and
  964. (taicpu(hp2).opcode = A_DEC) and
  965. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  966. GetNextInstruction(hp2, hp3) and
  967. (hp3.typ = ait_instruction) and
  968. (taicpu(hp3).is_jmp) and
  969. (taicpu(hp3).opcode = A_JMP) and
  970. GetNextInstruction(hp3, hp4) and
  971. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  972. begin
  973. taicpu(hp2).Opcode := A_SUB;
  974. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  975. taicpu(hp2).loadConst(0,1);
  976. taicpu(hp2).ops:=2;
  977. taicpu(hp3).Opcode := A_Jcc;
  978. case taicpu(hp1).condition of
  979. C_LE: taicpu(hp3).condition := C_GE;
  980. C_BE: taicpu(hp3).condition := C_AE;
  981. end;
  982. asml.remove(p);
  983. asml.remove(hp1);
  984. p.free;
  985. hp1.free;
  986. p := hp2;
  987. continue;
  988. end
  989. end;
  990. A_FLD:
  991. begin
  992. if (taicpu(p).oper[0]^.typ = top_reg) and
  993. GetNextInstruction(p, hp1) and
  994. (hp1.typ = Ait_Instruction) and
  995. (taicpu(hp1).oper[0]^.typ = top_reg) and
  996. (taicpu(hp1).oper[1]^.typ = top_reg) and
  997. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  998. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  999. { change to
  1000. fld reg fxxx reg,st
  1001. fxxxp st, st1 (hp1)
  1002. Remark: non commutative operations must be reversed!
  1003. }
  1004. begin
  1005. case taicpu(hp1).opcode Of
  1006. A_FMULP,A_FADDP,
  1007. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  1008. begin
  1009. case taicpu(hp1).opcode Of
  1010. A_FADDP: taicpu(hp1).opcode := A_FADD;
  1011. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  1012. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  1013. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  1014. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  1015. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  1016. end;
  1017. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  1018. taicpu(hp1).oper[1]^.reg := NR_ST;
  1019. asml.remove(p);
  1020. p.free;
  1021. p := hp1;
  1022. continue;
  1023. end;
  1024. end;
  1025. end
  1026. else
  1027. if (taicpu(p).oper[0]^.typ = top_ref) and
  1028. GetNextInstruction(p, hp2) and
  1029. (hp2.typ = Ait_Instruction) and
  1030. (taicpu(hp2).ops = 2) and
  1031. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1032. (taicpu(hp2).oper[1]^.typ = top_reg) and
  1033. (taicpu(p).opsize in [S_FS, S_FL]) and
  1034. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  1035. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  1036. if GetLastInstruction(p, hp1) and
  1037. (hp1.typ = Ait_Instruction) and
  1038. ((taicpu(hp1).opcode = A_FLD) or
  1039. (taicpu(hp1).opcode = A_FST)) and
  1040. (taicpu(hp1).opsize = taicpu(p).opsize) and
  1041. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1042. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  1043. if ((taicpu(hp2).opcode = A_FMULP) or
  1044. (taicpu(hp2).opcode = A_FADDP)) then
  1045. { change to
  1046. fld/fst mem1 (hp1) fld/fst mem1
  1047. fld mem1 (p) fadd/
  1048. faddp/ fmul st, st
  1049. fmulp st, st1 (hp2) }
  1050. begin
  1051. asml.remove(p);
  1052. p.free;
  1053. p := hp1;
  1054. if (taicpu(hp2).opcode = A_FADDP) then
  1055. taicpu(hp2).opcode := A_FADD
  1056. else
  1057. taicpu(hp2).opcode := A_FMUL;
  1058. taicpu(hp2).oper[1]^.reg := NR_ST;
  1059. end
  1060. else
  1061. { change to
  1062. fld/fst mem1 (hp1) fld/fst mem1
  1063. fld mem1 (p) fld st}
  1064. begin
  1065. taicpu(p).changeopsize(S_FL);
  1066. taicpu(p).loadreg(0,NR_ST);
  1067. end
  1068. else
  1069. begin
  1070. case taicpu(hp2).opcode Of
  1071. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  1072. { change to
  1073. fld/fst mem1 (hp1) fld/fst mem1
  1074. fld mem2 (p) fxxx mem2
  1075. fxxxp st, st1 (hp2) }
  1076. begin
  1077. case taicpu(hp2).opcode Of
  1078. A_FADDP: taicpu(p).opcode := A_FADD;
  1079. A_FMULP: taicpu(p).opcode := A_FMUL;
  1080. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  1081. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  1082. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  1083. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  1084. end;
  1085. asml.remove(hp2);
  1086. hp2.free;
  1087. end
  1088. end
  1089. end
  1090. end;
  1091. A_FSTP,A_FISTP:
  1092. if doFpuLoadStoreOpt(p) then
  1093. continue;
  1094. A_LEA:
  1095. begin
  1096. {removes seg register prefixes from LEA operations, as they
  1097. don't do anything}
  1098. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  1099. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  1100. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1101. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  1102. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1103. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1104. begin
  1105. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1106. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1107. begin
  1108. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  1109. taicpu(p).oper[1]^.reg);
  1110. InsertLLItem(p.previous,p.next, hp1);
  1111. p.free;
  1112. p := hp1;
  1113. continue;
  1114. end
  1115. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1116. begin
  1117. hp1 := tai(p.Next);
  1118. asml.remove(p);
  1119. p.free;
  1120. p := hp1;
  1121. continue;
  1122. end
  1123. { continue to use lea to adjust the stack pointer,
  1124. it is the recommended way, but only if not optimizing for size }
  1125. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1126. (cs_opt_size in current_settings.optimizerswitches) then
  1127. with taicpu(p).oper[0]^.ref^ do
  1128. if (base = taicpu(p).oper[1]^.reg) then
  1129. begin
  1130. l := offset;
  1131. if (l=1) and UseIncDec then
  1132. begin
  1133. taicpu(p).opcode := A_INC;
  1134. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1135. taicpu(p).ops := 1
  1136. end
  1137. else if (l=-1) and UseIncDec then
  1138. begin
  1139. taicpu(p).opcode := A_DEC;
  1140. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1141. taicpu(p).ops := 1;
  1142. end
  1143. else
  1144. begin
  1145. if (l<0) and (l<>-2147483648) then
  1146. begin
  1147. taicpu(p).opcode := A_SUB;
  1148. taicpu(p).loadConst(0,-l);
  1149. end
  1150. else
  1151. begin
  1152. taicpu(p).opcode := A_ADD;
  1153. taicpu(p).loadConst(0,l);
  1154. end;
  1155. end;
  1156. end;
  1157. end
  1158. (*
  1159. This is unsafe, lea doesn't modify the flags but "add"
  1160. does. This breaks webtbs/tw15694.pp. The above
  1161. transformations are also unsafe, but they don't seem to
  1162. be triggered by code that FPC generators (or that at
  1163. least does not occur in the tests...). This needs to be
  1164. fixed by checking for the liveness of the flags register.
  1165. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1166. begin
  1167. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1168. taicpu(p).oper[0]^.ref^.base);
  1169. InsertLLItem(asml,p.previous,p.next, hp1);
  1170. DebugMsg('Peephole Lea2AddBase done',hp1);
  1171. p.free;
  1172. p:=hp1;
  1173. continue;
  1174. end
  1175. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1176. begin
  1177. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1178. taicpu(p).oper[0]^.ref^.index);
  1179. InsertLLItem(asml,p.previous,p.next,hp1);
  1180. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1181. p.free;
  1182. p:=hp1;
  1183. continue;
  1184. end
  1185. *)
  1186. end;
  1187. A_MOV:
  1188. begin
  1189. If OptPass1MOV(p) then
  1190. Continue;
  1191. end;
  1192. A_MOVSX,
  1193. A_MOVZX :
  1194. begin
  1195. if (taicpu(p).oper[1]^.typ = top_reg) and
  1196. GetNextInstruction(p,hp1) and
  1197. (hp1.typ = ait_instruction) and
  1198. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1199. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1200. GetNextInstruction(hp1,hp2) and
  1201. MatchInstruction(hp2,A_MOV,[]) and
  1202. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1203. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1204. (((taicpu(hp1).ops=2) and
  1205. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1206. ((taicpu(hp1).ops=1) and
  1207. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1208. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  1209. { change movsX/movzX reg/ref, reg2 }
  1210. { add/sub/or/... reg3/$const, reg2 }
  1211. { mov reg2 reg/ref }
  1212. { to add/sub/or/... reg3/$const, reg/ref }
  1213. begin
  1214. { by example:
  1215. movswl %si,%eax movswl %si,%eax p
  1216. decl %eax addl %edx,%eax hp1
  1217. movw %ax,%si movw %ax,%si hp2
  1218. ->
  1219. movswl %si,%eax movswl %si,%eax p
  1220. decw %eax addw %edx,%eax hp1
  1221. movw %ax,%si movw %ax,%si hp2
  1222. }
  1223. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1224. {
  1225. ->
  1226. movswl %si,%eax movswl %si,%eax p
  1227. decw %si addw %dx,%si hp1
  1228. movw %ax,%si movw %ax,%si hp2
  1229. }
  1230. case taicpu(hp1).ops of
  1231. 1:
  1232. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1233. 2:
  1234. begin
  1235. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1236. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1237. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1238. end;
  1239. else
  1240. internalerror(2008042701);
  1241. end;
  1242. {
  1243. ->
  1244. decw %si addw %dx,%si p
  1245. }
  1246. asml.remove(p);
  1247. asml.remove(hp2);
  1248. p.free;
  1249. hp2.free;
  1250. p := hp1
  1251. end
  1252. { removes superfluous And's after movzx's }
  1253. else if taicpu(p).opcode=A_MOVZX then
  1254. begin
  1255. if (taicpu(p).oper[1]^.typ = top_reg) and
  1256. GetNextInstruction(p, hp1) and
  1257. (tai(hp1).typ = ait_instruction) and
  1258. (taicpu(hp1).opcode = A_AND) and
  1259. (taicpu(hp1).oper[0]^.typ = top_const) and
  1260. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1261. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1262. case taicpu(p).opsize Of
  1263. S_BL, S_BW:
  1264. if (taicpu(hp1).oper[0]^.val = $ff) then
  1265. begin
  1266. asml.remove(hp1);
  1267. hp1.free;
  1268. end;
  1269. S_WL:
  1270. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1271. begin
  1272. asml.remove(hp1);
  1273. hp1.free;
  1274. end;
  1275. end;
  1276. {changes some movzx constructs to faster synonims (all examples
  1277. are given with eax/ax, but are also valid for other registers)}
  1278. if (taicpu(p).oper[1]^.typ = top_reg) then
  1279. if (taicpu(p).oper[0]^.typ = top_reg) then
  1280. case taicpu(p).opsize of
  1281. S_BW:
  1282. begin
  1283. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1284. not(cs_opt_size in current_settings.optimizerswitches) then
  1285. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1286. begin
  1287. taicpu(p).opcode := A_AND;
  1288. taicpu(p).changeopsize(S_W);
  1289. taicpu(p).loadConst(0,$ff);
  1290. end
  1291. else if GetNextInstruction(p, hp1) and
  1292. (tai(hp1).typ = ait_instruction) and
  1293. (taicpu(hp1).opcode = A_AND) and
  1294. (taicpu(hp1).oper[0]^.typ = top_const) and
  1295. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1296. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1297. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1298. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1299. begin
  1300. taicpu(p).opcode := A_MOV;
  1301. taicpu(p).changeopsize(S_W);
  1302. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1303. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1304. end;
  1305. end;
  1306. S_BL:
  1307. begin
  1308. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1309. not(cs_opt_size in current_settings.optimizerswitches) then
  1310. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1311. begin
  1312. taicpu(p).opcode := A_AND;
  1313. taicpu(p).changeopsize(S_L);
  1314. taicpu(p).loadConst(0,$ff)
  1315. end
  1316. else if GetNextInstruction(p, hp1) and
  1317. (tai(hp1).typ = ait_instruction) and
  1318. (taicpu(hp1).opcode = A_AND) and
  1319. (taicpu(hp1).oper[0]^.typ = top_const) and
  1320. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1321. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1322. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1323. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1324. begin
  1325. taicpu(p).opcode := A_MOV;
  1326. taicpu(p).changeopsize(S_L);
  1327. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1328. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1329. end
  1330. end;
  1331. S_WL:
  1332. begin
  1333. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1334. not(cs_opt_size in current_settings.optimizerswitches) then
  1335. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1336. begin
  1337. taicpu(p).opcode := A_AND;
  1338. taicpu(p).changeopsize(S_L);
  1339. taicpu(p).loadConst(0,$ffff);
  1340. end
  1341. else if GetNextInstruction(p, hp1) and
  1342. (tai(hp1).typ = ait_instruction) and
  1343. (taicpu(hp1).opcode = A_AND) and
  1344. (taicpu(hp1).oper[0]^.typ = top_const) and
  1345. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1346. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1347. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1348. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1349. begin
  1350. taicpu(p).opcode := A_MOV;
  1351. taicpu(p).changeopsize(S_L);
  1352. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1353. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1354. end;
  1355. end;
  1356. end
  1357. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1358. begin
  1359. if GetNextInstruction(p, hp1) and
  1360. (tai(hp1).typ = ait_instruction) and
  1361. (taicpu(hp1).opcode = A_AND) and
  1362. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1363. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1364. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1365. begin
  1366. taicpu(p).opcode := A_MOV;
  1367. case taicpu(p).opsize Of
  1368. S_BL:
  1369. begin
  1370. taicpu(p).changeopsize(S_L);
  1371. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1372. end;
  1373. S_WL:
  1374. begin
  1375. taicpu(p).changeopsize(S_L);
  1376. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1377. end;
  1378. S_BW:
  1379. begin
  1380. taicpu(p).changeopsize(S_W);
  1381. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1382. end;
  1383. end;
  1384. end;
  1385. end;
  1386. end;
  1387. end;
  1388. (* should not be generated anymore by the current code generator
  1389. A_POP:
  1390. begin
  1391. if target_info.system=system_i386_go32v2 then
  1392. begin
  1393. { Transform a series of pop/pop/pop/push/push/push to }
  1394. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1395. { because I'm not sure whether they can cope with }
  1396. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1397. { such a problem when using esp as frame pointer (JM) }
  1398. if (taicpu(p).oper[0]^.typ = top_reg) then
  1399. begin
  1400. hp1 := p;
  1401. hp2 := p;
  1402. l := 0;
  1403. while getNextInstruction(hp1,hp1) and
  1404. (hp1.typ = ait_instruction) and
  1405. (taicpu(hp1).opcode = A_POP) and
  1406. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1407. begin
  1408. hp2 := hp1;
  1409. inc(l,4);
  1410. end;
  1411. getLastInstruction(p,hp3);
  1412. l1 := 0;
  1413. while (hp2 <> hp3) and
  1414. assigned(hp1) and
  1415. (hp1.typ = ait_instruction) and
  1416. (taicpu(hp1).opcode = A_PUSH) and
  1417. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1418. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1419. begin
  1420. { change it to a two op operation }
  1421. taicpu(hp2).oper[1]^.typ:=top_none;
  1422. taicpu(hp2).ops:=2;
  1423. taicpu(hp2).opcode := A_MOV;
  1424. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1425. reference_reset(tmpref);
  1426. tmpRef.base.enum:=R_INTREGISTER;
  1427. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1428. convert_register_to_enum(tmpref.base);
  1429. tmpRef.offset := l;
  1430. taicpu(hp2).loadRef(0,tmpRef);
  1431. hp4 := hp1;
  1432. getNextInstruction(hp1,hp1);
  1433. asml.remove(hp4);
  1434. hp4.free;
  1435. getLastInstruction(hp2,hp2);
  1436. dec(l,4);
  1437. inc(l1);
  1438. end;
  1439. if l <> -4 then
  1440. begin
  1441. inc(l,4);
  1442. for l1 := l1 downto 1 do
  1443. begin
  1444. getNextInstruction(hp2,hp2);
  1445. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1446. end
  1447. end
  1448. end
  1449. end
  1450. else
  1451. begin
  1452. if (taicpu(p).oper[0]^.typ = top_reg) and
  1453. GetNextInstruction(p, hp1) and
  1454. (tai(hp1).typ=ait_instruction) and
  1455. (taicpu(hp1).opcode=A_PUSH) and
  1456. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1457. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1458. begin
  1459. { change it to a two op operation }
  1460. taicpu(p).oper[1]^.typ:=top_none;
  1461. taicpu(p).ops:=2;
  1462. taicpu(p).opcode := A_MOV;
  1463. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1464. reference_reset(tmpref);
  1465. TmpRef.base.enum := R_ESP;
  1466. taicpu(p).loadRef(0,TmpRef);
  1467. asml.remove(hp1);
  1468. hp1.free;
  1469. end;
  1470. end;
  1471. end;
  1472. *)
  1473. A_PUSH:
  1474. begin
  1475. if (taicpu(p).opsize = S_W) and
  1476. (taicpu(p).oper[0]^.typ = Top_Const) and
  1477. GetNextInstruction(p, hp1) and
  1478. (tai(hp1).typ = ait_instruction) and
  1479. (taicpu(hp1).opcode = A_PUSH) and
  1480. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1481. (taicpu(hp1).opsize = S_W) then
  1482. begin
  1483. taicpu(p).changeopsize(S_L);
  1484. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1485. asml.remove(hp1);
  1486. hp1.free;
  1487. end;
  1488. end;
  1489. A_SHL, A_SAL:
  1490. begin
  1491. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1492. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1493. (taicpu(p).opsize = S_L) and
  1494. (taicpu(p).oper[0]^.val <= 3) then
  1495. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1496. begin
  1497. TmpBool1 := True; {should we check the next instruction?}
  1498. TmpBool2 := False; {have we found an add/sub which could be
  1499. integrated in the lea?}
  1500. reference_reset(tmpref,2,[]);
  1501. TmpRef.index := taicpu(p).oper[1]^.reg;
  1502. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1503. while TmpBool1 and
  1504. GetNextInstruction(p, hp1) and
  1505. (tai(hp1).typ = ait_instruction) and
  1506. ((((taicpu(hp1).opcode = A_ADD) or
  1507. (taicpu(hp1).opcode = A_SUB)) and
  1508. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1509. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1510. (((taicpu(hp1).opcode = A_INC) or
  1511. (taicpu(hp1).opcode = A_DEC)) and
  1512. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1513. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1514. (not GetNextInstruction(hp1,hp2) or
  1515. not instrReadsFlags(hp2)) Do
  1516. begin
  1517. TmpBool1 := False;
  1518. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1519. begin
  1520. TmpBool1 := True;
  1521. TmpBool2 := True;
  1522. case taicpu(hp1).opcode of
  1523. A_ADD:
  1524. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1525. A_SUB:
  1526. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1527. end;
  1528. asml.remove(hp1);
  1529. hp1.free;
  1530. end
  1531. else
  1532. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1533. (((taicpu(hp1).opcode = A_ADD) and
  1534. (TmpRef.base = NR_NO)) or
  1535. (taicpu(hp1).opcode = A_INC) or
  1536. (taicpu(hp1).opcode = A_DEC)) then
  1537. begin
  1538. TmpBool1 := True;
  1539. TmpBool2 := True;
  1540. case taicpu(hp1).opcode of
  1541. A_ADD:
  1542. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1543. A_INC:
  1544. inc(TmpRef.offset);
  1545. A_DEC:
  1546. dec(TmpRef.offset);
  1547. end;
  1548. asml.remove(hp1);
  1549. hp1.free;
  1550. end;
  1551. end;
  1552. if TmpBool2 or
  1553. ((current_settings.optimizecputype < cpu_Pentium2) and
  1554. (taicpu(p).oper[0]^.val <= 3) and
  1555. not(cs_opt_size in current_settings.optimizerswitches)) then
  1556. begin
  1557. if not(TmpBool2) and
  1558. (taicpu(p).oper[0]^.val = 1) then
  1559. begin
  1560. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1561. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1562. end
  1563. else
  1564. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1565. taicpu(p).oper[1]^.reg);
  1566. InsertLLItem(p.previous, p.next, hp1);
  1567. p.free;
  1568. p := hp1;
  1569. end;
  1570. end
  1571. else
  1572. if (current_settings.optimizecputype < cpu_Pentium2) and
  1573. (taicpu(p).oper[0]^.typ = top_const) and
  1574. (taicpu(p).oper[1]^.typ = top_reg) then
  1575. if (taicpu(p).oper[0]^.val = 1) then
  1576. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1577. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1578. (unlike shl, which is only Tairable in the U pipe)}
  1579. begin
  1580. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1581. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1582. InsertLLItem(p.previous, p.next, hp1);
  1583. p.free;
  1584. p := hp1;
  1585. end
  1586. else if (taicpu(p).opsize = S_L) and
  1587. (taicpu(p).oper[0]^.val<= 3) then
  1588. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1589. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1590. begin
  1591. reference_reset(tmpref,2,[]);
  1592. TmpRef.index := taicpu(p).oper[1]^.reg;
  1593. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1594. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1595. InsertLLItem(p.previous, p.next, hp1);
  1596. p.free;
  1597. p := hp1;
  1598. end
  1599. end;
  1600. A_SETcc :
  1601. { changes
  1602. setcc (funcres) setcc reg
  1603. movb (funcres), reg to leave/ret
  1604. leave/ret }
  1605. begin
  1606. if (taicpu(p).oper[0]^.typ = top_ref) and
  1607. GetNextInstruction(p, hp1) and
  1608. GetNextInstruction(hp1, hp2) and
  1609. IsExitCode(hp2) and
  1610. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1611. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1612. not(assigned(current_procinfo.procdef.funcretsym) and
  1613. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1614. (hp1.typ = ait_instruction) and
  1615. (taicpu(hp1).opcode = A_MOV) and
  1616. (taicpu(hp1).opsize = S_B) and
  1617. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1618. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1619. begin
  1620. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1621. asml.remove(hp1);
  1622. hp1.free;
  1623. end
  1624. end;
  1625. A_SUB:
  1626. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1627. { * change "sub/add const1, reg" or "dec reg" followed by
  1628. "sub const2, reg" to one "sub ..., reg" }
  1629. begin
  1630. if (taicpu(p).oper[0]^.typ = top_const) and
  1631. (taicpu(p).oper[1]^.typ = top_reg) then
  1632. if (taicpu(p).oper[0]^.val = 2) and
  1633. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1634. { Don't do the sub/push optimization if the sub }
  1635. { comes from setting up the stack frame (JM) }
  1636. (not getLastInstruction(p,hp1) or
  1637. (hp1.typ <> ait_instruction) or
  1638. (taicpu(hp1).opcode <> A_MOV) or
  1639. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1640. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1641. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1642. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1643. begin
  1644. hp1 := tai(p.next);
  1645. while Assigned(hp1) and
  1646. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1647. not RegReadByInstruction(NR_ESP,hp1) and
  1648. not RegModifiedByInstruction(NR_ESP,hp1) do
  1649. hp1 := tai(hp1.next);
  1650. if Assigned(hp1) and
  1651. (tai(hp1).typ = ait_instruction) and
  1652. (taicpu(hp1).opcode = A_PUSH) and
  1653. (taicpu(hp1).opsize = S_W) then
  1654. begin
  1655. taicpu(hp1).changeopsize(S_L);
  1656. if taicpu(hp1).oper[0]^.typ=top_reg then
  1657. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1658. hp1 := tai(p.next);
  1659. asml.remove(p);
  1660. p.free;
  1661. p := hp1;
  1662. continue
  1663. end;
  1664. if DoSubAddOpt(p) then
  1665. continue;
  1666. end
  1667. else if DoSubAddOpt(p) then
  1668. continue
  1669. end;
  1670. A_VMOVAPS,
  1671. A_VMOVAPD:
  1672. if OptPass1VMOVAP(p) then
  1673. continue;
  1674. A_VDIVSD,
  1675. A_VDIVSS,
  1676. A_VSUBSD,
  1677. A_VSUBSS,
  1678. A_VMULSD,
  1679. A_VMULSS,
  1680. A_VADDSD,
  1681. A_VADDSS:
  1682. if OptPass1VOP(p) then
  1683. continue;
  1684. end;
  1685. end; { if is_jmp }
  1686. end;
  1687. end;
  1688. updateUsedRegs(UsedRegs,p);
  1689. p:=tai(p.next);
  1690. end;
  1691. end;
  1692. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  1693. {$ifdef DEBUG_AOPTCPU}
  1694. procedure DebugMsg(const s: string;p : tai);
  1695. begin
  1696. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  1697. end;
  1698. {$else DEBUG_AOPTCPU}
  1699. procedure DebugMsg(const s: string;p : tai);inline;
  1700. begin
  1701. end;
  1702. {$endif DEBUG_AOPTCPU}
  1703. var
  1704. p,hp1,hp2,hp3: tai;
  1705. l : longint;
  1706. condition : tasmcond;
  1707. carryadd_opcode: Tasmop;
  1708. begin
  1709. p := BlockStart;
  1710. ClearUsedRegs;
  1711. while (p <> BlockEnd) Do
  1712. begin
  1713. UpdateUsedRegs(UsedRegs, tai(p.next));
  1714. case p.Typ Of
  1715. Ait_Instruction:
  1716. begin
  1717. if InsContainsSegRef(taicpu(p)) then
  1718. begin
  1719. p := tai(p.next);
  1720. continue;
  1721. end;
  1722. case taicpu(p).opcode Of
  1723. A_Jcc:
  1724. if OptPass2Jcc(p) then
  1725. continue;
  1726. A_FSTP,A_FISTP:
  1727. if DoFpuLoadStoreOpt(p) then
  1728. continue;
  1729. A_IMUL:
  1730. if OptPass2Imul(p) then
  1731. continue;
  1732. A_JMP:
  1733. if OptPass2Jmp(p) then
  1734. continue;
  1735. A_MOV:
  1736. if OptPass2MOV(p) then
  1737. continue;
  1738. end;
  1739. end;
  1740. end;
  1741. p := tai(p.next)
  1742. end;
  1743. end;
  1744. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  1745. var
  1746. p,hp1,hp2: tai;
  1747. IsTestConstX: boolean;
  1748. begin
  1749. p := BlockStart;
  1750. ClearUsedRegs;
  1751. while (p <> BlockEnd) Do
  1752. begin
  1753. UpdateUsedRegs(UsedRegs, tai(p.next));
  1754. case p.Typ Of
  1755. Ait_Instruction:
  1756. begin
  1757. if InsContainsSegRef(taicpu(p)) then
  1758. begin
  1759. p := tai(p.next);
  1760. continue;
  1761. end;
  1762. case taicpu(p).opcode Of
  1763. A_CALL:
  1764. begin
  1765. { don't do this on modern CPUs, this really hurts them due to
  1766. broken call/ret pairing }
  1767. if (current_settings.optimizecputype < cpu_Pentium2) and
  1768. not(cs_create_pic in current_settings.moduleswitches) and
  1769. GetNextInstruction(p, hp1) and
  1770. (hp1.typ = ait_instruction) and
  1771. (taicpu(hp1).opcode = A_JMP) and
  1772. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  1773. begin
  1774. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  1775. InsertLLItem(p.previous, p, hp2);
  1776. taicpu(p).opcode := A_JMP;
  1777. taicpu(p).is_jmp := true;
  1778. asml.remove(hp1);
  1779. hp1.free;
  1780. end
  1781. { replace
  1782. call procname
  1783. ret
  1784. by
  1785. jmp procname
  1786. this should never hurt except when pic is used, not sure
  1787. how to handle it then
  1788. but do it only on level 4 because it destroys stack back traces
  1789. }
  1790. else if (cs_opt_level4 in current_settings.optimizerswitches) and
  1791. not(cs_create_pic in current_settings.moduleswitches) and
  1792. GetNextInstruction(p, hp1) and
  1793. (hp1.typ = ait_instruction) and
  1794. (taicpu(hp1).opcode = A_RET) and
  1795. (taicpu(hp1).ops=0) then
  1796. begin
  1797. taicpu(p).opcode := A_JMP;
  1798. taicpu(p).is_jmp := true;
  1799. asml.remove(hp1);
  1800. hp1.free;
  1801. end;
  1802. end;
  1803. A_CMP:
  1804. begin
  1805. if (taicpu(p).oper[0]^.typ = top_const) and
  1806. (taicpu(p).oper[0]^.val = 0) and
  1807. (taicpu(p).oper[1]^.typ = top_reg) then
  1808. {change "cmp $0, %reg" to "test %reg, %reg"}
  1809. begin
  1810. taicpu(p).opcode := A_TEST;
  1811. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1812. continue;
  1813. end;
  1814. end;
  1815. A_MOV:
  1816. PostPeepholeOptMov(p);
  1817. A_MOVZX:
  1818. { if register vars are on, it's possible there is code like }
  1819. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  1820. { so we can't safely replace the movzx then with xor/mov, }
  1821. { since that would change the flags (JM) }
  1822. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  1823. begin
  1824. if (taicpu(p).oper[1]^.typ = top_reg) then
  1825. if (taicpu(p).oper[0]^.typ = top_reg)
  1826. then
  1827. case taicpu(p).opsize of
  1828. S_BL:
  1829. begin
  1830. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1831. not(cs_opt_size in current_settings.optimizerswitches) and
  1832. (current_settings.optimizecputype = cpu_Pentium) then
  1833. {Change "movzbl %reg1, %reg2" to
  1834. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  1835. PentiumMMX}
  1836. begin
  1837. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  1838. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1839. InsertLLItem(p.previous, p, hp1);
  1840. taicpu(p).opcode := A_MOV;
  1841. taicpu(p).changeopsize(S_B);
  1842. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1843. end;
  1844. end;
  1845. end
  1846. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1847. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1848. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  1849. not(cs_opt_size in current_settings.optimizerswitches) and
  1850. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1851. (current_settings.optimizecputype = cpu_Pentium) and
  1852. (taicpu(p).opsize = S_BL) then
  1853. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  1854. Pentium and PentiumMMX}
  1855. begin
  1856. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  1857. taicpu(p).oper[1]^.reg);
  1858. taicpu(p).opcode := A_MOV;
  1859. taicpu(p).changeopsize(S_B);
  1860. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1861. InsertLLItem(p.previous, p, hp1);
  1862. end;
  1863. end;
  1864. A_TEST, A_OR:
  1865. {removes the line marked with (x) from the sequence
  1866. and/or/xor/add/sub/... $x, %y
  1867. test/or %y, %y | test $-1, %y (x)
  1868. j(n)z _Label
  1869. as the first instruction already adjusts the ZF
  1870. %y operand may also be a reference }
  1871. begin
  1872. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  1873. MatchOperand(taicpu(p).oper[0]^,-1);
  1874. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  1875. GetLastInstruction(p, hp1) and
  1876. (tai(hp1).typ = ait_instruction) and
  1877. GetNextInstruction(p,hp2) and
  1878. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  1879. case taicpu(hp1).opcode Of
  1880. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  1881. begin
  1882. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  1883. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1884. { and in case of carry for A(E)/B(E)/C/NC }
  1885. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  1886. ((taicpu(hp1).opcode <> A_ADD) and
  1887. (taicpu(hp1).opcode <> A_SUB))) then
  1888. begin
  1889. hp1 := tai(p.next);
  1890. asml.remove(p);
  1891. p.free;
  1892. p := tai(hp1);
  1893. continue
  1894. end;
  1895. end;
  1896. A_SHL, A_SAL, A_SHR, A_SAR:
  1897. begin
  1898. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  1899. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  1900. { therefore, it's only safe to do this optimization for }
  1901. { shifts by a (nonzero) constant }
  1902. (taicpu(hp1).oper[0]^.typ = top_const) and
  1903. (taicpu(hp1).oper[0]^.val <> 0) and
  1904. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1905. { and in case of carry for A(E)/B(E)/C/NC }
  1906. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  1907. begin
  1908. hp1 := tai(p.next);
  1909. asml.remove(p);
  1910. p.free;
  1911. p := tai(hp1);
  1912. continue
  1913. end;
  1914. end;
  1915. A_DEC, A_INC, A_NEG:
  1916. begin
  1917. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  1918. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1919. { and in case of carry for A(E)/B(E)/C/NC }
  1920. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  1921. begin
  1922. case taicpu(hp1).opcode Of
  1923. A_DEC, A_INC:
  1924. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  1925. begin
  1926. case taicpu(hp1).opcode Of
  1927. A_DEC: taicpu(hp1).opcode := A_SUB;
  1928. A_INC: taicpu(hp1).opcode := A_ADD;
  1929. end;
  1930. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  1931. taicpu(hp1).loadConst(0,1);
  1932. taicpu(hp1).ops:=2;
  1933. end
  1934. end;
  1935. hp1 := tai(p.next);
  1936. asml.remove(p);
  1937. p.free;
  1938. p := tai(hp1);
  1939. continue
  1940. end;
  1941. end
  1942. else
  1943. { change "test $-1,%reg" into "test %reg,%reg" }
  1944. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  1945. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  1946. end { case }
  1947. else
  1948. { change "test $-1,%reg" into "test %reg,%reg" }
  1949. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  1950. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  1951. end;
  1952. end;
  1953. end;
  1954. end;
  1955. p := tai(p.next)
  1956. end;
  1957. end;
  1958. Procedure TCpuAsmOptimizer.Optimize;
  1959. Var
  1960. HP: Tai;
  1961. pass: longint;
  1962. slowopt, changed, lastLoop: boolean;
  1963. Begin
  1964. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  1965. pass := 0;
  1966. changed := false;
  1967. repeat
  1968. lastLoop :=
  1969. not(slowopt) or
  1970. (not changed and (pass > 2)) or
  1971. { prevent endless loops }
  1972. (pass = 4);
  1973. changed := false;
  1974. { Setup labeltable, always necessary }
  1975. blockstart := tai(asml.first);
  1976. pass_1;
  1977. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  1978. { or nil }
  1979. While Assigned(BlockStart) Do
  1980. Begin
  1981. if (cs_opt_peephole in current_settings.optimizerswitches) then
  1982. begin
  1983. if (pass = 0) then
  1984. PrePeepHoleOpts;
  1985. { Peephole optimizations }
  1986. PeepHoleOptPass1;
  1987. { Only perform them twice in the first pass }
  1988. if pass = 0 then
  1989. PeepHoleOptPass1;
  1990. end;
  1991. { More peephole optimizations }
  1992. if (cs_opt_peephole in current_settings.optimizerswitches) then
  1993. begin
  1994. PeepHoleOptPass2;
  1995. if lastLoop then
  1996. PostPeepHoleOpts;
  1997. end;
  1998. { Continue where we left off, BlockEnd is either the start of an }
  1999. { assembler block or nil }
  2000. BlockStart := BlockEnd;
  2001. While Assigned(BlockStart) And
  2002. (BlockStart.typ = ait_Marker) And
  2003. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  2004. Begin
  2005. { We stopped at an assembler block, so skip it }
  2006. Repeat
  2007. BlockStart := Tai(BlockStart.Next);
  2008. Until (BlockStart.Typ = Ait_Marker) And
  2009. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  2010. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  2011. If GetNextInstruction(BlockStart, HP) And
  2012. ((HP.typ <> ait_Marker) Or
  2013. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  2014. { There is no assembler block anymore after the current one, so }
  2015. { optimize the next block of "normal" instructions }
  2016. pass_1
  2017. { Otherwise, skip the next assembler block }
  2018. else
  2019. blockStart := hp;
  2020. End;
  2021. End;
  2022. inc(pass);
  2023. until lastLoop;
  2024. dfa.free;
  2025. End;
  2026. begin
  2027. casmoptimizer:=TCpuAsmOptimizer;
  2028. end.