aoptcpu.pas 109 KB


  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aoptobj, aoptcpub, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. function DoFpuLoadStoreOpt(var p : tai) : boolean;
  33. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  34. function InstructionLoadsFromReg(const reg : TRegister;const hp : tai) : boolean;override;
  35. end;
  36. Var
  37. AsmOptimizer : TCpuAsmOptimizer;
  38. Implementation
  39. uses
  40. verbose,globtype,globals,
  41. cutils,
  42. aoptbase,
  43. cpuinfo,
  44. aasmcpu,
  45. procinfo,
  46. cgutils,cgx86,
  47. { units we should get rid off: }
  48. symsym,symconst;
  49. function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
  50. { returns true if a "continue" should be done after this optimization }
  51. var hp1, hp2: tai;
  52. begin
  53. DoFpuLoadStoreOpt := false;
  54. if (taicpu(p).oper[0]^.typ = top_ref) and
  55. getNextInstruction(p, hp1) and
  56. (hp1.typ = ait_instruction) and
  57. (((taicpu(hp1).opcode = A_FLD) and
  58. (taicpu(p).opcode = A_FSTP)) or
  59. ((taicpu(p).opcode = A_FISTP) and
  60. (taicpu(hp1).opcode = A_FILD))) and
  61. (taicpu(hp1).oper[0]^.typ = top_ref) and
  62. (taicpu(hp1).opsize = taicpu(p).opsize) and
  63. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  64. begin
  65. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  66. if (taicpu(p).opsize=S_FX) and
  67. getNextInstruction(hp1, hp2) and
  68. (hp2.typ = ait_instruction) and
  69. IsExitCode(hp2) and
  70. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  71. not(assigned(current_procinfo.procdef.funcretsym) and
  72. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  73. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  74. begin
  75. asml.remove(p);
  76. asml.remove(hp1);
  77. p.free;
  78. hp1.free;
  79. p := hp2;
  80. removeLastDeallocForFuncRes(p);
  81. doFPULoadStoreOpt := true;
  82. end
  83. (* can't be done because the store operation rounds
  84. else
  85. { fst can't store an extended value! }
  86. if (taicpu(p).opsize <> S_FX) and
  87. (taicpu(p).opsize <> S_IQ) then
  88. begin
  89. if (taicpu(p).opcode = A_FSTP) then
  90. taicpu(p).opcode := A_FST
  91. else taicpu(p).opcode := A_FIST;
  92. asml.remove(hp1);
  93. hp1.free;
  94. end
  95. *)
  96. end;
  97. end;
  98. { converts a TChange variable to a TRegister }
  99. function tch2reg(ch: tinschange): tsuperregister;
  100. const
  101. ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
  102. begin
  103. if (ch <= CH_REDI) then
  104. tch2reg := ch2reg[ch]
  105. else if (ch <= CH_WEDI) then
  106. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
  107. else if (ch <= CH_RWEDI) then
  108. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
  109. else if (ch <= CH_MEDI) then
  110. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
  111. else
  112. InternalError(2016041901)
  113. end;
  114. { Checks if the register is a 32 bit general purpose register }
  115. function isgp32reg(reg: TRegister): boolean;
  116. begin
  117. {$push}{$warnings off}
  118. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  119. {$pop}
  120. end;
  121. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  122. begin
  123. Result:=RegReadByInstruction(reg,hp);
  124. end;
  125. function TCpuAsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  126. var
  127. p: taicpu;
  128. opcount: longint;
  129. begin
  130. RegReadByInstruction := false;
  131. if hp.typ <> ait_instruction then
  132. exit;
  133. p := taicpu(hp);
  134. case p.opcode of
  135. A_CALL:
  136. regreadbyinstruction := true;
  137. A_IMUL:
  138. case p.ops of
  139. 1:
  140. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  141. (
  142. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  143. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  144. );
  145. 2,3:
  146. regReadByInstruction :=
  147. reginop(reg,p.oper[0]^) or
  148. reginop(reg,p.oper[1]^);
  149. end;
  150. A_MUL:
  151. begin
  152. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  153. (
  154. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  155. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  156. );
  157. end;
  158. A_IDIV,A_DIV:
  159. begin
  160. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  161. (
  162. (getregtype(reg)=R_INTREGISTER) and
  163. (
  164. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  165. )
  166. );
  167. end;
  168. else
  169. begin
  170. for opcount := 0 to p.ops-1 do
  171. if (p.oper[opCount]^.typ = top_ref) and
  172. RegInRef(reg,p.oper[opcount]^.ref^) then
  173. begin
  174. RegReadByInstruction := true;
  175. exit
  176. end;
  177. { special handling for SSE MOVSD }
  178. if (p.opcode=A_MOVSD) and (p.ops>0) then
  179. begin
  180. if p.ops<>2 then
  181. internalerror(2017042702);
  182. regReadByInstruction := reginop(reg,p.oper[0]^) or
  183. (
  184. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  185. );
  186. exit;
  187. end;
  188. with insprop[p.opcode] do
  189. begin
  190. if getregtype(reg)=R_INTREGISTER then
  191. begin
  192. case getsupreg(reg) of
  193. RS_EAX:
  194. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  195. begin
  196. RegReadByInstruction := true;
  197. exit
  198. end;
  199. RS_ECX:
  200. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  201. begin
  202. RegReadByInstruction := true;
  203. exit
  204. end;
  205. RS_EDX:
  206. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  207. begin
  208. RegReadByInstruction := true;
  209. exit
  210. end;
  211. RS_EBX:
  212. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  213. begin
  214. RegReadByInstruction := true;
  215. exit
  216. end;
  217. RS_ESP:
  218. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  219. begin
  220. RegReadByInstruction := true;
  221. exit
  222. end;
  223. RS_EBP:
  224. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  225. begin
  226. RegReadByInstruction := true;
  227. exit
  228. end;
  229. RS_ESI:
  230. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  231. begin
  232. RegReadByInstruction := true;
  233. exit
  234. end;
  235. RS_EDI:
  236. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  237. begin
  238. RegReadByInstruction := true;
  239. exit
  240. end;
  241. end;
  242. end;
  243. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  244. begin
  245. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  246. begin
  247. case p.condition of
  248. C_A,C_NBE, { CF=0 and ZF=0 }
  249. C_BE,C_NA: { CF=1 or ZF=1 }
  250. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  251. C_AE,C_NB,C_NC, { CF=0 }
  252. C_B,C_NAE,C_C: { CF=1 }
  253. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  254. C_NE,C_NZ, { ZF=0 }
  255. C_E,C_Z: { ZF=1 }
  256. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  257. C_G,C_NLE, { ZF=0 and SF=OF }
  258. C_LE,C_NG: { ZF=1 or SF<>OF }
  259. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  260. C_GE,C_NL, { SF=OF }
  261. C_L,C_NGE: { SF<>OF }
  262. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  263. C_NO, { OF=0 }
  264. C_O: { OF=1 }
  265. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  266. C_NP,C_PO, { PF=0 }
  267. C_P,C_PE: { PF=1 }
  268. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  269. C_NS, { SF=0 }
  270. C_S: { SF=1 }
  271. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  272. else
  273. internalerror(2017042701);
  274. end;
  275. if RegReadByInstruction then
  276. exit;
  277. end;
  278. case getsubreg(reg) of
  279. R_SUBW,R_SUBD,R_SUBQ:
  280. RegReadByInstruction :=
  281. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  282. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  283. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  284. R_SUBFLAGCARRY:
  285. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  286. R_SUBFLAGPARITY:
  287. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  288. R_SUBFLAGAUXILIARY:
  289. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  290. R_SUBFLAGZERO:
  291. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  292. R_SUBFLAGSIGN:
  293. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  294. R_SUBFLAGOVERFLOW:
  295. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  296. R_SUBFLAGINTERRUPT:
  297. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  298. R_SUBFLAGDIRECTION:
  299. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  300. else
  301. internalerror(2017042601);
  302. end;
  303. exit;
  304. end;
  305. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  306. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  307. (p.oper[0]^.reg=p.oper[1]^.reg) then
  308. exit;
  309. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  310. begin
  311. RegReadByInstruction := true;
  312. exit
  313. end;
  314. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  315. begin
  316. RegReadByInstruction := true;
  317. exit
  318. end;
  319. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  320. begin
  321. RegReadByInstruction := true;
  322. exit
  323. end;
  324. end;
  325. end;
  326. end;
  327. end;
  328. { returns true if p contains a memory operand with a segment set }
  329. function InsContainsSegRef(p: taicpu): boolean;
  330. var
  331. i: longint;
  332. begin
  333. result:=true;
  334. for i:=0 to p.opercnt-1 do
  335. if (p.oper[i]^.typ=top_ref) and
  336. (p.oper[i]^.ref^.segment<>NR_NO) then
  337. exit;
  338. result:=false;
  339. end;
  340. function InstrReadsFlags(p: tai): boolean;
  341. var
  342. l: longint;
  343. begin
  344. InstrReadsFlags := true;
  345. case p.typ of
  346. ait_instruction:
  347. if InsProp[taicpu(p).opcode].Ch*
  348. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  349. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  350. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  351. exit;
  352. ait_label:
  353. exit;
  354. end;
  355. InstrReadsFlags := false;
  356. end;
  357. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  358. var
  359. p,hp1: tai;
  360. l: aint;
  361. tmpRef: treference;
  362. begin
  363. p := BlockStart;
  364. while (p <> BlockEnd) Do
  365. begin
  366. case p.Typ Of
  367. Ait_Instruction:
  368. begin
  369. if InsContainsSegRef(taicpu(p)) then
  370. begin
  371. p := tai(p.next);
  372. continue;
  373. end;
  374. case taicpu(p).opcode Of
  375. A_IMUL:
  376. {changes certain "imul const, %reg"'s to lea sequences}
  377. begin
  378. if (taicpu(p).oper[0]^.typ = Top_Const) and
  379. (taicpu(p).oper[1]^.typ = Top_Reg) and
  380. (taicpu(p).opsize = S_L) then
  381. if (taicpu(p).oper[0]^.val = 1) then
  382. if (taicpu(p).ops = 2) then
  383. {remove "imul $1, reg"}
  384. begin
  385. hp1 := tai(p.Next);
  386. asml.remove(p);
  387. p.free;
  388. p := hp1;
  389. continue;
  390. end
  391. else
  392. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  393. begin
  394. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  395. InsertLLItem(p.previous, p.next, hp1);
  396. p.free;
  397. p := hp1;
  398. end
  399. else if
  400. ((taicpu(p).ops <= 2) or
  401. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  402. (taicpu(p).oper[0]^.val <= 12) and
  403. not(cs_opt_size in current_settings.optimizerswitches) and
  404. (not(GetNextInstruction(p, hp1)) or
  405. {GetNextInstruction(p, hp1) and}
  406. not((tai(hp1).typ = ait_instruction) and
  407. ((taicpu(hp1).opcode=A_Jcc) and
  408. (taicpu(hp1).condition in [C_O,C_NO])))) then
  409. begin
  410. reference_reset(tmpref,1,[]);
  411. case taicpu(p).oper[0]^.val Of
  412. 3: begin
  413. {imul 3, reg1, reg2 to
  414. lea (reg1,reg1,2), reg2
  415. imul 3, reg1 to
  416. lea (reg1,reg1,2), reg1}
  417. TmpRef.base := taicpu(p).oper[1]^.reg;
  418. TmpRef.index := taicpu(p).oper[1]^.reg;
  419. TmpRef.ScaleFactor := 2;
  420. if (taicpu(p).ops = 2) then
  421. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  422. else
  423. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  424. InsertLLItem(p.previous, p.next, hp1);
  425. p.free;
  426. p := hp1;
  427. end;
  428. 5: begin
  429. {imul 5, reg1, reg2 to
  430. lea (reg1,reg1,4), reg2
  431. imul 5, reg1 to
  432. lea (reg1,reg1,4), reg1}
  433. TmpRef.base := taicpu(p).oper[1]^.reg;
  434. TmpRef.index := taicpu(p).oper[1]^.reg;
  435. TmpRef.ScaleFactor := 4;
  436. if (taicpu(p).ops = 2) then
  437. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  438. else
  439. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  440. InsertLLItem(p.previous, p.next, hp1);
  441. p.free;
  442. p := hp1;
  443. end;
  444. 6: begin
  445. {imul 6, reg1, reg2 to
  446. lea (,reg1,2), reg2
  447. lea (reg2,reg1,4), reg2
  448. imul 6, reg1 to
  449. lea (reg1,reg1,2), reg1
  450. add reg1, reg1}
  451. if (current_settings.optimizecputype <= cpu_386) then
  452. begin
  453. TmpRef.index := taicpu(p).oper[1]^.reg;
  454. if (taicpu(p).ops = 3) then
  455. begin
  456. TmpRef.base := taicpu(p).oper[2]^.reg;
  457. TmpRef.ScaleFactor := 4;
  458. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  459. end
  460. else
  461. begin
  462. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  463. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  464. end;
  465. InsertLLItem(p, p.next, hp1);
  466. reference_reset(tmpref,2,[]);
  467. TmpRef.index := taicpu(p).oper[1]^.reg;
  468. TmpRef.ScaleFactor := 2;
  469. if (taicpu(p).ops = 3) then
  470. begin
  471. TmpRef.base := NR_NO;
  472. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  473. taicpu(p).oper[2]^.reg);
  474. end
  475. else
  476. begin
  477. TmpRef.base := taicpu(p).oper[1]^.reg;
  478. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  479. end;
  480. InsertLLItem(p.previous, p.next, hp1);
  481. p.free;
  482. p := tai(hp1.next);
  483. end
  484. end;
  485. 9: begin
  486. {imul 9, reg1, reg2 to
  487. lea (reg1,reg1,8), reg2
  488. imul 9, reg1 to
  489. lea (reg1,reg1,8), reg1}
  490. TmpRef.base := taicpu(p).oper[1]^.reg;
  491. TmpRef.index := taicpu(p).oper[1]^.reg;
  492. TmpRef.ScaleFactor := 8;
  493. if (taicpu(p).ops = 2) then
  494. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  495. else
  496. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  497. InsertLLItem(p.previous, p.next, hp1);
  498. p.free;
  499. p := hp1;
  500. end;
  501. 10: begin
  502. {imul 10, reg1, reg2 to
  503. lea (reg1,reg1,4), reg2
  504. add reg2, reg2
  505. imul 10, reg1 to
  506. lea (reg1,reg1,4), reg1
  507. add reg1, reg1}
  508. if (current_settings.optimizecputype <= cpu_386) then
  509. begin
  510. if (taicpu(p).ops = 3) then
  511. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  512. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  513. else
  514. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  515. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  516. InsertLLItem(p, p.next, hp1);
  517. TmpRef.base := taicpu(p).oper[1]^.reg;
  518. TmpRef.index := taicpu(p).oper[1]^.reg;
  519. TmpRef.ScaleFactor := 4;
  520. if (taicpu(p).ops = 3) then
  521. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  522. else
  523. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  524. InsertLLItem(p.previous, p.next, hp1);
  525. p.free;
  526. p := tai(hp1.next);
  527. end
  528. end;
  529. 12: begin
  530. {imul 12, reg1, reg2 to
  531. lea (,reg1,4), reg2
  532. lea (reg2,reg1,8), reg2
  533. imul 12, reg1 to
  534. lea (reg1,reg1,2), reg1
  535. lea (,reg1,4), reg1}
  536. if (current_settings.optimizecputype <= cpu_386)
  537. then
  538. begin
  539. TmpRef.index := taicpu(p).oper[1]^.reg;
  540. if (taicpu(p).ops = 3) then
  541. begin
  542. TmpRef.base := taicpu(p).oper[2]^.reg;
  543. TmpRef.ScaleFactor := 8;
  544. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  545. end
  546. else
  547. begin
  548. TmpRef.base := NR_NO;
  549. TmpRef.ScaleFactor := 4;
  550. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  551. end;
  552. InsertLLItem(p, p.next, hp1);
  553. reference_reset(tmpref,2,[]);
  554. TmpRef.index := taicpu(p).oper[1]^.reg;
  555. if (taicpu(p).ops = 3) then
  556. begin
  557. TmpRef.base := NR_NO;
  558. TmpRef.ScaleFactor := 4;
  559. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  560. end
  561. else
  562. begin
  563. TmpRef.base := taicpu(p).oper[1]^.reg;
  564. TmpRef.ScaleFactor := 2;
  565. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  566. end;
  567. InsertLLItem(p.previous, p.next, hp1);
  568. p.free;
  569. p := tai(hp1.next);
  570. end
  571. end
  572. end;
  573. end;
  574. end;
  575. A_SAR, A_SHR:
  576. {changes the code sequence
  577. shr/sar const1, x
  578. shl const2, x
  579. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  580. begin
  581. if GetNextInstruction(p, hp1) and
  582. (tai(hp1).typ = ait_instruction) and
  583. (taicpu(hp1).opcode = A_SHL) and
  584. (taicpu(p).oper[0]^.typ = top_const) and
  585. (taicpu(hp1).oper[0]^.typ = top_const) and
  586. (taicpu(hp1).opsize = taicpu(p).opsize) and
  587. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  588. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  589. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  590. not(cs_opt_size in current_settings.optimizerswitches) then
  591. { shr/sar const1, %reg
  592. shl const2, %reg
  593. with const1 > const2 }
  594. begin
  595. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  596. taicpu(hp1).opcode := A_AND;
  597. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  598. case taicpu(p).opsize Of
  599. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  600. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  601. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  602. end;
  603. end
  604. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  605. not(cs_opt_size in current_settings.optimizerswitches) then
  606. { shr/sar const1, %reg
  607. shl const2, %reg
  608. with const1 < const2 }
  609. begin
  610. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  611. taicpu(p).opcode := A_AND;
  612. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  613. case taicpu(p).opsize Of
  614. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  615. S_B: taicpu(p).loadConst(0,l Xor $ff);
  616. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  617. end;
  618. end
  619. else
  620. { shr/sar const1, %reg
  621. shl const2, %reg
  622. with const1 = const2 }
  623. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  624. begin
  625. taicpu(p).opcode := A_AND;
  626. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  627. case taicpu(p).opsize Of
  628. S_B: taicpu(p).loadConst(0,l Xor $ff);
  629. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  630. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  631. end;
  632. asml.remove(hp1);
  633. hp1.free;
  634. end;
  635. end;
  636. A_XOR:
  637. if (taicpu(p).oper[0]^.typ = top_reg) and
  638. (taicpu(p).oper[1]^.typ = top_reg) and
  639. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  640. { temporarily change this to 'mov reg,0' to make it easier }
  641. { for the CSE. Will be changed back in pass 2 }
  642. begin
  643. taicpu(p).opcode := A_MOV;
  644. taicpu(p).loadConst(0,0);
  645. end;
  646. end;
  647. end;
  648. end;
  649. p := tai(p.next)
  650. end;
  651. end;
  652. { skips all labels and returns the next "real" instruction }
  653. function SkipLabels(hp: tai; var hp2: tai): boolean;
  654. begin
  655. while assigned(hp.next) and
  656. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  657. hp := tai(hp.next);
  658. if assigned(hp.next) then
  659. begin
  660. SkipLabels := True;
  661. hp2 := tai(hp.next)
  662. end
  663. else
  664. begin
  665. hp2 := hp;
  666. SkipLabels := False
  667. end;
  668. end;
  669. { First pass of peephole optimizations }
  670. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  671. function WriteOk : Boolean;
  672. begin
  673. writeln('Ok');
  674. Result:=True;
  675. end;
  676. var
  677. l : longint;
  678. p,hp1,hp2 : tai;
  679. hp3,hp4: tai;
  680. v:aint;
  681. TmpRef: TReference;
  682. TmpBool1, TmpBool2: Boolean;
  683. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  684. {traces sucessive jumps to their final destination and sets it, e.g.
  685. je l1 je l3
  686. <code> <code>
  687. l1: becomes l1:
  688. je l2 je l3
  689. <code> <code>
  690. l2: l2:
  691. jmp l3 jmp l3
  692. the level parameter denotes how deeep we have already followed the jump,
  693. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  694. var p1, p2: tai;
  695. l: tasmlabel;
  696. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  697. begin
  698. FindAnyLabel := false;
  699. while assigned(hp.next) and
  700. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  701. hp := tai(hp.next);
  702. if assigned(hp.next) and
  703. (tai(hp.next).typ = ait_label) then
  704. begin
  705. FindAnyLabel := true;
  706. l := tai_label(hp.next).labsym;
  707. end
  708. end;
  709. begin
  710. GetfinalDestination := false;
  711. if level > 20 then
  712. exit;
  713. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  714. if assigned(p1) then
  715. begin
  716. SkipLabels(p1,p1);
  717. if (tai(p1).typ = ait_instruction) and
  718. (taicpu(p1).is_jmp) then
  719. if { the next instruction after the label where the jump hp arrives}
  720. { is unconditional or of the same type as hp, so continue }
  721. (taicpu(p1).condition in [C_None,hp.condition]) or
  722. { the next instruction after the label where the jump hp arrives}
  723. { is the opposite of hp (so this one is never taken), but after }
  724. { that one there is a branch that will be taken, so perform a }
  725. { little hack: set p1 equal to this instruction (that's what the}
  726. { last SkipLabels is for, only works with short bool evaluation)}
  727. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  728. SkipLabels(p1,p2) and
  729. (p2.typ = ait_instruction) and
  730. (taicpu(p2).is_jmp) and
  731. (taicpu(p2).condition in [C_None,hp.condition]) and
  732. SkipLabels(p1,p1)) then
  733. begin
  734. { quick check for loops of the form "l5: ; jmp l5 }
  735. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  736. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  737. exit;
  738. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  739. exit;
  740. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  741. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  742. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  743. end
  744. else
  745. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  746. if not FindAnyLabel(p1,l) then
  747. begin
  748. {$ifdef finaldestdebug}
  749. insertllitem(asml,p1,p1.next,tai_comment.Create(
  750. strpnew('previous label inserted'))));
  751. {$endif finaldestdebug}
  752. current_asmdata.getjumplabel(l);
  753. insertllitem(p1,p1.next,tai_label.Create(l));
  754. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  755. hp.oper[0]^.ref^.symbol := l;
  756. l.increfs;
  757. { this won't work, since the new label isn't in the labeltable }
  758. { so it will fail the rangecheck. Labeltable should become a }
  759. { hashtable to support this: }
  760. { GetFinalDestination(asml, hp); }
  761. end
  762. else
  763. begin
  764. {$ifdef finaldestdebug}
  765. insertllitem(asml,p1,p1.next,tai_comment.Create(
  766. strpnew('next label reused'))));
  767. {$endif finaldestdebug}
  768. l.increfs;
  769. hp.oper[0]^.ref^.symbol := l;
  770. if not GetFinalDestination(asml, hp,succ(level)) then
  771. exit;
  772. end;
  773. end;
  774. GetFinalDestination := true;
  775. end;
  776. function DoSubAddOpt(var p: tai): Boolean;
  777. begin
  778. DoSubAddOpt := False;
  779. if GetLastInstruction(p, hp1) and
  780. (hp1.typ = ait_instruction) and
  781. (taicpu(hp1).opsize = taicpu(p).opsize) then
  782. case taicpu(hp1).opcode Of
  783. A_DEC:
  784. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  785. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  786. begin
  787. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  788. asml.remove(hp1);
  789. hp1.free;
  790. end;
  791. A_SUB:
  792. if (taicpu(hp1).oper[0]^.typ = top_const) and
  793. (taicpu(hp1).oper[1]^.typ = top_reg) and
  794. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  795. begin
  796. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  797. asml.remove(hp1);
  798. hp1.free;
  799. end;
  800. A_ADD:
  801. if (taicpu(hp1).oper[0]^.typ = top_const) and
  802. (taicpu(hp1).oper[1]^.typ = top_reg) and
  803. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  804. begin
  805. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  806. asml.remove(hp1);
  807. hp1.free;
  808. if (taicpu(p).oper[0]^.val = 0) then
  809. begin
  810. hp1 := tai(p.next);
  811. asml.remove(p);
  812. p.free;
  813. if not GetLastInstruction(hp1, p) then
  814. p := hp1;
  815. DoSubAddOpt := True;
  816. end
  817. end;
  818. end;
  819. end;
  820. begin
  821. p := BlockStart;
  822. ClearUsedRegs;
  823. while (p <> BlockEnd) Do
  824. begin
  825. UpDateUsedRegs(UsedRegs, tai(p.next));
  826. case p.Typ Of
  827. ait_instruction:
  828. begin
  829. current_filepos:=taicpu(p).fileinfo;
  830. if InsContainsSegRef(taicpu(p)) then
  831. begin
  832. p := tai(p.next);
  833. continue;
  834. end;
  835. { Handle Jmp Optimizations }
  836. if taicpu(p).is_jmp then
  837. begin
  838. {the following if-block removes all code between a jmp and the next label,
  839. because it can never be executed}
  840. if (taicpu(p).opcode = A_JMP) then
  841. begin
  842. hp2:=p;
  843. while GetNextInstruction(hp2, hp1) and
  844. (hp1.typ <> ait_label) do
  845. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  846. begin
  847. { don't kill start/end of assembler block,
  848. no-line-info-start/end etc }
  849. if hp1.typ<>ait_marker then
  850. begin
  851. asml.remove(hp1);
  852. hp1.free;
  853. end
  854. else
  855. hp2:=hp1;
  856. end
  857. else break;
  858. end;
  859. { remove jumps to a label coming right after them }
  860. if GetNextInstruction(p, hp1) then
  861. begin
  862. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  863. { TODO: FIXME removing the first instruction fails}
  864. (p<>blockstart) then
  865. begin
  866. hp2:=tai(hp1.next);
  867. asml.remove(p);
  868. p.free;
  869. p:=hp2;
  870. continue;
  871. end
  872. else
  873. begin
  874. if hp1.typ = ait_label then
  875. SkipLabels(hp1,hp1);
  876. if (tai(hp1).typ=ait_instruction) and
  877. (taicpu(hp1).opcode=A_JMP) and
  878. GetNextInstruction(hp1, hp2) and
  879. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  880. begin
  881. if taicpu(p).opcode=A_Jcc then
  882. begin
  883. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  884. tai_label(hp2).labsym.decrefs;
  885. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  886. { when free'ing hp1, the ref. isn't decresed, so we don't
  887. increase it (FK)
  888. taicpu(p).oper[0]^.ref^.symbol.increfs;
  889. }
  890. asml.remove(hp1);
  891. hp1.free;
  892. GetFinalDestination(asml, taicpu(p),0);
  893. end
  894. else
  895. begin
  896. GetFinalDestination(asml, taicpu(p),0);
  897. p:=tai(p.next);
  898. continue;
  899. end;
  900. end
  901. else
  902. GetFinalDestination(asml, taicpu(p),0);
  903. end;
  904. end;
  905. end
  906. else
  907. { All other optimizes }
  908. begin
  909. for l := 0 to taicpu(p).ops-1 Do
  910. if (taicpu(p).oper[l]^.typ = top_ref) then
  911. With taicpu(p).oper[l]^.ref^ Do
  912. begin
  913. if (base = NR_NO) and
  914. (index <> NR_NO) and
  915. (scalefactor in [0,1]) then
  916. begin
  917. base := index;
  918. index := NR_NO
  919. end
  920. end;
  921. case taicpu(p).opcode Of
  922. A_AND:
  923. if OptPass1And(p) then
  924. continue;
  925. A_CMP:
  926. begin
  927. { cmp register,$8000 neg register
  928. je target --> jo target
  929. .... only if register is deallocated before jump.}
  930. case Taicpu(p).opsize of
  931. S_B: v:=$80;
  932. S_W: v:=$8000;
  933. S_L: v:=aint($80000000);
  934. else
  935. internalerror(2013112905);
  936. end;
  937. if (taicpu(p).oper[0]^.typ=Top_const) and
  938. (taicpu(p).oper[0]^.val=v) and
  939. (Taicpu(p).oper[1]^.typ=top_reg) and
  940. GetNextInstruction(p, hp1) and
  941. (hp1.typ=ait_instruction) and
  942. (taicpu(hp1).opcode=A_Jcc) and
  943. (Taicpu(hp1).condition in [C_E,C_NE]) and
  944. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  945. begin
  946. Taicpu(p).opcode:=A_NEG;
  947. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  948. Taicpu(p).clearop(1);
  949. Taicpu(p).ops:=1;
  950. if Taicpu(hp1).condition=C_E then
  951. Taicpu(hp1).condition:=C_O
  952. else
  953. Taicpu(hp1).condition:=C_NO;
  954. continue;
  955. end;
  956. {
  957. @@2: @@2:
  958. .... ....
  959. cmp operand1,0
  960. jle/jbe @@1
  961. dec operand1 --> sub operand1,1
  962. jmp @@2 jge/jae @@2
  963. @@1: @@1:
  964. ... ....}
  965. if (taicpu(p).oper[0]^.typ = top_const) and
  966. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  967. (taicpu(p).oper[0]^.val = 0) and
  968. GetNextInstruction(p, hp1) and
  969. (hp1.typ = ait_instruction) and
  970. (taicpu(hp1).is_jmp) and
  971. (taicpu(hp1).opcode=A_Jcc) and
  972. (taicpu(hp1).condition in [C_LE,C_BE]) and
  973. GetNextInstruction(hp1,hp2) and
  974. (hp2.typ = ait_instruction) and
  975. (taicpu(hp2).opcode = A_DEC) and
  976. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  977. GetNextInstruction(hp2, hp3) and
  978. (hp3.typ = ait_instruction) and
  979. (taicpu(hp3).is_jmp) and
  980. (taicpu(hp3).opcode = A_JMP) and
  981. GetNextInstruction(hp3, hp4) and
  982. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  983. begin
  984. taicpu(hp2).Opcode := A_SUB;
  985. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  986. taicpu(hp2).loadConst(0,1);
  987. taicpu(hp2).ops:=2;
  988. taicpu(hp3).Opcode := A_Jcc;
  989. case taicpu(hp1).condition of
  990. C_LE: taicpu(hp3).condition := C_GE;
  991. C_BE: taicpu(hp3).condition := C_AE;
  992. end;
  993. asml.remove(p);
  994. asml.remove(hp1);
  995. p.free;
  996. hp1.free;
  997. p := hp2;
  998. continue;
  999. end
  1000. end;
  1001. A_FLD:
  1002. begin
  1003. if (taicpu(p).oper[0]^.typ = top_reg) and
  1004. GetNextInstruction(p, hp1) and
  1005. (hp1.typ = Ait_Instruction) and
  1006. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1007. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1008. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  1009. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  1010. { change to
  1011. fld reg fxxx reg,st
  1012. fxxxp st, st1 (hp1)
  1013. Remark: non commutative operations must be reversed!
  1014. }
  1015. begin
  1016. case taicpu(hp1).opcode Of
  1017. A_FMULP,A_FADDP,
  1018. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  1019. begin
  1020. case taicpu(hp1).opcode Of
  1021. A_FADDP: taicpu(hp1).opcode := A_FADD;
  1022. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  1023. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  1024. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  1025. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  1026. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  1027. end;
  1028. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  1029. taicpu(hp1).oper[1]^.reg := NR_ST;
  1030. asml.remove(p);
  1031. p.free;
  1032. p := hp1;
  1033. continue;
  1034. end;
  1035. end;
  1036. end
  1037. else
  1038. if (taicpu(p).oper[0]^.typ = top_ref) and
  1039. GetNextInstruction(p, hp2) and
  1040. (hp2.typ = Ait_Instruction) and
  1041. (taicpu(hp2).ops = 2) and
  1042. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1043. (taicpu(hp2).oper[1]^.typ = top_reg) and
  1044. (taicpu(p).opsize in [S_FS, S_FL]) and
  1045. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  1046. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  1047. if GetLastInstruction(p, hp1) and
  1048. (hp1.typ = Ait_Instruction) and
  1049. ((taicpu(hp1).opcode = A_FLD) or
  1050. (taicpu(hp1).opcode = A_FST)) and
  1051. (taicpu(hp1).opsize = taicpu(p).opsize) and
  1052. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1053. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  1054. if ((taicpu(hp2).opcode = A_FMULP) or
  1055. (taicpu(hp2).opcode = A_FADDP)) then
  1056. { change to
  1057. fld/fst mem1 (hp1) fld/fst mem1
  1058. fld mem1 (p) fadd/
  1059. faddp/ fmul st, st
  1060. fmulp st, st1 (hp2) }
  1061. begin
  1062. asml.remove(p);
  1063. p.free;
  1064. p := hp1;
  1065. if (taicpu(hp2).opcode = A_FADDP) then
  1066. taicpu(hp2).opcode := A_FADD
  1067. else
  1068. taicpu(hp2).opcode := A_FMUL;
  1069. taicpu(hp2).oper[1]^.reg := NR_ST;
  1070. end
  1071. else
  1072. { change to
  1073. fld/fst mem1 (hp1) fld/fst mem1
  1074. fld mem1 (p) fld st}
  1075. begin
  1076. taicpu(p).changeopsize(S_FL);
  1077. taicpu(p).loadreg(0,NR_ST);
  1078. end
  1079. else
  1080. begin
  1081. case taicpu(hp2).opcode Of
  1082. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  1083. { change to
  1084. fld/fst mem1 (hp1) fld/fst mem1
  1085. fld mem2 (p) fxxx mem2
  1086. fxxxp st, st1 (hp2) }
  1087. begin
  1088. case taicpu(hp2).opcode Of
  1089. A_FADDP: taicpu(p).opcode := A_FADD;
  1090. A_FMULP: taicpu(p).opcode := A_FMUL;
  1091. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  1092. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  1093. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  1094. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  1095. end;
  1096. asml.remove(hp2);
  1097. hp2.free;
  1098. end
  1099. end
  1100. end
  1101. end;
  1102. A_FSTP,A_FISTP:
  1103. if doFpuLoadStoreOpt(p) then
  1104. continue;
  1105. A_LEA:
  1106. begin
  1107. {removes seg register prefixes from LEA operations, as they
  1108. don't do anything}
  1109. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  1110. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  1111. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1112. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  1113. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1114. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1115. begin
  1116. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1117. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1118. begin
  1119. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  1120. taicpu(p).oper[1]^.reg);
  1121. InsertLLItem(p.previous,p.next, hp1);
  1122. p.free;
  1123. p := hp1;
  1124. continue;
  1125. end
  1126. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1127. begin
  1128. hp1 := tai(p.Next);
  1129. asml.remove(p);
  1130. p.free;
  1131. p := hp1;
  1132. continue;
  1133. end
  1134. { continue to use lea to adjust the stack pointer,
  1135. it is the recommended way, but only if not optimizing for size }
  1136. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1137. (cs_opt_size in current_settings.optimizerswitches) then
  1138. with taicpu(p).oper[0]^.ref^ do
  1139. if (base = taicpu(p).oper[1]^.reg) then
  1140. begin
  1141. l := offset;
  1142. if (l=1) and UseIncDec then
  1143. begin
  1144. taicpu(p).opcode := A_INC;
  1145. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1146. taicpu(p).ops := 1
  1147. end
  1148. else if (l=-1) and UseIncDec then
  1149. begin
  1150. taicpu(p).opcode := A_DEC;
  1151. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1152. taicpu(p).ops := 1;
  1153. end
  1154. else
  1155. begin
  1156. if (l<0) and (l<>-2147483648) then
  1157. begin
  1158. taicpu(p).opcode := A_SUB;
  1159. taicpu(p).loadConst(0,-l);
  1160. end
  1161. else
  1162. begin
  1163. taicpu(p).opcode := A_ADD;
  1164. taicpu(p).loadConst(0,l);
  1165. end;
  1166. end;
  1167. end;
  1168. end
  1169. (*
  1170. This is unsafe, lea doesn't modify the flags but "add"
  1171. does. This breaks webtbs/tw15694.pp. The above
  1172. transformations are also unsafe, but they don't seem to
  1173. be triggered by code that FPC generators (or that at
  1174. least does not occur in the tests...). This needs to be
  1175. fixed by checking for the liveness of the flags register.
  1176. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1177. begin
  1178. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1179. taicpu(p).oper[0]^.ref^.base);
  1180. InsertLLItem(asml,p.previous,p.next, hp1);
  1181. DebugMsg('Peephole Lea2AddBase done',hp1);
  1182. p.free;
  1183. p:=hp1;
  1184. continue;
  1185. end
  1186. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1187. begin
  1188. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1189. taicpu(p).oper[0]^.ref^.index);
  1190. InsertLLItem(asml,p.previous,p.next,hp1);
  1191. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1192. p.free;
  1193. p:=hp1;
  1194. continue;
  1195. end
  1196. *)
  1197. end;
  1198. A_MOV:
  1199. begin
  1200. If OptPass1MOV(p) then
  1201. Continue;
  1202. end;
  1203. A_MOVSX,
  1204. A_MOVZX :
  1205. begin
  1206. if (taicpu(p).oper[1]^.typ = top_reg) and
  1207. GetNextInstruction(p,hp1) and
  1208. (hp1.typ = ait_instruction) and
  1209. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1210. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1211. GetNextInstruction(hp1,hp2) and
  1212. MatchInstruction(hp2,A_MOV,[]) and
  1213. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1214. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1215. (((taicpu(hp1).ops=2) and
  1216. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1217. ((taicpu(hp1).ops=1) and
  1218. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1219. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  1220. { change movsX/movzX reg/ref, reg2 }
  1221. { add/sub/or/... reg3/$const, reg2 }
  1222. { mov reg2 reg/ref }
  1223. { to add/sub/or/... reg3/$const, reg/ref }
  1224. begin
  1225. { by example:
  1226. movswl %si,%eax movswl %si,%eax p
  1227. decl %eax addl %edx,%eax hp1
  1228. movw %ax,%si movw %ax,%si hp2
  1229. ->
  1230. movswl %si,%eax movswl %si,%eax p
  1231. decw %eax addw %edx,%eax hp1
  1232. movw %ax,%si movw %ax,%si hp2
  1233. }
  1234. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1235. {
  1236. ->
  1237. movswl %si,%eax movswl %si,%eax p
  1238. decw %si addw %dx,%si hp1
  1239. movw %ax,%si movw %ax,%si hp2
  1240. }
  1241. case taicpu(hp1).ops of
  1242. 1:
  1243. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1244. 2:
  1245. begin
  1246. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1247. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1248. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1249. end;
  1250. else
  1251. internalerror(2008042701);
  1252. end;
  1253. {
  1254. ->
  1255. decw %si addw %dx,%si p
  1256. }
  1257. asml.remove(p);
  1258. asml.remove(hp2);
  1259. p.free;
  1260. hp2.free;
  1261. p := hp1
  1262. end
  1263. { removes superfluous And's after movzx's }
  1264. else if taicpu(p).opcode=A_MOVZX then
  1265. begin
  1266. if (taicpu(p).oper[1]^.typ = top_reg) and
  1267. GetNextInstruction(p, hp1) and
  1268. (tai(hp1).typ = ait_instruction) and
  1269. (taicpu(hp1).opcode = A_AND) and
  1270. (taicpu(hp1).oper[0]^.typ = top_const) and
  1271. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1272. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1273. case taicpu(p).opsize Of
  1274. S_BL, S_BW:
  1275. if (taicpu(hp1).oper[0]^.val = $ff) then
  1276. begin
  1277. asml.remove(hp1);
  1278. hp1.free;
  1279. end;
  1280. S_WL:
  1281. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1282. begin
  1283. asml.remove(hp1);
  1284. hp1.free;
  1285. end;
  1286. end;
  1287. {changes some movzx constructs to faster synonims (all examples
  1288. are given with eax/ax, but are also valid for other registers)}
  1289. if (taicpu(p).oper[1]^.typ = top_reg) then
  1290. if (taicpu(p).oper[0]^.typ = top_reg) then
  1291. case taicpu(p).opsize of
  1292. S_BW:
  1293. begin
  1294. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1295. not(cs_opt_size in current_settings.optimizerswitches) then
  1296. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1297. begin
  1298. taicpu(p).opcode := A_AND;
  1299. taicpu(p).changeopsize(S_W);
  1300. taicpu(p).loadConst(0,$ff);
  1301. end
  1302. else if GetNextInstruction(p, hp1) and
  1303. (tai(hp1).typ = ait_instruction) and
  1304. (taicpu(hp1).opcode = A_AND) and
  1305. (taicpu(hp1).oper[0]^.typ = top_const) and
  1306. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1307. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1308. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1309. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1310. begin
  1311. taicpu(p).opcode := A_MOV;
  1312. taicpu(p).changeopsize(S_W);
  1313. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1314. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1315. end;
  1316. end;
  1317. S_BL:
  1318. begin
  1319. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1320. not(cs_opt_size in current_settings.optimizerswitches) then
  1321. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1322. begin
  1323. taicpu(p).opcode := A_AND;
  1324. taicpu(p).changeopsize(S_L);
  1325. taicpu(p).loadConst(0,$ff)
  1326. end
  1327. else if GetNextInstruction(p, hp1) and
  1328. (tai(hp1).typ = ait_instruction) and
  1329. (taicpu(hp1).opcode = A_AND) and
  1330. (taicpu(hp1).oper[0]^.typ = top_const) and
  1331. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1332. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1333. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1334. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1335. begin
  1336. taicpu(p).opcode := A_MOV;
  1337. taicpu(p).changeopsize(S_L);
  1338. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1339. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1340. end
  1341. end;
  1342. S_WL:
  1343. begin
  1344. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1345. not(cs_opt_size in current_settings.optimizerswitches) then
  1346. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1347. begin
  1348. taicpu(p).opcode := A_AND;
  1349. taicpu(p).changeopsize(S_L);
  1350. taicpu(p).loadConst(0,$ffff);
  1351. end
  1352. else if GetNextInstruction(p, hp1) and
  1353. (tai(hp1).typ = ait_instruction) and
  1354. (taicpu(hp1).opcode = A_AND) and
  1355. (taicpu(hp1).oper[0]^.typ = top_const) and
  1356. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1357. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1358. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1359. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1360. begin
  1361. taicpu(p).opcode := A_MOV;
  1362. taicpu(p).changeopsize(S_L);
  1363. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1364. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1365. end;
  1366. end;
  1367. end
  1368. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1369. begin
  1370. if GetNextInstruction(p, hp1) and
  1371. (tai(hp1).typ = ait_instruction) and
  1372. (taicpu(hp1).opcode = A_AND) and
  1373. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1374. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1375. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1376. begin
  1377. taicpu(p).opcode := A_MOV;
  1378. case taicpu(p).opsize Of
  1379. S_BL:
  1380. begin
  1381. taicpu(p).changeopsize(S_L);
  1382. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1383. end;
  1384. S_WL:
  1385. begin
  1386. taicpu(p).changeopsize(S_L);
  1387. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1388. end;
  1389. S_BW:
  1390. begin
  1391. taicpu(p).changeopsize(S_W);
  1392. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1393. end;
  1394. end;
  1395. end;
  1396. end;
  1397. end;
  1398. end;
  1399. (* should not be generated anymore by the current code generator
  1400. A_POP:
  1401. begin
  1402. if target_info.system=system_i386_go32v2 then
  1403. begin
  1404. { Transform a series of pop/pop/pop/push/push/push to }
  1405. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1406. { because I'm not sure whether they can cope with }
  1407. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1408. { such a problem when using esp as frame pointer (JM) }
  1409. if (taicpu(p).oper[0]^.typ = top_reg) then
  1410. begin
  1411. hp1 := p;
  1412. hp2 := p;
  1413. l := 0;
  1414. while getNextInstruction(hp1,hp1) and
  1415. (hp1.typ = ait_instruction) and
  1416. (taicpu(hp1).opcode = A_POP) and
  1417. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1418. begin
  1419. hp2 := hp1;
  1420. inc(l,4);
  1421. end;
  1422. getLastInstruction(p,hp3);
  1423. l1 := 0;
  1424. while (hp2 <> hp3) and
  1425. assigned(hp1) and
  1426. (hp1.typ = ait_instruction) and
  1427. (taicpu(hp1).opcode = A_PUSH) and
  1428. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1429. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1430. begin
  1431. { change it to a two op operation }
  1432. taicpu(hp2).oper[1]^.typ:=top_none;
  1433. taicpu(hp2).ops:=2;
  1434. taicpu(hp2).opcode := A_MOV;
  1435. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1436. reference_reset(tmpref);
  1437. tmpRef.base.enum:=R_INTREGISTER;
  1438. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1439. convert_register_to_enum(tmpref.base);
  1440. tmpRef.offset := l;
  1441. taicpu(hp2).loadRef(0,tmpRef);
  1442. hp4 := hp1;
  1443. getNextInstruction(hp1,hp1);
  1444. asml.remove(hp4);
  1445. hp4.free;
  1446. getLastInstruction(hp2,hp2);
  1447. dec(l,4);
  1448. inc(l1);
  1449. end;
  1450. if l <> -4 then
  1451. begin
  1452. inc(l,4);
  1453. for l1 := l1 downto 1 do
  1454. begin
  1455. getNextInstruction(hp2,hp2);
  1456. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1457. end
  1458. end
  1459. end
  1460. end
  1461. else
  1462. begin
  1463. if (taicpu(p).oper[0]^.typ = top_reg) and
  1464. GetNextInstruction(p, hp1) and
  1465. (tai(hp1).typ=ait_instruction) and
  1466. (taicpu(hp1).opcode=A_PUSH) and
  1467. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1468. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1469. begin
  1470. { change it to a two op operation }
  1471. taicpu(p).oper[1]^.typ:=top_none;
  1472. taicpu(p).ops:=2;
  1473. taicpu(p).opcode := A_MOV;
  1474. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1475. reference_reset(tmpref);
  1476. TmpRef.base.enum := R_ESP;
  1477. taicpu(p).loadRef(0,TmpRef);
  1478. asml.remove(hp1);
  1479. hp1.free;
  1480. end;
  1481. end;
  1482. end;
  1483. *)
  1484. A_PUSH:
  1485. begin
  1486. if (taicpu(p).opsize = S_W) and
  1487. (taicpu(p).oper[0]^.typ = Top_Const) and
  1488. GetNextInstruction(p, hp1) and
  1489. (tai(hp1).typ = ait_instruction) and
  1490. (taicpu(hp1).opcode = A_PUSH) and
  1491. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1492. (taicpu(hp1).opsize = S_W) then
  1493. begin
  1494. taicpu(p).changeopsize(S_L);
  1495. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1496. asml.remove(hp1);
  1497. hp1.free;
  1498. end;
  1499. end;
  1500. A_SHL, A_SAL:
  1501. begin
  1502. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1503. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1504. (taicpu(p).opsize = S_L) and
  1505. (taicpu(p).oper[0]^.val <= 3) then
  1506. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1507. begin
  1508. TmpBool1 := True; {should we check the next instruction?}
  1509. TmpBool2 := False; {have we found an add/sub which could be
  1510. integrated in the lea?}
  1511. reference_reset(tmpref,2,[]);
  1512. TmpRef.index := taicpu(p).oper[1]^.reg;
  1513. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1514. while TmpBool1 and
  1515. GetNextInstruction(p, hp1) and
  1516. (tai(hp1).typ = ait_instruction) and
  1517. ((((taicpu(hp1).opcode = A_ADD) or
  1518. (taicpu(hp1).opcode = A_SUB)) and
  1519. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1520. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1521. (((taicpu(hp1).opcode = A_INC) or
  1522. (taicpu(hp1).opcode = A_DEC)) and
  1523. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1524. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1525. (not GetNextInstruction(hp1,hp2) or
  1526. not instrReadsFlags(hp2)) Do
  1527. begin
  1528. TmpBool1 := False;
  1529. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1530. begin
  1531. TmpBool1 := True;
  1532. TmpBool2 := True;
  1533. case taicpu(hp1).opcode of
  1534. A_ADD:
  1535. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1536. A_SUB:
  1537. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1538. end;
  1539. asml.remove(hp1);
  1540. hp1.free;
  1541. end
  1542. else
  1543. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1544. (((taicpu(hp1).opcode = A_ADD) and
  1545. (TmpRef.base = NR_NO)) or
  1546. (taicpu(hp1).opcode = A_INC) or
  1547. (taicpu(hp1).opcode = A_DEC)) then
  1548. begin
  1549. TmpBool1 := True;
  1550. TmpBool2 := True;
  1551. case taicpu(hp1).opcode of
  1552. A_ADD:
  1553. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1554. A_INC:
  1555. inc(TmpRef.offset);
  1556. A_DEC:
  1557. dec(TmpRef.offset);
  1558. end;
  1559. asml.remove(hp1);
  1560. hp1.free;
  1561. end;
  1562. end;
  1563. if TmpBool2 or
  1564. ((current_settings.optimizecputype < cpu_Pentium2) and
  1565. (taicpu(p).oper[0]^.val <= 3) and
  1566. not(cs_opt_size in current_settings.optimizerswitches)) then
  1567. begin
  1568. if not(TmpBool2) and
  1569. (taicpu(p).oper[0]^.val = 1) then
  1570. begin
  1571. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1572. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1573. end
  1574. else
  1575. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1576. taicpu(p).oper[1]^.reg);
  1577. InsertLLItem(p.previous, p.next, hp1);
  1578. p.free;
  1579. p := hp1;
  1580. end;
  1581. end
  1582. else
  1583. if (current_settings.optimizecputype < cpu_Pentium2) and
  1584. (taicpu(p).oper[0]^.typ = top_const) and
  1585. (taicpu(p).oper[1]^.typ = top_reg) then
  1586. if (taicpu(p).oper[0]^.val = 1) then
  1587. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1588. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1589. (unlike shl, which is only Tairable in the U pipe)}
  1590. begin
  1591. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1592. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1593. InsertLLItem(p.previous, p.next, hp1);
  1594. p.free;
  1595. p := hp1;
  1596. end
  1597. else if (taicpu(p).opsize = S_L) and
  1598. (taicpu(p).oper[0]^.val<= 3) then
  1599. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1600. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1601. begin
  1602. reference_reset(tmpref,2,[]);
  1603. TmpRef.index := taicpu(p).oper[1]^.reg;
  1604. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1605. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1606. InsertLLItem(p.previous, p.next, hp1);
  1607. p.free;
  1608. p := hp1;
  1609. end
  1610. end;
  1611. A_SETcc :
  1612. { changes
  1613. setcc (funcres) setcc reg
  1614. movb (funcres), reg to leave/ret
  1615. leave/ret }
  1616. begin
  1617. if (taicpu(p).oper[0]^.typ = top_ref) and
  1618. GetNextInstruction(p, hp1) and
  1619. GetNextInstruction(hp1, hp2) and
  1620. IsExitCode(hp2) and
  1621. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1622. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1623. not(assigned(current_procinfo.procdef.funcretsym) and
  1624. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1625. (hp1.typ = ait_instruction) and
  1626. (taicpu(hp1).opcode = A_MOV) and
  1627. (taicpu(hp1).opsize = S_B) and
  1628. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1629. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1630. begin
  1631. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1632. asml.remove(hp1);
  1633. hp1.free;
  1634. end
  1635. end;
  1636. A_SUB:
  1637. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1638. { * change "sub/add const1, reg" or "dec reg" followed by
  1639. "sub const2, reg" to one "sub ..., reg" }
  1640. begin
  1641. if (taicpu(p).oper[0]^.typ = top_const) and
  1642. (taicpu(p).oper[1]^.typ = top_reg) then
  1643. if (taicpu(p).oper[0]^.val = 2) and
  1644. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1645. { Don't do the sub/push optimization if the sub }
  1646. { comes from setting up the stack frame (JM) }
  1647. (not getLastInstruction(p,hp1) or
  1648. (hp1.typ <> ait_instruction) or
  1649. (taicpu(hp1).opcode <> A_MOV) or
  1650. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1651. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1652. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1653. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1654. begin
  1655. hp1 := tai(p.next);
  1656. while Assigned(hp1) and
  1657. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1658. not RegReadByInstruction(NR_ESP,hp1) and
  1659. not RegModifiedByInstruction(NR_ESP,hp1) do
  1660. hp1 := tai(hp1.next);
  1661. if Assigned(hp1) and
  1662. (tai(hp1).typ = ait_instruction) and
  1663. (taicpu(hp1).opcode = A_PUSH) and
  1664. (taicpu(hp1).opsize = S_W) then
  1665. begin
  1666. taicpu(hp1).changeopsize(S_L);
  1667. if taicpu(hp1).oper[0]^.typ=top_reg then
  1668. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1669. hp1 := tai(p.next);
  1670. asml.remove(p);
  1671. p.free;
  1672. p := hp1;
  1673. continue
  1674. end;
  1675. if DoSubAddOpt(p) then
  1676. continue;
  1677. end
  1678. else if DoSubAddOpt(p) then
  1679. continue
  1680. end;
  1681. A_VMOVAPS,
  1682. A_VMOVAPD:
  1683. if OptPass1VMOVAP(p) then
  1684. continue;
  1685. A_VDIVSD,
  1686. A_VDIVSS,
  1687. A_VSUBSD,
  1688. A_VSUBSS,
  1689. A_VMULSD,
  1690. A_VMULSS,
  1691. A_VADDSD,
  1692. A_VADDSS:
  1693. if OptPass1VOP(p) then
  1694. continue;
  1695. end;
  1696. end; { if is_jmp }
  1697. end;
  1698. end;
  1699. updateUsedRegs(UsedRegs,p);
  1700. p:=tai(p.next);
  1701. end;
  1702. end;
  1703. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  1704. {$ifdef DEBUG_AOPTCPU}
  1705. procedure DebugMsg(const s: string;p : tai);
  1706. begin
  1707. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  1708. end;
  1709. {$else DEBUG_AOPTCPU}
  1710. procedure DebugMsg(const s: string;p : tai);inline;
  1711. begin
  1712. end;
  1713. {$endif DEBUG_AOPTCPU}
  1714. function CanBeCMOV(p : tai) : boolean;
  1715. begin
  1716. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1717. (taicpu(p).opcode=A_MOV) and
  1718. (taicpu(p).opsize in [S_L,S_W]) and
  1719. ((taicpu(p).oper[0]^.typ = top_reg)
  1720. { we can't use cmov ref,reg because
  1721. ref could be nil and cmov still throws an exception
  1722. if ref=nil but the mov isn't done (FK)
  1723. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1724. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1725. }
  1726. ) and
  1727. (taicpu(p).oper[1]^.typ in [top_reg]);
  1728. end;
  1729. var
  1730. p,hp1,hp2,hp3: tai;
  1731. l : longint;
  1732. condition : tasmcond;
  1733. carryadd_opcode: Tasmop;
  1734. begin
  1735. p := BlockStart;
  1736. ClearUsedRegs;
  1737. while (p <> BlockEnd) Do
  1738. begin
  1739. UpdateUsedRegs(UsedRegs, tai(p.next));
  1740. case p.Typ Of
  1741. Ait_Instruction:
  1742. begin
  1743. if InsContainsSegRef(taicpu(p)) then
  1744. begin
  1745. p := tai(p.next);
  1746. continue;
  1747. end;
  1748. case taicpu(p).opcode Of
  1749. A_Jcc:
  1750. begin
  1751. { jb @@1 cmc
  1752. inc/dec operand --> adc/sbb operand,0
  1753. @@1:
  1754. ... and ...
  1755. jnb @@1
  1756. inc/dec operand --> adc/sbb operand,0
  1757. @@1: }
  1758. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1759. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1760. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1761. begin
  1762. carryadd_opcode:=A_NONE;
  1763. if Taicpu(p).condition in [C_NAE,C_B] then
  1764. begin
  1765. if Taicpu(hp1).opcode=A_INC then
  1766. carryadd_opcode:=A_ADC;
  1767. if Taicpu(hp1).opcode=A_DEC then
  1768. carryadd_opcode:=A_SBB;
  1769. if carryadd_opcode<>A_NONE then
  1770. begin
  1771. Taicpu(p).clearop(0);
  1772. Taicpu(p).ops:=0;
  1773. Taicpu(p).is_jmp:=false;
  1774. Taicpu(p).opcode:=A_CMC;
  1775. Taicpu(p).condition:=C_NONE;
  1776. Taicpu(hp1).ops:=2;
  1777. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1778. Taicpu(hp1).loadconst(0,0);
  1779. Taicpu(hp1).opcode:=carryadd_opcode;
  1780. continue;
  1781. end;
  1782. end;
  1783. if Taicpu(p).condition in [C_AE,C_NB] then
  1784. begin
  1785. if Taicpu(hp1).opcode=A_INC then
  1786. carryadd_opcode:=A_ADC;
  1787. if Taicpu(hp1).opcode=A_DEC then
  1788. carryadd_opcode:=A_SBB;
  1789. if carryadd_opcode<>A_NONE then
  1790. begin
  1791. asml.remove(p);
  1792. p.free;
  1793. Taicpu(hp1).ops:=2;
  1794. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1795. Taicpu(hp1).loadconst(0,0);
  1796. Taicpu(hp1).opcode:=carryadd_opcode;
  1797. p:=hp1;
  1798. continue;
  1799. end;
  1800. end;
  1801. end;
  1802. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1803. begin
  1804. { check for
  1805. jCC xxx
  1806. <several movs>
  1807. xxx:
  1808. }
  1809. l:=0;
  1810. GetNextInstruction(p, hp1);
  1811. while assigned(hp1) and
  1812. CanBeCMOV(hp1) and
  1813. { stop on labels }
  1814. not(hp1.typ=ait_label) do
  1815. begin
  1816. inc(l);
  1817. GetNextInstruction(hp1,hp1);
  1818. end;
  1819. if assigned(hp1) then
  1820. begin
  1821. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1822. begin
  1823. if (l<=4) and (l>0) then
  1824. begin
  1825. condition:=inverse_cond(taicpu(p).condition);
  1826. hp2:=p;
  1827. GetNextInstruction(p,hp1);
  1828. p:=hp1;
  1829. repeat
  1830. taicpu(hp1).opcode:=A_CMOVcc;
  1831. taicpu(hp1).condition:=condition;
  1832. GetNextInstruction(hp1,hp1);
  1833. until not(assigned(hp1)) or
  1834. not(CanBeCMOV(hp1));
  1835. { wait with removing else GetNextInstruction could
  1836. ignore the label if it was the only usage in the
  1837. jump moved away }
  1838. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1839. asml.remove(hp2);
  1840. hp2.free;
  1841. continue;
  1842. end;
  1843. end
  1844. else
  1845. begin
  1846. { check further for
  1847. jCC xxx
  1848. <several movs 1>
  1849. jmp yyy
  1850. xxx:
  1851. <several movs 2>
  1852. yyy:
  1853. }
  1854. { hp2 points to jmp yyy }
  1855. hp2:=hp1;
  1856. { skip hp1 to xxx }
  1857. GetNextInstruction(hp1, hp1);
  1858. if assigned(hp2) and
  1859. assigned(hp1) and
  1860. (l<=3) and
  1861. (hp2.typ=ait_instruction) and
  1862. (taicpu(hp2).is_jmp) and
  1863. (taicpu(hp2).condition=C_None) and
  1864. { real label and jump, no further references to the
  1865. label are allowed }
  1866. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  1867. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1868. begin
  1869. l:=0;
  1870. { skip hp1 to <several moves 2> }
  1871. GetNextInstruction(hp1, hp1);
  1872. while assigned(hp1) and
  1873. CanBeCMOV(hp1) do
  1874. begin
  1875. inc(l);
  1876. GetNextInstruction(hp1, hp1);
  1877. end;
  1878. { hp1 points to yyy: }
  1879. if assigned(hp1) and
  1880. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1881. begin
  1882. condition:=inverse_cond(taicpu(p).condition);
  1883. GetNextInstruction(p,hp1);
  1884. hp3:=p;
  1885. p:=hp1;
  1886. repeat
  1887. taicpu(hp1).opcode:=A_CMOVcc;
  1888. taicpu(hp1).condition:=condition;
  1889. GetNextInstruction(hp1,hp1);
  1890. until not(assigned(hp1)) or
  1891. not(CanBeCMOV(hp1));
  1892. { hp2 is still at jmp yyy }
  1893. GetNextInstruction(hp2,hp1);
  1894. { hp2 is now at xxx: }
  1895. condition:=inverse_cond(condition);
  1896. GetNextInstruction(hp1,hp1);
  1897. { hp1 is now at <several movs 2> }
  1898. repeat
  1899. taicpu(hp1).opcode:=A_CMOVcc;
  1900. taicpu(hp1).condition:=condition;
  1901. GetNextInstruction(hp1,hp1);
  1902. until not(assigned(hp1)) or
  1903. not(CanBeCMOV(hp1));
  1904. {
  1905. asml.remove(hp1.next)
  1906. hp1.next.free;
  1907. asml.remove(hp1);
  1908. hp1.free;
  1909. }
  1910. { remove jCC }
  1911. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1912. asml.remove(hp3);
  1913. hp3.free;
  1914. { remove jmp }
  1915. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1916. asml.remove(hp2);
  1917. hp2.free;
  1918. continue;
  1919. end;
  1920. end;
  1921. end;
  1922. end;
  1923. end;
  1924. end;
  1925. A_FSTP,A_FISTP:
  1926. if DoFpuLoadStoreOpt(p) then
  1927. continue;
  1928. A_IMUL:
  1929. if OptPass2Imul(p) then
  1930. continue;
  1931. A_JMP:
  1932. {
  1933. change
  1934. jmp .L1
  1935. ...
  1936. .L1:
  1937. ret
  1938. into
  1939. ret
  1940. }
  1941. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) then
  1942. begin
  1943. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1944. if assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_RET) and (taicpu(p).condition=C_None) then
  1945. begin
  1946. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1947. taicpu(p).opcode:=A_RET;
  1948. taicpu(p).is_jmp:=false;
  1949. taicpu(p).ops:=taicpu(hp1).ops;
  1950. case taicpu(hp1).ops of
  1951. 0:
  1952. taicpu(p).clearop(0);
  1953. 1:
  1954. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1955. else
  1956. internalerror(2016041301);
  1957. end;
  1958. continue;
  1959. end;
  1960. end;
  1961. A_MOV:
  1962. if OptPass2MOV(p) then
  1963. continue;
  1964. end;
  1965. end;
  1966. end;
  1967. p := tai(p.next)
  1968. end;
  1969. end;
  1970. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  1971. var
  1972. p,hp1,hp2: tai;
  1973. IsTestConstX: boolean;
  1974. begin
  1975. p := BlockStart;
  1976. ClearUsedRegs;
  1977. while (p <> BlockEnd) Do
  1978. begin
  1979. UpdateUsedRegs(UsedRegs, tai(p.next));
  1980. case p.Typ Of
  1981. Ait_Instruction:
  1982. begin
  1983. if InsContainsSegRef(taicpu(p)) then
  1984. begin
  1985. p := tai(p.next);
  1986. continue;
  1987. end;
  1988. case taicpu(p).opcode Of
  1989. A_CALL:
  1990. begin
  1991. { don't do this on modern CPUs, this really hurts them due to
  1992. broken call/ret pairing }
  1993. if (current_settings.optimizecputype < cpu_Pentium2) and
  1994. not(cs_create_pic in current_settings.moduleswitches) and
  1995. GetNextInstruction(p, hp1) and
  1996. (hp1.typ = ait_instruction) and
  1997. (taicpu(hp1).opcode = A_JMP) and
  1998. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  1999. begin
  2000. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  2001. InsertLLItem(p.previous, p, hp2);
  2002. taicpu(p).opcode := A_JMP;
  2003. taicpu(p).is_jmp := true;
  2004. asml.remove(hp1);
  2005. hp1.free;
  2006. end
  2007. { replace
  2008. call procname
  2009. ret
  2010. by
  2011. jmp procname
  2012. this should never hurt except when pic is used, not sure
  2013. how to handle it then
  2014. but do it only on level 4 because it destroys stack back traces
  2015. }
  2016. else if (cs_opt_level4 in current_settings.optimizerswitches) and
  2017. not(cs_create_pic in current_settings.moduleswitches) and
  2018. GetNextInstruction(p, hp1) and
  2019. (hp1.typ = ait_instruction) and
  2020. (taicpu(hp1).opcode = A_RET) and
  2021. (taicpu(hp1).ops=0) then
  2022. begin
  2023. taicpu(p).opcode := A_JMP;
  2024. taicpu(p).is_jmp := true;
  2025. asml.remove(hp1);
  2026. hp1.free;
  2027. end;
  2028. end;
  2029. A_CMP:
  2030. begin
  2031. if (taicpu(p).oper[0]^.typ = top_const) and
  2032. (taicpu(p).oper[0]^.val = 0) and
  2033. (taicpu(p).oper[1]^.typ = top_reg) then
  2034. {change "cmp $0, %reg" to "test %reg, %reg"}
  2035. begin
  2036. taicpu(p).opcode := A_TEST;
  2037. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2038. continue;
  2039. end;
  2040. end;
  2041. A_MOV:
  2042. PostPeepholeOptMov(p);
  2043. A_MOVZX:
  2044. { if register vars are on, it's possible there is code like }
  2045. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  2046. { so we can't safely replace the movzx then with xor/mov, }
  2047. { since that would change the flags (JM) }
  2048. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  2049. begin
  2050. if (taicpu(p).oper[1]^.typ = top_reg) then
  2051. if (taicpu(p).oper[0]^.typ = top_reg)
  2052. then
  2053. case taicpu(p).opsize of
  2054. S_BL:
  2055. begin
  2056. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  2057. not(cs_opt_size in current_settings.optimizerswitches) and
  2058. (current_settings.optimizecputype = cpu_Pentium) then
  2059. {Change "movzbl %reg1, %reg2" to
  2060. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  2061. PentiumMMX}
  2062. begin
  2063. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  2064. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2065. InsertLLItem(p.previous, p, hp1);
  2066. taicpu(p).opcode := A_MOV;
  2067. taicpu(p).changeopsize(S_B);
  2068. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2069. end;
  2070. end;
  2071. end
  2072. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2073. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2074. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  2075. not(cs_opt_size in current_settings.optimizerswitches) and
  2076. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  2077. (current_settings.optimizecputype = cpu_Pentium) and
  2078. (taicpu(p).opsize = S_BL) then
  2079. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  2080. Pentium and PentiumMMX}
  2081. begin
  2082. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  2083. taicpu(p).oper[1]^.reg);
  2084. taicpu(p).opcode := A_MOV;
  2085. taicpu(p).changeopsize(S_B);
  2086. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2087. InsertLLItem(p.previous, p, hp1);
  2088. end;
  2089. end;
  2090. A_TEST, A_OR:
  2091. {removes the line marked with (x) from the sequence
  2092. and/or/xor/add/sub/... $x, %y
  2093. test/or %y, %y | test $-1, %y (x)
  2094. j(n)z _Label
  2095. as the first instruction already adjusts the ZF
  2096. %y operand may also be a reference }
  2097. begin
  2098. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  2099. MatchOperand(taicpu(p).oper[0]^,-1);
  2100. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  2101. GetLastInstruction(p, hp1) and
  2102. (tai(hp1).typ = ait_instruction) and
  2103. GetNextInstruction(p,hp2) and
  2104. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  2105. case taicpu(hp1).opcode Of
  2106. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  2107. begin
  2108. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2109. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2110. { and in case of carry for A(E)/B(E)/C/NC }
  2111. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2112. ((taicpu(hp1).opcode <> A_ADD) and
  2113. (taicpu(hp1).opcode <> A_SUB))) then
  2114. begin
  2115. hp1 := tai(p.next);
  2116. asml.remove(p);
  2117. p.free;
  2118. p := tai(hp1);
  2119. continue
  2120. end;
  2121. end;
  2122. A_SHL, A_SAL, A_SHR, A_SAR:
  2123. begin
  2124. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2125. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  2126. { therefore, it's only safe to do this optimization for }
  2127. { shifts by a (nonzero) constant }
  2128. (taicpu(hp1).oper[0]^.typ = top_const) and
  2129. (taicpu(hp1).oper[0]^.val <> 0) and
  2130. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2131. { and in case of carry for A(E)/B(E)/C/NC }
  2132. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2133. begin
  2134. hp1 := tai(p.next);
  2135. asml.remove(p);
  2136. p.free;
  2137. p := tai(hp1);
  2138. continue
  2139. end;
  2140. end;
  2141. A_DEC, A_INC, A_NEG:
  2142. begin
  2143. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  2144. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2145. { and in case of carry for A(E)/B(E)/C/NC }
  2146. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2147. begin
  2148. case taicpu(hp1).opcode Of
  2149. A_DEC, A_INC:
  2150. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2151. begin
  2152. case taicpu(hp1).opcode Of
  2153. A_DEC: taicpu(hp1).opcode := A_SUB;
  2154. A_INC: taicpu(hp1).opcode := A_ADD;
  2155. end;
  2156. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2157. taicpu(hp1).loadConst(0,1);
  2158. taicpu(hp1).ops:=2;
  2159. end
  2160. end;
  2161. hp1 := tai(p.next);
  2162. asml.remove(p);
  2163. p.free;
  2164. p := tai(hp1);
  2165. continue
  2166. end;
  2167. end
  2168. else
  2169. { change "test $-1,%reg" into "test %reg,%reg" }
  2170. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2171. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2172. end { case }
  2173. else
  2174. { change "test $-1,%reg" into "test %reg,%reg" }
  2175. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2176. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2177. end;
  2178. end;
  2179. end;
  2180. end;
  2181. p := tai(p.next)
  2182. end;
  2183. end;
  2184. Procedure TCpuAsmOptimizer.Optimize;
  2185. Var
  2186. HP: Tai;
  2187. pass: longint;
  2188. slowopt, changed, lastLoop: boolean;
  2189. Begin
  2190. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  2191. pass := 0;
  2192. changed := false;
  2193. repeat
  2194. lastLoop :=
  2195. not(slowopt) or
  2196. (not changed and (pass > 2)) or
  2197. { prevent endless loops }
  2198. (pass = 4);
  2199. changed := false;
  2200. { Setup labeltable, always necessary }
  2201. blockstart := tai(asml.first);
  2202. pass_1;
  2203. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  2204. { or nil }
  2205. While Assigned(BlockStart) Do
  2206. Begin
  2207. if (cs_opt_peephole in current_settings.optimizerswitches) then
  2208. begin
  2209. if (pass = 0) then
  2210. PrePeepHoleOpts;
  2211. { Peephole optimizations }
  2212. PeepHoleOptPass1;
  2213. { Only perform them twice in the first pass }
  2214. if pass = 0 then
  2215. PeepHoleOptPass1;
  2216. end;
  2217. { More peephole optimizations }
  2218. if (cs_opt_peephole in current_settings.optimizerswitches) then
  2219. begin
  2220. PeepHoleOptPass2;
  2221. if lastLoop then
  2222. PostPeepHoleOpts;
  2223. end;
  2224. { Continue where we left off, BlockEnd is either the start of an }
  2225. { assembler block or nil }
  2226. BlockStart := BlockEnd;
  2227. While Assigned(BlockStart) And
  2228. (BlockStart.typ = ait_Marker) And
  2229. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  2230. Begin
  2231. { We stopped at an assembler block, so skip it }
  2232. Repeat
  2233. BlockStart := Tai(BlockStart.Next);
  2234. Until (BlockStart.Typ = Ait_Marker) And
  2235. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  2236. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  2237. If GetNextInstruction(BlockStart, HP) And
  2238. ((HP.typ <> ait_Marker) Or
  2239. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  2240. { There is no assembler block anymore after the current one, so }
  2241. { optimize the next block of "normal" instructions }
  2242. pass_1
  2243. { Otherwise, skip the next assembler block }
  2244. else
  2245. blockStart := hp;
  2246. End;
  2247. End;
  2248. inc(pass);
  2249. until lastLoop;
  2250. dfa.free;
  2251. End;
  2252. begin
  2253. casmoptimizer:=TCpuAsmOptimizer;
  2254. end.