aoptx86.pas 101 KB


  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. protected
  33. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  34. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  35. { checks whether reading the value in reg1 depends on the value of reg2. This
  36. is very similar to SuperRegisterEquals, except it takes into account that
  37. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  38. depend on the value in AH). }
  39. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  40. procedure DebugMsg(const s : string; p : tai);inline;
  41. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  42. class function IsExitCode(p : tai) : boolean;
  43. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  44. procedure RemoveLastDeallocForFuncRes(p : tai);
  45. function PrePeepholeOptSxx(var p : tai) : boolean;
  46. function OptPass1AND(var p : tai) : boolean;
  47. function OptPass1VMOVAP(var p : tai) : boolean;
  48. function OptPass1VOP(const p : tai) : boolean;
  49. function OptPass1MOV(var p : tai) : boolean;
  50. function OptPass1Movx(var p : tai) : boolean;
  51. function OptPass1MOVAP(var p : tai) : boolean;
  52. function OptPass1MOVXX(var p : tai) : boolean;
  53. function OptPass2MOV(var p : tai) : boolean;
  54. function OptPass2Imul(var p : tai) : boolean;
  55. function OptPass2Jmp(var p : tai) : boolean;
  56. function OptPass2Jcc(var p : tai) : boolean;
  57. procedure PostPeepholeOptMov(const p : tai);
  58. end;
  59. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  60. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  61. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  62. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  63. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  64. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  65. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  66. function RefsEqual(const r1, r2: treference): boolean;
  67. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  68. { returns true, if ref is a reference using only the registers passed as base and index
  69. and having an offset }
  70. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  71. implementation
  72. uses
  73. cutils,verbose,
  74. globals,
  75. cpuinfo,
  76. procinfo,
  77. aasmbase,
  78. aoptutils,
  79. symconst,symsym,
  80. itcpugas;
  81. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  82. begin
  83. result :=
  84. (instr.typ = ait_instruction) and
  85. (taicpu(instr).opcode = op) and
  86. ((opsize = []) or (taicpu(instr).opsize in opsize));
  87. end;
  88. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  89. begin
  90. result :=
  91. (instr.typ = ait_instruction) and
  92. ((taicpu(instr).opcode = op1) or
  93. (taicpu(instr).opcode = op2)
  94. ) and
  95. ((opsize = []) or (taicpu(instr).opsize in opsize));
  96. end;
  97. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  98. begin
  99. result :=
  100. (instr.typ = ait_instruction) and
  101. ((taicpu(instr).opcode = op1) or
  102. (taicpu(instr).opcode = op2) or
  103. (taicpu(instr).opcode = op3)
  104. ) and
  105. ((opsize = []) or (taicpu(instr).opsize in opsize));
  106. end;
  107. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  108. const opsize : topsizes) : boolean;
  109. var
  110. op : TAsmOp;
  111. begin
  112. result:=false;
  113. for op in ops do
  114. begin
  115. if (instr.typ = ait_instruction) and
  116. (taicpu(instr).opcode = op) and
  117. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  118. begin
  119. result:=true;
  120. exit;
  121. end;
  122. end;
  123. end;
  124. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  125. begin
  126. result := (oper.typ = top_reg) and (oper.reg = reg);
  127. end;
  128. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  129. begin
  130. result := (oper.typ = top_const) and (oper.val = a);
  131. end;
  132. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  133. begin
  134. result := oper1.typ = oper2.typ;
  135. if result then
  136. case oper1.typ of
  137. top_const:
  138. Result:=oper1.val = oper2.val;
  139. top_reg:
  140. Result:=oper1.reg = oper2.reg;
  141. top_ref:
  142. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  143. else
  144. internalerror(2013102801);
  145. end
  146. end;
  147. function RefsEqual(const r1, r2: treference): boolean;
  148. begin
  149. RefsEqual :=
  150. (r1.offset = r2.offset) and
  151. (r1.segment = r2.segment) and (r1.base = r2.base) and
  152. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  153. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  154. (r1.relsymbol = r2.relsymbol);
  155. end;
  156. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  157. begin
  158. Result:=(ref.offset=0) and
  159. (ref.scalefactor in [0,1]) and
  160. (ref.segment=NR_NO) and
  161. (ref.symbol=nil) and
  162. (ref.relsymbol=nil) and
  163. ((base=NR_INVALID) or
  164. (ref.base=base)) and
  165. ((index=NR_INVALID) or
  166. (ref.index=index));
  167. end;
  168. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  169. begin
  170. Result:=(ref.scalefactor in [0,1]) and
  171. (ref.segment=NR_NO) and
  172. (ref.symbol=nil) and
  173. (ref.relsymbol=nil) and
  174. ((base=NR_INVALID) or
  175. (ref.base=base)) and
  176. ((index=NR_INVALID) or
  177. (ref.index=index));
  178. end;
  179. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  180. begin
  181. Result:=RegReadByInstruction(reg,hp);
  182. end;
  183. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  184. var
  185. p: taicpu;
  186. opcount: longint;
  187. begin
  188. RegReadByInstruction := false;
  189. if hp.typ <> ait_instruction then
  190. exit;
  191. p := taicpu(hp);
  192. case p.opcode of
  193. A_CALL:
  194. regreadbyinstruction := true;
  195. A_IMUL:
  196. case p.ops of
  197. 1:
  198. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  199. (
  200. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  201. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  202. );
  203. 2,3:
  204. regReadByInstruction :=
  205. reginop(reg,p.oper[0]^) or
  206. reginop(reg,p.oper[1]^);
  207. end;
  208. A_MUL:
  209. begin
  210. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  211. (
  212. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  213. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  214. );
  215. end;
  216. A_IDIV,A_DIV:
  217. begin
  218. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  219. (
  220. (getregtype(reg)=R_INTREGISTER) and
  221. (
  222. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  223. )
  224. );
  225. end;
  226. else
  227. begin
  228. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  229. begin
  230. RegReadByInstruction := false;
  231. exit;
  232. end;
  233. for opcount := 0 to p.ops-1 do
  234. if (p.oper[opCount]^.typ = top_ref) and
  235. RegInRef(reg,p.oper[opcount]^.ref^) then
  236. begin
  237. RegReadByInstruction := true;
  238. exit
  239. end;
  240. { special handling for SSE MOVSD }
  241. if (p.opcode=A_MOVSD) and (p.ops>0) then
  242. begin
  243. if p.ops<>2 then
  244. internalerror(2017042702);
  245. regReadByInstruction := reginop(reg,p.oper[0]^) or
  246. (
  247. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  248. );
  249. exit;
  250. end;
  251. with insprop[p.opcode] do
  252. begin
  253. if getregtype(reg)=R_INTREGISTER then
  254. begin
  255. case getsupreg(reg) of
  256. RS_EAX:
  257. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  258. begin
  259. RegReadByInstruction := true;
  260. exit
  261. end;
  262. RS_ECX:
  263. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  264. begin
  265. RegReadByInstruction := true;
  266. exit
  267. end;
  268. RS_EDX:
  269. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  270. begin
  271. RegReadByInstruction := true;
  272. exit
  273. end;
  274. RS_EBX:
  275. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  276. begin
  277. RegReadByInstruction := true;
  278. exit
  279. end;
  280. RS_ESP:
  281. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  282. begin
  283. RegReadByInstruction := true;
  284. exit
  285. end;
  286. RS_EBP:
  287. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  288. begin
  289. RegReadByInstruction := true;
  290. exit
  291. end;
  292. RS_ESI:
  293. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  294. begin
  295. RegReadByInstruction := true;
  296. exit
  297. end;
  298. RS_EDI:
  299. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  300. begin
  301. RegReadByInstruction := true;
  302. exit
  303. end;
  304. end;
  305. end;
  306. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  307. begin
  308. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  309. begin
  310. case p.condition of
  311. C_A,C_NBE, { CF=0 and ZF=0 }
  312. C_BE,C_NA: { CF=1 or ZF=1 }
  313. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  314. C_AE,C_NB,C_NC, { CF=0 }
  315. C_B,C_NAE,C_C: { CF=1 }
  316. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  317. C_NE,C_NZ, { ZF=0 }
  318. C_E,C_Z: { ZF=1 }
  319. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  320. C_G,C_NLE, { ZF=0 and SF=OF }
  321. C_LE,C_NG: { ZF=1 or SF<>OF }
  322. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  323. C_GE,C_NL, { SF=OF }
  324. C_L,C_NGE: { SF<>OF }
  325. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  326. C_NO, { OF=0 }
  327. C_O: { OF=1 }
  328. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  329. C_NP,C_PO, { PF=0 }
  330. C_P,C_PE: { PF=1 }
  331. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  332. C_NS, { SF=0 }
  333. C_S: { SF=1 }
  334. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  335. else
  336. internalerror(2017042701);
  337. end;
  338. if RegReadByInstruction then
  339. exit;
  340. end;
  341. case getsubreg(reg) of
  342. R_SUBW,R_SUBD,R_SUBQ:
  343. RegReadByInstruction :=
  344. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  345. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  346. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  347. R_SUBFLAGCARRY:
  348. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  349. R_SUBFLAGPARITY:
  350. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  351. R_SUBFLAGAUXILIARY:
  352. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  353. R_SUBFLAGZERO:
  354. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  355. R_SUBFLAGSIGN:
  356. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  357. R_SUBFLAGOVERFLOW:
  358. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  359. R_SUBFLAGINTERRUPT:
  360. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  361. R_SUBFLAGDIRECTION:
  362. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  363. else
  364. internalerror(2017042601);
  365. end;
  366. exit;
  367. end;
  368. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  369. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  370. (p.oper[0]^.reg=p.oper[1]^.reg) then
  371. exit;
  372. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  373. begin
  374. RegReadByInstruction := true;
  375. exit
  376. end;
  377. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  378. begin
  379. RegReadByInstruction := true;
  380. exit
  381. end;
  382. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  383. begin
  384. RegReadByInstruction := true;
  385. exit
  386. end;
  387. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  388. begin
  389. RegReadByInstruction := true;
  390. exit
  391. end;
  392. end;
  393. end;
  394. end;
  395. end;
  396. {$ifdef DEBUG_AOPTCPU}
  397. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  398. begin
  399. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  400. end;
  401. {$else DEBUG_AOPTCPU}
  402. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  403. begin
  404. end;
  405. {$endif DEBUG_AOPTCPU}
  406. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  407. begin
  408. if not SuperRegistersEqual(reg1,reg2) then
  409. exit(false);
  410. if getregtype(reg1)<>R_INTREGISTER then
  411. exit(true); {because SuperRegisterEqual is true}
  412. case getsubreg(reg1) of
  413. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  414. higher, it preserves the high bits, so the new value depends on
  415. reg2's previous value. In other words, it is equivalent to doing:
  416. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  417. R_SUBL:
  418. exit(getsubreg(reg2)=R_SUBL);
  419. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  420. higher, it actually does a:
  421. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  422. R_SUBH:
  423. exit(getsubreg(reg2)=R_SUBH);
  424. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  425. bits of reg2:
  426. reg2 := (reg2 and $ffff0000) or word(reg1); }
  427. R_SUBW:
  428. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  429. { a write to R_SUBD always overwrites every other subregister,
  430. because it clears the high 32 bits of R_SUBQ on x86_64 }
  431. R_SUBD,
  432. R_SUBQ:
  433. exit(true);
  434. else
  435. internalerror(2017042801);
  436. end;
  437. end;
  438. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  439. begin
  440. if not SuperRegistersEqual(reg1,reg2) then
  441. exit(false);
  442. if getregtype(reg1)<>R_INTREGISTER then
  443. exit(true); {because SuperRegisterEqual is true}
  444. case getsubreg(reg1) of
  445. R_SUBL:
  446. exit(getsubreg(reg2)<>R_SUBH);
  447. R_SUBH:
  448. exit(getsubreg(reg2)<>R_SUBL);
  449. R_SUBW,
  450. R_SUBD,
  451. R_SUBQ:
  452. exit(true);
  453. else
  454. internalerror(2017042802);
  455. end;
  456. end;
  457. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  458. var
  459. hp1 : tai;
  460. l : TCGInt;
  461. begin
  462. result:=false;
  463. { changes the code sequence
  464. shr/sar const1, x
  465. shl const2, x
  466. to
  467. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  468. if GetNextInstruction(p, hp1) and
  469. MatchInstruction(hp1,A_SHL,[]) and
  470. (taicpu(p).oper[0]^.typ = top_const) and
  471. (taicpu(hp1).oper[0]^.typ = top_const) and
  472. (taicpu(hp1).opsize = taicpu(p).opsize) and
  473. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  474. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  475. begin
  476. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  477. not(cs_opt_size in current_settings.optimizerswitches) then
  478. begin
  479. { shr/sar const1, %reg
  480. shl const2, %reg
  481. with const1 > const2 }
  482. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  483. taicpu(hp1).opcode := A_AND;
  484. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  485. case taicpu(p).opsize Of
  486. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  487. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  488. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  489. S_Q: taicpu(hp1).loadConst(0,l Xor aint($ffffffffffffffff));
  490. else
  491. Internalerror(2017050703)
  492. end;
  493. end
  494. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  495. not(cs_opt_size in current_settings.optimizerswitches) then
  496. begin
  497. { shr/sar const1, %reg
  498. shl const2, %reg
  499. with const1 < const2 }
  500. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  501. taicpu(p).opcode := A_AND;
  502. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  503. case taicpu(p).opsize Of
  504. S_B: taicpu(p).loadConst(0,l Xor $ff);
  505. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  506. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  507. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  508. else
  509. Internalerror(2017050702)
  510. end;
  511. end
  512. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  513. begin
  514. { shr/sar const1, %reg
  515. shl const2, %reg
  516. with const1 = const2 }
  517. taicpu(p).opcode := A_AND;
  518. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  519. case taicpu(p).opsize Of
  520. S_B: taicpu(p).loadConst(0,l Xor $ff);
  521. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  522. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  523. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  524. else
  525. Internalerror(2017050701)
  526. end;
  527. asml.remove(hp1);
  528. hp1.free;
  529. end;
  530. end;
  531. end;
  532. { allocates register reg between (and including) instructions p1 and p2
  533. the type of p1 and p2 must not be in SkipInstr
  534. note that this routine is both called from the peephole optimizer
  535. where optinfo is not yet initialised) and from the cse (where it is) }
  536. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  537. var
  538. hp, start: tai;
  539. removedsomething,
  540. firstRemovedWasAlloc,
  541. lastRemovedWasDealloc: boolean;
  542. begin
  543. {$ifdef EXTDEBUG}
  544. { if assigned(p1.optinfo) and
  545. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  546. internalerror(2004101010); }
  547. {$endif EXTDEBUG}
  548. start := p1;
  549. if (reg = NR_ESP) or
  550. (reg = current_procinfo.framepointer) or
  551. not(assigned(p1)) then
  552. { this happens with registers which are loaded implicitely, outside the }
  553. { current block (e.g. esi with self) }
  554. exit;
  555. { make sure we allocate it for this instruction }
  556. getnextinstruction(p2,p2);
  557. lastRemovedWasDealloc := false;
  558. removedSomething := false;
  559. firstRemovedWasAlloc := false;
  560. {$ifdef allocregdebug}
  561. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  562. ' from here...'));
  563. insertllitem(asml,p1.previous,p1,hp);
  564. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  565. ' till here...'));
  566. insertllitem(asml,p2,p2.next,hp);
  567. {$endif allocregdebug}
  568. { do it the safe way: always allocate the full super register,
  569. as we do no register re-allocation in the peephole optimizer,
  570. this does not hurt
  571. }
  572. case getregtype(reg) of
  573. R_MMREGISTER:
  574. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  575. R_INTREGISTER:
  576. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  577. end;
  578. if not(RegInUsedRegs(reg,initialusedregs)) then
  579. begin
  580. hp := tai_regalloc.alloc(reg,nil);
  581. insertllItem(p1.previous,p1,hp);
  582. IncludeRegInUsedRegs(reg,initialusedregs);
  583. end;
  584. while assigned(p1) and
  585. (p1 <> p2) do
  586. begin
  587. if assigned(p1.optinfo) then
  588. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  589. p1 := tai(p1.next);
  590. repeat
  591. while assigned(p1) and
  592. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  593. p1 := tai(p1.next);
  594. { remove all allocation/deallocation info about the register in between }
  595. if assigned(p1) and
  596. (p1.typ = ait_regalloc) then
  597. begin
  598. { same super register, different sub register? }
  599. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  600. begin
  601. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  602. internalerror(2016101501);
  603. tai_regalloc(p1).reg:=reg;
  604. end;
  605. if tai_regalloc(p1).reg=reg then
  606. begin
  607. if not removedSomething then
  608. begin
  609. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  610. removedSomething := true;
  611. end;
  612. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  613. hp := tai(p1.Next);
  614. asml.Remove(p1);
  615. p1.free;
  616. p1 := hp;
  617. end
  618. else
  619. p1 := tai(p1.next);
  620. end;
  621. until not(assigned(p1)) or
  622. not(p1.typ in SkipInstr);
  623. end;
  624. if assigned(p1) then
  625. begin
  626. if firstRemovedWasAlloc then
  627. begin
  628. hp := tai_regalloc.Alloc(reg,nil);
  629. insertLLItem(start.previous,start,hp);
  630. end;
  631. if lastRemovedWasDealloc then
  632. begin
  633. hp := tai_regalloc.DeAlloc(reg,nil);
  634. insertLLItem(p1.previous,p1,hp);
  635. end;
  636. end;
  637. end;
  638. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  639. var
  640. p: taicpu;
  641. begin
  642. if not assigned(hp) or
  643. (hp.typ <> ait_instruction) then
  644. begin
  645. Result := false;
  646. exit;
  647. end;
  648. p := taicpu(hp);
  649. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  650. with insprop[p.opcode] do
  651. begin
  652. case getsubreg(reg) of
  653. R_SUBW,R_SUBD,R_SUBQ:
  654. Result:=
  655. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  656. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  657. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  658. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  659. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  660. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  661. R_SUBFLAGCARRY:
  662. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  663. R_SUBFLAGPARITY:
  664. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  665. R_SUBFLAGAUXILIARY:
  666. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  667. R_SUBFLAGZERO:
  668. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  669. R_SUBFLAGSIGN:
  670. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  671. R_SUBFLAGOVERFLOW:
  672. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  673. R_SUBFLAGINTERRUPT:
  674. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  675. R_SUBFLAGDIRECTION:
  676. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  677. else
  678. internalerror(2017050501);
  679. end;
  680. exit;
  681. end;
  682. Result :=
  683. (((p.opcode = A_MOV) or
  684. (p.opcode = A_MOVZX) or
  685. (p.opcode = A_MOVSX) or
  686. (p.opcode = A_LEA) or
  687. (p.opcode = A_VMOVSS) or
  688. (p.opcode = A_VMOVSD) or
  689. (p.opcode = A_VMOVAPD) or
  690. (p.opcode = A_VMOVAPS) or
  691. (p.opcode = A_VMOVQ) or
  692. (p.opcode = A_MOVSS) or
  693. (p.opcode = A_MOVSD) or
  694. (p.opcode = A_MOVQ) or
  695. (p.opcode = A_MOVAPD) or
  696. (p.opcode = A_MOVAPS) or
  697. {$ifndef x86_64}
  698. (p.opcode = A_LDS) or
  699. (p.opcode = A_LES) or
  700. {$endif not x86_64}
  701. (p.opcode = A_LFS) or
  702. (p.opcode = A_LGS) or
  703. (p.opcode = A_LSS)) and
  704. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  705. (p.oper[1]^.typ = top_reg) and
  706. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  707. ((p.oper[0]^.typ = top_const) or
  708. ((p.oper[0]^.typ = top_reg) and
  709. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  710. ((p.oper[0]^.typ = top_ref) and
  711. not RegInRef(reg,p.oper[0]^.ref^)))) or
  712. ((p.opcode = A_POP) and
  713. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  714. ((p.opcode = A_IMUL) and
  715. (p.ops=3) and
  716. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  717. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  718. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  719. ((((p.opcode = A_IMUL) or
  720. (p.opcode = A_MUL)) and
  721. (p.ops=1)) and
  722. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  723. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  724. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  725. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  726. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  727. {$ifdef x86_64}
  728. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  729. {$endif x86_64}
  730. )) or
  731. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  732. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  733. {$ifdef x86_64}
  734. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  735. {$endif x86_64}
  736. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  737. {$ifndef x86_64}
  738. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  739. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  740. {$endif not x86_64}
  741. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  742. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  743. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  744. {$ifndef x86_64}
  745. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  746. {$endif not x86_64}
  747. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  748. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  749. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  750. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  751. {$ifdef x86_64}
  752. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  753. {$endif x86_64}
  754. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  755. (((p.opcode = A_FSTSW) or
  756. (p.opcode = A_FNSTSW)) and
  757. (p.oper[0]^.typ=top_reg) and
  758. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  759. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  760. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  761. (p.oper[0]^.reg=p.oper[1]^.reg) and
  762. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  763. end;
  764. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  765. var
  766. hp2,hp3 : tai;
  767. begin
  768. { some x86-64 issue a NOP before the real exit code }
  769. if MatchInstruction(p,A_NOP,[]) then
  770. GetNextInstruction(p,p);
  771. result:=assigned(p) and (p.typ=ait_instruction) and
  772. ((taicpu(p).opcode = A_RET) or
  773. ((taicpu(p).opcode=A_LEAVE) and
  774. GetNextInstruction(p,hp2) and
  775. MatchInstruction(hp2,A_RET,[S_NO])
  776. ) or
  777. ((((taicpu(p).opcode=A_MOV) and
  778. MatchOpType(taicpu(p),top_reg,top_reg) and
  779. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  780. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  781. ((taicpu(p).opcode=A_LEA) and
  782. MatchOpType(taicpu(p),top_ref,top_reg) and
  783. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  784. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  785. )
  786. ) and
  787. GetNextInstruction(p,hp2) and
  788. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  789. MatchOpType(taicpu(hp2),top_reg) and
  790. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  791. GetNextInstruction(hp2,hp3) and
  792. MatchInstruction(hp3,A_RET,[S_NO])
  793. )
  794. );
  795. end;
  796. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  797. begin
  798. isFoldableArithOp := False;
  799. case hp1.opcode of
  800. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  801. isFoldableArithOp :=
  802. ((taicpu(hp1).oper[0]^.typ = top_const) or
  803. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  804. (taicpu(hp1).oper[0]^.reg <> reg))) and
  805. (taicpu(hp1).oper[1]^.typ = top_reg) and
  806. (taicpu(hp1).oper[1]^.reg = reg);
  807. A_INC,A_DEC,A_NEG,A_NOT:
  808. isFoldableArithOp :=
  809. (taicpu(hp1).oper[0]^.typ = top_reg) and
  810. (taicpu(hp1).oper[0]^.reg = reg);
  811. end;
  812. end;
  813. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  814. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  815. var
  816. hp2: tai;
  817. begin
  818. hp2 := p;
  819. repeat
  820. hp2 := tai(hp2.previous);
  821. if assigned(hp2) and
  822. (hp2.typ = ait_regalloc) and
  823. (tai_regalloc(hp2).ratype=ra_dealloc) and
  824. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  825. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  826. begin
  827. asml.remove(hp2);
  828. hp2.free;
  829. break;
  830. end;
  831. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  832. end;
  833. begin
  834. case current_procinfo.procdef.returndef.typ of
  835. arraydef,recorddef,pointerdef,
  836. stringdef,enumdef,procdef,objectdef,errordef,
  837. filedef,setdef,procvardef,
  838. classrefdef,forwarddef:
  839. DoRemoveLastDeallocForFuncRes(RS_EAX);
  840. orddef:
  841. if current_procinfo.procdef.returndef.size <> 0 then
  842. begin
  843. DoRemoveLastDeallocForFuncRes(RS_EAX);
  844. { for int64/qword }
  845. if current_procinfo.procdef.returndef.size = 8 then
  846. DoRemoveLastDeallocForFuncRes(RS_EDX);
  847. end;
  848. end;
  849. end;
  850. function TX86AsmOptimizer.OptPass1MOVAP(var p : tai) : boolean;
  851. var
  852. TmpUsedRegs : TAllUsedRegs;
  853. hp1,hp2 : tai;
  854. alloc ,dealloc: tai_regalloc;
  855. begin
  856. result:=false;
  857. if MatchOpType(taicpu(p),top_reg,top_reg) and
  858. GetNextInstruction(p, hp1) and
  859. (hp1.typ = ait_instruction) and
  860. GetNextInstruction(hp1, hp2) and
  861. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  862. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  863. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  864. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  865. (((taicpu(p).opcode=A_MOVAPS) and
  866. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  867. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  868. ((taicpu(p).opcode=A_MOVAPD) and
  869. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  870. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  871. ) then
  872. { change
  873. movapX reg,reg2
  874. addsX/subsX/... reg3, reg2
  875. movapX reg2,reg
  876. to
  877. addsX/subsX/... reg3,reg
  878. }
  879. begin
  880. CopyUsedRegs(TmpUsedRegs);
  881. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  882. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  883. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  884. begin
  885. DebugMsg('Peephole Optimization MovapXOpMovapX2Op ('+
  886. std_op2str[taicpu(p).opcode]+' '+
  887. std_op2str[taicpu(hp1).opcode]+' '+
  888. std_op2str[taicpu(hp2).opcode]+')',p);
  889. { we cannot eliminate the first move if
  890. the operations uses the same register for source and dest }
  891. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  892. begin
  893. asml.remove(p);
  894. p.Free;
  895. end;
  896. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  897. asml.remove(hp2);
  898. hp2.Free;
  899. p:=hp1;
  900. result:=true;
  901. end;
  902. ReleaseUsedRegs(TmpUsedRegs);
  903. end
  904. end;
  905. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  906. var
  907. TmpUsedRegs : TAllUsedRegs;
  908. hp1,hp2 : tai;
  909. begin
  910. result:=false;
  911. if MatchOpType(taicpu(p),top_reg,top_reg) then
  912. begin
  913. { vmova* reg1,reg1
  914. =>
  915. <nop> }
  916. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  917. begin
  918. GetNextInstruction(p,hp1);
  919. asml.Remove(p);
  920. p.Free;
  921. p:=hp1;
  922. result:=true;
  923. end
  924. else if GetNextInstruction(p,hp1) then
  925. begin
  926. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  927. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  928. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  929. begin
  930. { vmova* reg1,reg2
  931. vmova* reg2,reg3
  932. dealloc reg2
  933. =>
  934. vmova* reg1,reg3 }
  935. CopyUsedRegs(TmpUsedRegs);
  936. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  937. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  938. begin
  939. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  940. asml.Remove(hp1);
  941. hp1.Free;
  942. result:=true;
  943. end
  944. { special case:
  945. vmova* reg1,reg2
  946. vmova* reg2,reg1
  947. =>
  948. vmova* reg1,reg2 }
  949. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  950. begin
  951. asml.Remove(hp1);
  952. hp1.Free;
  953. result:=true;
  954. end
  955. end
  956. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  957. { we mix single and double opperations here because we assume that the compiler
  958. generates vmovapd only after double operations and vmovaps only after single operations }
  959. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  960. GetNextInstruction(hp1,hp2) and
  961. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  962. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  963. begin
  964. CopyUsedRegs(TmpUsedRegs);
  965. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  966. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  967. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  968. then
  969. begin
  970. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  971. asml.Remove(p);
  972. p.Free;
  973. asml.Remove(hp2);
  974. hp2.Free;
  975. p:=hp1;
  976. end;
  977. end;
  978. end;
  979. end;
  980. end;
  981. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  982. var
  983. TmpUsedRegs : TAllUsedRegs;
  984. hp1 : tai;
  985. begin
  986. result:=false;
  987. if GetNextInstruction(p,hp1) and
  988. { we mix single and double opperations here because we assume that the compiler
  989. generates vmovapd only after double operations and vmovaps only after single operations }
  990. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  991. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  992. (taicpu(hp1).oper[1]^.typ=top_reg) then
  993. begin
  994. CopyUsedRegs(TmpUsedRegs);
  995. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  996. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  997. ) then
  998. begin
  999. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1000. asml.Remove(hp1);
  1001. hp1.Free;
  1002. result:=true;
  1003. end;
  1004. end;
  1005. end;
  1006. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  1007. var
  1008. hp1, hp2: tai;
  1009. TmpUsedRegs : TAllUsedRegs;
  1010. GetNextIntruction_p : Boolean;
  1011. begin
  1012. Result:=false;
  1013. { remove mov reg1,reg1? }
  1014. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1015. begin
  1016. GetNextInstruction(p, hp1);
  1017. DebugMsg('PeepHole Optimization,Mov2Nop',p);
  1018. asml.remove(p);
  1019. p.free;
  1020. p:=hp1;
  1021. Result:=true;
  1022. exit;
  1023. end;
  1024. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  1025. if GetNextIntruction_p and
  1026. MatchInstruction(hp1,A_AND,[]) and
  1027. (taicpu(p).oper[1]^.typ = top_reg) and
  1028. MatchOpType(taicpu(hp1),top_const,top_reg) and
  1029. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  1030. case taicpu(p).opsize Of
  1031. S_L:
  1032. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1033. begin
  1034. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  1035. asml.remove(hp1);
  1036. hp1.free;
  1037. Result:=true;
  1038. exit;
  1039. end;
  1040. end
  1041. else if GetNextIntruction_p and
  1042. MatchInstruction(hp1,A_MOV,[]) and
  1043. (taicpu(p).oper[1]^.typ = top_reg) and
  1044. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  1045. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1046. begin
  1047. CopyUsedRegs(TmpUsedRegs);
  1048. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1049. { we have
  1050. mov x, %treg
  1051. mov %treg, y
  1052. }
  1053. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  1054. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1055. { we've got
  1056. mov x, %treg
  1057. mov %treg, y
  1058. with %treg is not used after }
  1059. case taicpu(p).oper[0]^.typ Of
  1060. top_reg:
  1061. begin
  1062. { change
  1063. mov %reg, %treg
  1064. mov %treg, y
  1065. to
  1066. mov %reg, y
  1067. }
  1068. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1069. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  1070. asml.remove(hp1);
  1071. hp1.free;
  1072. ReleaseUsedRegs(TmpUsedRegs);
  1073. Result:=true;
  1074. Exit;
  1075. end;
  1076. top_ref:
  1077. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1078. begin
  1079. { change
  1080. mov mem, %treg
  1081. mov %treg, %reg
  1082. to
  1083. mov mem, %reg"
  1084. }
  1085. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1086. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  1087. asml.remove(hp1);
  1088. hp1.free;
  1089. ReleaseUsedRegs(TmpUsedRegs);
  1090. Result:=true;
  1091. Exit;
  1092. end;
  1093. end;
  1094. ReleaseUsedRegs(TmpUsedRegs);
  1095. end
  1096. else
  1097. { Change
  1098. mov %reg1, %reg2
  1099. xxx %reg2, ???
  1100. to
  1101. mov %reg1, %reg2
  1102. xxx %reg1, ???
  1103. to avoid a write/read penalty
  1104. }
  1105. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1106. GetNextInstruction(p,hp1) and
  1107. (tai(hp1).typ = ait_instruction) and
  1108. (taicpu(hp1).ops >= 1) and
  1109. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1110. { we have
  1111. mov %reg1, %reg2
  1112. XXX %reg2, ???
  1113. }
  1114. begin
  1115. if ((taicpu(hp1).opcode = A_OR) or
  1116. (taicpu(hp1).opcode = A_TEST)) and
  1117. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1118. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1119. { we have
  1120. mov %reg1, %reg2
  1121. test/or %reg2, %reg2
  1122. }
  1123. begin
  1124. CopyUsedRegs(TmpUsedRegs);
  1125. { reg1 will be used after the first instruction,
  1126. so update the allocation info }
  1127. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1128. if GetNextInstruction(hp1, hp2) and
  1129. (hp2.typ = ait_instruction) and
  1130. taicpu(hp2).is_jmp and
  1131. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1132. { change
  1133. mov %reg1, %reg2
  1134. test/or %reg2, %reg2
  1135. jxx
  1136. to
  1137. test %reg1, %reg1
  1138. jxx
  1139. }
  1140. begin
  1141. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1142. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1143. asml.remove(p);
  1144. p.free;
  1145. p := hp1;
  1146. ReleaseUsedRegs(TmpUsedRegs);
  1147. Exit;
  1148. end
  1149. else
  1150. { change
  1151. mov %reg1, %reg2
  1152. test/or %reg2, %reg2
  1153. to
  1154. mov %reg1, %reg2
  1155. test/or %reg1, %reg1
  1156. }
  1157. begin
  1158. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1159. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1160. end;
  1161. ReleaseUsedRegs(TmpUsedRegs);
  1162. end
  1163. end
  1164. else
  1165. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1166. x >= RetOffset) as it doesn't do anything (it writes either to a
  1167. parameter or to the temporary storage room for the function
  1168. result)
  1169. }
  1170. if GetNextIntruction_p and
  1171. (tai(hp1).typ = ait_instruction) then
  1172. begin
  1173. if IsExitCode(hp1) and
  1174. MatchOpType(taicpu(p),top_reg,top_ref) and
  1175. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1176. not(assigned(current_procinfo.procdef.funcretsym) and
  1177. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1178. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  1179. begin
  1180. asml.remove(p);
  1181. p.free;
  1182. p:=hp1;
  1183. DebugMsg('Peephole removed deadstore before leave/ret',p);
  1184. RemoveLastDeallocForFuncRes(p);
  1185. exit;
  1186. end
  1187. { change
  1188. mov reg1, mem1
  1189. test/cmp x, mem1
  1190. to
  1191. mov reg1, mem1
  1192. test/cmp x, reg1
  1193. }
  1194. else if MatchOpType(taicpu(p),top_reg,top_ref) and
  1195. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  1196. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1197. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1198. begin
  1199. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1200. DebugMsg('Peephole MovTestCmp2MovTestCmp 1',hp1);
  1201. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1202. end;
  1203. end;
  1204. { Next instruction is also a MOV ? }
  1205. if GetNextIntruction_p and
  1206. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1207. begin
  1208. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1209. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1210. { mov reg1, mem1 or mov mem1, reg1
  1211. mov mem2, reg2 mov reg2, mem2}
  1212. begin
  1213. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1214. { mov reg1, mem1 or mov mem1, reg1
  1215. mov mem2, reg1 mov reg2, mem1}
  1216. begin
  1217. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1218. { Removes the second statement from
  1219. mov reg1, mem1/reg2
  1220. mov mem1/reg2, reg1 }
  1221. begin
  1222. if taicpu(p).oper[0]^.typ=top_reg then
  1223. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1224. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  1225. asml.remove(hp1);
  1226. hp1.free;
  1227. Result:=true;
  1228. exit;
  1229. end
  1230. else
  1231. begin
  1232. CopyUsedRegs(TmpUsedRegs);
  1233. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1234. if (taicpu(p).oper[1]^.typ = top_ref) and
  1235. { mov reg1, mem1
  1236. mov mem2, reg1 }
  1237. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1238. GetNextInstruction(hp1, hp2) and
  1239. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1240. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1241. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1242. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1243. { change to
  1244. mov reg1, mem1 mov reg1, mem1
  1245. mov mem2, reg1 cmp reg1, mem2
  1246. cmp mem1, reg1
  1247. }
  1248. begin
  1249. asml.remove(hp2);
  1250. hp2.free;
  1251. taicpu(hp1).opcode := A_CMP;
  1252. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1253. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1254. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1255. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  1256. end;
  1257. ReleaseUsedRegs(TmpUsedRegs);
  1258. end;
  1259. end
  1260. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1261. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1262. begin
  1263. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1264. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1265. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  1266. end
  1267. else
  1268. begin
  1269. CopyUsedRegs(TmpUsedRegs);
  1270. if GetNextInstruction(hp1, hp2) and
  1271. MatchOpType(taicpu(p),top_ref,top_reg) and
  1272. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1273. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1274. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1275. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1276. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1277. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1278. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1279. { mov mem1, %reg1
  1280. mov %reg1, mem2
  1281. mov mem2, reg2
  1282. to:
  1283. mov mem1, reg2
  1284. mov reg2, mem2}
  1285. begin
  1286. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1287. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  1288. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1289. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1290. asml.remove(hp2);
  1291. hp2.free;
  1292. end
  1293. {$ifdef i386}
  1294. { this is enabled for i386 only, as the rules to create the reg sets below
  1295. are too complicated for x86-64, so this makes this code too error prone
  1296. on x86-64
  1297. }
  1298. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1299. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1300. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1301. { mov mem1, reg1 mov mem1, reg1
  1302. mov reg1, mem2 mov reg1, mem2
  1303. mov mem2, reg2 mov mem2, reg1
  1304. to: to:
  1305. mov mem1, reg1 mov mem1, reg1
  1306. mov mem1, reg2 mov reg1, mem2
  1307. mov reg1, mem2
  1308. or (if mem1 depends on reg1
  1309. and/or if mem2 depends on reg2)
  1310. to:
  1311. mov mem1, reg1
  1312. mov reg1, mem2
  1313. mov reg1, reg2
  1314. }
  1315. begin
  1316. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1317. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1318. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1319. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1320. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1321. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1322. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1323. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1324. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1325. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1326. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1327. end
  1328. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1329. begin
  1330. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1331. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1332. end
  1333. else
  1334. begin
  1335. asml.remove(hp2);
  1336. hp2.free;
  1337. end
  1338. {$endif i386}
  1339. ;
  1340. ReleaseUsedRegs(TmpUsedRegs);
  1341. end;
  1342. end
  1343. (* { movl [mem1],reg1
  1344. movl [mem1],reg2
  1345. to
  1346. movl [mem1],reg1
  1347. movl reg1,reg2
  1348. }
  1349. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1350. (taicpu(p).oper[1]^.typ = top_reg) and
  1351. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1352. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1353. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1354. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1355. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1356. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1357. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1358. else*)
  1359. { movl const1,[mem1]
  1360. movl [mem1],reg1
  1361. to
  1362. movl const1,reg1
  1363. movl reg1,[mem1]
  1364. }
  1365. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1366. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1367. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1368. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1369. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1370. begin
  1371. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1372. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1373. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1374. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1375. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1376. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  1377. end
  1378. end
  1379. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1380. GetNextIntruction_p and
  1381. (hp1.typ = ait_instruction) and
  1382. GetNextInstruction(hp1, hp2) and
  1383. MatchInstruction(hp2,A_MOV,[]) and
  1384. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1385. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1386. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1387. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1388. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  1389. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1390. ) then
  1391. { change movsX/movzX reg/ref, reg2
  1392. add/sub/or/... reg3/$const, reg2
  1393. mov reg2 reg/ref
  1394. to add/sub/or/... reg3/$const, reg/ref }
  1395. begin
  1396. CopyUsedRegs(TmpUsedRegs);
  1397. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1398. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1399. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1400. begin
  1401. { by example:
  1402. movswl %si,%eax movswl %si,%eax p
  1403. decl %eax addl %edx,%eax hp1
  1404. movw %ax,%si movw %ax,%si hp2
  1405. ->
  1406. movswl %si,%eax movswl %si,%eax p
  1407. decw %eax addw %edx,%eax hp1
  1408. movw %ax,%si movw %ax,%si hp2
  1409. }
  1410. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  1411. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1412. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1413. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1414. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1415. {
  1416. ->
  1417. movswl %si,%eax movswl %si,%eax p
  1418. decw %si addw %dx,%si hp1
  1419. movw %ax,%si movw %ax,%si hp2
  1420. }
  1421. case taicpu(hp1).ops of
  1422. 1:
  1423. begin
  1424. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1425. if taicpu(hp1).oper[0]^.typ=top_reg then
  1426. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1427. end;
  1428. 2:
  1429. begin
  1430. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1431. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1432. (taicpu(hp1).opcode<>A_SHL) and
  1433. (taicpu(hp1).opcode<>A_SHR) and
  1434. (taicpu(hp1).opcode<>A_SAR) then
  1435. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1436. end;
  1437. else
  1438. internalerror(2008042701);
  1439. end;
  1440. {
  1441. ->
  1442. decw %si addw %dx,%si p
  1443. }
  1444. asml.remove(p);
  1445. asml.remove(hp2);
  1446. p.Free;
  1447. hp2.Free;
  1448. p := hp1;
  1449. end;
  1450. ReleaseUsedRegs(TmpUsedRegs);
  1451. end
  1452. else if GetNextIntruction_p and
  1453. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1454. GetNextInstruction(hp1, hp2) and
  1455. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1456. MatchOperand(Taicpu(p).oper[0]^,0) and
  1457. (Taicpu(p).oper[1]^.typ = top_reg) and
  1458. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1459. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1460. { mov reg1,0
  1461. bts reg1,operand1 --> mov reg1,operand2
  1462. or reg1,operand2 bts reg1,operand1}
  1463. begin
  1464. Taicpu(hp2).opcode:=A_MOV;
  1465. asml.remove(hp1);
  1466. insertllitem(hp2,hp2.next,hp1);
  1467. asml.remove(p);
  1468. p.free;
  1469. p:=hp1;
  1470. end
  1471. else if GetNextIntruction_p and
  1472. MatchInstruction(hp1,A_LEA,[S_L]) and
  1473. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1474. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1475. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1476. ) or
  1477. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1478. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1479. )
  1480. ) then
  1481. { mov reg1,ref
  1482. lea reg2,[reg1,reg2]
  1483. to
  1484. add reg2,ref}
  1485. begin
  1486. CopyUsedRegs(TmpUsedRegs);
  1487. { reg1 may not be used afterwards }
  1488. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1489. begin
  1490. Taicpu(hp1).opcode:=A_ADD;
  1491. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1492. DebugMsg('Peephole MovLea2Add done',hp1);
  1493. asml.remove(p);
  1494. p.free;
  1495. p:=hp1;
  1496. end;
  1497. ReleaseUsedRegs(TmpUsedRegs);
  1498. end;
  1499. end;
  1500. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  1501. var
  1502. hp1 : tai;
  1503. begin
  1504. Result:=false;
  1505. if GetNextInstruction(p,hp1) and
  1506. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) then
  1507. begin
  1508. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1509. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1510. { movXX reg1, mem1 or movXX mem1, reg1
  1511. movXX mem2, reg2 movXX reg2, mem2}
  1512. begin
  1513. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1514. { movXX reg1, mem1 or movXX mem1, reg1
  1515. movXX mem2, reg1 movXX reg2, mem1}
  1516. begin
  1517. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1518. begin
  1519. { Removes the second statement from
  1520. movXX reg1, mem1/reg2
  1521. movXX mem1/reg2, reg1
  1522. }
  1523. if taicpu(p).oper[0]^.typ=top_reg then
  1524. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1525. { Removes the second statement from
  1526. movXX mem1/reg1, reg2
  1527. movXX reg2, mem1/reg1
  1528. }
  1529. if (taicpu(p).oper[1]^.typ=top_reg) and
  1530. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  1531. begin
  1532. asml.remove(p);
  1533. p.free;
  1534. DebugMsg('PeepHole Optimization,MovXXMovXX2Nop 1',p);
  1535. GetNextInstruction(hp1,p);
  1536. end
  1537. else
  1538. DebugMsg('PeepHole Optimization,MovXXMovXX2MoVXX 1',p);
  1539. asml.remove(hp1);
  1540. hp1.free;
  1541. Result:=true;
  1542. exit;
  1543. end
  1544. end;
  1545. end;
  1546. end;
  1547. end;
  1548. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1549. var
  1550. TmpUsedRegs : TAllUsedRegs;
  1551. hp1,hp2: tai;
  1552. begin
  1553. Result:=false;
  1554. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1555. GetNextInstruction(p, hp1) and
  1556. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1557. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1558. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1559. or
  1560. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1561. ) and
  1562. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1563. { mov reg1, reg2
  1564. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1565. begin
  1566. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1567. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1568. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1569. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1570. asml.remove(p);
  1571. p.free;
  1572. p := hp1;
  1573. Result:=true;
  1574. exit;
  1575. end
  1576. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1577. GetNextInstruction(p,hp1) and
  1578. (hp1.typ = ait_instruction) and
  1579. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1580. doing it separately in both branches allows to do the cheap checks
  1581. with low probability earlier }
  1582. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1583. GetNextInstruction(hp1,hp2) and
  1584. MatchInstruction(hp2,A_MOV,[])
  1585. ) or
  1586. ((taicpu(hp1).opcode=A_LEA) and
  1587. GetNextInstruction(hp1,hp2) and
  1588. MatchInstruction(hp2,A_MOV,[]) and
  1589. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1590. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1591. ) or
  1592. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1593. taicpu(p).oper[1]^.reg) and
  1594. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1595. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1596. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1597. ) and
  1598. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1599. )
  1600. ) and
  1601. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1602. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1603. begin
  1604. CopyUsedRegs(TmpUsedRegs);
  1605. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1606. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1607. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1608. { change mov (ref), reg
  1609. add/sub/or/... reg2/$const, reg
  1610. mov reg, (ref)
  1611. # release reg
  1612. to add/sub/or/... reg2/$const, (ref) }
  1613. begin
  1614. case taicpu(hp1).opcode of
  1615. A_INC,A_DEC,A_NOT,A_NEG :
  1616. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1617. A_LEA :
  1618. begin
  1619. taicpu(hp1).opcode:=A_ADD;
  1620. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1621. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1622. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1623. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1624. else
  1625. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1626. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1627. DebugMsg('Peephole FoldLea done',hp1);
  1628. end
  1629. else
  1630. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1631. end;
  1632. asml.remove(p);
  1633. asml.remove(hp2);
  1634. p.free;
  1635. hp2.free;
  1636. p := hp1
  1637. end;
  1638. ReleaseUsedRegs(TmpUsedRegs);
  1639. end;
  1640. end;
  1641. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1642. var
  1643. TmpUsedRegs : TAllUsedRegs;
  1644. hp1 : tai;
  1645. begin
  1646. Result:=false;
  1647. if (taicpu(p).ops >= 2) and
  1648. ((taicpu(p).oper[0]^.typ = top_const) or
  1649. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1650. (taicpu(p).oper[1]^.typ = top_reg) and
  1651. ((taicpu(p).ops = 2) or
  1652. ((taicpu(p).oper[2]^.typ = top_reg) and
  1653. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1654. GetLastInstruction(p,hp1) and
  1655. MatchInstruction(hp1,A_MOV,[]) and
  1656. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1657. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1658. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1659. begin
  1660. CopyUsedRegs(TmpUsedRegs);
  1661. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1662. { change
  1663. mov reg1,reg2
  1664. imul y,reg2 to imul y,reg1,reg2 }
  1665. begin
  1666. taicpu(p).ops := 3;
  1667. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1668. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1669. DebugMsg('Peephole MovImul2Imul done',p);
  1670. asml.remove(hp1);
  1671. hp1.free;
  1672. result:=true;
  1673. end;
  1674. ReleaseUsedRegs(TmpUsedRegs);
  1675. end;
  1676. end;
  1677. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  1678. var
  1679. hp1 : tai;
  1680. begin
  1681. {
  1682. change
  1683. jmp .L1
  1684. ...
  1685. .L1:
  1686. ret
  1687. into
  1688. ret
  1689. }
  1690. result:=false;
  1691. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  1692. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  1693. begin
  1694. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1695. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  1696. MatchInstruction(hp1,A_RET,[S_NO]) then
  1697. begin
  1698. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1699. taicpu(p).opcode:=A_RET;
  1700. taicpu(p).is_jmp:=false;
  1701. taicpu(p).ops:=taicpu(hp1).ops;
  1702. case taicpu(hp1).ops of
  1703. 0:
  1704. taicpu(p).clearop(0);
  1705. 1:
  1706. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1707. else
  1708. internalerror(2016041301);
  1709. end;
  1710. result:=true;
  1711. end;
  1712. end;
  1713. end;
  1714. function CanBeCMOV(p : tai) : boolean;
  1715. begin
  1716. CanBeCMOV:=assigned(p) and
  1717. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  1718. { we can't use cmov ref,reg because
  1719. ref could be nil and cmov still throws an exception
  1720. if ref=nil but the mov isn't done (FK)
  1721. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1722. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1723. }
  1724. MatchOpType(taicpu(p),top_reg,top_reg);
  1725. end;
  1726. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  1727. var
  1728. hp1,hp2,hp3: tai;
  1729. carryadd_opcode : TAsmOp;
  1730. l : Longint;
  1731. condition : TAsmCond;
  1732. begin
  1733. { jb @@1 cmc
  1734. inc/dec operand --> adc/sbb operand,0
  1735. @@1:
  1736. ... and ...
  1737. jnb @@1
  1738. inc/dec operand --> adc/sbb operand,0
  1739. @@1: }
  1740. result:=false;
  1741. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1742. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1743. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1744. begin
  1745. carryadd_opcode:=A_NONE;
  1746. if Taicpu(p).condition in [C_NAE,C_B] then
  1747. begin
  1748. if Taicpu(hp1).opcode=A_INC then
  1749. carryadd_opcode:=A_ADC;
  1750. if Taicpu(hp1).opcode=A_DEC then
  1751. carryadd_opcode:=A_SBB;
  1752. if carryadd_opcode<>A_NONE then
  1753. begin
  1754. Taicpu(p).clearop(0);
  1755. Taicpu(p).ops:=0;
  1756. Taicpu(p).is_jmp:=false;
  1757. Taicpu(p).opcode:=A_CMC;
  1758. Taicpu(p).condition:=C_NONE;
  1759. Taicpu(hp1).ops:=2;
  1760. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1761. Taicpu(hp1).loadconst(0,0);
  1762. Taicpu(hp1).opcode:=carryadd_opcode;
  1763. result:=true;
  1764. exit;
  1765. end;
  1766. end;
  1767. if Taicpu(p).condition in [C_AE,C_NB] then
  1768. begin
  1769. if Taicpu(hp1).opcode=A_INC then
  1770. carryadd_opcode:=A_ADC;
  1771. if Taicpu(hp1).opcode=A_DEC then
  1772. carryadd_opcode:=A_SBB;
  1773. if carryadd_opcode<>A_NONE then
  1774. begin
  1775. asml.remove(p);
  1776. p.free;
  1777. Taicpu(hp1).ops:=2;
  1778. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1779. Taicpu(hp1).loadconst(0,0);
  1780. Taicpu(hp1).opcode:=carryadd_opcode;
  1781. p:=hp1;
  1782. result:=true;
  1783. exit;
  1784. end;
  1785. end;
  1786. end;
  1787. {$ifndef i8086}
  1788. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1789. begin
  1790. { check for
  1791. jCC xxx
  1792. <several movs>
  1793. xxx:
  1794. }
  1795. l:=0;
  1796. GetNextInstruction(p, hp1);
  1797. while assigned(hp1) and
  1798. CanBeCMOV(hp1) and
  1799. { stop on labels }
  1800. not(hp1.typ=ait_label) do
  1801. begin
  1802. inc(l);
  1803. GetNextInstruction(hp1,hp1);
  1804. end;
  1805. if assigned(hp1) then
  1806. begin
  1807. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1808. begin
  1809. if (l<=4) and (l>0) then
  1810. begin
  1811. condition:=inverse_cond(taicpu(p).condition);
  1812. hp2:=p;
  1813. GetNextInstruction(p,hp1);
  1814. p:=hp1;
  1815. repeat
  1816. taicpu(hp1).opcode:=A_CMOVcc;
  1817. taicpu(hp1).condition:=condition;
  1818. GetNextInstruction(hp1,hp1);
  1819. until not(assigned(hp1)) or
  1820. not(CanBeCMOV(hp1));
  1821. { wait with removing else GetNextInstruction could
  1822. ignore the label if it was the only usage in the
  1823. jump moved away }
  1824. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1825. { if the label refs. reach zero, remove any alignment before the label }
  1826. if (hp1.typ=ait_align) and (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).getrefs=0) then
  1827. begin
  1828. asml.Remove(hp1);
  1829. hp1.Free;
  1830. end;
  1831. asml.remove(hp2);
  1832. hp2.free;
  1833. result:=true;
  1834. exit;
  1835. end;
  1836. end
  1837. else
  1838. begin
  1839. { check further for
  1840. jCC xxx
  1841. <several movs 1>
  1842. jmp yyy
  1843. xxx:
  1844. <several movs 2>
  1845. yyy:
  1846. }
  1847. { hp2 points to jmp yyy }
  1848. hp2:=hp1;
  1849. { skip hp1 to xxx }
  1850. GetNextInstruction(hp1, hp1);
  1851. if assigned(hp2) and
  1852. assigned(hp1) and
  1853. (l<=3) and
  1854. (hp2.typ=ait_instruction) and
  1855. (taicpu(hp2).is_jmp) and
  1856. (taicpu(hp2).condition=C_None) and
  1857. { real label and jump, no further references to the
  1858. label are allowed }
  1859. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  1860. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1861. begin
  1862. l:=0;
  1863. { skip hp1 to <several moves 2> }
  1864. GetNextInstruction(hp1, hp1);
  1865. while assigned(hp1) and
  1866. CanBeCMOV(hp1) do
  1867. begin
  1868. inc(l);
  1869. GetNextInstruction(hp1, hp1);
  1870. end;
  1871. { hp1 points to yyy: }
  1872. if assigned(hp1) and
  1873. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1874. begin
  1875. condition:=inverse_cond(taicpu(p).condition);
  1876. GetNextInstruction(p,hp1);
  1877. hp3:=p;
  1878. p:=hp1;
  1879. repeat
  1880. taicpu(hp1).opcode:=A_CMOVcc;
  1881. taicpu(hp1).condition:=condition;
  1882. GetNextInstruction(hp1,hp1);
  1883. until not(assigned(hp1)) or
  1884. not(CanBeCMOV(hp1));
  1885. { hp2 is still at jmp yyy }
  1886. GetNextInstruction(hp2,hp1);
  1887. { hp2 is now at xxx: }
  1888. condition:=inverse_cond(condition);
  1889. GetNextInstruction(hp1,hp1);
  1890. { hp1 is now at <several movs 2> }
  1891. repeat
  1892. taicpu(hp1).opcode:=A_CMOVcc;
  1893. taicpu(hp1).condition:=condition;
  1894. GetNextInstruction(hp1,hp1);
  1895. until not(assigned(hp1)) or
  1896. not(CanBeCMOV(hp1));
  1897. {
  1898. asml.remove(hp1.next)
  1899. hp1.next.free;
  1900. asml.remove(hp1);
  1901. hp1.free;
  1902. }
  1903. { remove jCC }
  1904. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1905. asml.remove(hp3);
  1906. hp3.free;
  1907. { remove jmp }
  1908. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1909. asml.remove(hp2);
  1910. hp2.free;
  1911. result:=true;
  1912. exit;
  1913. end;
  1914. end;
  1915. end;
  1916. end;
  1917. end;
  1918. {$endif i8086}
  1919. end;
  1920. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  1921. var
  1922. hp1,hp2: tai;
  1923. begin
  1924. result:=false;
  1925. if (taicpu(p).oper[1]^.typ = top_reg) and
  1926. GetNextInstruction(p,hp1) and
  1927. (hp1.typ = ait_instruction) and
  1928. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1929. GetNextInstruction(hp1,hp2) and
  1930. MatchInstruction(hp2,A_MOV,[]) and
  1931. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1932. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1933. {$ifdef i386}
  1934. { not all registers have byte size sub registers on i386 }
  1935. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  1936. {$endif i386}
  1937. (((taicpu(hp1).ops=2) and
  1938. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1939. ((taicpu(hp1).ops=1) and
  1940. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1941. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  1942. begin
  1943. { change movsX/movzX reg/ref, reg2
  1944. add/sub/or/... reg3/$const, reg2
  1945. mov reg2 reg/ref
  1946. to add/sub/or/... reg3/$const, reg/ref }
  1947. { by example:
  1948. movswl %si,%eax movswl %si,%eax p
  1949. decl %eax addl %edx,%eax hp1
  1950. movw %ax,%si movw %ax,%si hp2
  1951. ->
  1952. movswl %si,%eax movswl %si,%eax p
  1953. decw %eax addw %edx,%eax hp1
  1954. movw %ax,%si movw %ax,%si hp2
  1955. }
  1956. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1957. {
  1958. ->
  1959. movswl %si,%eax movswl %si,%eax p
  1960. decw %si addw %dx,%si hp1
  1961. movw %ax,%si movw %ax,%si hp2
  1962. }
  1963. case taicpu(hp1).ops of
  1964. 1:
  1965. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1966. 2:
  1967. begin
  1968. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1969. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1970. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1971. end;
  1972. else
  1973. internalerror(2008042701);
  1974. end;
  1975. {
  1976. ->
  1977. decw %si addw %dx,%si p
  1978. }
  1979. DebugMsg('PeepHole Optimization,var3',p);
  1980. asml.remove(p);
  1981. asml.remove(hp2);
  1982. p.free;
  1983. hp2.free;
  1984. p:=hp1;
  1985. end
  1986. { removes superfluous And's after movzx's }
  1987. else if taicpu(p).opcode=A_MOVZX then
  1988. begin
  1989. if (taicpu(p).oper[1]^.typ = top_reg) and
  1990. GetNextInstruction(p, hp1) and
  1991. (tai(hp1).typ = ait_instruction) and
  1992. (taicpu(hp1).opcode = A_AND) and
  1993. (taicpu(hp1).oper[0]^.typ = top_const) and
  1994. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1995. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1996. begin
  1997. case taicpu(p).opsize Of
  1998. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  1999. if (taicpu(hp1).oper[0]^.val = $ff) then
  2000. begin
  2001. DebugMsg('PeepHole Optimization,var4',p);
  2002. asml.remove(hp1);
  2003. hp1.free;
  2004. end;
  2005. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  2006. if (taicpu(hp1).oper[0]^.val = $ffff) then
  2007. begin
  2008. DebugMsg('PeepHole Optimization,var5',p);
  2009. asml.remove(hp1);
  2010. hp1.free;
  2011. end;
  2012. {$ifdef x86_64}
  2013. S_LQ:
  2014. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  2015. begin
  2016. if (cs_asm_source in current_settings.globalswitches) then
  2017. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  2018. asml.remove(hp1);
  2019. hp1.Free;
  2020. end;
  2021. {$endif x86_64}
  2022. end;
  2023. end;
  2024. { changes some movzx constructs to faster synonims (all examples
  2025. are given with eax/ax, but are also valid for other registers)}
  2026. if (taicpu(p).oper[1]^.typ = top_reg) then
  2027. if (taicpu(p).oper[0]^.typ = top_reg) then
  2028. case taicpu(p).opsize of
  2029. S_BW:
  2030. begin
  2031. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2032. not(cs_opt_size in current_settings.optimizerswitches) then
  2033. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  2034. begin
  2035. taicpu(p).opcode := A_AND;
  2036. taicpu(p).changeopsize(S_W);
  2037. taicpu(p).loadConst(0,$ff);
  2038. DebugMsg('PeepHole Optimization,var7',p);
  2039. end
  2040. else if GetNextInstruction(p, hp1) and
  2041. (tai(hp1).typ = ait_instruction) and
  2042. (taicpu(hp1).opcode = A_AND) and
  2043. (taicpu(hp1).oper[0]^.typ = top_const) and
  2044. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2045. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2046. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  2047. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  2048. begin
  2049. DebugMsg('PeepHole Optimization,var8',p);
  2050. taicpu(p).opcode := A_MOV;
  2051. taicpu(p).changeopsize(S_W);
  2052. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  2053. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2054. end;
  2055. end;
  2056. S_BL:
  2057. begin
  2058. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2059. not(cs_opt_size in current_settings.optimizerswitches) then
  2060. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  2061. begin
  2062. taicpu(p).opcode := A_AND;
  2063. taicpu(p).changeopsize(S_L);
  2064. taicpu(p).loadConst(0,$ff)
  2065. end
  2066. else if GetNextInstruction(p, hp1) and
  2067. (tai(hp1).typ = ait_instruction) and
  2068. (taicpu(hp1).opcode = A_AND) and
  2069. (taicpu(hp1).oper[0]^.typ = top_const) and
  2070. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2071. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2072. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  2073. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  2074. begin
  2075. DebugMsg('PeepHole Optimization,var10',p);
  2076. taicpu(p).opcode := A_MOV;
  2077. taicpu(p).changeopsize(S_L);
  2078. { do not use R_SUBWHOLE
  2079. as movl %rdx,%eax
  2080. is invalid in assembler PM }
  2081. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2082. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2083. end
  2084. end;
  2085. {$ifndef i8086}
  2086. S_WL:
  2087. begin
  2088. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2089. not(cs_opt_size in current_settings.optimizerswitches) then
  2090. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  2091. begin
  2092. DebugMsg('PeepHole Optimization,var11',p);
  2093. taicpu(p).opcode := A_AND;
  2094. taicpu(p).changeopsize(S_L);
  2095. taicpu(p).loadConst(0,$ffff);
  2096. end
  2097. else if GetNextInstruction(p, hp1) and
  2098. (tai(hp1).typ = ait_instruction) and
  2099. (taicpu(hp1).opcode = A_AND) and
  2100. (taicpu(hp1).oper[0]^.typ = top_const) and
  2101. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2102. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2103. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  2104. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  2105. begin
  2106. DebugMsg('PeepHole Optimization,var12',p);
  2107. taicpu(p).opcode := A_MOV;
  2108. taicpu(p).changeopsize(S_L);
  2109. { do not use R_SUBWHOLE
  2110. as movl %rdx,%eax
  2111. is invalid in assembler PM }
  2112. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2113. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2114. end;
  2115. end;
  2116. {$endif i8086}
  2117. end
  2118. else if (taicpu(p).oper[0]^.typ = top_ref) then
  2119. begin
  2120. if GetNextInstruction(p, hp1) and
  2121. (tai(hp1).typ = ait_instruction) and
  2122. (taicpu(hp1).opcode = A_AND) and
  2123. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2124. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2125. begin
  2126. taicpu(p).opcode := A_MOV;
  2127. case taicpu(p).opsize Of
  2128. S_BL:
  2129. begin
  2130. DebugMsg('PeepHole Optimization,var13',p);
  2131. taicpu(p).changeopsize(S_L);
  2132. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2133. end;
  2134. S_WL:
  2135. begin
  2136. DebugMsg('PeepHole Optimization,var14',p);
  2137. taicpu(p).changeopsize(S_L);
  2138. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2139. end;
  2140. S_BW:
  2141. begin
  2142. DebugMsg('PeepHole Optimization,var15',p);
  2143. taicpu(p).changeopsize(S_W);
  2144. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2145. end;
  2146. {$ifdef x86_64}
  2147. S_BQ:
  2148. begin
  2149. DebugMsg('PeepHole Optimization,var16',p);
  2150. taicpu(p).changeopsize(S_Q);
  2151. taicpu(hp1).loadConst(
  2152. 0, taicpu(hp1).oper[0]^.val and $ff);
  2153. end;
  2154. S_WQ:
  2155. begin
  2156. DebugMsg('PeepHole Optimization,var17',p);
  2157. taicpu(p).changeopsize(S_Q);
  2158. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  2159. end;
  2160. S_LQ:
  2161. begin
  2162. DebugMsg('PeepHole Optimization,var18',p);
  2163. taicpu(p).changeopsize(S_Q);
  2164. taicpu(hp1).loadConst(
  2165. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  2166. end;
  2167. {$endif x86_64}
  2168. else
  2169. Internalerror(2017050704)
  2170. end;
  2171. end;
  2172. end;
  2173. end;
  2174. end;
  2175. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  2176. var
  2177. hp1 : tai;
  2178. begin
  2179. Result:=false;
  2180. if not(GetNextInstruction(p, hp1)) then
  2181. exit;
  2182. if MatchOpType(taicpu(p),top_const,top_reg) and
  2183. MatchInstruction(hp1,A_AND,[]) and
  2184. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2185. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2186. { the second register must contain the first one, so compare their subreg types }
  2187. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2188. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  2189. { change
  2190. and const1, reg
  2191. and const2, reg
  2192. to
  2193. and (const1 and const2), reg
  2194. }
  2195. begin
  2196. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  2197. DebugMsg('Peephole AndAnd2And done',hp1);
  2198. asml.remove(p);
  2199. p.Free;
  2200. p:=hp1;
  2201. Result:=true;
  2202. exit;
  2203. end
  2204. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2205. MatchInstruction(hp1,A_MOVZX,[]) and
  2206. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2207. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2208. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2209. (((taicpu(p).opsize=S_W) and
  2210. (taicpu(hp1).opsize=S_BW)) or
  2211. ((taicpu(p).opsize=S_L) and
  2212. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2213. {$ifdef x86_64}
  2214. or
  2215. ((taicpu(p).opsize=S_Q) and
  2216. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2217. {$endif x86_64}
  2218. ) then
  2219. begin
  2220. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2221. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  2222. ) or
  2223. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2224. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  2225. {$ifdef x86_64}
  2226. or
  2227. (((taicpu(hp1).opsize)=S_LQ) and
  2228. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  2229. )
  2230. {$endif x86_64}
  2231. then
  2232. begin
  2233. DebugMsg('Peephole AndMovzToAnd done',p);
  2234. asml.remove(hp1);
  2235. hp1.free;
  2236. end;
  2237. end
  2238. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2239. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  2240. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2241. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2242. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2243. (((taicpu(p).opsize=S_W) and
  2244. (taicpu(hp1).opsize=S_BW)) or
  2245. ((taicpu(p).opsize=S_L) and
  2246. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2247. {$ifdef x86_64}
  2248. or
  2249. ((taicpu(p).opsize=S_Q) and
  2250. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2251. {$endif x86_64}
  2252. ) then
  2253. begin
  2254. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2255. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  2256. ) or
  2257. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2258. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  2259. {$ifdef x86_64}
  2260. or
  2261. (((taicpu(hp1).opsize)=S_LQ) and
  2262. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  2263. )
  2264. {$endif x86_64}
  2265. then
  2266. begin
  2267. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  2268. asml.remove(hp1);
  2269. hp1.free;
  2270. end;
  2271. end
  2272. else if (taicpu(p).oper[1]^.typ = top_reg) and
  2273. (hp1.typ = ait_instruction) and
  2274. (taicpu(hp1).is_jmp) and
  2275. (taicpu(hp1).opcode<>A_JMP) and
  2276. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  2277. { change
  2278. and x, reg
  2279. jxx
  2280. to
  2281. test x, reg
  2282. jxx
  2283. if reg is deallocated before the
  2284. jump, but only if it's a conditional jump (PFV)
  2285. }
  2286. taicpu(p).opcode := A_TEST;
  2287. end;
  2288. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  2289. begin
  2290. if MatchOperand(taicpu(p).oper[0]^,0) and
  2291. (taicpu(p).oper[1]^.typ = Top_Reg) and
  2292. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2293. { change "mov $0, %reg" into "xor %reg, %reg" }
  2294. begin
  2295. taicpu(p).opcode := A_XOR;
  2296. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2297. end;
  2298. end;
  2299. end.