aoptx86.pas 50 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. procedure PostPeepholeOptMov(const p : tai);
  32. function OptPass1AND(var p : tai) : boolean;
  33. function OptPass1VMOVAP(var p : tai) : boolean;
  34. function OptPass1VOP(const p : tai) : boolean;
  35. function OptPass1MOV(var p : tai) : boolean;
  36. function OptPass2MOV(var p : tai) : boolean;
  37. procedure DebugMsg(const s : string; p : tai);inline;
  38. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  39. class function IsExitCode(p : tai) : boolean;
  40. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  41. procedure RemoveLastDeallocForFuncRes(p : tai);
  42. end;
  43. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  44. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  45. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  46. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  47. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  48. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  49. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  50. function RefsEqual(const r1, r2: treference): boolean;
  51. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  52. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  53. implementation
  54. uses
  55. cutils,
  56. verbose,
  57. procinfo,
  58. symconst,symsym,
  59. itcpugas;
  60. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  61. begin
  62. result :=
  63. (instr.typ = ait_instruction) and
  64. (taicpu(instr).opcode = op) and
  65. ((opsize = []) or (taicpu(instr).opsize in opsize));
  66. end;
  67. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  68. begin
  69. result :=
  70. (instr.typ = ait_instruction) and
  71. ((taicpu(instr).opcode = op1) or
  72. (taicpu(instr).opcode = op2)
  73. ) and
  74. ((opsize = []) or (taicpu(instr).opsize in opsize));
  75. end;
  76. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  77. begin
  78. result :=
  79. (instr.typ = ait_instruction) and
  80. ((taicpu(instr).opcode = op1) or
  81. (taicpu(instr).opcode = op2) or
  82. (taicpu(instr).opcode = op3)
  83. ) and
  84. ((opsize = []) or (taicpu(instr).opsize in opsize));
  85. end;
  86. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  87. const opsize : topsizes) : boolean;
  88. var
  89. op : TAsmOp;
  90. begin
  91. result:=false;
  92. for op in ops do
  93. begin
  94. if (instr.typ = ait_instruction) and
  95. (taicpu(instr).opcode = op) and
  96. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  97. begin
  98. result:=true;
  99. exit;
  100. end;
  101. end;
  102. end;
  103. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  104. begin
  105. result := (oper.typ = top_reg) and (oper.reg = reg);
  106. end;
  107. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  108. begin
  109. result := (oper.typ = top_const) and (oper.val = a);
  110. end;
  111. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  112. begin
  113. result := oper1.typ = oper2.typ;
  114. if result then
  115. case oper1.typ of
  116. top_const:
  117. Result:=oper1.val = oper2.val;
  118. top_reg:
  119. Result:=oper1.reg = oper2.reg;
  120. top_ref:
  121. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  122. else
  123. internalerror(2013102801);
  124. end
  125. end;
  126. function RefsEqual(const r1, r2: treference): boolean;
  127. begin
  128. RefsEqual :=
  129. (r1.offset = r2.offset) and
  130. (r1.segment = r2.segment) and (r1.base = r2.base) and
  131. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  132. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  133. (r1.relsymbol = r2.relsymbol);
  134. end;
  135. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  136. begin
  137. Result:=(ref.offset=0) and
  138. (ref.scalefactor in [0,1]) and
  139. (ref.segment=NR_NO) and
  140. (ref.symbol=nil) and
  141. (ref.relsymbol=nil) and
  142. ((base=NR_INVALID) or
  143. (ref.base=base)) and
  144. ((index=NR_INVALID) or
  145. (ref.index=index));
  146. end;
  147. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  148. begin
  149. Result:=(taicpu(instr).ops=2) and
  150. (taicpu(instr).oper[0]^.typ=ot0) and
  151. (taicpu(instr).oper[1]^.typ=ot1);
  152. end;
  153. {$ifdef DEBUG_AOPTCPU}
  154. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  155. begin
  156. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  157. end;
  158. {$else DEBUG_AOPTCPU}
  159. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  160. begin
  161. end;
  162. {$endif DEBUG_AOPTCPU}
  163. { allocates register reg between (and including) instructions p1 and p2
  164. the type of p1 and p2 must not be in SkipInstr
  165. note that this routine is both called from the peephole optimizer
  166. where optinfo is not yet initialised) and from the cse (where it is) }
  167. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  168. var
  169. hp, start: tai;
  170. removedsomething,
  171. firstRemovedWasAlloc,
  172. lastRemovedWasDealloc: boolean;
  173. begin
  174. {$ifdef EXTDEBUG}
  175. { if assigned(p1.optinfo) and
  176. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  177. internalerror(2004101010); }
  178. {$endif EXTDEBUG}
  179. start := p1;
  180. if (reg = NR_ESP) or
  181. (reg = current_procinfo.framepointer) or
  182. not(assigned(p1)) then
  183. { this happens with registers which are loaded implicitely, outside the }
  184. { current block (e.g. esi with self) }
  185. exit;
  186. { make sure we allocate it for this instruction }
  187. getnextinstruction(p2,p2);
  188. lastRemovedWasDealloc := false;
  189. removedSomething := false;
  190. firstRemovedWasAlloc := false;
  191. {$ifdef allocregdebug}
  192. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  193. ' from here...'));
  194. insertllitem(asml,p1.previous,p1,hp);
  195. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  196. ' till here...'));
  197. insertllitem(asml,p2,p2.next,hp);
  198. {$endif allocregdebug}
  199. { do it the safe way: always allocate the full super register,
  200. as we do no register re-allocation in the peephole optimizer,
  201. this does not hurt
  202. }
  203. case getregtype(reg) of
  204. R_MMREGISTER:
  205. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  206. R_INTREGISTER:
  207. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  208. end;
  209. if not(RegInUsedRegs(reg,initialusedregs)) then
  210. begin
  211. hp := tai_regalloc.alloc(reg,nil);
  212. insertllItem(p1.previous,p1,hp);
  213. IncludeRegInUsedRegs(reg,initialusedregs);
  214. end;
  215. while assigned(p1) and
  216. (p1 <> p2) do
  217. begin
  218. if assigned(p1.optinfo) then
  219. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  220. p1 := tai(p1.next);
  221. repeat
  222. while assigned(p1) and
  223. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  224. p1 := tai(p1.next);
  225. { remove all allocation/deallocation info about the register in between }
  226. if assigned(p1) and
  227. (p1.typ = ait_regalloc) then
  228. begin
  229. { same super register, different sub register? }
  230. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  231. begin
  232. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  233. internalerror(2016101501);
  234. tai_regalloc(p1).reg:=reg;
  235. end;
  236. if tai_regalloc(p1).reg=reg then
  237. begin
  238. if not removedSomething then
  239. begin
  240. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  241. removedSomething := true;
  242. end;
  243. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  244. hp := tai(p1.Next);
  245. asml.Remove(p1);
  246. p1.free;
  247. p1 := hp;
  248. end
  249. else
  250. p1 := tai(p1.next);
  251. end;
  252. until not(assigned(p1)) or
  253. not(p1.typ in SkipInstr);
  254. end;
  255. if assigned(p1) then
  256. begin
  257. if firstRemovedWasAlloc then
  258. begin
  259. hp := tai_regalloc.Alloc(reg,nil);
  260. insertLLItem(start.previous,start,hp);
  261. end;
  262. if lastRemovedWasDealloc then
  263. begin
  264. hp := tai_regalloc.DeAlloc(reg,nil);
  265. insertLLItem(p1.previous,p1,hp);
  266. end;
  267. end;
  268. end;
  269. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  270. var
  271. p: taicpu;
  272. begin
  273. if not assigned(hp) or
  274. (hp.typ <> ait_instruction) then
  275. begin
  276. Result := false;
  277. exit;
  278. end;
  279. p := taicpu(hp);
  280. Result :=
  281. (((p.opcode = A_MOV) or
  282. (p.opcode = A_MOVZX) or
  283. (p.opcode = A_MOVSX) or
  284. (p.opcode = A_LEA) or
  285. (p.opcode = A_VMOVSS) or
  286. (p.opcode = A_VMOVSD) or
  287. (p.opcode = A_VMOVAPD) or
  288. (p.opcode = A_VMOVAPS) or
  289. (p.opcode = A_VMOVQ) or
  290. (p.opcode = A_MOVSS) or
  291. (p.opcode = A_MOVSD) or
  292. (p.opcode = A_MOVQ) or
  293. (p.opcode = A_MOVAPD) or
  294. (p.opcode = A_MOVAPS)) and
  295. (p.oper[1]^.typ = top_reg) and
  296. (SuperRegistersEqual(p.oper[1]^.reg,reg)) and
  297. ((p.oper[0]^.typ = top_const) or
  298. ((p.oper[0]^.typ = top_reg) and
  299. not(SuperRegistersEqual(p.oper[0]^.reg,reg))) or
  300. ((p.oper[0]^.typ = top_ref) and
  301. not RegInRef(reg,p.oper[0]^.ref^)))) or
  302. ((p.opcode = A_POP) and
  303. (SuperRegistersEqual(p.oper[0]^.reg,reg)));
  304. end;
  305. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  306. var
  307. hp2,hp3 : tai;
  308. begin
  309. result:=(p.typ=ait_instruction) and
  310. ((taicpu(p).opcode = A_RET) or
  311. ((taicpu(p).opcode=A_LEAVE) and
  312. GetNextInstruction(p,hp2) and
  313. (hp2.typ=ait_instruction) and
  314. (taicpu(hp2).opcode=A_RET)
  315. ) or
  316. ((taicpu(p).opcode=A_MOV) and
  317. (taicpu(p).oper[0]^.typ=top_reg) and
  318. (taicpu(p).oper[0]^.reg=NR_EBP) and
  319. (taicpu(p).oper[1]^.typ=top_reg) and
  320. (taicpu(p).oper[1]^.reg=NR_ESP) and
  321. GetNextInstruction(p,hp2) and
  322. (hp2.typ=ait_instruction) and
  323. (taicpu(hp2).opcode=A_POP) and
  324. (taicpu(hp2).oper[0]^.typ=top_reg) and
  325. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  326. GetNextInstruction(hp2,hp3) and
  327. (hp3.typ=ait_instruction) and
  328. (taicpu(hp3).opcode=A_RET)
  329. )
  330. );
  331. end;
  332. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  333. begin
  334. isFoldableArithOp := False;
  335. case hp1.opcode of
  336. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  337. isFoldableArithOp :=
  338. ((taicpu(hp1).oper[0]^.typ = top_const) or
  339. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  340. (taicpu(hp1).oper[0]^.reg <> reg))) and
  341. (taicpu(hp1).oper[1]^.typ = top_reg) and
  342. (taicpu(hp1).oper[1]^.reg = reg);
  343. A_INC,A_DEC,A_NEG,A_NOT:
  344. isFoldableArithOp :=
  345. (taicpu(hp1).oper[0]^.typ = top_reg) and
  346. (taicpu(hp1).oper[0]^.reg = reg);
  347. end;
  348. end;
  349. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  350. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  351. var
  352. hp2: tai;
  353. begin
  354. hp2 := p;
  355. repeat
  356. hp2 := tai(hp2.previous);
  357. if assigned(hp2) and
  358. (hp2.typ = ait_regalloc) and
  359. (tai_regalloc(hp2).ratype=ra_dealloc) and
  360. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  361. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  362. begin
  363. asml.remove(hp2);
  364. hp2.free;
  365. break;
  366. end;
  367. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  368. end;
  369. begin
  370. case current_procinfo.procdef.returndef.typ of
  371. arraydef,recorddef,pointerdef,
  372. stringdef,enumdef,procdef,objectdef,errordef,
  373. filedef,setdef,procvardef,
  374. classrefdef,forwarddef:
  375. DoRemoveLastDeallocForFuncRes(RS_EAX);
  376. orddef:
  377. if current_procinfo.procdef.returndef.size <> 0 then
  378. begin
  379. DoRemoveLastDeallocForFuncRes(RS_EAX);
  380. { for int64/qword }
  381. if current_procinfo.procdef.returndef.size = 8 then
  382. DoRemoveLastDeallocForFuncRes(RS_EDX);
  383. end;
  384. end;
  385. end;
  386. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  387. var
  388. TmpUsedRegs : TAllUsedRegs;
  389. hp1,hp2 : tai;
  390. begin
  391. result:=false;
  392. if MatchOpType(taicpu(p),top_reg,top_reg) then
  393. begin
  394. { vmova* reg1,reg1
  395. =>
  396. <nop> }
  397. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  398. begin
  399. GetNextInstruction(p,hp1);
  400. asml.Remove(p);
  401. p.Free;
  402. p:=hp1;
  403. result:=true;
  404. end
  405. else if GetNextInstruction(p,hp1) then
  406. begin
  407. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  408. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  409. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  410. begin
  411. { vmova* reg1,reg2
  412. vmova* reg2,reg3
  413. dealloc reg2
  414. =>
  415. vmova* reg1,reg3 }
  416. CopyUsedRegs(TmpUsedRegs);
  417. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  418. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  419. begin
  420. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  421. asml.Remove(hp1);
  422. hp1.Free;
  423. result:=true;
  424. end
  425. { special case:
  426. vmova* reg1,reg2
  427. vmova* reg2,reg1
  428. =>
  429. vmova* reg1,reg2 }
  430. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  431. begin
  432. asml.Remove(hp1);
  433. hp1.Free;
  434. result:=true;
  435. end
  436. end
  437. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  438. { we mix single and double opperations here because we assume that the compiler
  439. generates vmovapd only after double operations and vmovaps only after single operations }
  440. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  441. GetNextInstruction(hp1,hp2) and
  442. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  443. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  444. begin
  445. CopyUsedRegs(TmpUsedRegs);
  446. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  447. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  448. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  449. then
  450. begin
  451. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  452. asml.Remove(p);
  453. p.Free;
  454. asml.Remove(hp2);
  455. hp2.Free;
  456. p:=hp1;
  457. end;
  458. end;
  459. end;
  460. end;
  461. end;
  462. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  463. var
  464. TmpUsedRegs : TAllUsedRegs;
  465. hp1 : tai;
  466. begin
  467. result:=false;
  468. if GetNextInstruction(p,hp1) and
  469. { we mix single and double opperations here because we assume that the compiler
  470. generates vmovapd only after double operations and vmovaps only after single operations }
  471. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  472. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  473. (taicpu(hp1).oper[1]^.typ=top_reg) then
  474. begin
  475. CopyUsedRegs(TmpUsedRegs);
  476. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  477. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  478. ) then
  479. begin
  480. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  481. asml.Remove(hp1);
  482. hp1.Free;
  483. result:=true;
  484. end;
  485. end;
  486. end;
  487. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  488. var
  489. hp1, hp2: tai;
  490. TmpUsedRegs : TAllUsedRegs;
  491. GetNextIntruction_p : Boolean;
  492. begin
  493. Result:=false;
  494. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  495. if GetNextIntruction_p and
  496. MatchInstruction(hp1,A_AND,[]) and
  497. (taicpu(p).oper[1]^.typ = top_reg) and
  498. MatchOpType(taicpu(hp1),top_const,top_reg) and
  499. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  500. case taicpu(p).opsize Of
  501. S_L:
  502. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  503. begin
  504. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  505. asml.remove(hp1);
  506. hp1.free;
  507. Result:=true;
  508. exit;
  509. end;
  510. end
  511. else if GetNextIntruction_p and
  512. MatchInstruction(hp1,A_MOV,[]) and
  513. (taicpu(p).oper[1]^.typ = top_reg) and
  514. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  515. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  516. begin
  517. CopyUsedRegs(TmpUsedRegs);
  518. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  519. { we have
  520. mov x, %treg
  521. mov %treg, y
  522. }
  523. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  524. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  525. { we've got
  526. mov x, %treg
  527. mov %treg, y
  528. with %treg is not used after }
  529. case taicpu(p).oper[0]^.typ Of
  530. top_reg:
  531. begin
  532. { change
  533. mov %reg, %treg
  534. mov %treg, y
  535. to
  536. mov %reg, y
  537. }
  538. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  539. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  540. asml.remove(hp1);
  541. hp1.free;
  542. ReleaseUsedRegs(TmpUsedRegs);
  543. Exit;
  544. end;
  545. top_ref:
  546. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  547. begin
  548. { change
  549. mov mem, %treg
  550. mov %treg, %reg
  551. to
  552. mov mem, %reg"
  553. }
  554. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  555. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  556. asml.remove(hp1);
  557. hp1.free;
  558. ReleaseUsedRegs(TmpUsedRegs);
  559. Exit;
  560. end;
  561. end;
  562. ReleaseUsedRegs(TmpUsedRegs);
  563. end
  564. else
  565. { Change
  566. mov %reg1, %reg2
  567. xxx %reg2, ???
  568. to
  569. mov %reg1, %reg2
  570. xxx %reg1, ???
  571. to avoid a write/read penalty
  572. }
  573. if MatchOpType(taicpu(p),top_reg,top_reg) and
  574. GetNextInstruction(p,hp1) and
  575. (tai(hp1).typ = ait_instruction) and
  576. (taicpu(hp1).ops >= 1) and
  577. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  578. { we have
  579. mov %reg1, %reg2
  580. XXX %reg2, ???
  581. }
  582. begin
  583. if ((taicpu(hp1).opcode = A_OR) or
  584. (taicpu(hp1).opcode = A_TEST)) and
  585. (taicpu(hp1).oper[1]^.typ = top_reg) and
  586. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  587. { we have
  588. mov %reg1, %reg2
  589. test/or %reg2, %reg2
  590. }
  591. begin
  592. CopyUsedRegs(TmpUsedRegs);
  593. { reg1 will be used after the first instruction,
  594. so update the allocation info }
  595. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  596. if GetNextInstruction(hp1, hp2) and
  597. (hp2.typ = ait_instruction) and
  598. taicpu(hp2).is_jmp and
  599. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  600. { change
  601. mov %reg1, %reg2
  602. test/or %reg2, %reg2
  603. jxx
  604. to
  605. test %reg1, %reg1
  606. jxx
  607. }
  608. begin
  609. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  610. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  611. asml.remove(p);
  612. p.free;
  613. p := hp1;
  614. ReleaseUsedRegs(TmpUsedRegs);
  615. Exit;
  616. end
  617. else
  618. { change
  619. mov %reg1, %reg2
  620. test/or %reg2, %reg2
  621. to
  622. mov %reg1, %reg2
  623. test/or %reg1, %reg1
  624. }
  625. begin
  626. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  627. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  628. end;
  629. ReleaseUsedRegs(TmpUsedRegs);
  630. end
  631. end
  632. else
  633. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  634. x >= RetOffset) as it doesn't do anything (it writes either to a
  635. parameter or to the temporary storage room for the function
  636. result)
  637. }
  638. if GetNextIntruction_p and
  639. (tai(hp1).typ = ait_instruction) then
  640. begin
  641. if IsExitCode(hp1) and
  642. MatchOpType(p,top_reg,top_ref) and
  643. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  644. not(assigned(current_procinfo.procdef.funcretsym) and
  645. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  646. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  647. begin
  648. asml.remove(p);
  649. p.free;
  650. p:=hp1;
  651. DebugMsg('Peephole removed deadstore before leave/ret',p);
  652. RemoveLastDeallocForFuncRes(p);
  653. exit;
  654. end
  655. { change
  656. mov reg1, mem1
  657. cmp x, mem1
  658. to
  659. mov reg1, mem1
  660. cmp x, reg1
  661. }
  662. else if MatchOpType(p,top_reg,top_ref) and
  663. MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
  664. (taicpu(hp1).oper[1]^.typ = top_ref) and
  665. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  666. begin
  667. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  668. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  669. end;
  670. end;
  671. { Next instruction is also a MOV ? }
  672. if GetNextIntruction_p and
  673. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  674. begin
  675. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  676. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  677. { mov reg1, mem1 or mov mem1, reg1
  678. mov mem2, reg2 mov reg2, mem2}
  679. begin
  680. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  681. { mov reg1, mem1 or mov mem1, reg1
  682. mov mem2, reg1 mov reg2, mem1}
  683. begin
  684. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  685. { Removes the second statement from
  686. mov reg1, mem1/reg2
  687. mov mem1/reg2, reg1 }
  688. begin
  689. if taicpu(p).oper[0]^.typ=top_reg then
  690. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  691. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  692. asml.remove(hp1);
  693. hp1.free;
  694. Result:=true;
  695. exit;
  696. end
  697. else
  698. begin
  699. CopyUsedRegs(TmpUsedRegs);
  700. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  701. if (taicpu(p).oper[1]^.typ = top_ref) and
  702. { mov reg1, mem1
  703. mov mem2, reg1 }
  704. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  705. GetNextInstruction(hp1, hp2) and
  706. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  707. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  708. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  709. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  710. { change to
  711. mov reg1, mem1 mov reg1, mem1
  712. mov mem2, reg1 cmp reg1, mem2
  713. cmp mem1, reg1
  714. }
  715. begin
  716. asml.remove(hp2);
  717. hp2.free;
  718. taicpu(hp1).opcode := A_CMP;
  719. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  720. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  721. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  722. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  723. end;
  724. ReleaseUsedRegs(TmpUsedRegs);
  725. end;
  726. end
  727. else if (taicpu(p).oper[1]^.typ=top_ref) and
  728. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  729. begin
  730. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  731. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  732. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  733. end
  734. else
  735. begin
  736. CopyUsedRegs(TmpUsedRegs);
  737. if GetNextInstruction(hp1, hp2) and
  738. MatchOpType(taicpu(p),top_ref,top_reg) and
  739. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  740. (taicpu(hp1).oper[1]^.typ = top_ref) and
  741. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  742. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  743. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  744. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  745. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  746. { mov mem1, %reg1
  747. mov %reg1, mem2
  748. mov mem2, reg2
  749. to:
  750. mov mem1, reg2
  751. mov reg2, mem2}
  752. begin
  753. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  754. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  755. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  756. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  757. asml.remove(hp2);
  758. hp2.free;
  759. end
  760. {$ifdef i386}
  761. { this is enabled for i386 only, as the rules to create the reg sets below
  762. are too complicated for x86-64, so this makes this code too error prone
  763. on x86-64
  764. }
  765. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  766. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  767. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  768. { mov mem1, reg1 mov mem1, reg1
  769. mov reg1, mem2 mov reg1, mem2
  770. mov mem2, reg2 mov mem2, reg1
  771. to: to:
  772. mov mem1, reg1 mov mem1, reg1
  773. mov mem1, reg2 mov reg1, mem2
  774. mov reg1, mem2
  775. or (if mem1 depends on reg1
  776. and/or if mem2 depends on reg2)
  777. to:
  778. mov mem1, reg1
  779. mov reg1, mem2
  780. mov reg1, reg2
  781. }
  782. begin
  783. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  784. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  785. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  786. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  787. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  788. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  789. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  790. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  791. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  792. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  793. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  794. end
  795. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  796. begin
  797. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  798. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  799. end
  800. else
  801. begin
  802. asml.remove(hp2);
  803. hp2.free;
  804. end
  805. {$endif i386}
  806. ;
  807. ReleaseUsedRegs(TmpUsedRegs);
  808. end;
  809. end
  810. (* { movl [mem1],reg1
  811. movl [mem1],reg2
  812. to
  813. movl [mem1],reg1
  814. movl reg1,reg2
  815. }
  816. else if (taicpu(p).oper[0]^.typ = top_ref) and
  817. (taicpu(p).oper[1]^.typ = top_reg) and
  818. (taicpu(hp1).oper[0]^.typ = top_ref) and
  819. (taicpu(hp1).oper[1]^.typ = top_reg) and
  820. (taicpu(p).opsize = taicpu(hp1).opsize) and
  821. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  822. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  823. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  824. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  825. else*)
  826. { movl const1,[mem1]
  827. movl [mem1],reg1
  828. to
  829. movl const1,reg1
  830. movl reg1,[mem1]
  831. }
  832. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  833. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  834. (taicpu(p).opsize = taicpu(hp1).opsize) and
  835. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  836. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  837. begin
  838. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  839. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  840. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  841. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  842. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  843. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  844. end
  845. end
  846. else if (taicpu(p).oper[1]^.typ = top_reg) and
  847. GetNextIntruction_p and
  848. (hp1.typ = ait_instruction) and
  849. GetNextInstruction(hp1, hp2) and
  850. MatchInstruction(hp2,A_MOV,[]) and
  851. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  852. (taicpu(hp2).oper[0]^.typ=top_reg) and
  853. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  854. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  855. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  856. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  857. ) then
  858. { change movsX/movzX reg/ref, reg2
  859. add/sub/or/... reg3/$const, reg2
  860. mov reg2 reg/ref
  861. to add/sub/or/... reg3/$const, reg/ref }
  862. begin
  863. CopyUsedRegs(TmpUsedRegs);
  864. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  865. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  866. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  867. begin
  868. { by example:
  869. movswl %si,%eax movswl %si,%eax p
  870. decl %eax addl %edx,%eax hp1
  871. movw %ax,%si movw %ax,%si hp2
  872. ->
  873. movswl %si,%eax movswl %si,%eax p
  874. decw %eax addw %edx,%eax hp1
  875. movw %ax,%si movw %ax,%si hp2
  876. }
  877. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  878. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  879. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  880. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  881. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  882. {
  883. ->
  884. movswl %si,%eax movswl %si,%eax p
  885. decw %si addw %dx,%si hp1
  886. movw %ax,%si movw %ax,%si hp2
  887. }
  888. case taicpu(hp1).ops of
  889. 1:
  890. begin
  891. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  892. if taicpu(hp1).oper[0]^.typ=top_reg then
  893. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  894. end;
  895. 2:
  896. begin
  897. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  898. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  899. (taicpu(hp1).opcode<>A_SHL) and
  900. (taicpu(hp1).opcode<>A_SHR) and
  901. (taicpu(hp1).opcode<>A_SAR) then
  902. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  903. end;
  904. else
  905. internalerror(2008042701);
  906. end;
  907. {
  908. ->
  909. decw %si addw %dx,%si p
  910. }
  911. asml.remove(p);
  912. asml.remove(hp2);
  913. p.Free;
  914. hp2.Free;
  915. p := hp1;
  916. end;
  917. ReleaseUsedRegs(TmpUsedRegs);
  918. end
  919. else if GetNextIntruction_p and
  920. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  921. GetNextInstruction(hp1, hp2) and
  922. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  923. MatchOperand(Taicpu(p).oper[0]^,0) and
  924. (Taicpu(p).oper[1]^.typ = top_reg) and
  925. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  926. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  927. { mov reg1,0
  928. bts reg1,operand1 --> mov reg1,operand2
  929. or reg1,operand2 bts reg1,operand1}
  930. begin
  931. Taicpu(hp2).opcode:=A_MOV;
  932. asml.remove(hp1);
  933. insertllitem(hp2,hp2.next,hp1);
  934. asml.remove(p);
  935. p.free;
  936. p:=hp1;
  937. end
  938. else if GetNextIntruction_p and
  939. MatchInstruction(hp1,A_LEA,[S_L]) and
  940. MatchOpType(Taicpu(p),top_ref,top_reg) and
  941. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  942. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  943. ) or
  944. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  945. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  946. )
  947. ) then
  948. { mov reg1,ref
  949. lea reg2,[reg1,reg2]
  950. to
  951. add reg2,ref}
  952. begin
  953. CopyUsedRegs(TmpUsedRegs);
  954. { reg1 may not be used afterwards }
  955. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  956. begin
  957. Taicpu(hp1).opcode:=A_ADD;
  958. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  959. DebugMsg('Peephole MovLea2Add done',hp1);
  960. asml.remove(p);
  961. p.free;
  962. p:=hp1;
  963. end;
  964. ReleaseUsedRegs(TmpUsedRegs);
  965. end;
  966. end;
  967. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  968. var
  969. TmpUsedRegs : TAllUsedRegs;
  970. hp1,hp2: tai;
  971. begin
  972. Result:=false;
  973. if MatchOpType(taicpu(p),top_reg,top_reg) and
  974. GetNextInstruction(p, hp1) and
  975. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  976. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  977. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  978. or
  979. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  980. ) and
  981. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  982. { mov reg1, reg2
  983. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  984. begin
  985. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  986. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  987. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  988. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  989. asml.remove(p);
  990. p.free;
  991. p := hp1;
  992. Result:=true;
  993. exit;
  994. end
  995. else if (taicpu(p).oper[0]^.typ = top_ref) and
  996. GetNextInstruction(p,hp1) and
  997. (hp1.typ = ait_instruction) and
  998. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  999. doing it separately in both branches allows to do the cheap checks
  1000. with low probability earlier }
  1001. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1002. GetNextInstruction(hp1,hp2) and
  1003. MatchInstruction(hp2,A_MOV,[])
  1004. ) or
  1005. ((taicpu(hp1).opcode=A_LEA) and
  1006. GetNextInstruction(hp1,hp2) and
  1007. MatchInstruction(hp2,A_MOV,[]) and
  1008. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  1009. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1010. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1011. ) or
  1012. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1013. taicpu(p).oper[1]^.reg) and
  1014. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg))
  1015. ) and
  1016. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1017. )
  1018. ) and
  1019. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1020. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1021. begin
  1022. CopyUsedRegs(TmpUsedRegs);
  1023. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1024. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1025. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1026. { change mov (ref), reg
  1027. add/sub/or/... reg2/$const, reg
  1028. mov reg, (ref)
  1029. # release reg
  1030. to add/sub/or/... reg2/$const, (ref) }
  1031. begin
  1032. case taicpu(hp1).opcode of
  1033. A_INC,A_DEC,A_NOT,A_NEG :
  1034. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1035. A_LEA :
  1036. begin
  1037. taicpu(hp1).opcode:=A_ADD;
  1038. if taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg then
  1039. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1040. else
  1041. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base);
  1042. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1043. DebugMsg('Peephole FoldLea done',hp1);
  1044. end
  1045. else
  1046. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1047. end;
  1048. asml.remove(p);
  1049. asml.remove(hp2);
  1050. p.free;
  1051. hp2.free;
  1052. p := hp1
  1053. end;
  1054. ReleaseUsedRegs(TmpUsedRegs);
  1055. end;
  1056. end;
  1057. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1058. var
  1059. hp1 : tai;
  1060. begin
  1061. Result:=false;
  1062. if not(GetNextInstruction(p, hp1)) then
  1063. exit;
  1064. if MatchOpType(p,top_const,top_reg) and
  1065. MatchInstruction(hp1,A_AND,[]) and
  1066. MatchOpType(hp1,top_const,top_reg) and
  1067. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1068. { the second register must contain the first one, so compare their subreg types }
  1069. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1070. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1071. { change
  1072. and const1, reg
  1073. and const2, reg
  1074. to
  1075. and (const1 and const2), reg
  1076. }
  1077. begin
  1078. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1079. DebugMsg('Peephole AndAnd2And done',hp1);
  1080. asml.remove(p);
  1081. p.Free;
  1082. p:=hp1;
  1083. Result:=true;
  1084. exit;
  1085. end
  1086. else if MatchOpType(p,top_const,top_reg) and
  1087. MatchInstruction(hp1,A_MOVZX,[]) and
  1088. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1089. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1090. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1091. (((taicpu(p).opsize=S_W) and
  1092. (taicpu(hp1).opsize=S_BW)) or
  1093. ((taicpu(p).opsize=S_L) and
  1094. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1095. {$ifdef x86_64}
  1096. or
  1097. ((taicpu(p).opsize=S_Q) and
  1098. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1099. {$endif x86_64}
  1100. ) then
  1101. begin
  1102. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1103. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1104. ) or
  1105. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1106. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1107. {$ifdef x86_64}
  1108. or
  1109. (((taicpu(hp1).opsize)=S_LQ) and
  1110. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1111. )
  1112. {$endif x86_64}
  1113. then
  1114. begin
  1115. DebugMsg('Peephole AndMovzToAnd done',p);
  1116. asml.remove(hp1);
  1117. hp1.free;
  1118. end;
  1119. end
  1120. else if MatchOpType(p,top_const,top_reg) and
  1121. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1122. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1123. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1124. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1125. (((taicpu(p).opsize=S_W) and
  1126. (taicpu(hp1).opsize=S_BW)) or
  1127. ((taicpu(p).opsize=S_L) and
  1128. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1129. {$ifdef x86_64}
  1130. or
  1131. ((taicpu(p).opsize=S_Q) and
  1132. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1133. {$endif x86_64}
  1134. ) then
  1135. begin
  1136. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1137. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1138. ) or
  1139. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1140. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1141. {$ifdef x86_64}
  1142. or
  1143. (((taicpu(hp1).opsize)=S_LQ) and
  1144. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1145. )
  1146. {$endif x86_64}
  1147. then
  1148. begin
  1149. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1150. asml.remove(hp1);
  1151. hp1.free;
  1152. end;
  1153. end
  1154. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1155. (hp1.typ = ait_instruction) and
  1156. (taicpu(hp1).is_jmp) and
  1157. (taicpu(hp1).opcode<>A_JMP) and
  1158. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1159. { change
  1160. and x, reg
  1161. jxx
  1162. to
  1163. test x, reg
  1164. jxx
  1165. if reg is deallocated before the
  1166. jump, but only if it's a conditional jump (PFV)
  1167. }
  1168. taicpu(p).opcode := A_TEST;
  1169. end;
  1170. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1171. begin
  1172. if MatchOperand(taicpu(p).oper[0]^,0) and
  1173. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1174. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1175. { change "mov $0, %reg" into "xor %reg, %reg" }
  1176. begin
  1177. taicpu(p).opcode := A_XOR;
  1178. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1179. end;
  1180. end;
  1181. end.