aoptx86.pas 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. procedure PostPeepholeOptMov(const p : tai);
  32. function OptPass1AND(var p : tai) : boolean;
  33. function OptPass1VMOVAP(var p : tai) : boolean;
  34. function OptPass1VOP(const p : tai) : boolean;
  35. function OptPass1MOV(var p : tai) : boolean;
  36. function OptPass2MOV(var p : tai) : boolean;
  37. function OptPass2Imul(var p : tai) : boolean;
  38. procedure DebugMsg(const s : string; p : tai);inline;
  39. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  40. class function IsExitCode(p : tai) : boolean;
  41. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  42. procedure RemoveLastDeallocForFuncRes(p : tai);
  43. end;
  44. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  45. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  46. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  47. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  48. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  49. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  50. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  51. function RefsEqual(const r1, r2: treference): boolean;
  52. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  53. { returns true, if ref is a reference using only the registers passed as base and index
  54. and having an offset }
  55. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  56. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  57. implementation
  58. uses
  59. cutils,
  60. verbose,
  61. procinfo,
  62. symconst,symsym,
  63. itcpugas;
  64. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  65. begin
  66. result :=
  67. (instr.typ = ait_instruction) and
  68. (taicpu(instr).opcode = op) and
  69. ((opsize = []) or (taicpu(instr).opsize in opsize));
  70. end;
  71. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  72. begin
  73. result :=
  74. (instr.typ = ait_instruction) and
  75. ((taicpu(instr).opcode = op1) or
  76. (taicpu(instr).opcode = op2)
  77. ) and
  78. ((opsize = []) or (taicpu(instr).opsize in opsize));
  79. end;
  80. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  81. begin
  82. result :=
  83. (instr.typ = ait_instruction) and
  84. ((taicpu(instr).opcode = op1) or
  85. (taicpu(instr).opcode = op2) or
  86. (taicpu(instr).opcode = op3)
  87. ) and
  88. ((opsize = []) or (taicpu(instr).opsize in opsize));
  89. end;
  90. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  91. const opsize : topsizes) : boolean;
  92. var
  93. op : TAsmOp;
  94. begin
  95. result:=false;
  96. for op in ops do
  97. begin
  98. if (instr.typ = ait_instruction) and
  99. (taicpu(instr).opcode = op) and
  100. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  101. begin
  102. result:=true;
  103. exit;
  104. end;
  105. end;
  106. end;
  107. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  108. begin
  109. result := (oper.typ = top_reg) and (oper.reg = reg);
  110. end;
  111. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  112. begin
  113. result := (oper.typ = top_const) and (oper.val = a);
  114. end;
  115. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  116. begin
  117. result := oper1.typ = oper2.typ;
  118. if result then
  119. case oper1.typ of
  120. top_const:
  121. Result:=oper1.val = oper2.val;
  122. top_reg:
  123. Result:=oper1.reg = oper2.reg;
  124. top_ref:
  125. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  126. else
  127. internalerror(2013102801);
  128. end
  129. end;
  130. function RefsEqual(const r1, r2: treference): boolean;
  131. begin
  132. RefsEqual :=
  133. (r1.offset = r2.offset) and
  134. (r1.segment = r2.segment) and (r1.base = r2.base) and
  135. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  136. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  137. (r1.relsymbol = r2.relsymbol);
  138. end;
  139. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  140. begin
  141. Result:=(ref.offset=0) and
  142. (ref.scalefactor in [0,1]) and
  143. (ref.segment=NR_NO) and
  144. (ref.symbol=nil) and
  145. (ref.relsymbol=nil) and
  146. ((base=NR_INVALID) or
  147. (ref.base=base)) and
  148. ((index=NR_INVALID) or
  149. (ref.index=index));
  150. end;
  151. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  152. begin
  153. Result:=(ref.scalefactor in [0,1]) and
  154. (ref.segment=NR_NO) and
  155. (ref.symbol=nil) and
  156. (ref.relsymbol=nil) and
  157. ((base=NR_INVALID) or
  158. (ref.base=base)) and
  159. ((index=NR_INVALID) or
  160. (ref.index=index));
  161. end;
  162. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  163. begin
  164. Result:=(taicpu(instr).ops=2) and
  165. (taicpu(instr).oper[0]^.typ=ot0) and
  166. (taicpu(instr).oper[1]^.typ=ot1);
  167. end;
  168. {$ifdef DEBUG_AOPTCPU}
  169. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  170. begin
  171. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  172. end;
  173. {$else DEBUG_AOPTCPU}
  174. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  175. begin
  176. end;
  177. {$endif DEBUG_AOPTCPU}
  178. { allocates register reg between (and including) instructions p1 and p2
  179. the type of p1 and p2 must not be in SkipInstr
  180. note that this routine is both called from the peephole optimizer
  181. where optinfo is not yet initialised) and from the cse (where it is) }
  182. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  183. var
  184. hp, start: tai;
  185. removedsomething,
  186. firstRemovedWasAlloc,
  187. lastRemovedWasDealloc: boolean;
  188. begin
  189. {$ifdef EXTDEBUG}
  190. { if assigned(p1.optinfo) and
  191. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  192. internalerror(2004101010); }
  193. {$endif EXTDEBUG}
  194. start := p1;
  195. if (reg = NR_ESP) or
  196. (reg = current_procinfo.framepointer) or
  197. not(assigned(p1)) then
  198. { this happens with registers which are loaded implicitely, outside the }
  199. { current block (e.g. esi with self) }
  200. exit;
  201. { make sure we allocate it for this instruction }
  202. getnextinstruction(p2,p2);
  203. lastRemovedWasDealloc := false;
  204. removedSomething := false;
  205. firstRemovedWasAlloc := false;
  206. {$ifdef allocregdebug}
  207. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  208. ' from here...'));
  209. insertllitem(asml,p1.previous,p1,hp);
  210. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  211. ' till here...'));
  212. insertllitem(asml,p2,p2.next,hp);
  213. {$endif allocregdebug}
  214. { do it the safe way: always allocate the full super register,
  215. as we do no register re-allocation in the peephole optimizer,
  216. this does not hurt
  217. }
  218. case getregtype(reg) of
  219. R_MMREGISTER:
  220. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  221. R_INTREGISTER:
  222. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  223. end;
  224. if not(RegInUsedRegs(reg,initialusedregs)) then
  225. begin
  226. hp := tai_regalloc.alloc(reg,nil);
  227. insertllItem(p1.previous,p1,hp);
  228. IncludeRegInUsedRegs(reg,initialusedregs);
  229. end;
  230. while assigned(p1) and
  231. (p1 <> p2) do
  232. begin
  233. if assigned(p1.optinfo) then
  234. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  235. p1 := tai(p1.next);
  236. repeat
  237. while assigned(p1) and
  238. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  239. p1 := tai(p1.next);
  240. { remove all allocation/deallocation info about the register in between }
  241. if assigned(p1) and
  242. (p1.typ = ait_regalloc) then
  243. begin
  244. { same super register, different sub register? }
  245. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  246. begin
  247. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  248. internalerror(2016101501);
  249. tai_regalloc(p1).reg:=reg;
  250. end;
  251. if tai_regalloc(p1).reg=reg then
  252. begin
  253. if not removedSomething then
  254. begin
  255. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  256. removedSomething := true;
  257. end;
  258. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  259. hp := tai(p1.Next);
  260. asml.Remove(p1);
  261. p1.free;
  262. p1 := hp;
  263. end
  264. else
  265. p1 := tai(p1.next);
  266. end;
  267. until not(assigned(p1)) or
  268. not(p1.typ in SkipInstr);
  269. end;
  270. if assigned(p1) then
  271. begin
  272. if firstRemovedWasAlloc then
  273. begin
  274. hp := tai_regalloc.Alloc(reg,nil);
  275. insertLLItem(start.previous,start,hp);
  276. end;
  277. if lastRemovedWasDealloc then
  278. begin
  279. hp := tai_regalloc.DeAlloc(reg,nil);
  280. insertLLItem(p1.previous,p1,hp);
  281. end;
  282. end;
  283. end;
  284. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  285. var
  286. p: taicpu;
  287. begin
  288. if not assigned(hp) or
  289. (hp.typ <> ait_instruction) then
  290. begin
  291. Result := false;
  292. exit;
  293. end;
  294. p := taicpu(hp);
  295. Result :=
  296. (((p.opcode = A_MOV) or
  297. (p.opcode = A_MOVZX) or
  298. (p.opcode = A_MOVSX) or
  299. (p.opcode = A_LEA) or
  300. (p.opcode = A_VMOVSS) or
  301. (p.opcode = A_VMOVSD) or
  302. (p.opcode = A_VMOVAPD) or
  303. (p.opcode = A_VMOVAPS) or
  304. (p.opcode = A_VMOVQ) or
  305. (p.opcode = A_MOVSS) or
  306. (p.opcode = A_MOVSD) or
  307. (p.opcode = A_MOVQ) or
  308. (p.opcode = A_MOVAPD) or
  309. (p.opcode = A_MOVAPS)) and
  310. (p.oper[1]^.typ = top_reg) and
  311. (SuperRegistersEqual(p.oper[1]^.reg,reg)) and
  312. ((p.oper[0]^.typ = top_const) or
  313. ((p.oper[0]^.typ = top_reg) and
  314. not(SuperRegistersEqual(p.oper[0]^.reg,reg))) or
  315. ((p.oper[0]^.typ = top_ref) and
  316. not RegInRef(reg,p.oper[0]^.ref^)))) or
  317. ((p.opcode = A_POP) and
  318. (SuperRegistersEqual(p.oper[0]^.reg,reg))) or
  319. ((p.opcode = A_IMUL) and
  320. (p.ops=3) and
  321. (SuperRegistersEqual(p.oper[2]^.reg,reg)) and
  322. not((SuperRegistersEqual(p.oper[1]^.reg,reg))));
  323. end;
  324. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  325. var
  326. hp2,hp3 : tai;
  327. begin
  328. result:=(p.typ=ait_instruction) and
  329. ((taicpu(p).opcode = A_RET) or
  330. ((taicpu(p).opcode=A_LEAVE) and
  331. GetNextInstruction(p,hp2) and
  332. (hp2.typ=ait_instruction) and
  333. (taicpu(hp2).opcode=A_RET)
  334. ) or
  335. ((taicpu(p).opcode=A_MOV) and
  336. (taicpu(p).oper[0]^.typ=top_reg) and
  337. (taicpu(p).oper[0]^.reg=NR_EBP) and
  338. (taicpu(p).oper[1]^.typ=top_reg) and
  339. (taicpu(p).oper[1]^.reg=NR_ESP) and
  340. GetNextInstruction(p,hp2) and
  341. (hp2.typ=ait_instruction) and
  342. (taicpu(hp2).opcode=A_POP) and
  343. (taicpu(hp2).oper[0]^.typ=top_reg) and
  344. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  345. GetNextInstruction(hp2,hp3) and
  346. (hp3.typ=ait_instruction) and
  347. (taicpu(hp3).opcode=A_RET)
  348. )
  349. );
  350. end;
  351. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  352. begin
  353. isFoldableArithOp := False;
  354. case hp1.opcode of
  355. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  356. isFoldableArithOp :=
  357. ((taicpu(hp1).oper[0]^.typ = top_const) or
  358. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  359. (taicpu(hp1).oper[0]^.reg <> reg))) and
  360. (taicpu(hp1).oper[1]^.typ = top_reg) and
  361. (taicpu(hp1).oper[1]^.reg = reg);
  362. A_INC,A_DEC,A_NEG,A_NOT:
  363. isFoldableArithOp :=
  364. (taicpu(hp1).oper[0]^.typ = top_reg) and
  365. (taicpu(hp1).oper[0]^.reg = reg);
  366. end;
  367. end;
  368. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  369. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  370. var
  371. hp2: tai;
  372. begin
  373. hp2 := p;
  374. repeat
  375. hp2 := tai(hp2.previous);
  376. if assigned(hp2) and
  377. (hp2.typ = ait_regalloc) and
  378. (tai_regalloc(hp2).ratype=ra_dealloc) and
  379. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  380. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  381. begin
  382. asml.remove(hp2);
  383. hp2.free;
  384. break;
  385. end;
  386. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  387. end;
  388. begin
  389. case current_procinfo.procdef.returndef.typ of
  390. arraydef,recorddef,pointerdef,
  391. stringdef,enumdef,procdef,objectdef,errordef,
  392. filedef,setdef,procvardef,
  393. classrefdef,forwarddef:
  394. DoRemoveLastDeallocForFuncRes(RS_EAX);
  395. orddef:
  396. if current_procinfo.procdef.returndef.size <> 0 then
  397. begin
  398. DoRemoveLastDeallocForFuncRes(RS_EAX);
  399. { for int64/qword }
  400. if current_procinfo.procdef.returndef.size = 8 then
  401. DoRemoveLastDeallocForFuncRes(RS_EDX);
  402. end;
  403. end;
  404. end;
  405. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  406. var
  407. TmpUsedRegs : TAllUsedRegs;
  408. hp1,hp2 : tai;
  409. begin
  410. result:=false;
  411. if MatchOpType(taicpu(p),top_reg,top_reg) then
  412. begin
  413. { vmova* reg1,reg1
  414. =>
  415. <nop> }
  416. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  417. begin
  418. GetNextInstruction(p,hp1);
  419. asml.Remove(p);
  420. p.Free;
  421. p:=hp1;
  422. result:=true;
  423. end
  424. else if GetNextInstruction(p,hp1) then
  425. begin
  426. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  427. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  428. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  429. begin
  430. { vmova* reg1,reg2
  431. vmova* reg2,reg3
  432. dealloc reg2
  433. =>
  434. vmova* reg1,reg3 }
  435. CopyUsedRegs(TmpUsedRegs);
  436. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  437. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  438. begin
  439. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  440. asml.Remove(hp1);
  441. hp1.Free;
  442. result:=true;
  443. end
  444. { special case:
  445. vmova* reg1,reg2
  446. vmova* reg2,reg1
  447. =>
  448. vmova* reg1,reg2 }
  449. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  450. begin
  451. asml.Remove(hp1);
  452. hp1.Free;
  453. result:=true;
  454. end
  455. end
  456. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  457. { we mix single and double opperations here because we assume that the compiler
  458. generates vmovapd only after double operations and vmovaps only after single operations }
  459. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  460. GetNextInstruction(hp1,hp2) and
  461. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  462. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  463. begin
  464. CopyUsedRegs(TmpUsedRegs);
  465. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  466. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  467. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  468. then
  469. begin
  470. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  471. asml.Remove(p);
  472. p.Free;
  473. asml.Remove(hp2);
  474. hp2.Free;
  475. p:=hp1;
  476. end;
  477. end;
  478. end;
  479. end;
  480. end;
  481. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  482. var
  483. TmpUsedRegs : TAllUsedRegs;
  484. hp1 : tai;
  485. begin
  486. result:=false;
  487. if GetNextInstruction(p,hp1) and
  488. { we mix single and double opperations here because we assume that the compiler
  489. generates vmovapd only after double operations and vmovaps only after single operations }
  490. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  491. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  492. (taicpu(hp1).oper[1]^.typ=top_reg) then
  493. begin
  494. CopyUsedRegs(TmpUsedRegs);
  495. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  496. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  497. ) then
  498. begin
  499. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  500. asml.Remove(hp1);
  501. hp1.Free;
  502. result:=true;
  503. end;
  504. end;
  505. end;
  506. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  507. var
  508. hp1, hp2: tai;
  509. TmpUsedRegs : TAllUsedRegs;
  510. GetNextIntruction_p : Boolean;
  511. begin
  512. Result:=false;
  513. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  514. if GetNextIntruction_p and
  515. MatchInstruction(hp1,A_AND,[]) and
  516. (taicpu(p).oper[1]^.typ = top_reg) and
  517. MatchOpType(taicpu(hp1),top_const,top_reg) and
  518. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  519. case taicpu(p).opsize Of
  520. S_L:
  521. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  522. begin
  523. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  524. asml.remove(hp1);
  525. hp1.free;
  526. Result:=true;
  527. exit;
  528. end;
  529. end
  530. else if GetNextIntruction_p and
  531. MatchInstruction(hp1,A_MOV,[]) and
  532. (taicpu(p).oper[1]^.typ = top_reg) and
  533. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  534. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  535. begin
  536. CopyUsedRegs(TmpUsedRegs);
  537. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  538. { we have
  539. mov x, %treg
  540. mov %treg, y
  541. }
  542. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  543. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  544. { we've got
  545. mov x, %treg
  546. mov %treg, y
  547. with %treg is not used after }
  548. case taicpu(p).oper[0]^.typ Of
  549. top_reg:
  550. begin
  551. { change
  552. mov %reg, %treg
  553. mov %treg, y
  554. to
  555. mov %reg, y
  556. }
  557. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  558. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  559. asml.remove(hp1);
  560. hp1.free;
  561. ReleaseUsedRegs(TmpUsedRegs);
  562. Exit;
  563. end;
  564. top_ref:
  565. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  566. begin
  567. { change
  568. mov mem, %treg
  569. mov %treg, %reg
  570. to
  571. mov mem, %reg"
  572. }
  573. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  574. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  575. asml.remove(hp1);
  576. hp1.free;
  577. ReleaseUsedRegs(TmpUsedRegs);
  578. Exit;
  579. end;
  580. end;
  581. ReleaseUsedRegs(TmpUsedRegs);
  582. end
  583. else
  584. { Change
  585. mov %reg1, %reg2
  586. xxx %reg2, ???
  587. to
  588. mov %reg1, %reg2
  589. xxx %reg1, ???
  590. to avoid a write/read penalty
  591. }
  592. if MatchOpType(taicpu(p),top_reg,top_reg) and
  593. GetNextInstruction(p,hp1) and
  594. (tai(hp1).typ = ait_instruction) and
  595. (taicpu(hp1).ops >= 1) and
  596. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  597. { we have
  598. mov %reg1, %reg2
  599. XXX %reg2, ???
  600. }
  601. begin
  602. if ((taicpu(hp1).opcode = A_OR) or
  603. (taicpu(hp1).opcode = A_TEST)) and
  604. (taicpu(hp1).oper[1]^.typ = top_reg) and
  605. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  606. { we have
  607. mov %reg1, %reg2
  608. test/or %reg2, %reg2
  609. }
  610. begin
  611. CopyUsedRegs(TmpUsedRegs);
  612. { reg1 will be used after the first instruction,
  613. so update the allocation info }
  614. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  615. if GetNextInstruction(hp1, hp2) and
  616. (hp2.typ = ait_instruction) and
  617. taicpu(hp2).is_jmp and
  618. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  619. { change
  620. mov %reg1, %reg2
  621. test/or %reg2, %reg2
  622. jxx
  623. to
  624. test %reg1, %reg1
  625. jxx
  626. }
  627. begin
  628. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  629. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  630. asml.remove(p);
  631. p.free;
  632. p := hp1;
  633. ReleaseUsedRegs(TmpUsedRegs);
  634. Exit;
  635. end
  636. else
  637. { change
  638. mov %reg1, %reg2
  639. test/or %reg2, %reg2
  640. to
  641. mov %reg1, %reg2
  642. test/or %reg1, %reg1
  643. }
  644. begin
  645. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  646. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  647. end;
  648. ReleaseUsedRegs(TmpUsedRegs);
  649. end
  650. end
  651. else
  652. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  653. x >= RetOffset) as it doesn't do anything (it writes either to a
  654. parameter or to the temporary storage room for the function
  655. result)
  656. }
  657. if GetNextIntruction_p and
  658. (tai(hp1).typ = ait_instruction) then
  659. begin
  660. if IsExitCode(hp1) and
  661. MatchOpType(p,top_reg,top_ref) and
  662. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  663. not(assigned(current_procinfo.procdef.funcretsym) and
  664. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  665. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  666. begin
  667. asml.remove(p);
  668. p.free;
  669. p:=hp1;
  670. DebugMsg('Peephole removed deadstore before leave/ret',p);
  671. RemoveLastDeallocForFuncRes(p);
  672. exit;
  673. end
  674. { change
  675. mov reg1, mem1
  676. cmp x, mem1
  677. to
  678. mov reg1, mem1
  679. cmp x, reg1
  680. }
  681. else if MatchOpType(p,top_reg,top_ref) and
  682. MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
  683. (taicpu(hp1).oper[1]^.typ = top_ref) and
  684. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  685. begin
  686. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  687. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  688. end;
  689. end;
  690. { Next instruction is also a MOV ? }
  691. if GetNextIntruction_p and
  692. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  693. begin
  694. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  695. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  696. { mov reg1, mem1 or mov mem1, reg1
  697. mov mem2, reg2 mov reg2, mem2}
  698. begin
  699. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  700. { mov reg1, mem1 or mov mem1, reg1
  701. mov mem2, reg1 mov reg2, mem1}
  702. begin
  703. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  704. { Removes the second statement from
  705. mov reg1, mem1/reg2
  706. mov mem1/reg2, reg1 }
  707. begin
  708. if taicpu(p).oper[0]^.typ=top_reg then
  709. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  710. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  711. asml.remove(hp1);
  712. hp1.free;
  713. Result:=true;
  714. exit;
  715. end
  716. else
  717. begin
  718. CopyUsedRegs(TmpUsedRegs);
  719. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  720. if (taicpu(p).oper[1]^.typ = top_ref) and
  721. { mov reg1, mem1
  722. mov mem2, reg1 }
  723. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  724. GetNextInstruction(hp1, hp2) and
  725. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  726. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  727. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  728. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  729. { change to
  730. mov reg1, mem1 mov reg1, mem1
  731. mov mem2, reg1 cmp reg1, mem2
  732. cmp mem1, reg1
  733. }
  734. begin
  735. asml.remove(hp2);
  736. hp2.free;
  737. taicpu(hp1).opcode := A_CMP;
  738. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  739. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  740. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  741. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  742. end;
  743. ReleaseUsedRegs(TmpUsedRegs);
  744. end;
  745. end
  746. else if (taicpu(p).oper[1]^.typ=top_ref) and
  747. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  748. begin
  749. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  750. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  751. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  752. end
  753. else
  754. begin
  755. CopyUsedRegs(TmpUsedRegs);
  756. if GetNextInstruction(hp1, hp2) and
  757. MatchOpType(taicpu(p),top_ref,top_reg) and
  758. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  759. (taicpu(hp1).oper[1]^.typ = top_ref) and
  760. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  761. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  762. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  763. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  764. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  765. { mov mem1, %reg1
  766. mov %reg1, mem2
  767. mov mem2, reg2
  768. to:
  769. mov mem1, reg2
  770. mov reg2, mem2}
  771. begin
  772. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  773. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  774. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  775. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  776. asml.remove(hp2);
  777. hp2.free;
  778. end
  779. {$ifdef i386}
  780. { this is enabled for i386 only, as the rules to create the reg sets below
  781. are too complicated for x86-64, so this makes this code too error prone
  782. on x86-64
  783. }
  784. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  785. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  786. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  787. { mov mem1, reg1 mov mem1, reg1
  788. mov reg1, mem2 mov reg1, mem2
  789. mov mem2, reg2 mov mem2, reg1
  790. to: to:
  791. mov mem1, reg1 mov mem1, reg1
  792. mov mem1, reg2 mov reg1, mem2
  793. mov reg1, mem2
  794. or (if mem1 depends on reg1
  795. and/or if mem2 depends on reg2)
  796. to:
  797. mov mem1, reg1
  798. mov reg1, mem2
  799. mov reg1, reg2
  800. }
  801. begin
  802. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  803. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  804. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  805. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  806. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  807. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  808. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  809. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  810. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  811. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  812. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  813. end
  814. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  815. begin
  816. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  817. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  818. end
  819. else
  820. begin
  821. asml.remove(hp2);
  822. hp2.free;
  823. end
  824. {$endif i386}
  825. ;
  826. ReleaseUsedRegs(TmpUsedRegs);
  827. end;
  828. end
  829. (* { movl [mem1],reg1
  830. movl [mem1],reg2
  831. to
  832. movl [mem1],reg1
  833. movl reg1,reg2
  834. }
  835. else if (taicpu(p).oper[0]^.typ = top_ref) and
  836. (taicpu(p).oper[1]^.typ = top_reg) and
  837. (taicpu(hp1).oper[0]^.typ = top_ref) and
  838. (taicpu(hp1).oper[1]^.typ = top_reg) and
  839. (taicpu(p).opsize = taicpu(hp1).opsize) and
  840. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  841. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  842. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  843. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  844. else*)
  845. { movl const1,[mem1]
  846. movl [mem1],reg1
  847. to
  848. movl const1,reg1
  849. movl reg1,[mem1]
  850. }
  851. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  852. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  853. (taicpu(p).opsize = taicpu(hp1).opsize) and
  854. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  855. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  856. begin
  857. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  858. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  859. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  860. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  861. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  862. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  863. end
  864. end
  865. else if (taicpu(p).oper[1]^.typ = top_reg) and
  866. GetNextIntruction_p and
  867. (hp1.typ = ait_instruction) and
  868. GetNextInstruction(hp1, hp2) and
  869. MatchInstruction(hp2,A_MOV,[]) and
  870. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  871. (taicpu(hp2).oper[0]^.typ=top_reg) and
  872. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  873. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  874. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  875. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  876. ) then
  877. { change movsX/movzX reg/ref, reg2
  878. add/sub/or/... reg3/$const, reg2
  879. mov reg2 reg/ref
  880. to add/sub/or/... reg3/$const, reg/ref }
  881. begin
  882. CopyUsedRegs(TmpUsedRegs);
  883. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  884. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  885. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  886. begin
  887. { by example:
  888. movswl %si,%eax movswl %si,%eax p
  889. decl %eax addl %edx,%eax hp1
  890. movw %ax,%si movw %ax,%si hp2
  891. ->
  892. movswl %si,%eax movswl %si,%eax p
  893. decw %eax addw %edx,%eax hp1
  894. movw %ax,%si movw %ax,%si hp2
  895. }
  896. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  897. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  898. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  899. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  900. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  901. {
  902. ->
  903. movswl %si,%eax movswl %si,%eax p
  904. decw %si addw %dx,%si hp1
  905. movw %ax,%si movw %ax,%si hp2
  906. }
  907. case taicpu(hp1).ops of
  908. 1:
  909. begin
  910. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  911. if taicpu(hp1).oper[0]^.typ=top_reg then
  912. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  913. end;
  914. 2:
  915. begin
  916. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  917. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  918. (taicpu(hp1).opcode<>A_SHL) and
  919. (taicpu(hp1).opcode<>A_SHR) and
  920. (taicpu(hp1).opcode<>A_SAR) then
  921. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  922. end;
  923. else
  924. internalerror(2008042701);
  925. end;
  926. {
  927. ->
  928. decw %si addw %dx,%si p
  929. }
  930. asml.remove(p);
  931. asml.remove(hp2);
  932. p.Free;
  933. hp2.Free;
  934. p := hp1;
  935. end;
  936. ReleaseUsedRegs(TmpUsedRegs);
  937. end
  938. else if GetNextIntruction_p and
  939. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  940. GetNextInstruction(hp1, hp2) and
  941. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  942. MatchOperand(Taicpu(p).oper[0]^,0) and
  943. (Taicpu(p).oper[1]^.typ = top_reg) and
  944. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  945. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  946. { mov reg1,0
  947. bts reg1,operand1 --> mov reg1,operand2
  948. or reg1,operand2 bts reg1,operand1}
  949. begin
  950. Taicpu(hp2).opcode:=A_MOV;
  951. asml.remove(hp1);
  952. insertllitem(hp2,hp2.next,hp1);
  953. asml.remove(p);
  954. p.free;
  955. p:=hp1;
  956. end
  957. else if GetNextIntruction_p and
  958. MatchInstruction(hp1,A_LEA,[S_L]) and
  959. MatchOpType(Taicpu(p),top_ref,top_reg) and
  960. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  961. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  962. ) or
  963. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  964. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  965. )
  966. ) then
  967. { mov reg1,ref
  968. lea reg2,[reg1,reg2]
  969. to
  970. add reg2,ref}
  971. begin
  972. CopyUsedRegs(TmpUsedRegs);
  973. { reg1 may not be used afterwards }
  974. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  975. begin
  976. Taicpu(hp1).opcode:=A_ADD;
  977. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  978. DebugMsg('Peephole MovLea2Add done',hp1);
  979. asml.remove(p);
  980. p.free;
  981. p:=hp1;
  982. end;
  983. ReleaseUsedRegs(TmpUsedRegs);
  984. end;
  985. end;
  986. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  987. var
  988. TmpUsedRegs : TAllUsedRegs;
  989. hp1,hp2: tai;
  990. begin
  991. Result:=false;
  992. if MatchOpType(taicpu(p),top_reg,top_reg) and
  993. GetNextInstruction(p, hp1) and
  994. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  995. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  996. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  997. or
  998. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  999. ) and
  1000. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1001. { mov reg1, reg2
  1002. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1003. begin
  1004. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1005. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1006. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1007. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1008. asml.remove(p);
  1009. p.free;
  1010. p := hp1;
  1011. Result:=true;
  1012. exit;
  1013. end
  1014. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1015. GetNextInstruction(p,hp1) and
  1016. (hp1.typ = ait_instruction) and
  1017. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1018. doing it separately in both branches allows to do the cheap checks
  1019. with low probability earlier }
  1020. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1021. GetNextInstruction(hp1,hp2) and
  1022. MatchInstruction(hp2,A_MOV,[])
  1023. ) or
  1024. ((taicpu(hp1).opcode=A_LEA) and
  1025. GetNextInstruction(hp1,hp2) and
  1026. MatchInstruction(hp2,A_MOV,[]) and
  1027. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1028. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1029. ) or
  1030. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1031. taicpu(p).oper[1]^.reg) and
  1032. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1033. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1034. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1035. ) and
  1036. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1037. )
  1038. ) and
  1039. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1040. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1041. begin
  1042. CopyUsedRegs(TmpUsedRegs);
  1043. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1044. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1045. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1046. { change mov (ref), reg
  1047. add/sub/or/... reg2/$const, reg
  1048. mov reg, (ref)
  1049. # release reg
  1050. to add/sub/or/... reg2/$const, (ref) }
  1051. begin
  1052. case taicpu(hp1).opcode of
  1053. A_INC,A_DEC,A_NOT,A_NEG :
  1054. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1055. A_LEA :
  1056. begin
  1057. taicpu(hp1).opcode:=A_ADD;
  1058. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1059. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1060. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1061. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1062. else
  1063. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1064. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1065. DebugMsg('Peephole FoldLea done',hp1);
  1066. end
  1067. else
  1068. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1069. end;
  1070. asml.remove(p);
  1071. asml.remove(hp2);
  1072. p.free;
  1073. hp2.free;
  1074. p := hp1
  1075. end;
  1076. ReleaseUsedRegs(TmpUsedRegs);
  1077. end;
  1078. end;
  1079. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1080. var
  1081. TmpUsedRegs : TAllUsedRegs;
  1082. hp1 : tai;
  1083. begin
  1084. Result:=false;
  1085. if (taicpu(p).ops >= 2) and
  1086. ((taicpu(p).oper[0]^.typ = top_const) or
  1087. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1088. (taicpu(p).oper[1]^.typ = top_reg) and
  1089. ((taicpu(p).ops = 2) or
  1090. ((taicpu(p).oper[2]^.typ = top_reg) and
  1091. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1092. GetLastInstruction(p,hp1) and
  1093. MatchInstruction(hp1,A_MOV,[]) and
  1094. MatchOpType(hp1,top_reg,top_reg) and
  1095. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1096. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1097. begin
  1098. CopyUsedRegs(TmpUsedRegs);
  1099. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1100. { change
  1101. mov reg1,reg2
  1102. imul y,reg2 to imul y,reg1,reg2 }
  1103. begin
  1104. taicpu(p).ops := 3;
  1105. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1106. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1107. DebugMsg('Peephole MovImul2Imul done',p);
  1108. asml.remove(hp1);
  1109. hp1.free;
  1110. result:=true;
  1111. end;
  1112. ReleaseUsedRegs(TmpUsedRegs);
  1113. end;
  1114. end;
  1115. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1116. var
  1117. hp1 : tai;
  1118. begin
  1119. Result:=false;
  1120. if not(GetNextInstruction(p, hp1)) then
  1121. exit;
  1122. if MatchOpType(p,top_const,top_reg) and
  1123. MatchInstruction(hp1,A_AND,[]) and
  1124. MatchOpType(hp1,top_const,top_reg) and
  1125. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1126. { the second register must contain the first one, so compare their subreg types }
  1127. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1128. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1129. { change
  1130. and const1, reg
  1131. and const2, reg
  1132. to
  1133. and (const1 and const2), reg
  1134. }
  1135. begin
  1136. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1137. DebugMsg('Peephole AndAnd2And done',hp1);
  1138. asml.remove(p);
  1139. p.Free;
  1140. p:=hp1;
  1141. Result:=true;
  1142. exit;
  1143. end
  1144. else if MatchOpType(p,top_const,top_reg) and
  1145. MatchInstruction(hp1,A_MOVZX,[]) and
  1146. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1147. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1148. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1149. (((taicpu(p).opsize=S_W) and
  1150. (taicpu(hp1).opsize=S_BW)) or
  1151. ((taicpu(p).opsize=S_L) and
  1152. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1153. {$ifdef x86_64}
  1154. or
  1155. ((taicpu(p).opsize=S_Q) and
  1156. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1157. {$endif x86_64}
  1158. ) then
  1159. begin
  1160. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1161. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1162. ) or
  1163. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1164. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1165. {$ifdef x86_64}
  1166. or
  1167. (((taicpu(hp1).opsize)=S_LQ) and
  1168. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1169. )
  1170. {$endif x86_64}
  1171. then
  1172. begin
  1173. DebugMsg('Peephole AndMovzToAnd done',p);
  1174. asml.remove(hp1);
  1175. hp1.free;
  1176. end;
  1177. end
  1178. else if MatchOpType(p,top_const,top_reg) and
  1179. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1180. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1181. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1182. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1183. (((taicpu(p).opsize=S_W) and
  1184. (taicpu(hp1).opsize=S_BW)) or
  1185. ((taicpu(p).opsize=S_L) and
  1186. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1187. {$ifdef x86_64}
  1188. or
  1189. ((taicpu(p).opsize=S_Q) and
  1190. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1191. {$endif x86_64}
  1192. ) then
  1193. begin
  1194. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1195. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1196. ) or
  1197. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1198. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1199. {$ifdef x86_64}
  1200. or
  1201. (((taicpu(hp1).opsize)=S_LQ) and
  1202. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1203. )
  1204. {$endif x86_64}
  1205. then
  1206. begin
  1207. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1208. asml.remove(hp1);
  1209. hp1.free;
  1210. end;
  1211. end
  1212. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1213. (hp1.typ = ait_instruction) and
  1214. (taicpu(hp1).is_jmp) and
  1215. (taicpu(hp1).opcode<>A_JMP) and
  1216. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1217. { change
  1218. and x, reg
  1219. jxx
  1220. to
  1221. test x, reg
  1222. jxx
  1223. if reg is deallocated before the
  1224. jump, but only if it's a conditional jump (PFV)
  1225. }
  1226. taicpu(p).opcode := A_TEST;
  1227. end;
  1228. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1229. begin
  1230. if MatchOperand(taicpu(p).oper[0]^,0) and
  1231. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1232. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1233. { change "mov $0, %reg" into "xor %reg, %reg" }
  1234. begin
  1235. taicpu(p).opcode := A_XOR;
  1236. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1237. end;
  1238. end;
  1239. end.