aoptx86.pas 87 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. {$define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  32. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  33. { checks whether reading the value in reg1 depends on the value of reg2. This
  34. is very similar to SuperRegisterEquals, except it takes into account that
  35. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  36. depend on the value in AH). }
  37. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  38. procedure DebugMsg(const s : string; p : tai);inline;
  39. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  40. class function IsExitCode(p : tai) : boolean;
  41. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  42. procedure RemoveLastDeallocForFuncRes(p : tai);
  43. function PrePeepholeOptSxx(var p : tai) : boolean;
  44. function OptPass1AND(var p : tai) : boolean;
  45. function OptPass1VMOVAP(var p : tai) : boolean;
  46. function OptPass1VOP(const p : tai) : boolean;
  47. function OptPass1MOV(var p : tai) : boolean;
  48. function OptPass1Movx(var p : tai) : boolean;
  49. function OptPass2MOV(var p : tai) : boolean;
  50. function OptPass2Imul(var p : tai) : boolean;
  51. function OptPass2Jmp(var p : tai) : boolean;
  52. function OptPass2Jcc(var p : tai) : boolean;
  53. procedure PostPeepholeOptMov(const p : tai);
  54. end;
  55. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  56. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  57. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  58. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  59. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  60. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  61. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  62. function RefsEqual(const r1, r2: treference): boolean;
  63. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  64. { returns true, if ref is a reference using only the registers passed as base and index
  65. and having an offset }
  66. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  67. implementation
  68. uses
  69. cutils,verbose,
  70. globals,
  71. cpuinfo,
  72. procinfo,
  73. aasmbase,
  74. aoptutils,
  75. symconst,symsym,
  76. itcpugas;
  77. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  78. begin
  79. result :=
  80. (instr.typ = ait_instruction) and
  81. (taicpu(instr).opcode = op) and
  82. ((opsize = []) or (taicpu(instr).opsize in opsize));
  83. end;
  84. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  85. begin
  86. result :=
  87. (instr.typ = ait_instruction) and
  88. ((taicpu(instr).opcode = op1) or
  89. (taicpu(instr).opcode = op2)
  90. ) and
  91. ((opsize = []) or (taicpu(instr).opsize in opsize));
  92. end;
  93. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  94. begin
  95. result :=
  96. (instr.typ = ait_instruction) and
  97. ((taicpu(instr).opcode = op1) or
  98. (taicpu(instr).opcode = op2) or
  99. (taicpu(instr).opcode = op3)
  100. ) and
  101. ((opsize = []) or (taicpu(instr).opsize in opsize));
  102. end;
  103. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  104. const opsize : topsizes) : boolean;
  105. var
  106. op : TAsmOp;
  107. begin
  108. result:=false;
  109. for op in ops do
  110. begin
  111. if (instr.typ = ait_instruction) and
  112. (taicpu(instr).opcode = op) and
  113. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  114. begin
  115. result:=true;
  116. exit;
  117. end;
  118. end;
  119. end;
  120. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  121. begin
  122. result := (oper.typ = top_reg) and (oper.reg = reg);
  123. end;
  124. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  125. begin
  126. result := (oper.typ = top_const) and (oper.val = a);
  127. end;
  128. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  129. begin
  130. result := oper1.typ = oper2.typ;
  131. if result then
  132. case oper1.typ of
  133. top_const:
  134. Result:=oper1.val = oper2.val;
  135. top_reg:
  136. Result:=oper1.reg = oper2.reg;
  137. top_ref:
  138. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  139. else
  140. internalerror(2013102801);
  141. end
  142. end;
  143. function RefsEqual(const r1, r2: treference): boolean;
  144. begin
  145. RefsEqual :=
  146. (r1.offset = r2.offset) and
  147. (r1.segment = r2.segment) and (r1.base = r2.base) and
  148. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  149. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  150. (r1.relsymbol = r2.relsymbol);
  151. end;
  152. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  153. begin
  154. Result:=(ref.offset=0) and
  155. (ref.scalefactor in [0,1]) and
  156. (ref.segment=NR_NO) and
  157. (ref.symbol=nil) and
  158. (ref.relsymbol=nil) and
  159. ((base=NR_INVALID) or
  160. (ref.base=base)) and
  161. ((index=NR_INVALID) or
  162. (ref.index=index));
  163. end;
  164. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  165. begin
  166. Result:=(ref.scalefactor in [0,1]) and
  167. (ref.segment=NR_NO) and
  168. (ref.symbol=nil) and
  169. (ref.relsymbol=nil) and
  170. ((base=NR_INVALID) or
  171. (ref.base=base)) and
  172. ((index=NR_INVALID) or
  173. (ref.index=index));
  174. end;
  175. {$ifdef DEBUG_AOPTCPU}
  176. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  177. begin
  178. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  179. end;
  180. {$else DEBUG_AOPTCPU}
  181. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  182. begin
  183. end;
  184. {$endif DEBUG_AOPTCPU}
  185. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  186. begin
  187. if not SuperRegistersEqual(reg1,reg2) then
  188. exit(false);
  189. if getregtype(reg1)<>R_INTREGISTER then
  190. exit(true); {because SuperRegisterEqual is true}
  191. case getsubreg(reg1) of
  192. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  193. higher, it preserves the high bits, so the new value depends on
  194. reg2's previous value. In other words, it is equivalent to doing:
  195. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  196. R_SUBL:
  197. exit(getsubreg(reg2)=R_SUBL);
  198. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  199. higher, it actually does a:
  200. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  201. R_SUBH:
  202. exit(getsubreg(reg2)=R_SUBH);
  203. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  204. bits of reg2:
  205. reg2 := (reg2 and $ffff0000) or word(reg1); }
  206. R_SUBW:
  207. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  208. { a write to R_SUBD always overwrites every other subregister,
  209. because it clears the high 32 bits of R_SUBQ on x86_64 }
  210. R_SUBD,
  211. R_SUBQ:
  212. exit(true);
  213. else
  214. internalerror(2017042801);
  215. end;
  216. end;
  217. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  218. begin
  219. if not SuperRegistersEqual(reg1,reg2) then
  220. exit(false);
  221. if getregtype(reg1)<>R_INTREGISTER then
  222. exit(true); {because SuperRegisterEqual is true}
  223. case getsubreg(reg1) of
  224. R_SUBL:
  225. exit(getsubreg(reg2)<>R_SUBH);
  226. R_SUBH:
  227. exit(getsubreg(reg2)<>R_SUBL);
  228. R_SUBW,
  229. R_SUBD,
  230. R_SUBQ:
  231. exit(true);
  232. else
  233. internalerror(2017042802);
  234. end;
  235. end;
  236. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  237. var
  238. hp1 : tai;
  239. l : TCGInt;
  240. begin
  241. result:=false;
  242. { changes the code sequence
  243. shr/sar const1, x
  244. shl const2, x
  245. to
  246. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  247. if GetNextInstruction(p, hp1) and
  248. MatchInstruction(hp1,A_SHL,[]) and
  249. (taicpu(p).oper[0]^.typ = top_const) and
  250. (taicpu(hp1).oper[0]^.typ = top_const) and
  251. (taicpu(hp1).opsize = taicpu(p).opsize) and
  252. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  253. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  254. begin
  255. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  256. not(cs_opt_size in current_settings.optimizerswitches) then
  257. begin
  258. { shr/sar const1, %reg
  259. shl const2, %reg
  260. with const1 > const2 }
  261. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  262. taicpu(hp1).opcode := A_AND;
  263. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  264. case taicpu(p).opsize Of
  265. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  266. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  267. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  268. S_Q: taicpu(hp1).loadConst(0,l Xor aint($ffffffffffffffff));
  269. else
  270. Internalerror(2017050703)
  271. end;
  272. end
  273. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  274. not(cs_opt_size in current_settings.optimizerswitches) then
  275. begin
  276. { shr/sar const1, %reg
  277. shl const2, %reg
  278. with const1 < const2 }
  279. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  280. taicpu(p).opcode := A_AND;
  281. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  282. case taicpu(p).opsize Of
  283. S_B: taicpu(p).loadConst(0,l Xor $ff);
  284. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  285. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  286. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  287. else
  288. Internalerror(2017050702)
  289. end;
  290. end
  291. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  292. begin
  293. { shr/sar const1, %reg
  294. shl const2, %reg
  295. with const1 = const2 }
  296. taicpu(p).opcode := A_AND;
  297. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  298. case taicpu(p).opsize Of
  299. S_B: taicpu(p).loadConst(0,l Xor $ff);
  300. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  301. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  302. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  303. else
  304. Internalerror(2017050701)
  305. end;
  306. asml.remove(hp1);
  307. hp1.free;
  308. end;
  309. end;
  310. end;
  311. { allocates register reg between (and including) instructions p1 and p2
  312. the type of p1 and p2 must not be in SkipInstr
  313. note that this routine is both called from the peephole optimizer
  314. where optinfo is not yet initialised) and from the cse (where it is) }
  315. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  316. var
  317. hp, start: tai;
  318. removedsomething,
  319. firstRemovedWasAlloc,
  320. lastRemovedWasDealloc: boolean;
  321. begin
  322. {$ifdef EXTDEBUG}
  323. { if assigned(p1.optinfo) and
  324. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  325. internalerror(2004101010); }
  326. {$endif EXTDEBUG}
  327. start := p1;
  328. if (reg = NR_ESP) or
  329. (reg = current_procinfo.framepointer) or
  330. not(assigned(p1)) then
  331. { this happens with registers which are loaded implicitely, outside the }
  332. { current block (e.g. esi with self) }
  333. exit;
  334. { make sure we allocate it for this instruction }
  335. getnextinstruction(p2,p2);
  336. lastRemovedWasDealloc := false;
  337. removedSomething := false;
  338. firstRemovedWasAlloc := false;
  339. {$ifdef allocregdebug}
  340. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  341. ' from here...'));
  342. insertllitem(asml,p1.previous,p1,hp);
  343. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  344. ' till here...'));
  345. insertllitem(asml,p2,p2.next,hp);
  346. {$endif allocregdebug}
  347. { do it the safe way: always allocate the full super register,
  348. as we do no register re-allocation in the peephole optimizer,
  349. this does not hurt
  350. }
  351. case getregtype(reg) of
  352. R_MMREGISTER:
  353. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  354. R_INTREGISTER:
  355. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  356. end;
  357. if not(RegInUsedRegs(reg,initialusedregs)) then
  358. begin
  359. hp := tai_regalloc.alloc(reg,nil);
  360. insertllItem(p1.previous,p1,hp);
  361. IncludeRegInUsedRegs(reg,initialusedregs);
  362. end;
  363. while assigned(p1) and
  364. (p1 <> p2) do
  365. begin
  366. if assigned(p1.optinfo) then
  367. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  368. p1 := tai(p1.next);
  369. repeat
  370. while assigned(p1) and
  371. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  372. p1 := tai(p1.next);
  373. { remove all allocation/deallocation info about the register in between }
  374. if assigned(p1) and
  375. (p1.typ = ait_regalloc) then
  376. begin
  377. { same super register, different sub register? }
  378. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  379. begin
  380. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  381. internalerror(2016101501);
  382. tai_regalloc(p1).reg:=reg;
  383. end;
  384. if tai_regalloc(p1).reg=reg then
  385. begin
  386. if not removedSomething then
  387. begin
  388. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  389. removedSomething := true;
  390. end;
  391. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  392. hp := tai(p1.Next);
  393. asml.Remove(p1);
  394. p1.free;
  395. p1 := hp;
  396. end
  397. else
  398. p1 := tai(p1.next);
  399. end;
  400. until not(assigned(p1)) or
  401. not(p1.typ in SkipInstr);
  402. end;
  403. if assigned(p1) then
  404. begin
  405. if firstRemovedWasAlloc then
  406. begin
  407. hp := tai_regalloc.Alloc(reg,nil);
  408. insertLLItem(start.previous,start,hp);
  409. end;
  410. if lastRemovedWasDealloc then
  411. begin
  412. hp := tai_regalloc.DeAlloc(reg,nil);
  413. insertLLItem(p1.previous,p1,hp);
  414. end;
  415. end;
  416. end;
  417. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  418. var
  419. p: taicpu;
  420. begin
  421. if not assigned(hp) or
  422. (hp.typ <> ait_instruction) then
  423. begin
  424. Result := false;
  425. exit;
  426. end;
  427. p := taicpu(hp);
  428. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  429. with insprop[p.opcode] do
  430. begin
  431. case getsubreg(reg) of
  432. R_SUBW,R_SUBD,R_SUBQ:
  433. Result:=
  434. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  435. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  436. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  437. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  438. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  439. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  440. R_SUBFLAGCARRY:
  441. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  442. R_SUBFLAGPARITY:
  443. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  444. R_SUBFLAGAUXILIARY:
  445. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  446. R_SUBFLAGZERO:
  447. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  448. R_SUBFLAGSIGN:
  449. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  450. R_SUBFLAGOVERFLOW:
  451. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  452. R_SUBFLAGINTERRUPT:
  453. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  454. R_SUBFLAGDIRECTION:
  455. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  456. else
  457. internalerror(2017050501);
  458. end;
  459. exit;
  460. end;
  461. Result :=
  462. (((p.opcode = A_MOV) or
  463. (p.opcode = A_MOVZX) or
  464. (p.opcode = A_MOVSX) or
  465. (p.opcode = A_LEA) or
  466. (p.opcode = A_VMOVSS) or
  467. (p.opcode = A_VMOVSD) or
  468. (p.opcode = A_VMOVAPD) or
  469. (p.opcode = A_VMOVAPS) or
  470. (p.opcode = A_VMOVQ) or
  471. (p.opcode = A_MOVSS) or
  472. (p.opcode = A_MOVSD) or
  473. (p.opcode = A_MOVQ) or
  474. (p.opcode = A_MOVAPD) or
  475. (p.opcode = A_MOVAPS) or
  476. {$ifndef x86_64}
  477. (p.opcode = A_LDS) or
  478. (p.opcode = A_LES) or
  479. {$endif not x86_64}
  480. (p.opcode = A_LFS) or
  481. (p.opcode = A_LGS) or
  482. (p.opcode = A_LSS)) and
  483. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  484. (p.oper[1]^.typ = top_reg) and
  485. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  486. ((p.oper[0]^.typ = top_const) or
  487. ((p.oper[0]^.typ = top_reg) and
  488. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  489. ((p.oper[0]^.typ = top_ref) and
  490. not RegInRef(reg,p.oper[0]^.ref^)))) or
  491. ((p.opcode = A_POP) and
  492. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  493. ((p.opcode = A_IMUL) and
  494. (p.ops=3) and
  495. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  496. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  497. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  498. ((((p.opcode = A_IMUL) or
  499. (p.opcode = A_MUL)) and
  500. (p.ops=1)) and
  501. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  502. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  503. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  504. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  505. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  506. {$ifdef x86_64}
  507. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  508. {$endif x86_64}
  509. )) or
  510. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  511. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  512. {$ifdef x86_64}
  513. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  514. {$endif x86_64}
  515. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  516. {$ifndef x86_64}
  517. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  518. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  519. {$endif not x86_64}
  520. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  521. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  522. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  523. {$ifndef x86_64}
  524. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  525. {$endif not x86_64}
  526. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  527. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  528. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  529. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  530. {$ifdef x86_64}
  531. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  532. {$endif x86_64}
  533. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  534. (((p.opcode = A_FSTSW) or
  535. (p.opcode = A_FNSTSW)) and
  536. (p.oper[0]^.typ=top_reg) and
  537. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  538. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  539. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  540. (p.oper[0]^.reg=p.oper[1]^.reg) and
  541. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  542. end;
  543. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  544. var
  545. hp2,hp3 : tai;
  546. begin
  547. { some x86-64 issue a NOP before the real exit code }
  548. if MatchInstruction(p,A_NOP,[]) then
  549. GetNextInstruction(p,p);
  550. result:=assigned(p) and (p.typ=ait_instruction) and
  551. ((taicpu(p).opcode = A_RET) or
  552. ((taicpu(p).opcode=A_LEAVE) and
  553. GetNextInstruction(p,hp2) and
  554. MatchInstruction(hp2,A_RET,[S_NO])
  555. ) or
  556. ((((taicpu(p).opcode=A_MOV) and
  557. MatchOpType(taicpu(p),top_reg,top_reg) and
  558. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  559. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  560. ((taicpu(p).opcode=A_LEA) and
  561. MatchOpType(taicpu(p),top_ref,top_reg) and
  562. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  563. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  564. )
  565. ) and
  566. GetNextInstruction(p,hp2) and
  567. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  568. MatchOpType(taicpu(hp2),top_reg) and
  569. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  570. GetNextInstruction(hp2,hp3) and
  571. MatchInstruction(hp3,A_RET,[S_NO])
  572. )
  573. );
  574. end;
  575. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  576. begin
  577. isFoldableArithOp := False;
  578. case hp1.opcode of
  579. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  580. isFoldableArithOp :=
  581. ((taicpu(hp1).oper[0]^.typ = top_const) or
  582. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  583. (taicpu(hp1).oper[0]^.reg <> reg))) and
  584. (taicpu(hp1).oper[1]^.typ = top_reg) and
  585. (taicpu(hp1).oper[1]^.reg = reg);
  586. A_INC,A_DEC,A_NEG,A_NOT:
  587. isFoldableArithOp :=
  588. (taicpu(hp1).oper[0]^.typ = top_reg) and
  589. (taicpu(hp1).oper[0]^.reg = reg);
  590. end;
  591. end;
  592. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  593. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  594. var
  595. hp2: tai;
  596. begin
  597. hp2 := p;
  598. repeat
  599. hp2 := tai(hp2.previous);
  600. if assigned(hp2) and
  601. (hp2.typ = ait_regalloc) and
  602. (tai_regalloc(hp2).ratype=ra_dealloc) and
  603. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  604. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  605. begin
  606. asml.remove(hp2);
  607. hp2.free;
  608. break;
  609. end;
  610. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  611. end;
  612. begin
  613. case current_procinfo.procdef.returndef.typ of
  614. arraydef,recorddef,pointerdef,
  615. stringdef,enumdef,procdef,objectdef,errordef,
  616. filedef,setdef,procvardef,
  617. classrefdef,forwarddef:
  618. DoRemoveLastDeallocForFuncRes(RS_EAX);
  619. orddef:
  620. if current_procinfo.procdef.returndef.size <> 0 then
  621. begin
  622. DoRemoveLastDeallocForFuncRes(RS_EAX);
  623. { for int64/qword }
  624. if current_procinfo.procdef.returndef.size = 8 then
  625. DoRemoveLastDeallocForFuncRes(RS_EDX);
  626. end;
  627. end;
  628. end;
  629. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  630. var
  631. TmpUsedRegs : TAllUsedRegs;
  632. hp1,hp2 : tai;
  633. begin
  634. result:=false;
  635. if MatchOpType(taicpu(p),top_reg,top_reg) then
  636. begin
  637. { vmova* reg1,reg1
  638. =>
  639. <nop> }
  640. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  641. begin
  642. GetNextInstruction(p,hp1);
  643. asml.Remove(p);
  644. p.Free;
  645. p:=hp1;
  646. result:=true;
  647. end
  648. else if GetNextInstruction(p,hp1) then
  649. begin
  650. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  651. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  652. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  653. begin
  654. { vmova* reg1,reg2
  655. vmova* reg2,reg3
  656. dealloc reg2
  657. =>
  658. vmova* reg1,reg3 }
  659. CopyUsedRegs(TmpUsedRegs);
  660. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  661. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  662. begin
  663. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  664. asml.Remove(hp1);
  665. hp1.Free;
  666. result:=true;
  667. end
  668. { special case:
  669. vmova* reg1,reg2
  670. vmova* reg2,reg1
  671. =>
  672. vmova* reg1,reg2 }
  673. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  674. begin
  675. asml.Remove(hp1);
  676. hp1.Free;
  677. result:=true;
  678. end
  679. end
  680. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  681. { we mix single and double opperations here because we assume that the compiler
  682. generates vmovapd only after double operations and vmovaps only after single operations }
  683. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  684. GetNextInstruction(hp1,hp2) and
  685. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  686. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  687. begin
  688. CopyUsedRegs(TmpUsedRegs);
  689. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  690. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  691. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  692. then
  693. begin
  694. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  695. asml.Remove(p);
  696. p.Free;
  697. asml.Remove(hp2);
  698. hp2.Free;
  699. p:=hp1;
  700. end;
  701. end;
  702. end;
  703. end;
  704. end;
  705. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  706. var
  707. TmpUsedRegs : TAllUsedRegs;
  708. hp1 : tai;
  709. begin
  710. result:=false;
  711. if GetNextInstruction(p,hp1) and
  712. { we mix single and double opperations here because we assume that the compiler
  713. generates vmovapd only after double operations and vmovaps only after single operations }
  714. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  715. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  716. (taicpu(hp1).oper[1]^.typ=top_reg) then
  717. begin
  718. CopyUsedRegs(TmpUsedRegs);
  719. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  720. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  721. ) then
  722. begin
  723. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  724. asml.Remove(hp1);
  725. hp1.Free;
  726. result:=true;
  727. end;
  728. end;
  729. end;
  730. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  731. var
  732. hp1, hp2: tai;
  733. TmpUsedRegs : TAllUsedRegs;
  734. GetNextIntruction_p : Boolean;
  735. begin
  736. Result:=false;
  737. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  738. if GetNextIntruction_p and
  739. MatchInstruction(hp1,A_AND,[]) and
  740. (taicpu(p).oper[1]^.typ = top_reg) and
  741. MatchOpType(taicpu(hp1),top_const,top_reg) and
  742. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  743. case taicpu(p).opsize Of
  744. S_L:
  745. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  746. begin
  747. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  748. asml.remove(hp1);
  749. hp1.free;
  750. Result:=true;
  751. exit;
  752. end;
  753. end
  754. else if GetNextIntruction_p and
  755. MatchInstruction(hp1,A_MOV,[]) and
  756. (taicpu(p).oper[1]^.typ = top_reg) and
  757. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  758. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  759. begin
  760. CopyUsedRegs(TmpUsedRegs);
  761. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  762. { we have
  763. mov x, %treg
  764. mov %treg, y
  765. }
  766. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  767. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  768. { we've got
  769. mov x, %treg
  770. mov %treg, y
  771. with %treg is not used after }
  772. case taicpu(p).oper[0]^.typ Of
  773. top_reg:
  774. begin
  775. { change
  776. mov %reg, %treg
  777. mov %treg, y
  778. to
  779. mov %reg, y
  780. }
  781. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  782. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  783. asml.remove(hp1);
  784. hp1.free;
  785. ReleaseUsedRegs(TmpUsedRegs);
  786. Exit;
  787. end;
  788. top_ref:
  789. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  790. begin
  791. { change
  792. mov mem, %treg
  793. mov %treg, %reg
  794. to
  795. mov mem, %reg"
  796. }
  797. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  798. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  799. asml.remove(hp1);
  800. hp1.free;
  801. ReleaseUsedRegs(TmpUsedRegs);
  802. Exit;
  803. end;
  804. end;
  805. ReleaseUsedRegs(TmpUsedRegs);
  806. end
  807. else
  808. { Change
  809. mov %reg1, %reg2
  810. xxx %reg2, ???
  811. to
  812. mov %reg1, %reg2
  813. xxx %reg1, ???
  814. to avoid a write/read penalty
  815. }
  816. if MatchOpType(taicpu(p),top_reg,top_reg) and
  817. GetNextInstruction(p,hp1) and
  818. (tai(hp1).typ = ait_instruction) and
  819. (taicpu(hp1).ops >= 1) and
  820. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  821. { we have
  822. mov %reg1, %reg2
  823. XXX %reg2, ???
  824. }
  825. begin
  826. if ((taicpu(hp1).opcode = A_OR) or
  827. (taicpu(hp1).opcode = A_TEST)) and
  828. (taicpu(hp1).oper[1]^.typ = top_reg) and
  829. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  830. { we have
  831. mov %reg1, %reg2
  832. test/or %reg2, %reg2
  833. }
  834. begin
  835. CopyUsedRegs(TmpUsedRegs);
  836. { reg1 will be used after the first instruction,
  837. so update the allocation info }
  838. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  839. if GetNextInstruction(hp1, hp2) and
  840. (hp2.typ = ait_instruction) and
  841. taicpu(hp2).is_jmp and
  842. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  843. { change
  844. mov %reg1, %reg2
  845. test/or %reg2, %reg2
  846. jxx
  847. to
  848. test %reg1, %reg1
  849. jxx
  850. }
  851. begin
  852. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  853. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  854. asml.remove(p);
  855. p.free;
  856. p := hp1;
  857. ReleaseUsedRegs(TmpUsedRegs);
  858. Exit;
  859. end
  860. else
  861. { change
  862. mov %reg1, %reg2
  863. test/or %reg2, %reg2
  864. to
  865. mov %reg1, %reg2
  866. test/or %reg1, %reg1
  867. }
  868. begin
  869. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  870. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  871. end;
  872. ReleaseUsedRegs(TmpUsedRegs);
  873. end
  874. end
  875. else
  876. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  877. x >= RetOffset) as it doesn't do anything (it writes either to a
  878. parameter or to the temporary storage room for the function
  879. result)
  880. }
  881. if GetNextIntruction_p and
  882. (tai(hp1).typ = ait_instruction) then
  883. begin
  884. if IsExitCode(hp1) and
  885. MatchOpType(taicpu(p),top_reg,top_ref) and
  886. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  887. not(assigned(current_procinfo.procdef.funcretsym) and
  888. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  889. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  890. begin
  891. asml.remove(p);
  892. p.free;
  893. p:=hp1;
  894. DebugMsg('Peephole removed deadstore before leave/ret',p);
  895. RemoveLastDeallocForFuncRes(p);
  896. exit;
  897. end
  898. { change
  899. mov reg1, mem1
  900. test/cmp x, mem1
  901. to
  902. mov reg1, mem1
  903. test/cmp x, reg1
  904. }
  905. else if MatchOpType(taicpu(p),top_reg,top_ref) and
  906. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  907. (taicpu(hp1).oper[1]^.typ = top_ref) and
  908. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  909. begin
  910. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  911. DebugMsg('Peephole MovTestCmp2MovTestCmp 1',hp1);
  912. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  913. end;
  914. end;
  915. { Next instruction is also a MOV ? }
  916. if GetNextIntruction_p and
  917. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  918. begin
  919. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  920. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  921. { mov reg1, mem1 or mov mem1, reg1
  922. mov mem2, reg2 mov reg2, mem2}
  923. begin
  924. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  925. { mov reg1, mem1 or mov mem1, reg1
  926. mov mem2, reg1 mov reg2, mem1}
  927. begin
  928. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  929. { Removes the second statement from
  930. mov reg1, mem1/reg2
  931. mov mem1/reg2, reg1 }
  932. begin
  933. if taicpu(p).oper[0]^.typ=top_reg then
  934. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  935. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  936. asml.remove(hp1);
  937. hp1.free;
  938. Result:=true;
  939. exit;
  940. end
  941. else
  942. begin
  943. CopyUsedRegs(TmpUsedRegs);
  944. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  945. if (taicpu(p).oper[1]^.typ = top_ref) and
  946. { mov reg1, mem1
  947. mov mem2, reg1 }
  948. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  949. GetNextInstruction(hp1, hp2) and
  950. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  951. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  952. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  953. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  954. { change to
  955. mov reg1, mem1 mov reg1, mem1
  956. mov mem2, reg1 cmp reg1, mem2
  957. cmp mem1, reg1
  958. }
  959. begin
  960. asml.remove(hp2);
  961. hp2.free;
  962. taicpu(hp1).opcode := A_CMP;
  963. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  964. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  965. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  966. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  967. end;
  968. ReleaseUsedRegs(TmpUsedRegs);
  969. end;
  970. end
  971. else if (taicpu(p).oper[1]^.typ=top_ref) and
  972. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  973. begin
  974. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  975. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  976. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  977. end
  978. else
  979. begin
  980. CopyUsedRegs(TmpUsedRegs);
  981. if GetNextInstruction(hp1, hp2) and
  982. MatchOpType(taicpu(p),top_ref,top_reg) and
  983. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  984. (taicpu(hp1).oper[1]^.typ = top_ref) and
  985. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  986. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  987. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  988. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  989. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  990. { mov mem1, %reg1
  991. mov %reg1, mem2
  992. mov mem2, reg2
  993. to:
  994. mov mem1, reg2
  995. mov reg2, mem2}
  996. begin
  997. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  998. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  999. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1000. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1001. asml.remove(hp2);
  1002. hp2.free;
  1003. end
  1004. {$ifdef i386}
  1005. { this is enabled for i386 only, as the rules to create the reg sets below
  1006. are too complicated for x86-64, so this makes this code too error prone
  1007. on x86-64
  1008. }
  1009. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1010. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1011. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1012. { mov mem1, reg1 mov mem1, reg1
  1013. mov reg1, mem2 mov reg1, mem2
  1014. mov mem2, reg2 mov mem2, reg1
  1015. to: to:
  1016. mov mem1, reg1 mov mem1, reg1
  1017. mov mem1, reg2 mov reg1, mem2
  1018. mov reg1, mem2
  1019. or (if mem1 depends on reg1
  1020. and/or if mem2 depends on reg2)
  1021. to:
  1022. mov mem1, reg1
  1023. mov reg1, mem2
  1024. mov reg1, reg2
  1025. }
  1026. begin
  1027. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1028. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1029. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1030. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1031. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1032. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1033. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1034. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1035. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1036. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1037. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1038. end
  1039. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1040. begin
  1041. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1042. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1043. end
  1044. else
  1045. begin
  1046. asml.remove(hp2);
  1047. hp2.free;
  1048. end
  1049. {$endif i386}
  1050. ;
  1051. ReleaseUsedRegs(TmpUsedRegs);
  1052. end;
  1053. end
  1054. (* { movl [mem1],reg1
  1055. movl [mem1],reg2
  1056. to
  1057. movl [mem1],reg1
  1058. movl reg1,reg2
  1059. }
  1060. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1061. (taicpu(p).oper[1]^.typ = top_reg) and
  1062. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1063. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1064. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1065. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1066. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1067. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1068. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1069. else*)
  1070. { movl const1,[mem1]
  1071. movl [mem1],reg1
  1072. to
  1073. movl const1,reg1
  1074. movl reg1,[mem1]
  1075. }
  1076. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1077. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1078. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1079. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1080. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1081. begin
  1082. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1083. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1084. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1085. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1086. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1087. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  1088. end
  1089. end
  1090. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1091. GetNextIntruction_p and
  1092. (hp1.typ = ait_instruction) and
  1093. GetNextInstruction(hp1, hp2) and
  1094. MatchInstruction(hp2,A_MOV,[]) and
  1095. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1096. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1097. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1098. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1099. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  1100. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1101. ) then
  1102. { change movsX/movzX reg/ref, reg2
  1103. add/sub/or/... reg3/$const, reg2
  1104. mov reg2 reg/ref
  1105. to add/sub/or/... reg3/$const, reg/ref }
  1106. begin
  1107. CopyUsedRegs(TmpUsedRegs);
  1108. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1109. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1110. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1111. begin
  1112. { by example:
  1113. movswl %si,%eax movswl %si,%eax p
  1114. decl %eax addl %edx,%eax hp1
  1115. movw %ax,%si movw %ax,%si hp2
  1116. ->
  1117. movswl %si,%eax movswl %si,%eax p
  1118. decw %eax addw %edx,%eax hp1
  1119. movw %ax,%si movw %ax,%si hp2
  1120. }
  1121. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  1122. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1123. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1124. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1125. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1126. {
  1127. ->
  1128. movswl %si,%eax movswl %si,%eax p
  1129. decw %si addw %dx,%si hp1
  1130. movw %ax,%si movw %ax,%si hp2
  1131. }
  1132. case taicpu(hp1).ops of
  1133. 1:
  1134. begin
  1135. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1136. if taicpu(hp1).oper[0]^.typ=top_reg then
  1137. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1138. end;
  1139. 2:
  1140. begin
  1141. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1142. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1143. (taicpu(hp1).opcode<>A_SHL) and
  1144. (taicpu(hp1).opcode<>A_SHR) and
  1145. (taicpu(hp1).opcode<>A_SAR) then
  1146. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1147. end;
  1148. else
  1149. internalerror(2008042701);
  1150. end;
  1151. {
  1152. ->
  1153. decw %si addw %dx,%si p
  1154. }
  1155. asml.remove(p);
  1156. asml.remove(hp2);
  1157. p.Free;
  1158. hp2.Free;
  1159. p := hp1;
  1160. end;
  1161. ReleaseUsedRegs(TmpUsedRegs);
  1162. end
  1163. else if GetNextIntruction_p and
  1164. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1165. GetNextInstruction(hp1, hp2) and
  1166. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1167. MatchOperand(Taicpu(p).oper[0]^,0) and
  1168. (Taicpu(p).oper[1]^.typ = top_reg) and
  1169. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1170. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1171. { mov reg1,0
  1172. bts reg1,operand1 --> mov reg1,operand2
  1173. or reg1,operand2 bts reg1,operand1}
  1174. begin
  1175. Taicpu(hp2).opcode:=A_MOV;
  1176. asml.remove(hp1);
  1177. insertllitem(hp2,hp2.next,hp1);
  1178. asml.remove(p);
  1179. p.free;
  1180. p:=hp1;
  1181. end
  1182. else if GetNextIntruction_p and
  1183. MatchInstruction(hp1,A_LEA,[S_L]) and
  1184. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1185. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1186. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1187. ) or
  1188. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1189. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1190. )
  1191. ) then
  1192. { mov reg1,ref
  1193. lea reg2,[reg1,reg2]
  1194. to
  1195. add reg2,ref}
  1196. begin
  1197. CopyUsedRegs(TmpUsedRegs);
  1198. { reg1 may not be used afterwards }
  1199. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1200. begin
  1201. Taicpu(hp1).opcode:=A_ADD;
  1202. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1203. DebugMsg('Peephole MovLea2Add done',hp1);
  1204. asml.remove(p);
  1205. p.free;
  1206. p:=hp1;
  1207. end;
  1208. ReleaseUsedRegs(TmpUsedRegs);
  1209. end;
  1210. end;
  1211. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1212. var
  1213. TmpUsedRegs : TAllUsedRegs;
  1214. hp1,hp2: tai;
  1215. begin
  1216. Result:=false;
  1217. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1218. GetNextInstruction(p, hp1) and
  1219. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1220. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1221. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1222. or
  1223. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1224. ) and
  1225. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1226. { mov reg1, reg2
  1227. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1228. begin
  1229. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1230. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1231. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1232. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1233. asml.remove(p);
  1234. p.free;
  1235. p := hp1;
  1236. Result:=true;
  1237. exit;
  1238. end
  1239. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1240. GetNextInstruction(p,hp1) and
  1241. (hp1.typ = ait_instruction) and
  1242. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1243. doing it separately in both branches allows to do the cheap checks
  1244. with low probability earlier }
  1245. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1246. GetNextInstruction(hp1,hp2) and
  1247. MatchInstruction(hp2,A_MOV,[])
  1248. ) or
  1249. ((taicpu(hp1).opcode=A_LEA) and
  1250. GetNextInstruction(hp1,hp2) and
  1251. MatchInstruction(hp2,A_MOV,[]) and
  1252. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1253. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1254. ) or
  1255. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1256. taicpu(p).oper[1]^.reg) and
  1257. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1258. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1259. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1260. ) and
  1261. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1262. )
  1263. ) and
  1264. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1265. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1266. begin
  1267. CopyUsedRegs(TmpUsedRegs);
  1268. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1269. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1270. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1271. { change mov (ref), reg
  1272. add/sub/or/... reg2/$const, reg
  1273. mov reg, (ref)
  1274. # release reg
  1275. to add/sub/or/... reg2/$const, (ref) }
  1276. begin
  1277. case taicpu(hp1).opcode of
  1278. A_INC,A_DEC,A_NOT,A_NEG :
  1279. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1280. A_LEA :
  1281. begin
  1282. taicpu(hp1).opcode:=A_ADD;
  1283. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1284. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1285. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1286. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1287. else
  1288. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1289. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1290. DebugMsg('Peephole FoldLea done',hp1);
  1291. end
  1292. else
  1293. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1294. end;
  1295. asml.remove(p);
  1296. asml.remove(hp2);
  1297. p.free;
  1298. hp2.free;
  1299. p := hp1
  1300. end;
  1301. ReleaseUsedRegs(TmpUsedRegs);
  1302. end;
  1303. end;
  1304. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1305. var
  1306. TmpUsedRegs : TAllUsedRegs;
  1307. hp1 : tai;
  1308. begin
  1309. Result:=false;
  1310. if (taicpu(p).ops >= 2) and
  1311. ((taicpu(p).oper[0]^.typ = top_const) or
  1312. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1313. (taicpu(p).oper[1]^.typ = top_reg) and
  1314. ((taicpu(p).ops = 2) or
  1315. ((taicpu(p).oper[2]^.typ = top_reg) and
  1316. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1317. GetLastInstruction(p,hp1) and
  1318. MatchInstruction(hp1,A_MOV,[]) and
  1319. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1320. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1321. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1322. begin
  1323. CopyUsedRegs(TmpUsedRegs);
  1324. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1325. { change
  1326. mov reg1,reg2
  1327. imul y,reg2 to imul y,reg1,reg2 }
  1328. begin
  1329. taicpu(p).ops := 3;
  1330. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1331. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1332. DebugMsg('Peephole MovImul2Imul done',p);
  1333. asml.remove(hp1);
  1334. hp1.free;
  1335. result:=true;
  1336. end;
  1337. ReleaseUsedRegs(TmpUsedRegs);
  1338. end;
  1339. end;
  1340. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  1341. var
  1342. hp1 : tai;
  1343. begin
  1344. {
  1345. change
  1346. jmp .L1
  1347. ...
  1348. .L1:
  1349. ret
  1350. into
  1351. ret
  1352. }
  1353. result:=false;
  1354. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  1355. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  1356. begin
  1357. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1358. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  1359. MatchInstruction(hp1,A_RET,[S_NO]) then
  1360. begin
  1361. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1362. taicpu(p).opcode:=A_RET;
  1363. taicpu(p).is_jmp:=false;
  1364. taicpu(p).ops:=taicpu(hp1).ops;
  1365. case taicpu(hp1).ops of
  1366. 0:
  1367. taicpu(p).clearop(0);
  1368. 1:
  1369. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1370. else
  1371. internalerror(2016041301);
  1372. end;
  1373. result:=true;
  1374. end;
  1375. end;
  1376. end;
  1377. function CanBeCMOV(p : tai) : boolean;
  1378. begin
  1379. CanBeCMOV:=assigned(p) and
  1380. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  1381. { we can't use cmov ref,reg because
  1382. ref could be nil and cmov still throws an exception
  1383. if ref=nil but the mov isn't done (FK)
  1384. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1385. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1386. }
  1387. MatchOpType(taicpu(p),top_reg,top_reg);
  1388. end;
  1389. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  1390. var
  1391. hp1,hp2,hp3: tai;
  1392. carryadd_opcode : TAsmOp;
  1393. l : Longint;
  1394. condition : TAsmCond;
  1395. begin
  1396. { jb @@1 cmc
  1397. inc/dec operand --> adc/sbb operand,0
  1398. @@1:
  1399. ... and ...
  1400. jnb @@1
  1401. inc/dec operand --> adc/sbb operand,0
  1402. @@1: }
  1403. result:=false;
  1404. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1405. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1406. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1407. begin
  1408. carryadd_opcode:=A_NONE;
  1409. if Taicpu(p).condition in [C_NAE,C_B] then
  1410. begin
  1411. if Taicpu(hp1).opcode=A_INC then
  1412. carryadd_opcode:=A_ADC;
  1413. if Taicpu(hp1).opcode=A_DEC then
  1414. carryadd_opcode:=A_SBB;
  1415. if carryadd_opcode<>A_NONE then
  1416. begin
  1417. Taicpu(p).clearop(0);
  1418. Taicpu(p).ops:=0;
  1419. Taicpu(p).is_jmp:=false;
  1420. Taicpu(p).opcode:=A_CMC;
  1421. Taicpu(p).condition:=C_NONE;
  1422. Taicpu(hp1).ops:=2;
  1423. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1424. Taicpu(hp1).loadconst(0,0);
  1425. Taicpu(hp1).opcode:=carryadd_opcode;
  1426. result:=true;
  1427. exit;
  1428. end;
  1429. end;
  1430. if Taicpu(p).condition in [C_AE,C_NB] then
  1431. begin
  1432. if Taicpu(hp1).opcode=A_INC then
  1433. carryadd_opcode:=A_ADC;
  1434. if Taicpu(hp1).opcode=A_DEC then
  1435. carryadd_opcode:=A_SBB;
  1436. if carryadd_opcode<>A_NONE then
  1437. begin
  1438. asml.remove(p);
  1439. p.free;
  1440. Taicpu(hp1).ops:=2;
  1441. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1442. Taicpu(hp1).loadconst(0,0);
  1443. Taicpu(hp1).opcode:=carryadd_opcode;
  1444. p:=hp1;
  1445. result:=true;
  1446. exit;
  1447. end;
  1448. end;
  1449. end;
  1450. {$ifndef i8086}
  1451. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1452. begin
  1453. { check for
  1454. jCC xxx
  1455. <several movs>
  1456. xxx:
  1457. }
  1458. l:=0;
  1459. GetNextInstruction(p, hp1);
  1460. while assigned(hp1) and
  1461. CanBeCMOV(hp1) and
  1462. { stop on labels }
  1463. not(hp1.typ=ait_label) do
  1464. begin
  1465. inc(l);
  1466. GetNextInstruction(hp1,hp1);
  1467. end;
  1468. if assigned(hp1) then
  1469. begin
  1470. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1471. begin
  1472. if (l<=4) and (l>0) then
  1473. begin
  1474. condition:=inverse_cond(taicpu(p).condition);
  1475. hp2:=p;
  1476. GetNextInstruction(p,hp1);
  1477. p:=hp1;
  1478. repeat
  1479. taicpu(hp1).opcode:=A_CMOVcc;
  1480. taicpu(hp1).condition:=condition;
  1481. GetNextInstruction(hp1,hp1);
  1482. until not(assigned(hp1)) or
  1483. not(CanBeCMOV(hp1));
  1484. { wait with removing else GetNextInstruction could
  1485. ignore the label if it was the only usage in the
  1486. jump moved away }
  1487. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1488. { if the label refs. reach zero, remove any alignment before the label }
  1489. if (hp1.typ=ait_align) and (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).getrefs=0) then
  1490. begin
  1491. asml.Remove(hp1);
  1492. hp1.Free;
  1493. end;
  1494. asml.remove(hp2);
  1495. hp2.free;
  1496. result:=true;
  1497. exit;
  1498. end;
  1499. end
  1500. else
  1501. begin
  1502. { check further for
  1503. jCC xxx
  1504. <several movs 1>
  1505. jmp yyy
  1506. xxx:
  1507. <several movs 2>
  1508. yyy:
  1509. }
  1510. { hp2 points to jmp yyy }
  1511. hp2:=hp1;
  1512. { skip hp1 to xxx }
  1513. GetNextInstruction(hp1, hp1);
  1514. if assigned(hp2) and
  1515. assigned(hp1) and
  1516. (l<=3) and
  1517. (hp2.typ=ait_instruction) and
  1518. (taicpu(hp2).is_jmp) and
  1519. (taicpu(hp2).condition=C_None) and
  1520. { real label and jump, no further references to the
  1521. label are allowed }
  1522. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  1523. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1524. begin
  1525. l:=0;
  1526. { skip hp1 to <several moves 2> }
  1527. GetNextInstruction(hp1, hp1);
  1528. while assigned(hp1) and
  1529. CanBeCMOV(hp1) do
  1530. begin
  1531. inc(l);
  1532. GetNextInstruction(hp1, hp1);
  1533. end;
  1534. { hp1 points to yyy: }
  1535. if assigned(hp1) and
  1536. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1537. begin
  1538. condition:=inverse_cond(taicpu(p).condition);
  1539. GetNextInstruction(p,hp1);
  1540. hp3:=p;
  1541. p:=hp1;
  1542. repeat
  1543. taicpu(hp1).opcode:=A_CMOVcc;
  1544. taicpu(hp1).condition:=condition;
  1545. GetNextInstruction(hp1,hp1);
  1546. until not(assigned(hp1)) or
  1547. not(CanBeCMOV(hp1));
  1548. { hp2 is still at jmp yyy }
  1549. GetNextInstruction(hp2,hp1);
  1550. { hp2 is now at xxx: }
  1551. condition:=inverse_cond(condition);
  1552. GetNextInstruction(hp1,hp1);
  1553. { hp1 is now at <several movs 2> }
  1554. repeat
  1555. taicpu(hp1).opcode:=A_CMOVcc;
  1556. taicpu(hp1).condition:=condition;
  1557. GetNextInstruction(hp1,hp1);
  1558. until not(assigned(hp1)) or
  1559. not(CanBeCMOV(hp1));
  1560. {
  1561. asml.remove(hp1.next)
  1562. hp1.next.free;
  1563. asml.remove(hp1);
  1564. hp1.free;
  1565. }
  1566. { remove jCC }
  1567. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1568. asml.remove(hp3);
  1569. hp3.free;
  1570. { remove jmp }
  1571. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1572. asml.remove(hp2);
  1573. hp2.free;
  1574. result:=true;
  1575. exit;
  1576. end;
  1577. end;
  1578. end;
  1579. end;
  1580. end;
  1581. {$endif i8086}
  1582. end;
  1583. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  1584. var
  1585. hp1,hp2: tai;
  1586. begin
  1587. result:=false;
  1588. if (taicpu(p).oper[1]^.typ = top_reg) and
  1589. GetNextInstruction(p,hp1) and
  1590. (hp1.typ = ait_instruction) and
  1591. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1592. GetNextInstruction(hp1,hp2) and
  1593. MatchInstruction(hp2,A_MOV,[]) and
  1594. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1595. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1596. {$ifdef i386}
  1597. { not all registers have byte size sub registers on i386 }
  1598. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  1599. {$endif i386}
  1600. (((taicpu(hp1).ops=2) and
  1601. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1602. ((taicpu(hp1).ops=1) and
  1603. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1604. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  1605. begin
  1606. { change movsX/movzX reg/ref, reg2
  1607. add/sub/or/... reg3/$const, reg2
  1608. mov reg2 reg/ref
  1609. to add/sub/or/... reg3/$const, reg/ref }
  1610. { by example:
  1611. movswl %si,%eax movswl %si,%eax p
  1612. decl %eax addl %edx,%eax hp1
  1613. movw %ax,%si movw %ax,%si hp2
  1614. ->
  1615. movswl %si,%eax movswl %si,%eax p
  1616. decw %eax addw %edx,%eax hp1
  1617. movw %ax,%si movw %ax,%si hp2
  1618. }
  1619. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1620. {
  1621. ->
  1622. movswl %si,%eax movswl %si,%eax p
  1623. decw %si addw %dx,%si hp1
  1624. movw %ax,%si movw %ax,%si hp2
  1625. }
  1626. case taicpu(hp1).ops of
  1627. 1:
  1628. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1629. 2:
  1630. begin
  1631. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1632. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1633. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1634. end;
  1635. else
  1636. internalerror(2008042701);
  1637. end;
  1638. {
  1639. ->
  1640. decw %si addw %dx,%si p
  1641. }
  1642. DebugMsg('PeepHole Optimization,var3',p);
  1643. asml.remove(p);
  1644. asml.remove(hp2);
  1645. p.free;
  1646. hp2.free;
  1647. p:=hp1;
  1648. end
  1649. { removes superfluous And's after movzx's }
  1650. else if taicpu(p).opcode=A_MOVZX then
  1651. begin
  1652. if (taicpu(p).oper[1]^.typ = top_reg) and
  1653. GetNextInstruction(p, hp1) and
  1654. (tai(hp1).typ = ait_instruction) and
  1655. (taicpu(hp1).opcode = A_AND) and
  1656. (taicpu(hp1).oper[0]^.typ = top_const) and
  1657. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1658. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1659. begin
  1660. case taicpu(p).opsize Of
  1661. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  1662. if (taicpu(hp1).oper[0]^.val = $ff) then
  1663. begin
  1664. DebugMsg('PeepHole Optimization,var4',p);
  1665. asml.remove(hp1);
  1666. hp1.free;
  1667. end;
  1668. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  1669. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1670. begin
  1671. DebugMsg('PeepHole Optimization,var5',p);
  1672. asml.remove(hp1);
  1673. hp1.free;
  1674. end;
  1675. {$ifdef x86_64}
  1676. S_LQ:
  1677. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1678. begin
  1679. if (cs_asm_source in current_settings.globalswitches) then
  1680. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  1681. asml.remove(hp1);
  1682. hp1.Free;
  1683. end;
  1684. {$endif x86_64}
  1685. end;
  1686. end;
  1687. { changes some movzx constructs to faster synonims (all examples
  1688. are given with eax/ax, but are also valid for other registers)}
  1689. if (taicpu(p).oper[1]^.typ = top_reg) then
  1690. if (taicpu(p).oper[0]^.typ = top_reg) then
  1691. case taicpu(p).opsize of
  1692. S_BW:
  1693. begin
  1694. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1695. not(cs_opt_size in current_settings.optimizerswitches) then
  1696. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1697. begin
  1698. taicpu(p).opcode := A_AND;
  1699. taicpu(p).changeopsize(S_W);
  1700. taicpu(p).loadConst(0,$ff);
  1701. DebugMsg('PeepHole Optimization,var7',p);
  1702. end
  1703. else if GetNextInstruction(p, hp1) and
  1704. (tai(hp1).typ = ait_instruction) and
  1705. (taicpu(hp1).opcode = A_AND) and
  1706. (taicpu(hp1).oper[0]^.typ = top_const) and
  1707. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1708. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1709. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1710. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1711. begin
  1712. DebugMsg('PeepHole Optimization,var8',p);
  1713. taicpu(p).opcode := A_MOV;
  1714. taicpu(p).changeopsize(S_W);
  1715. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1716. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1717. end;
  1718. end;
  1719. S_BL:
  1720. begin
  1721. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1722. not(cs_opt_size in current_settings.optimizerswitches) then
  1723. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  1724. begin
  1725. taicpu(p).opcode := A_AND;
  1726. taicpu(p).changeopsize(S_L);
  1727. taicpu(p).loadConst(0,$ff)
  1728. end
  1729. else if GetNextInstruction(p, hp1) and
  1730. (tai(hp1).typ = ait_instruction) and
  1731. (taicpu(hp1).opcode = A_AND) and
  1732. (taicpu(hp1).oper[0]^.typ = top_const) and
  1733. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1734. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1735. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1736. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1737. begin
  1738. DebugMsg('PeepHole Optimization,var10',p);
  1739. taicpu(p).opcode := A_MOV;
  1740. taicpu(p).changeopsize(S_L);
  1741. { do not use R_SUBWHOLE
  1742. as movl %rdx,%eax
  1743. is invalid in assembler PM }
  1744. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  1745. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1746. end
  1747. end;
  1748. {$ifndef i8086}
  1749. S_WL:
  1750. begin
  1751. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1752. not(cs_opt_size in current_settings.optimizerswitches) then
  1753. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  1754. begin
  1755. DebugMsg('PeepHole Optimization,var11',p);
  1756. taicpu(p).opcode := A_AND;
  1757. taicpu(p).changeopsize(S_L);
  1758. taicpu(p).loadConst(0,$ffff);
  1759. end
  1760. else if GetNextInstruction(p, hp1) and
  1761. (tai(hp1).typ = ait_instruction) and
  1762. (taicpu(hp1).opcode = A_AND) and
  1763. (taicpu(hp1).oper[0]^.typ = top_const) and
  1764. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1765. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1766. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1767. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1768. begin
  1769. DebugMsg('PeepHole Optimization,var12',p);
  1770. taicpu(p).opcode := A_MOV;
  1771. taicpu(p).changeopsize(S_L);
  1772. { do not use R_SUBWHOLE
  1773. as movl %rdx,%eax
  1774. is invalid in assembler PM }
  1775. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  1776. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1777. end;
  1778. end;
  1779. {$endif i8086}
  1780. end
  1781. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1782. begin
  1783. if GetNextInstruction(p, hp1) and
  1784. (tai(hp1).typ = ait_instruction) and
  1785. (taicpu(hp1).opcode = A_AND) and
  1786. MatchOpType(taicpu(hp1),top_const,top_reg) and
  1787. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1788. begin
  1789. taicpu(p).opcode := A_MOV;
  1790. case taicpu(p).opsize Of
  1791. S_BL:
  1792. begin
  1793. DebugMsg('PeepHole Optimization,var13',p);
  1794. taicpu(p).changeopsize(S_L);
  1795. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1796. end;
  1797. S_WL:
  1798. begin
  1799. DebugMsg('PeepHole Optimization,var14',p);
  1800. taicpu(p).changeopsize(S_L);
  1801. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1802. end;
  1803. S_BW:
  1804. begin
  1805. DebugMsg('PeepHole Optimization,var15',p);
  1806. taicpu(p).changeopsize(S_W);
  1807. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1808. end;
  1809. {$ifdef x86_64}
  1810. S_BQ:
  1811. begin
  1812. DebugMsg('PeepHole Optimization,var16',p);
  1813. taicpu(p).changeopsize(S_Q);
  1814. taicpu(hp1).loadConst(
  1815. 0, taicpu(hp1).oper[0]^.val and $ff);
  1816. end;
  1817. S_WQ:
  1818. begin
  1819. DebugMsg('PeepHole Optimization,var17',p);
  1820. taicpu(p).changeopsize(S_Q);
  1821. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  1822. end;
  1823. S_LQ:
  1824. begin
  1825. DebugMsg('PeepHole Optimization,var18',p);
  1826. taicpu(p).changeopsize(S_Q);
  1827. taicpu(hp1).loadConst(
  1828. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  1829. end;
  1830. {$endif x86_64}
  1831. else
  1832. Internalerror(2017050704)
  1833. end;
  1834. end;
  1835. end;
  1836. end;
  1837. end;
  1838. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  1839. var
  1840. hp1 : tai;
  1841. begin
  1842. Result:=false;
  1843. if not(GetNextInstruction(p, hp1)) then
  1844. exit;
  1845. if MatchOpType(taicpu(p),top_const,top_reg) and
  1846. MatchInstruction(hp1,A_AND,[]) and
  1847. MatchOpType(taicpu(hp1),top_const,top_reg) and
  1848. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1849. { the second register must contain the first one, so compare their subreg types }
  1850. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1851. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  1852. { change
  1853. and const1, reg
  1854. and const2, reg
  1855. to
  1856. and (const1 and const2), reg
  1857. }
  1858. begin
  1859. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  1860. DebugMsg('Peephole AndAnd2And done',hp1);
  1861. asml.remove(p);
  1862. p.Free;
  1863. p:=hp1;
  1864. Result:=true;
  1865. exit;
  1866. end
  1867. else if MatchOpType(taicpu(p),top_const,top_reg) and
  1868. MatchInstruction(hp1,A_MOVZX,[]) and
  1869. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1870. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1871. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  1872. (((taicpu(p).opsize=S_W) and
  1873. (taicpu(hp1).opsize=S_BW)) or
  1874. ((taicpu(p).opsize=S_L) and
  1875. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1876. {$ifdef x86_64}
  1877. or
  1878. ((taicpu(p).opsize=S_Q) and
  1879. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1880. {$endif x86_64}
  1881. ) then
  1882. begin
  1883. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1884. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  1885. ) or
  1886. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1887. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  1888. {$ifdef x86_64}
  1889. or
  1890. (((taicpu(hp1).opsize)=S_LQ) and
  1891. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  1892. )
  1893. {$endif x86_64}
  1894. then
  1895. begin
  1896. DebugMsg('Peephole AndMovzToAnd done',p);
  1897. asml.remove(hp1);
  1898. hp1.free;
  1899. end;
  1900. end
  1901. else if MatchOpType(taicpu(p),top_const,top_reg) and
  1902. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  1903. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1904. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1905. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  1906. (((taicpu(p).opsize=S_W) and
  1907. (taicpu(hp1).opsize=S_BW)) or
  1908. ((taicpu(p).opsize=S_L) and
  1909. (taicpu(hp1).opsize in [S_WL,S_BL]))
  1910. {$ifdef x86_64}
  1911. or
  1912. ((taicpu(p).opsize=S_Q) and
  1913. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  1914. {$endif x86_64}
  1915. ) then
  1916. begin
  1917. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  1918. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  1919. ) or
  1920. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  1921. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  1922. {$ifdef x86_64}
  1923. or
  1924. (((taicpu(hp1).opsize)=S_LQ) and
  1925. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  1926. )
  1927. {$endif x86_64}
  1928. then
  1929. begin
  1930. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  1931. asml.remove(hp1);
  1932. hp1.free;
  1933. end;
  1934. end
  1935. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1936. (hp1.typ = ait_instruction) and
  1937. (taicpu(hp1).is_jmp) and
  1938. (taicpu(hp1).opcode<>A_JMP) and
  1939. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  1940. { change
  1941. and x, reg
  1942. jxx
  1943. to
  1944. test x, reg
  1945. jxx
  1946. if reg is deallocated before the
  1947. jump, but only if it's a conditional jump (PFV)
  1948. }
  1949. taicpu(p).opcode := A_TEST;
  1950. end;
  1951. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  1952. begin
  1953. if MatchOperand(taicpu(p).oper[0]^,0) and
  1954. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1955. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1956. { change "mov $0, %reg" into "xor %reg, %reg" }
  1957. begin
  1958. taicpu(p).opcode := A_XOR;
  1959. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1960. end;
  1961. end;
  1962. end.