aoptx86.pas 111 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. protected
  33. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  34. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  35. { checks whether reading the value in reg1 depends on the value of reg2. This
  36. is very similar to SuperRegisterEquals, except it takes into account that
  37. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  38. depend on the value in AH). }
  39. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  40. procedure DebugMsg(const s : string; p : tai);inline;
  41. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  42. class function IsExitCode(p : tai) : boolean;
  43. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  44. procedure RemoveLastDeallocForFuncRes(p : tai);
  45. function PrePeepholeOptSxx(var p : tai) : boolean;
  46. function OptPass1AND(var p : tai) : boolean;
  47. function OptPass1VMOVAP(var p : tai) : boolean;
  48. function OptPass1VOP(const p : tai) : boolean;
  49. function OptPass1MOV(var p : tai) : boolean;
  50. function OptPass1Movx(var p : tai) : boolean;
  51. function OptPass1MOVAP(var p : tai) : boolean;
  52. function OptPass1MOVXX(var p : tai) : boolean;
  53. function OptPass1OP(const p : tai) : boolean;
  54. function OptPass1LEA(var p : tai) : boolean;
  55. function OptPass2MOV(var p : tai) : boolean;
  56. function OptPass2Imul(var p : tai) : boolean;
  57. function OptPass2Jmp(var p : tai) : boolean;
  58. function OptPass2Jcc(var p : tai) : boolean;
  59. procedure PostPeepholeOptMov(const p : tai);
  60. procedure OptReferences;
  61. end;
  62. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  63. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  64. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  65. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  66. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  67. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  68. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  69. function RefsEqual(const r1, r2: treference): boolean;
  70. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  71. { returns true, if ref is a reference using only the registers passed as base and index
  72. and having an offset }
  73. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  74. implementation
  75. uses
  76. cutils,verbose,
  77. globals,
  78. cpuinfo,
  79. procinfo,
  80. aasmbase,
  81. aoptutils,
  82. symconst,symsym,
  83. cgx86,
  84. itcpugas;
  85. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  86. begin
  87. result :=
  88. (instr.typ = ait_instruction) and
  89. (taicpu(instr).opcode = op) and
  90. ((opsize = []) or (taicpu(instr).opsize in opsize));
  91. end;
  92. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  93. begin
  94. result :=
  95. (instr.typ = ait_instruction) and
  96. ((taicpu(instr).opcode = op1) or
  97. (taicpu(instr).opcode = op2)
  98. ) and
  99. ((opsize = []) or (taicpu(instr).opsize in opsize));
  100. end;
  101. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  102. begin
  103. result :=
  104. (instr.typ = ait_instruction) and
  105. ((taicpu(instr).opcode = op1) or
  106. (taicpu(instr).opcode = op2) or
  107. (taicpu(instr).opcode = op3)
  108. ) and
  109. ((opsize = []) or (taicpu(instr).opsize in opsize));
  110. end;
  111. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  112. const opsize : topsizes) : boolean;
  113. var
  114. op : TAsmOp;
  115. begin
  116. result:=false;
  117. for op in ops do
  118. begin
  119. if (instr.typ = ait_instruction) and
  120. (taicpu(instr).opcode = op) and
  121. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  122. begin
  123. result:=true;
  124. exit;
  125. end;
  126. end;
  127. end;
  128. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  129. begin
  130. result := (oper.typ = top_reg) and (oper.reg = reg);
  131. end;
  132. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  133. begin
  134. result := (oper.typ = top_const) and (oper.val = a);
  135. end;
  136. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  137. begin
  138. result := oper1.typ = oper2.typ;
  139. if result then
  140. case oper1.typ of
  141. top_const:
  142. Result:=oper1.val = oper2.val;
  143. top_reg:
  144. Result:=oper1.reg = oper2.reg;
  145. top_ref:
  146. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  147. else
  148. internalerror(2013102801);
  149. end
  150. end;
  151. function RefsEqual(const r1, r2: treference): boolean;
  152. begin
  153. RefsEqual :=
  154. (r1.offset = r2.offset) and
  155. (r1.segment = r2.segment) and (r1.base = r2.base) and
  156. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  157. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  158. (r1.relsymbol = r2.relsymbol);
  159. end;
  160. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  161. begin
  162. Result:=(ref.offset=0) and
  163. (ref.scalefactor in [0,1]) and
  164. (ref.segment=NR_NO) and
  165. (ref.symbol=nil) and
  166. (ref.relsymbol=nil) and
  167. ((base=NR_INVALID) or
  168. (ref.base=base)) and
  169. ((index=NR_INVALID) or
  170. (ref.index=index));
  171. end;
  172. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  173. begin
  174. Result:=(ref.scalefactor in [0,1]) and
  175. (ref.segment=NR_NO) and
  176. (ref.symbol=nil) and
  177. (ref.relsymbol=nil) and
  178. ((base=NR_INVALID) or
  179. (ref.base=base)) and
  180. ((index=NR_INVALID) or
  181. (ref.index=index));
  182. end;
  183. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  184. begin
  185. Result:=RegReadByInstruction(reg,hp);
  186. end;
  187. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  188. var
  189. p: taicpu;
  190. opcount: longint;
  191. begin
  192. RegReadByInstruction := false;
  193. if hp.typ <> ait_instruction then
  194. exit;
  195. p := taicpu(hp);
  196. case p.opcode of
  197. A_CALL:
  198. regreadbyinstruction := true;
  199. A_IMUL:
  200. case p.ops of
  201. 1:
  202. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  203. (
  204. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  205. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  206. );
  207. 2,3:
  208. regReadByInstruction :=
  209. reginop(reg,p.oper[0]^) or
  210. reginop(reg,p.oper[1]^);
  211. end;
  212. A_MUL:
  213. begin
  214. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  215. (
  216. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  217. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  218. );
  219. end;
  220. A_IDIV,A_DIV:
  221. begin
  222. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  223. (
  224. (getregtype(reg)=R_INTREGISTER) and
  225. (
  226. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  227. )
  228. );
  229. end;
  230. else
  231. begin
  232. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  233. begin
  234. RegReadByInstruction := false;
  235. exit;
  236. end;
  237. for opcount := 0 to p.ops-1 do
  238. if (p.oper[opCount]^.typ = top_ref) and
  239. RegInRef(reg,p.oper[opcount]^.ref^) then
  240. begin
  241. RegReadByInstruction := true;
  242. exit
  243. end;
  244. { special handling for SSE MOVSD }
  245. if (p.opcode=A_MOVSD) and (p.ops>0) then
  246. begin
  247. if p.ops<>2 then
  248. internalerror(2017042702);
  249. regReadByInstruction := reginop(reg,p.oper[0]^) or
  250. (
  251. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  252. );
  253. exit;
  254. end;
  255. with insprop[p.opcode] do
  256. begin
  257. if getregtype(reg)=R_INTREGISTER then
  258. begin
  259. case getsupreg(reg) of
  260. RS_EAX:
  261. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  262. begin
  263. RegReadByInstruction := true;
  264. exit
  265. end;
  266. RS_ECX:
  267. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  268. begin
  269. RegReadByInstruction := true;
  270. exit
  271. end;
  272. RS_EDX:
  273. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  274. begin
  275. RegReadByInstruction := true;
  276. exit
  277. end;
  278. RS_EBX:
  279. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  280. begin
  281. RegReadByInstruction := true;
  282. exit
  283. end;
  284. RS_ESP:
  285. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  286. begin
  287. RegReadByInstruction := true;
  288. exit
  289. end;
  290. RS_EBP:
  291. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  292. begin
  293. RegReadByInstruction := true;
  294. exit
  295. end;
  296. RS_ESI:
  297. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  298. begin
  299. RegReadByInstruction := true;
  300. exit
  301. end;
  302. RS_EDI:
  303. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  304. begin
  305. RegReadByInstruction := true;
  306. exit
  307. end;
  308. end;
  309. end;
  310. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  311. begin
  312. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  313. begin
  314. case p.condition of
  315. C_A,C_NBE, { CF=0 and ZF=0 }
  316. C_BE,C_NA: { CF=1 or ZF=1 }
  317. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  318. C_AE,C_NB,C_NC, { CF=0 }
  319. C_B,C_NAE,C_C: { CF=1 }
  320. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  321. C_NE,C_NZ, { ZF=0 }
  322. C_E,C_Z: { ZF=1 }
  323. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  324. C_G,C_NLE, { ZF=0 and SF=OF }
  325. C_LE,C_NG: { ZF=1 or SF<>OF }
  326. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  327. C_GE,C_NL, { SF=OF }
  328. C_L,C_NGE: { SF<>OF }
  329. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  330. C_NO, { OF=0 }
  331. C_O: { OF=1 }
  332. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  333. C_NP,C_PO, { PF=0 }
  334. C_P,C_PE: { PF=1 }
  335. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  336. C_NS, { SF=0 }
  337. C_S: { SF=1 }
  338. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  339. else
  340. internalerror(2017042701);
  341. end;
  342. if RegReadByInstruction then
  343. exit;
  344. end;
  345. case getsubreg(reg) of
  346. R_SUBW,R_SUBD,R_SUBQ:
  347. RegReadByInstruction :=
  348. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  349. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  350. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  351. R_SUBFLAGCARRY:
  352. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  353. R_SUBFLAGPARITY:
  354. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  355. R_SUBFLAGAUXILIARY:
  356. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  357. R_SUBFLAGZERO:
  358. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  359. R_SUBFLAGSIGN:
  360. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  361. R_SUBFLAGOVERFLOW:
  362. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  363. R_SUBFLAGINTERRUPT:
  364. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  365. R_SUBFLAGDIRECTION:
  366. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  367. else
  368. internalerror(2017042601);
  369. end;
  370. exit;
  371. end;
  372. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  373. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  374. (p.oper[0]^.reg=p.oper[1]^.reg) then
  375. exit;
  376. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  377. begin
  378. RegReadByInstruction := true;
  379. exit
  380. end;
  381. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  382. begin
  383. RegReadByInstruction := true;
  384. exit
  385. end;
  386. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  387. begin
  388. RegReadByInstruction := true;
  389. exit
  390. end;
  391. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  392. begin
  393. RegReadByInstruction := true;
  394. exit
  395. end;
  396. end;
  397. end;
  398. end;
  399. end;
  400. {$ifdef DEBUG_AOPTCPU}
  401. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  402. begin
  403. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  404. end;
  405. {$else DEBUG_AOPTCPU}
  406. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  407. begin
  408. end;
  409. {$endif DEBUG_AOPTCPU}
  410. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  411. begin
  412. if not SuperRegistersEqual(reg1,reg2) then
  413. exit(false);
  414. if getregtype(reg1)<>R_INTREGISTER then
  415. exit(true); {because SuperRegisterEqual is true}
  416. case getsubreg(reg1) of
  417. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  418. higher, it preserves the high bits, so the new value depends on
  419. reg2's previous value. In other words, it is equivalent to doing:
  420. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  421. R_SUBL:
  422. exit(getsubreg(reg2)=R_SUBL);
  423. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  424. higher, it actually does a:
  425. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  426. R_SUBH:
  427. exit(getsubreg(reg2)=R_SUBH);
  428. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  429. bits of reg2:
  430. reg2 := (reg2 and $ffff0000) or word(reg1); }
  431. R_SUBW:
  432. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  433. { a write to R_SUBD always overwrites every other subregister,
  434. because it clears the high 32 bits of R_SUBQ on x86_64 }
  435. R_SUBD,
  436. R_SUBQ:
  437. exit(true);
  438. else
  439. internalerror(2017042801);
  440. end;
  441. end;
  442. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  443. begin
  444. if not SuperRegistersEqual(reg1,reg2) then
  445. exit(false);
  446. if getregtype(reg1)<>R_INTREGISTER then
  447. exit(true); {because SuperRegisterEqual is true}
  448. case getsubreg(reg1) of
  449. R_SUBL:
  450. exit(getsubreg(reg2)<>R_SUBH);
  451. R_SUBH:
  452. exit(getsubreg(reg2)<>R_SUBL);
  453. R_SUBW,
  454. R_SUBD,
  455. R_SUBQ:
  456. exit(true);
  457. else
  458. internalerror(2017042802);
  459. end;
  460. end;
  461. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  462. var
  463. hp1 : tai;
  464. l : TCGInt;
  465. begin
  466. result:=false;
  467. { changes the code sequence
  468. shr/sar const1, x
  469. shl const2, x
  470. to
  471. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  472. if GetNextInstruction(p, hp1) and
  473. MatchInstruction(hp1,A_SHL,[]) and
  474. (taicpu(p).oper[0]^.typ = top_const) and
  475. (taicpu(hp1).oper[0]^.typ = top_const) and
  476. (taicpu(hp1).opsize = taicpu(p).opsize) and
  477. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  478. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  479. begin
  480. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  481. not(cs_opt_size in current_settings.optimizerswitches) then
  482. begin
  483. { shr/sar const1, %reg
  484. shl const2, %reg
  485. with const1 > const2 }
  486. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  487. taicpu(hp1).opcode := A_AND;
  488. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  489. case taicpu(p).opsize Of
  490. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  491. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  492. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  493. S_Q: taicpu(hp1).loadConst(0,l Xor aint($ffffffffffffffff));
  494. else
  495. Internalerror(2017050703)
  496. end;
  497. end
  498. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  499. not(cs_opt_size in current_settings.optimizerswitches) then
  500. begin
  501. { shr/sar const1, %reg
  502. shl const2, %reg
  503. with const1 < const2 }
  504. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  505. taicpu(p).opcode := A_AND;
  506. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  507. case taicpu(p).opsize Of
  508. S_B: taicpu(p).loadConst(0,l Xor $ff);
  509. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  510. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  511. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  512. else
  513. Internalerror(2017050702)
  514. end;
  515. end
  516. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  517. begin
  518. { shr/sar const1, %reg
  519. shl const2, %reg
  520. with const1 = const2 }
  521. taicpu(p).opcode := A_AND;
  522. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  523. case taicpu(p).opsize Of
  524. S_B: taicpu(p).loadConst(0,l Xor $ff);
  525. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  526. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  527. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  528. else
  529. Internalerror(2017050701)
  530. end;
  531. asml.remove(hp1);
  532. hp1.free;
  533. end;
  534. end;
  535. end;
  536. { allocates register reg between (and including) instructions p1 and p2
  537. the type of p1 and p2 must not be in SkipInstr
  538. note that this routine is both called from the peephole optimizer
  539. where optinfo is not yet initialised) and from the cse (where it is) }
  540. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  541. var
  542. hp, start: tai;
  543. removedsomething,
  544. firstRemovedWasAlloc,
  545. lastRemovedWasDealloc: boolean;
  546. begin
  547. {$ifdef EXTDEBUG}
  548. { if assigned(p1.optinfo) and
  549. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  550. internalerror(2004101010); }
  551. {$endif EXTDEBUG}
  552. start := p1;
  553. if (reg = NR_ESP) or
  554. (reg = current_procinfo.framepointer) or
  555. not(assigned(p1)) then
  556. { this happens with registers which are loaded implicitely, outside the }
  557. { current block (e.g. esi with self) }
  558. exit;
  559. { make sure we allocate it for this instruction }
  560. getnextinstruction(p2,p2);
  561. lastRemovedWasDealloc := false;
  562. removedSomething := false;
  563. firstRemovedWasAlloc := false;
  564. {$ifdef allocregdebug}
  565. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  566. ' from here...'));
  567. insertllitem(asml,p1.previous,p1,hp);
  568. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  569. ' till here...'));
  570. insertllitem(asml,p2,p2.next,hp);
  571. {$endif allocregdebug}
  572. { do it the safe way: always allocate the full super register,
  573. as we do no register re-allocation in the peephole optimizer,
  574. this does not hurt
  575. }
  576. case getregtype(reg) of
  577. R_MMREGISTER:
  578. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  579. R_INTREGISTER:
  580. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  581. end;
  582. if not(RegInUsedRegs(reg,initialusedregs)) then
  583. begin
  584. hp := tai_regalloc.alloc(reg,nil);
  585. insertllItem(p1.previous,p1,hp);
  586. IncludeRegInUsedRegs(reg,initialusedregs);
  587. end;
  588. while assigned(p1) and
  589. (p1 <> p2) do
  590. begin
  591. if assigned(p1.optinfo) then
  592. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  593. p1 := tai(p1.next);
  594. repeat
  595. while assigned(p1) and
  596. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  597. p1 := tai(p1.next);
  598. { remove all allocation/deallocation info about the register in between }
  599. if assigned(p1) and
  600. (p1.typ = ait_regalloc) then
  601. begin
  602. { same super register, different sub register? }
  603. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  604. begin
  605. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  606. internalerror(2016101501);
  607. tai_regalloc(p1).reg:=reg;
  608. end;
  609. if tai_regalloc(p1).reg=reg then
  610. begin
  611. if not removedSomething then
  612. begin
  613. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  614. removedSomething := true;
  615. end;
  616. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  617. hp := tai(p1.Next);
  618. asml.Remove(p1);
  619. p1.free;
  620. p1 := hp;
  621. end
  622. else
  623. p1 := tai(p1.next);
  624. end;
  625. until not(assigned(p1)) or
  626. not(p1.typ in SkipInstr);
  627. end;
  628. if assigned(p1) then
  629. begin
  630. if firstRemovedWasAlloc then
  631. begin
  632. hp := tai_regalloc.Alloc(reg,nil);
  633. insertLLItem(start.previous,start,hp);
  634. end;
  635. if lastRemovedWasDealloc then
  636. begin
  637. hp := tai_regalloc.DeAlloc(reg,nil);
  638. insertLLItem(p1.previous,p1,hp);
  639. end;
  640. end;
  641. end;
  642. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  643. var
  644. p: taicpu;
  645. begin
  646. if not assigned(hp) or
  647. (hp.typ <> ait_instruction) then
  648. begin
  649. Result := false;
  650. exit;
  651. end;
  652. p := taicpu(hp);
  653. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  654. with insprop[p.opcode] do
  655. begin
  656. case getsubreg(reg) of
  657. R_SUBW,R_SUBD,R_SUBQ:
  658. Result:=
  659. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  660. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  661. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  662. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  663. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  664. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  665. R_SUBFLAGCARRY:
  666. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  667. R_SUBFLAGPARITY:
  668. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  669. R_SUBFLAGAUXILIARY:
  670. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  671. R_SUBFLAGZERO:
  672. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  673. R_SUBFLAGSIGN:
  674. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  675. R_SUBFLAGOVERFLOW:
  676. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  677. R_SUBFLAGINTERRUPT:
  678. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  679. R_SUBFLAGDIRECTION:
  680. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  681. else
  682. internalerror(2017050501);
  683. end;
  684. exit;
  685. end;
  686. Result :=
  687. (((p.opcode = A_MOV) or
  688. (p.opcode = A_MOVZX) or
  689. (p.opcode = A_MOVSX) or
  690. (p.opcode = A_LEA) or
  691. (p.opcode = A_VMOVSS) or
  692. (p.opcode = A_VMOVSD) or
  693. (p.opcode = A_VMOVAPD) or
  694. (p.opcode = A_VMOVAPS) or
  695. (p.opcode = A_VMOVQ) or
  696. (p.opcode = A_MOVSS) or
  697. (p.opcode = A_MOVSD) or
  698. (p.opcode = A_MOVQ) or
  699. (p.opcode = A_MOVAPD) or
  700. (p.opcode = A_MOVAPS) or
  701. {$ifndef x86_64}
  702. (p.opcode = A_LDS) or
  703. (p.opcode = A_LES) or
  704. {$endif not x86_64}
  705. (p.opcode = A_LFS) or
  706. (p.opcode = A_LGS) or
  707. (p.opcode = A_LSS)) and
  708. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  709. (p.oper[1]^.typ = top_reg) and
  710. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  711. ((p.oper[0]^.typ = top_const) or
  712. ((p.oper[0]^.typ = top_reg) and
  713. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  714. ((p.oper[0]^.typ = top_ref) and
  715. not RegInRef(reg,p.oper[0]^.ref^)))) or
  716. ((p.opcode = A_POP) and
  717. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  718. ((p.opcode = A_IMUL) and
  719. (p.ops=3) and
  720. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  721. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  722. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  723. ((((p.opcode = A_IMUL) or
  724. (p.opcode = A_MUL)) and
  725. (p.ops=1)) and
  726. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  727. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  728. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  729. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  730. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  731. {$ifdef x86_64}
  732. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  733. {$endif x86_64}
  734. )) or
  735. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  736. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  737. {$ifdef x86_64}
  738. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  739. {$endif x86_64}
  740. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  741. {$ifndef x86_64}
  742. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  743. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  744. {$endif not x86_64}
  745. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  746. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  747. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  748. {$ifndef x86_64}
  749. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  750. {$endif not x86_64}
  751. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  752. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  753. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  754. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  755. {$ifdef x86_64}
  756. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  757. {$endif x86_64}
  758. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  759. (((p.opcode = A_FSTSW) or
  760. (p.opcode = A_FNSTSW)) and
  761. (p.oper[0]^.typ=top_reg) and
  762. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  763. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  764. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  765. (p.oper[0]^.reg=p.oper[1]^.reg) and
  766. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  767. end;
  768. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  769. var
  770. hp2,hp3 : tai;
  771. begin
  772. { some x86-64 issue a NOP before the real exit code }
  773. if MatchInstruction(p,A_NOP,[]) then
  774. GetNextInstruction(p,p);
  775. result:=assigned(p) and (p.typ=ait_instruction) and
  776. ((taicpu(p).opcode = A_RET) or
  777. ((taicpu(p).opcode=A_LEAVE) and
  778. GetNextInstruction(p,hp2) and
  779. MatchInstruction(hp2,A_RET,[S_NO])
  780. ) or
  781. ((((taicpu(p).opcode=A_MOV) and
  782. MatchOpType(taicpu(p),top_reg,top_reg) and
  783. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  784. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  785. ((taicpu(p).opcode=A_LEA) and
  786. MatchOpType(taicpu(p),top_ref,top_reg) and
  787. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  788. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  789. )
  790. ) and
  791. GetNextInstruction(p,hp2) and
  792. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  793. MatchOpType(taicpu(hp2),top_reg) and
  794. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  795. GetNextInstruction(hp2,hp3) and
  796. MatchInstruction(hp3,A_RET,[S_NO])
  797. )
  798. );
  799. end;
  800. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  801. begin
  802. isFoldableArithOp := False;
  803. case hp1.opcode of
  804. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  805. isFoldableArithOp :=
  806. ((taicpu(hp1).oper[0]^.typ = top_const) or
  807. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  808. (taicpu(hp1).oper[0]^.reg <> reg))) and
  809. (taicpu(hp1).oper[1]^.typ = top_reg) and
  810. (taicpu(hp1).oper[1]^.reg = reg);
  811. A_INC,A_DEC,A_NEG,A_NOT:
  812. isFoldableArithOp :=
  813. (taicpu(hp1).oper[0]^.typ = top_reg) and
  814. (taicpu(hp1).oper[0]^.reg = reg);
  815. end;
  816. end;
  817. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  818. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  819. var
  820. hp2: tai;
  821. begin
  822. hp2 := p;
  823. repeat
  824. hp2 := tai(hp2.previous);
  825. if assigned(hp2) and
  826. (hp2.typ = ait_regalloc) and
  827. (tai_regalloc(hp2).ratype=ra_dealloc) and
  828. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  829. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  830. begin
  831. asml.remove(hp2);
  832. hp2.free;
  833. break;
  834. end;
  835. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  836. end;
  837. begin
  838. case current_procinfo.procdef.returndef.typ of
  839. arraydef,recorddef,pointerdef,
  840. stringdef,enumdef,procdef,objectdef,errordef,
  841. filedef,setdef,procvardef,
  842. classrefdef,forwarddef:
  843. DoRemoveLastDeallocForFuncRes(RS_EAX);
  844. orddef:
  845. if current_procinfo.procdef.returndef.size <> 0 then
  846. begin
  847. DoRemoveLastDeallocForFuncRes(RS_EAX);
  848. { for int64/qword }
  849. if current_procinfo.procdef.returndef.size = 8 then
  850. DoRemoveLastDeallocForFuncRes(RS_EDX);
  851. end;
  852. end;
  853. end;
  854. function TX86AsmOptimizer.OptPass1MOVAP(var p : tai) : boolean;
  855. var
  856. TmpUsedRegs : TAllUsedRegs;
  857. hp1,hp2 : tai;
  858. alloc ,dealloc: tai_regalloc;
  859. begin
  860. result:=false;
  861. if MatchOpType(taicpu(p),top_reg,top_reg) and
  862. GetNextInstruction(p, hp1) and
  863. (hp1.typ = ait_instruction) and
  864. GetNextInstruction(hp1, hp2) and
  865. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  866. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  867. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  868. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  869. (((taicpu(p).opcode=A_MOVAPS) and
  870. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  871. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  872. ((taicpu(p).opcode=A_MOVAPD) and
  873. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  874. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  875. ) then
  876. { change
  877. movapX reg,reg2
  878. addsX/subsX/... reg3, reg2
  879. movapX reg2,reg
  880. to
  881. addsX/subsX/... reg3,reg
  882. }
  883. begin
  884. CopyUsedRegs(TmpUsedRegs);
  885. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  886. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  887. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  888. begin
  889. DebugMsg('Peephole Optimization MovapXOpMovapX2Op ('+
  890. std_op2str[taicpu(p).opcode]+' '+
  891. std_op2str[taicpu(hp1).opcode]+' '+
  892. std_op2str[taicpu(hp2).opcode]+') done',p);
  893. { we cannot eliminate the first move if
  894. the operations uses the same register for source and dest }
  895. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  896. begin
  897. asml.remove(p);
  898. p.Free;
  899. end;
  900. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  901. asml.remove(hp2);
  902. hp2.Free;
  903. p:=hp1;
  904. result:=true;
  905. end;
  906. ReleaseUsedRegs(TmpUsedRegs);
  907. end
  908. end;
  909. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  910. var
  911. TmpUsedRegs : TAllUsedRegs;
  912. hp1,hp2 : tai;
  913. begin
  914. result:=false;
  915. if MatchOpType(taicpu(p),top_reg,top_reg) then
  916. begin
  917. { vmova* reg1,reg1
  918. =>
  919. <nop> }
  920. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  921. begin
  922. GetNextInstruction(p,hp1);
  923. asml.Remove(p);
  924. p.Free;
  925. p:=hp1;
  926. result:=true;
  927. end
  928. else if GetNextInstruction(p,hp1) then
  929. begin
  930. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  931. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  932. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  933. begin
  934. { vmova* reg1,reg2
  935. vmova* reg2,reg3
  936. dealloc reg2
  937. =>
  938. vmova* reg1,reg3 }
  939. CopyUsedRegs(TmpUsedRegs);
  940. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  941. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  942. begin
  943. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  944. asml.Remove(hp1);
  945. hp1.Free;
  946. result:=true;
  947. end
  948. { special case:
  949. vmova* reg1,reg2
  950. vmova* reg2,reg1
  951. =>
  952. vmova* reg1,reg2 }
  953. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  954. begin
  955. asml.Remove(hp1);
  956. hp1.Free;
  957. result:=true;
  958. end
  959. end
  960. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  961. { we mix single and double opperations here because we assume that the compiler
  962. generates vmovapd only after double operations and vmovaps only after single operations }
  963. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  964. GetNextInstruction(hp1,hp2) and
  965. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  966. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  967. begin
  968. CopyUsedRegs(TmpUsedRegs);
  969. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  970. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  971. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  972. then
  973. begin
  974. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  975. asml.Remove(p);
  976. p.Free;
  977. asml.Remove(hp2);
  978. hp2.Free;
  979. p:=hp1;
  980. end;
  981. end;
  982. end;
  983. end;
  984. end;
  985. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  986. var
  987. TmpUsedRegs : TAllUsedRegs;
  988. hp1 : tai;
  989. begin
  990. result:=false;
  991. { replace
  992. V<Op>X %mreg1,%mreg2,%mreg3
  993. VMovX %mreg3,%mreg4
  994. dealloc %mreg3
  995. by
  996. V<Op>X %mreg1,%mreg2,%mreg4
  997. ?
  998. }
  999. if GetNextInstruction(p,hp1) and
  1000. { we mix single and double operations here because we assume that the compiler
  1001. generates vmovapd only after double operations and vmovaps only after single operations }
  1002. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1003. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1004. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1005. begin
  1006. CopyUsedRegs(TmpUsedRegs);
  1007. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1008. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  1009. ) then
  1010. begin
  1011. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1012. DebugMsg('PeepHole Optimization VOpVmov2VOp done',p);
  1013. asml.Remove(hp1);
  1014. hp1.Free;
  1015. result:=true;
  1016. end;
  1017. end;
  1018. end;
  1019. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  1020. var
  1021. hp1, hp2: tai;
  1022. TmpUsedRegs : TAllUsedRegs;
  1023. GetNextInstruction_p : Boolean;
  1024. begin
  1025. Result:=false;
  1026. { remove mov reg1,reg1? }
  1027. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1028. begin
  1029. GetNextInstruction(p, hp1);
  1030. DebugMsg('PeepHole Optimization Mov2Nop done',p);
  1031. asml.remove(p);
  1032. p.free;
  1033. p:=hp1;
  1034. Result:=true;
  1035. exit;
  1036. end;
  1037. GetNextInstruction_p:=GetNextInstruction(p, hp1);
  1038. if GetNextInstruction_p and
  1039. MatchInstruction(hp1,A_AND,[]) and
  1040. (taicpu(p).oper[1]^.typ = top_reg) and
  1041. MatchOpType(taicpu(hp1),top_const,top_reg) and
  1042. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  1043. case taicpu(p).opsize Of
  1044. S_L:
  1045. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1046. begin
  1047. { Optimize out:
  1048. mov x, %reg
  1049. and ffffffffh, %reg
  1050. }
  1051. DebugMsg('PeepHole Optimization MovAnd2Mov 1 done',p);
  1052. asml.remove(hp1);
  1053. hp1.free;
  1054. Result:=true;
  1055. exit;
  1056. end;
  1057. S_Q: { TODO: Confirm if this is even possible }
  1058. if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
  1059. begin
  1060. { Optimize out:
  1061. mov x, %reg
  1062. and ffffffffffffffffh, %reg
  1063. }
  1064. DebugMsg('PeepHole Optimization MovAnd2Mov 2 done',p);
  1065. asml.remove(hp1);
  1066. hp1.free;
  1067. Result:=true;
  1068. exit;
  1069. end;
  1070. end
  1071. else if GetNextInstruction_p and
  1072. MatchInstruction(hp1,A_MOV,[]) and
  1073. (taicpu(p).oper[1]^.typ = top_reg) and
  1074. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  1075. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1076. begin
  1077. CopyUsedRegs(TmpUsedRegs);
  1078. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1079. { we have
  1080. mov x, %treg
  1081. mov %treg, y
  1082. }
  1083. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  1084. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1085. { we've got
  1086. mov x, %treg
  1087. mov %treg, y
  1088. with %treg is not used after }
  1089. case taicpu(p).oper[0]^.typ Of
  1090. top_reg:
  1091. begin
  1092. { change
  1093. mov %reg, %treg
  1094. mov %treg, y
  1095. to
  1096. mov %reg, y
  1097. }
  1098. if taicpu(hp1).oper[1]^.typ=top_reg then
  1099. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1100. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1101. DebugMsg('PeepHole Optimization MovMov2Mov 2 done',p);
  1102. asml.remove(hp1);
  1103. hp1.free;
  1104. ReleaseUsedRegs(TmpUsedRegs);
  1105. Result:=true;
  1106. Exit;
  1107. end;
  1108. top_ref:
  1109. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1110. begin
  1111. { change
  1112. mov mem, %treg
  1113. mov %treg, %reg
  1114. to
  1115. mov mem, %reg"
  1116. }
  1117. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1118. DebugMsg('PeepHole Optimization MovMov2Mov 3 done',p);
  1119. asml.remove(hp1);
  1120. hp1.free;
  1121. ReleaseUsedRegs(TmpUsedRegs);
  1122. Result:=true;
  1123. Exit;
  1124. end;
  1125. end;
  1126. ReleaseUsedRegs(TmpUsedRegs);
  1127. end
  1128. else
  1129. { Change
  1130. mov %reg1, %reg2
  1131. xxx %reg2, ???
  1132. to
  1133. mov %reg1, %reg2
  1134. xxx %reg1, ???
  1135. to avoid a write/read penalty
  1136. }
  1137. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1138. GetNextInstruction(p,hp1) and
  1139. (tai(hp1).typ = ait_instruction) and
  1140. (taicpu(hp1).ops >= 1) and
  1141. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1142. { we have
  1143. mov %reg1, %reg2
  1144. XXX %reg2, ???
  1145. }
  1146. begin
  1147. if ((taicpu(hp1).opcode = A_OR) or
  1148. (taicpu(hp1).opcode = A_AND) or
  1149. (taicpu(hp1).opcode = A_TEST)) and
  1150. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1151. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1152. { we have
  1153. mov %reg1, %reg2
  1154. test/or/and %reg2, %reg2
  1155. }
  1156. begin
  1157. CopyUsedRegs(TmpUsedRegs);
  1158. { reg1 will be used after the first instruction,
  1159. so update the allocation info }
  1160. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1161. if GetNextInstruction(hp1, hp2) and
  1162. (hp2.typ = ait_instruction) and
  1163. taicpu(hp2).is_jmp and
  1164. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1165. { change
  1166. mov %reg1, %reg2
  1167. test/or/and %reg2, %reg2
  1168. jxx
  1169. to
  1170. test %reg1, %reg1
  1171. jxx
  1172. }
  1173. begin
  1174. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1175. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1176. DebugMsg('PeepHole Optimization MovTestJxx2TestMov done',p);
  1177. asml.remove(p);
  1178. p.free;
  1179. p := hp1;
  1180. ReleaseUsedRegs(TmpUsedRegs);
  1181. Exit;
  1182. end
  1183. else
  1184. { change
  1185. mov %reg1, %reg2
  1186. test/or/and %reg2, %reg2
  1187. to
  1188. mov %reg1, %reg2
  1189. test/or/and %reg1, %reg1
  1190. }
  1191. begin
  1192. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1193. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1194. DebugMsg('PeepHole Optimization MovTestJxx2ovTestJxx done',p);
  1195. end;
  1196. ReleaseUsedRegs(TmpUsedRegs);
  1197. end
  1198. end
  1199. else
  1200. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1201. x >= RetOffset) as it doesn't do anything (it writes either to a
  1202. parameter or to the temporary storage room for the function
  1203. result)
  1204. }
  1205. if GetNextInstruction_p and
  1206. (tai(hp1).typ = ait_instruction) then
  1207. begin
  1208. if IsExitCode(hp1) and
  1209. MatchOpType(taicpu(p),top_reg,top_ref) and
  1210. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1211. not(assigned(current_procinfo.procdef.funcretsym) and
  1212. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1213. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  1214. begin
  1215. asml.remove(p);
  1216. p.free;
  1217. p:=hp1;
  1218. DebugMsg('Peephole removed deadstore before leave/ret',p);
  1219. RemoveLastDeallocForFuncRes(p);
  1220. exit;
  1221. end
  1222. { change
  1223. mov reg1, mem1
  1224. test/cmp x, mem1
  1225. to
  1226. mov reg1, mem1
  1227. test/cmp x, reg1
  1228. }
  1229. else if MatchOpType(taicpu(p),top_reg,top_ref) and
  1230. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  1231. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1232. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1233. begin
  1234. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1235. DebugMsg('Peephole MovTestCmp2MovTestCmp 1',hp1);
  1236. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1237. end;
  1238. end;
  1239. { Next instruction is also a MOV ? }
  1240. if GetNextInstruction_p and
  1241. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1242. begin
  1243. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1244. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1245. { mov reg1, mem1 or mov mem1, reg1
  1246. mov mem2, reg2 mov reg2, mem2}
  1247. begin
  1248. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1249. { mov reg1, mem1 or mov mem1, reg1
  1250. mov mem2, reg1 mov reg2, mem1}
  1251. begin
  1252. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1253. { Removes the second statement from
  1254. mov reg1, mem1/reg2
  1255. mov mem1/reg2, reg1 }
  1256. begin
  1257. if taicpu(p).oper[0]^.typ=top_reg then
  1258. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1259. DebugMsg('PeepHole Optimization MovMov2Mov 1',p);
  1260. asml.remove(hp1);
  1261. hp1.free;
  1262. Result:=true;
  1263. exit;
  1264. end
  1265. else
  1266. begin
  1267. CopyUsedRegs(TmpUsedRegs);
  1268. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1269. if (taicpu(p).oper[1]^.typ = top_ref) and
  1270. { mov reg1, mem1
  1271. mov mem2, reg1 }
  1272. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1273. GetNextInstruction(hp1, hp2) and
  1274. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1275. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1276. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1277. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1278. { change to
  1279. mov reg1, mem1 mov reg1, mem1
  1280. mov mem2, reg1 cmp reg1, mem2
  1281. cmp mem1, reg1
  1282. }
  1283. begin
  1284. asml.remove(hp2);
  1285. hp2.free;
  1286. taicpu(hp1).opcode := A_CMP;
  1287. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1288. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1289. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1290. DebugMsg('Peephole Optimization MovMovCmp2MovCmp done',hp1);
  1291. end;
  1292. ReleaseUsedRegs(TmpUsedRegs);
  1293. end;
  1294. end
  1295. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1296. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1297. begin
  1298. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1299. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1300. DebugMsg('PeepHole Optimization MovMov2MovMov1 done',p);
  1301. end
  1302. else
  1303. begin
  1304. CopyUsedRegs(TmpUsedRegs);
  1305. if GetNextInstruction(hp1, hp2) and
  1306. MatchOpType(taicpu(p),top_ref,top_reg) and
  1307. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1308. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1309. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1310. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1311. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1312. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1313. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1314. { mov mem1, %reg1
  1315. mov %reg1, mem2
  1316. mov mem2, reg2
  1317. to:
  1318. mov mem1, reg2
  1319. mov reg2, mem2}
  1320. begin
  1321. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1322. DebugMsg('PeepHole Optimization MovMovMov2MovMov 1 done',p);
  1323. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1324. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1325. asml.remove(hp2);
  1326. hp2.free;
  1327. end
  1328. {$ifdef i386}
  1329. { this is enabled for i386 only, as the rules to create the reg sets below
  1330. are too complicated for x86-64, so this makes this code too error prone
  1331. on x86-64
  1332. }
  1333. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1334. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1335. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1336. { mov mem1, reg1 mov mem1, reg1
  1337. mov reg1, mem2 mov reg1, mem2
  1338. mov mem2, reg2 mov mem2, reg1
  1339. to: to:
  1340. mov mem1, reg1 mov mem1, reg1
  1341. mov mem1, reg2 mov reg1, mem2
  1342. mov reg1, mem2
  1343. or (if mem1 depends on reg1
  1344. and/or if mem2 depends on reg2)
  1345. to:
  1346. mov mem1, reg1
  1347. mov reg1, mem2
  1348. mov reg1, reg2
  1349. }
  1350. begin
  1351. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1352. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1353. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1354. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1355. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1356. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1357. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1358. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1359. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1360. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1361. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1362. end
  1363. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1364. begin
  1365. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1366. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1367. end
  1368. else
  1369. begin
  1370. asml.remove(hp2);
  1371. hp2.free;
  1372. end
  1373. {$endif i386}
  1374. ;
  1375. ReleaseUsedRegs(TmpUsedRegs);
  1376. end;
  1377. end
  1378. (* { movl [mem1],reg1
  1379. movl [mem1],reg2
  1380. to
  1381. movl [mem1],reg1
  1382. movl reg1,reg2
  1383. }
  1384. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1385. (taicpu(p).oper[1]^.typ = top_reg) and
  1386. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1387. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1388. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1389. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1390. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1391. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1392. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1393. else*)
  1394. { movl const1,[mem1]
  1395. movl [mem1],reg1
  1396. to
  1397. movl const1,reg1
  1398. movl reg1,[mem1]
  1399. }
  1400. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1401. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1402. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1403. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1404. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1405. begin
  1406. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1407. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1408. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1409. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1410. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1411. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  1412. end
  1413. end
  1414. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1415. GetNextInstruction_p and
  1416. (hp1.typ = ait_instruction) and
  1417. GetNextInstruction(hp1, hp2) and
  1418. MatchInstruction(hp2,A_MOV,[]) and
  1419. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1420. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1421. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1422. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1423. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  1424. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1425. ) then
  1426. { change movsX/movzX reg/ref, reg2
  1427. add/sub/or/... reg3/$const, reg2
  1428. mov reg2 reg/ref
  1429. to add/sub/or/... reg3/$const, reg/ref }
  1430. begin
  1431. CopyUsedRegs(TmpUsedRegs);
  1432. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1433. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1434. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1435. begin
  1436. { by example:
  1437. movswl %si,%eax movswl %si,%eax p
  1438. decl %eax addl %edx,%eax hp1
  1439. movw %ax,%si movw %ax,%si hp2
  1440. ->
  1441. movswl %si,%eax movswl %si,%eax p
  1442. decw %eax addw %edx,%eax hp1
  1443. movw %ax,%si movw %ax,%si hp2
  1444. }
  1445. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  1446. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1447. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1448. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1449. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1450. {
  1451. ->
  1452. movswl %si,%eax movswl %si,%eax p
  1453. decw %si addw %dx,%si hp1
  1454. movw %ax,%si movw %ax,%si hp2
  1455. }
  1456. case taicpu(hp1).ops of
  1457. 1:
  1458. begin
  1459. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1460. if taicpu(hp1).oper[0]^.typ=top_reg then
  1461. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1462. end;
  1463. 2:
  1464. begin
  1465. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1466. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1467. (taicpu(hp1).opcode<>A_SHL) and
  1468. (taicpu(hp1).opcode<>A_SHR) and
  1469. (taicpu(hp1).opcode<>A_SAR) then
  1470. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1471. end;
  1472. else
  1473. internalerror(2008042701);
  1474. end;
  1475. {
  1476. ->
  1477. decw %si addw %dx,%si p
  1478. }
  1479. asml.remove(p);
  1480. asml.remove(hp2);
  1481. p.Free;
  1482. hp2.Free;
  1483. p := hp1;
  1484. end;
  1485. ReleaseUsedRegs(TmpUsedRegs);
  1486. end
  1487. else if GetNextInstruction_p and
  1488. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1489. GetNextInstruction(hp1, hp2) and
  1490. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1491. MatchOperand(Taicpu(p).oper[0]^,0) and
  1492. (Taicpu(p).oper[1]^.typ = top_reg) and
  1493. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1494. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1495. { mov reg1,0
  1496. bts reg1,operand1 --> mov reg1,operand2
  1497. or reg1,operand2 bts reg1,operand1}
  1498. begin
  1499. Taicpu(hp2).opcode:=A_MOV;
  1500. asml.remove(hp1);
  1501. insertllitem(hp2,hp2.next,hp1);
  1502. asml.remove(p);
  1503. p.free;
  1504. p:=hp1;
  1505. end
  1506. else if GetNextInstruction_p and
  1507. MatchInstruction(hp1,A_LEA,[S_L]) and
  1508. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1509. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1510. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1511. ) or
  1512. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1513. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1514. )
  1515. ) then
  1516. { mov reg1,ref
  1517. lea reg2,[reg1,reg2]
  1518. to
  1519. add reg2,ref}
  1520. begin
  1521. CopyUsedRegs(TmpUsedRegs);
  1522. { reg1 may not be used afterwards }
  1523. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1524. begin
  1525. Taicpu(hp1).opcode:=A_ADD;
  1526. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1527. DebugMsg('Peephole MovLea2Add done',hp1);
  1528. asml.remove(p);
  1529. p.free;
  1530. p:=hp1;
  1531. end;
  1532. ReleaseUsedRegs(TmpUsedRegs);
  1533. end;
  1534. end;
  1535. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  1536. var
  1537. hp1 : tai;
  1538. begin
  1539. Result:=false;
  1540. if taicpu(p).ops <> 2 then
  1541. exit;
  1542. if GetNextInstruction(p,hp1) and
  1543. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  1544. (taicpu(hp1).ops = 2) then
  1545. begin
  1546. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1547. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1548. { movXX reg1, mem1 or movXX mem1, reg1
  1549. movXX mem2, reg2 movXX reg2, mem2}
  1550. begin
  1551. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1552. { movXX reg1, mem1 or movXX mem1, reg1
  1553. movXX mem2, reg1 movXX reg2, mem1}
  1554. begin
  1555. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1556. begin
  1557. { Removes the second statement from
  1558. movXX reg1, mem1/reg2
  1559. movXX mem1/reg2, reg1
  1560. }
  1561. if taicpu(p).oper[0]^.typ=top_reg then
  1562. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1563. { Removes the second statement from
  1564. movXX mem1/reg1, reg2
  1565. movXX reg2, mem1/reg1
  1566. }
  1567. if (taicpu(p).oper[1]^.typ=top_reg) and
  1568. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  1569. begin
  1570. asml.remove(p);
  1571. p.free;
  1572. GetNextInstruction(hp1,p);
  1573. DebugMsg('PeepHole Optimization MovXXMovXX2Nop 1 done',p);
  1574. end
  1575. else
  1576. DebugMsg('PeepHole Optimization MovXXMovXX2MoVXX 1 done',p);
  1577. asml.remove(hp1);
  1578. hp1.free;
  1579. Result:=true;
  1580. exit;
  1581. end
  1582. end;
  1583. end;
  1584. end;
  1585. end;
  1586. function TX86AsmOptimizer.OptPass1OP(const p : tai) : boolean;
  1587. var
  1588. TmpUsedRegs : TAllUsedRegs;
  1589. hp1 : tai;
  1590. begin
  1591. result:=false;
  1592. { replace
  1593. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  1594. MovX %mreg2,%mreg1
  1595. dealloc %mreg2
  1596. by
  1597. <Op>X %mreg2,%mreg1
  1598. ?
  1599. }
  1600. if GetNextInstruction(p,hp1) and
  1601. { we mix single and double opperations here because we assume that the compiler
  1602. generates vmovapd only after double operations and vmovaps only after single operations }
  1603. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  1604. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1605. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  1606. (taicpu(p).oper[0]^.typ=top_reg) then
  1607. begin
  1608. CopyUsedRegs(TmpUsedRegs);
  1609. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1610. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1611. begin
  1612. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  1613. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1614. DebugMsg('PeepHole Optimization OpMov2Op done',p);
  1615. asml.Remove(hp1);
  1616. hp1.Free;
  1617. result:=true;
  1618. end;
  1619. ReleaseUsedRegs(TmpUsedRegs);
  1620. end;
  1621. end;
  1622. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  1623. var
  1624. hp1 : tai;
  1625. l : ASizeInt;
  1626. TmpUsedRegs : TAllUsedRegs;
  1627. begin
  1628. Result:=false;
  1629. { removes seg register prefixes from LEA operations, as they
  1630. don't do anything}
  1631. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  1632. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  1633. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1634. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1635. { do not mess with leas acessing the stack pointer }
  1636. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  1637. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1638. begin
  1639. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1640. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1641. begin
  1642. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  1643. taicpu(p).oper[1]^.reg);
  1644. InsertLLItem(p.previous,p.next, hp1);
  1645. DebugMsg('PeepHole Optimization Lea2Mov done',hp1);
  1646. p.free;
  1647. p:=hp1;
  1648. Result:=true;
  1649. exit;
  1650. end
  1651. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1652. begin
  1653. hp1:=taicpu(p.Next);
  1654. DebugMsg('PeepHole Optimization Lea2Nop done',p);
  1655. asml.remove(p);
  1656. p.free;
  1657. p:=hp1;
  1658. Result:=true;
  1659. exit;
  1660. end
  1661. { continue to use lea to adjust the stack pointer,
  1662. it is the recommended way, but only if not optimizing for size }
  1663. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1664. (cs_opt_size in current_settings.optimizerswitches) then
  1665. with taicpu(p).oper[0]^.ref^ do
  1666. if (base = taicpu(p).oper[1]^.reg) then
  1667. begin
  1668. l:=offset;
  1669. if (l=1) and UseIncDec then
  1670. begin
  1671. taicpu(p).opcode:=A_INC;
  1672. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1673. taicpu(p).ops:=1;
  1674. DebugMsg('PeepHole Optimization Lea2Inc done',p);
  1675. end
  1676. else if (l=-1) and UseIncDec then
  1677. begin
  1678. taicpu(p).opcode:=A_DEC;
  1679. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1680. taicpu(p).ops:=1;
  1681. DebugMsg('PeepHole Optimization Lea2Dec done',p);
  1682. end
  1683. else
  1684. begin
  1685. if (l<0) and (l<>-2147483648) then
  1686. begin
  1687. taicpu(p).opcode:=A_SUB;
  1688. taicpu(p).loadConst(0,-l);
  1689. DebugMsg('PeepHole Optimization Lea2Sub done',p);
  1690. end
  1691. else
  1692. begin
  1693. taicpu(p).opcode:=A_ADD;
  1694. taicpu(p).loadConst(0,l);
  1695. DebugMsg('PeepHole Optimization Lea2Add done',p);
  1696. end;
  1697. end;
  1698. Result:=true;
  1699. exit;
  1700. end;
  1701. end;
  1702. if GetNextInstruction(p,hp1) and
  1703. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  1704. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1705. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  1706. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  1707. begin
  1708. CopyUsedRegs(TmpUsedRegs);
  1709. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1710. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1711. begin
  1712. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1713. DebugMsg('PeepHole Optimization LeaMov2Lea done',p);
  1714. asml.Remove(hp1);
  1715. hp1.Free;
  1716. result:=true;
  1717. end;
  1718. ReleaseUsedRegs(TmpUsedRegs);
  1719. end;
  1720. (*
  1721. This is unsafe, lea doesn't modify the flags but "add"
  1722. does. This breaks webtbs/tw15694.pp. The above
  1723. transformations are also unsafe, but they don't seem to
  1724. be triggered by code that FPC generators (or that at
  1725. least does not occur in the tests...). This needs to be
  1726. fixed by checking for the liveness of the flags register.
  1727. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1728. begin
  1729. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1730. taicpu(p).oper[0]^.ref^.base);
  1731. InsertLLItem(asml,p.previous,p.next, hp1);
  1732. DebugMsg('Peephole Lea2AddBase done',hp1);
  1733. p.free;
  1734. p:=hp1;
  1735. continue;
  1736. end
  1737. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1738. begin
  1739. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1740. taicpu(p).oper[0]^.ref^.index);
  1741. InsertLLItem(asml,p.previous,p.next,hp1);
  1742. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1743. p.free;
  1744. p:=hp1;
  1745. continue;
  1746. end
  1747. *)
  1748. end;
  1749. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1750. var
  1751. TmpUsedRegs : TAllUsedRegs;
  1752. hp1,hp2: tai;
  1753. begin
  1754. Result:=false;
  1755. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1756. GetNextInstruction(p, hp1) and
  1757. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1758. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1759. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1760. or
  1761. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1762. ) and
  1763. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1764. { mov reg1, reg2
  1765. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1766. begin
  1767. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1768. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1769. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1770. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1771. DebugMsg('PeepHole Optimization MovMovXX2MoVXX 1 done',p);
  1772. asml.remove(p);
  1773. p.free;
  1774. p := hp1;
  1775. Result:=true;
  1776. exit;
  1777. end
  1778. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1779. GetNextInstruction(p,hp1) and
  1780. (hp1.typ = ait_instruction) and
  1781. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1782. doing it separately in both branches allows to do the cheap checks
  1783. with low probability earlier }
  1784. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1785. GetNextInstruction(hp1,hp2) and
  1786. MatchInstruction(hp2,A_MOV,[])
  1787. ) or
  1788. ((taicpu(hp1).opcode=A_LEA) and
  1789. GetNextInstruction(hp1,hp2) and
  1790. MatchInstruction(hp2,A_MOV,[]) and
  1791. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1792. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1793. ) or
  1794. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1795. taicpu(p).oper[1]^.reg) and
  1796. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1797. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1798. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1799. ) and
  1800. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1801. )
  1802. ) and
  1803. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1804. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1805. begin
  1806. CopyUsedRegs(TmpUsedRegs);
  1807. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1808. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1809. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1810. { change mov (ref), reg
  1811. add/sub/or/... reg2/$const, reg
  1812. mov reg, (ref)
  1813. # release reg
  1814. to add/sub/or/... reg2/$const, (ref) }
  1815. begin
  1816. case taicpu(hp1).opcode of
  1817. A_INC,A_DEC,A_NOT,A_NEG :
  1818. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1819. A_LEA :
  1820. begin
  1821. taicpu(hp1).opcode:=A_ADD;
  1822. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1823. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1824. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1825. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1826. else
  1827. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1828. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1829. DebugMsg('Peephole FoldLea done',hp1);
  1830. end
  1831. else
  1832. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1833. end;
  1834. asml.remove(p);
  1835. asml.remove(hp2);
  1836. p.free;
  1837. hp2.free;
  1838. p := hp1
  1839. end;
  1840. ReleaseUsedRegs(TmpUsedRegs);
  1841. end;
  1842. end;
  1843. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1844. var
  1845. TmpUsedRegs : TAllUsedRegs;
  1846. hp1 : tai;
  1847. begin
  1848. Result:=false;
  1849. if (taicpu(p).ops >= 2) and
  1850. ((taicpu(p).oper[0]^.typ = top_const) or
  1851. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1852. (taicpu(p).oper[1]^.typ = top_reg) and
  1853. ((taicpu(p).ops = 2) or
  1854. ((taicpu(p).oper[2]^.typ = top_reg) and
  1855. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1856. GetLastInstruction(p,hp1) and
  1857. MatchInstruction(hp1,A_MOV,[]) and
  1858. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1859. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1860. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1861. begin
  1862. CopyUsedRegs(TmpUsedRegs);
  1863. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1864. { change
  1865. mov reg1,reg2
  1866. imul y,reg2 to imul y,reg1,reg2 }
  1867. begin
  1868. taicpu(p).ops := 3;
  1869. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1870. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1871. DebugMsg('Peephole MovImul2Imul done',p);
  1872. asml.remove(hp1);
  1873. hp1.free;
  1874. result:=true;
  1875. end;
  1876. ReleaseUsedRegs(TmpUsedRegs);
  1877. end;
  1878. end;
  1879. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  1880. var
  1881. hp1 : tai;
  1882. begin
  1883. {
  1884. change
  1885. jmp .L1
  1886. ...
  1887. .L1:
  1888. ret
  1889. into
  1890. ret
  1891. }
  1892. result:=false;
  1893. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  1894. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  1895. begin
  1896. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1897. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  1898. MatchInstruction(hp1,A_RET,[S_NO]) then
  1899. begin
  1900. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1901. taicpu(p).opcode:=A_RET;
  1902. taicpu(p).is_jmp:=false;
  1903. taicpu(p).ops:=taicpu(hp1).ops;
  1904. case taicpu(hp1).ops of
  1905. 0:
  1906. taicpu(p).clearop(0);
  1907. 1:
  1908. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1909. else
  1910. internalerror(2016041301);
  1911. end;
  1912. result:=true;
  1913. end;
  1914. end;
  1915. end;
  1916. function CanBeCMOV(p : tai) : boolean;
  1917. begin
  1918. CanBeCMOV:=assigned(p) and
  1919. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  1920. { we can't use cmov ref,reg because
  1921. ref could be nil and cmov still throws an exception
  1922. if ref=nil but the mov isn't done (FK)
  1923. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1924. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1925. }
  1926. MatchOpType(taicpu(p),top_reg,top_reg);
  1927. end;
  1928. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  1929. var
  1930. hp1,hp2,hp3: tai;
  1931. carryadd_opcode : TAsmOp;
  1932. l : Longint;
  1933. condition : TAsmCond;
  1934. begin
  1935. { jb @@1 cmc
  1936. inc/dec operand --> adc/sbb operand,0
  1937. @@1:
  1938. ... and ...
  1939. jnb @@1
  1940. inc/dec operand --> adc/sbb operand,0
  1941. @@1: }
  1942. result:=false;
  1943. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1944. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1945. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1946. begin
  1947. carryadd_opcode:=A_NONE;
  1948. if Taicpu(p).condition in [C_NAE,C_B] then
  1949. begin
  1950. if Taicpu(hp1).opcode=A_INC then
  1951. carryadd_opcode:=A_ADC;
  1952. if Taicpu(hp1).opcode=A_DEC then
  1953. carryadd_opcode:=A_SBB;
  1954. if carryadd_opcode<>A_NONE then
  1955. begin
  1956. Taicpu(p).clearop(0);
  1957. Taicpu(p).ops:=0;
  1958. Taicpu(p).is_jmp:=false;
  1959. Taicpu(p).opcode:=A_CMC;
  1960. Taicpu(p).condition:=C_NONE;
  1961. Taicpu(hp1).ops:=2;
  1962. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1963. Taicpu(hp1).loadconst(0,0);
  1964. Taicpu(hp1).opcode:=carryadd_opcode;
  1965. result:=true;
  1966. exit;
  1967. end;
  1968. end;
  1969. if Taicpu(p).condition in [C_AE,C_NB] then
  1970. begin
  1971. if Taicpu(hp1).opcode=A_INC then
  1972. carryadd_opcode:=A_ADC;
  1973. if Taicpu(hp1).opcode=A_DEC then
  1974. carryadd_opcode:=A_SBB;
  1975. if carryadd_opcode<>A_NONE then
  1976. begin
  1977. asml.remove(p);
  1978. p.free;
  1979. Taicpu(hp1).ops:=2;
  1980. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1981. Taicpu(hp1).loadconst(0,0);
  1982. Taicpu(hp1).opcode:=carryadd_opcode;
  1983. p:=hp1;
  1984. result:=true;
  1985. exit;
  1986. end;
  1987. end;
  1988. end;
  1989. {$ifndef i8086}
  1990. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1991. begin
  1992. { check for
  1993. jCC xxx
  1994. <several movs>
  1995. xxx:
  1996. }
  1997. l:=0;
  1998. GetNextInstruction(p, hp1);
  1999. while assigned(hp1) and
  2000. CanBeCMOV(hp1) and
  2001. { stop on labels }
  2002. not(hp1.typ=ait_label) do
  2003. begin
  2004. inc(l);
  2005. GetNextInstruction(hp1,hp1);
  2006. end;
  2007. if assigned(hp1) then
  2008. begin
  2009. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2010. begin
  2011. if (l<=4) and (l>0) then
  2012. begin
  2013. condition:=inverse_cond(taicpu(p).condition);
  2014. hp2:=p;
  2015. GetNextInstruction(p,hp1);
  2016. p:=hp1;
  2017. repeat
  2018. taicpu(hp1).opcode:=A_CMOVcc;
  2019. taicpu(hp1).condition:=condition;
  2020. GetNextInstruction(hp1,hp1);
  2021. until not(assigned(hp1)) or
  2022. not(CanBeCMOV(hp1));
  2023. { wait with removing else GetNextInstruction could
  2024. ignore the label if it was the only usage in the
  2025. jump moved away }
  2026. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2027. { if the label refs. reach zero, remove any alignment before the label }
  2028. if (hp1.typ=ait_align) and (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).getrefs=0) then
  2029. begin
  2030. asml.Remove(hp1);
  2031. hp1.Free;
  2032. end;
  2033. asml.remove(hp2);
  2034. hp2.free;
  2035. result:=true;
  2036. exit;
  2037. end;
  2038. end
  2039. else
  2040. begin
  2041. { check further for
  2042. jCC xxx
  2043. <several movs 1>
  2044. jmp yyy
  2045. xxx:
  2046. <several movs 2>
  2047. yyy:
  2048. }
  2049. { hp2 points to jmp yyy }
  2050. hp2:=hp1;
  2051. { skip hp1 to xxx }
  2052. GetNextInstruction(hp1, hp1);
  2053. if assigned(hp2) and
  2054. assigned(hp1) and
  2055. (l<=3) and
  2056. (hp2.typ=ait_instruction) and
  2057. (taicpu(hp2).is_jmp) and
  2058. (taicpu(hp2).condition=C_None) and
  2059. { real label and jump, no further references to the
  2060. label are allowed }
  2061. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  2062. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2063. begin
  2064. l:=0;
  2065. { skip hp1 to <several moves 2> }
  2066. GetNextInstruction(hp1, hp1);
  2067. while assigned(hp1) and
  2068. CanBeCMOV(hp1) do
  2069. begin
  2070. inc(l);
  2071. GetNextInstruction(hp1, hp1);
  2072. end;
  2073. { hp1 points to yyy: }
  2074. if assigned(hp1) and
  2075. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2076. begin
  2077. condition:=inverse_cond(taicpu(p).condition);
  2078. GetNextInstruction(p,hp1);
  2079. hp3:=p;
  2080. p:=hp1;
  2081. repeat
  2082. taicpu(hp1).opcode:=A_CMOVcc;
  2083. taicpu(hp1).condition:=condition;
  2084. GetNextInstruction(hp1,hp1);
  2085. until not(assigned(hp1)) or
  2086. not(CanBeCMOV(hp1));
  2087. { hp2 is still at jmp yyy }
  2088. GetNextInstruction(hp2,hp1);
  2089. { hp2 is now at xxx: }
  2090. condition:=inverse_cond(condition);
  2091. GetNextInstruction(hp1,hp1);
  2092. { hp1 is now at <several movs 2> }
  2093. repeat
  2094. taicpu(hp1).opcode:=A_CMOVcc;
  2095. taicpu(hp1).condition:=condition;
  2096. GetNextInstruction(hp1,hp1);
  2097. until not(assigned(hp1)) or
  2098. not(CanBeCMOV(hp1));
  2099. {
  2100. asml.remove(hp1.next)
  2101. hp1.next.free;
  2102. asml.remove(hp1);
  2103. hp1.free;
  2104. }
  2105. { remove jCC }
  2106. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2107. asml.remove(hp3);
  2108. hp3.free;
  2109. { remove jmp }
  2110. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2111. asml.remove(hp2);
  2112. hp2.free;
  2113. result:=true;
  2114. exit;
  2115. end;
  2116. end;
  2117. end;
  2118. end;
  2119. end;
  2120. {$endif i8086}
  2121. end;
  2122. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  2123. var
  2124. hp1,hp2: tai;
  2125. begin
  2126. result:=false;
  2127. if (taicpu(p).oper[1]^.typ = top_reg) and
  2128. GetNextInstruction(p,hp1) and
  2129. (hp1.typ = ait_instruction) and
  2130. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  2131. GetNextInstruction(hp1,hp2) and
  2132. MatchInstruction(hp2,A_MOV,[]) and
  2133. (taicpu(hp2).oper[0]^.typ = top_reg) and
  2134. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  2135. {$ifdef i386}
  2136. { not all registers have byte size sub registers on i386 }
  2137. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  2138. {$endif i386}
  2139. (((taicpu(hp1).ops=2) and
  2140. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  2141. ((taicpu(hp1).ops=1) and
  2142. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  2143. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  2144. begin
  2145. { change movsX/movzX reg/ref, reg2
  2146. add/sub/or/... reg3/$const, reg2
  2147. mov reg2 reg/ref
  2148. to add/sub/or/... reg3/$const, reg/ref }
  2149. { by example:
  2150. movswl %si,%eax movswl %si,%eax p
  2151. decl %eax addl %edx,%eax hp1
  2152. movw %ax,%si movw %ax,%si hp2
  2153. ->
  2154. movswl %si,%eax movswl %si,%eax p
  2155. decw %eax addw %edx,%eax hp1
  2156. movw %ax,%si movw %ax,%si hp2
  2157. }
  2158. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  2159. {
  2160. ->
  2161. movswl %si,%eax movswl %si,%eax p
  2162. decw %si addw %dx,%si hp1
  2163. movw %ax,%si movw %ax,%si hp2
  2164. }
  2165. case taicpu(hp1).ops of
  2166. 1:
  2167. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  2168. 2:
  2169. begin
  2170. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  2171. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  2172. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2173. end;
  2174. else
  2175. internalerror(2008042701);
  2176. end;
  2177. {
  2178. ->
  2179. decw %si addw %dx,%si p
  2180. }
  2181. DebugMsg('PeepHole Optimization,var3',p);
  2182. asml.remove(p);
  2183. asml.remove(hp2);
  2184. p.free;
  2185. hp2.free;
  2186. p:=hp1;
  2187. end
  2188. { removes superfluous And's after movzx's }
  2189. else if taicpu(p).opcode=A_MOVZX then
  2190. begin
  2191. if (taicpu(p).oper[1]^.typ = top_reg) and
  2192. GetNextInstruction(p, hp1) and
  2193. (tai(hp1).typ = ait_instruction) and
  2194. (taicpu(hp1).opcode = A_AND) and
  2195. (taicpu(hp1).oper[0]^.typ = top_const) and
  2196. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2197. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2198. begin
  2199. case taicpu(p).opsize Of
  2200. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  2201. if (taicpu(hp1).oper[0]^.val = $ff) then
  2202. begin
  2203. DebugMsg('PeepHole Optimization,var4',p);
  2204. asml.remove(hp1);
  2205. hp1.free;
  2206. end;
  2207. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  2208. if (taicpu(hp1).oper[0]^.val = $ffff) then
  2209. begin
  2210. DebugMsg('PeepHole Optimization,var5',p);
  2211. asml.remove(hp1);
  2212. hp1.free;
  2213. end;
  2214. {$ifdef x86_64}
  2215. S_LQ:
  2216. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  2217. begin
  2218. if (cs_asm_source in current_settings.globalswitches) then
  2219. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  2220. asml.remove(hp1);
  2221. hp1.Free;
  2222. end;
  2223. {$endif x86_64}
  2224. end;
  2225. end;
  2226. { changes some movzx constructs to faster synonims (all examples
  2227. are given with eax/ax, but are also valid for other registers)}
  2228. if (taicpu(p).oper[1]^.typ = top_reg) then
  2229. if (taicpu(p).oper[0]^.typ = top_reg) then
  2230. case taicpu(p).opsize of
  2231. S_BW:
  2232. begin
  2233. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2234. not(cs_opt_size in current_settings.optimizerswitches) then
  2235. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  2236. begin
  2237. taicpu(p).opcode := A_AND;
  2238. taicpu(p).changeopsize(S_W);
  2239. taicpu(p).loadConst(0,$ff);
  2240. DebugMsg('PeepHole Optimization,var7',p);
  2241. end
  2242. else if GetNextInstruction(p, hp1) and
  2243. (tai(hp1).typ = ait_instruction) and
  2244. (taicpu(hp1).opcode = A_AND) and
  2245. (taicpu(hp1).oper[0]^.typ = top_const) and
  2246. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2247. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2248. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  2249. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  2250. begin
  2251. DebugMsg('PeepHole Optimization,var8',p);
  2252. taicpu(p).opcode := A_MOV;
  2253. taicpu(p).changeopsize(S_W);
  2254. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  2255. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2256. end;
  2257. end;
  2258. S_BL:
  2259. begin
  2260. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2261. not(cs_opt_size in current_settings.optimizerswitches) then
  2262. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  2263. begin
  2264. taicpu(p).opcode := A_AND;
  2265. taicpu(p).changeopsize(S_L);
  2266. taicpu(p).loadConst(0,$ff)
  2267. end
  2268. else if GetNextInstruction(p, hp1) and
  2269. (tai(hp1).typ = ait_instruction) and
  2270. (taicpu(hp1).opcode = A_AND) and
  2271. (taicpu(hp1).oper[0]^.typ = top_const) and
  2272. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2273. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2274. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  2275. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  2276. begin
  2277. DebugMsg('PeepHole Optimization,var10',p);
  2278. taicpu(p).opcode := A_MOV;
  2279. taicpu(p).changeopsize(S_L);
  2280. { do not use R_SUBWHOLE
  2281. as movl %rdx,%eax
  2282. is invalid in assembler PM }
  2283. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2284. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2285. end
  2286. end;
  2287. {$ifndef i8086}
  2288. S_WL:
  2289. begin
  2290. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2291. not(cs_opt_size in current_settings.optimizerswitches) then
  2292. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  2293. begin
  2294. DebugMsg('PeepHole Optimization,var11',p);
  2295. taicpu(p).opcode := A_AND;
  2296. taicpu(p).changeopsize(S_L);
  2297. taicpu(p).loadConst(0,$ffff);
  2298. end
  2299. else if GetNextInstruction(p, hp1) and
  2300. (tai(hp1).typ = ait_instruction) and
  2301. (taicpu(hp1).opcode = A_AND) and
  2302. (taicpu(hp1).oper[0]^.typ = top_const) and
  2303. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2304. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2305. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  2306. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  2307. begin
  2308. DebugMsg('PeepHole Optimization,var12',p);
  2309. taicpu(p).opcode := A_MOV;
  2310. taicpu(p).changeopsize(S_L);
  2311. { do not use R_SUBWHOLE
  2312. as movl %rdx,%eax
  2313. is invalid in assembler PM }
  2314. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2315. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2316. end;
  2317. end;
  2318. {$endif i8086}
  2319. end
  2320. else if (taicpu(p).oper[0]^.typ = top_ref) then
  2321. begin
  2322. if GetNextInstruction(p, hp1) and
  2323. (tai(hp1).typ = ait_instruction) and
  2324. (taicpu(hp1).opcode = A_AND) and
  2325. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2326. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2327. begin
  2328. taicpu(p).opcode := A_MOV;
  2329. case taicpu(p).opsize Of
  2330. S_BL:
  2331. begin
  2332. DebugMsg('PeepHole Optimization,var13',p);
  2333. taicpu(p).changeopsize(S_L);
  2334. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2335. end;
  2336. S_WL:
  2337. begin
  2338. DebugMsg('PeepHole Optimization,var14',p);
  2339. taicpu(p).changeopsize(S_L);
  2340. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2341. end;
  2342. S_BW:
  2343. begin
  2344. DebugMsg('PeepHole Optimization,var15',p);
  2345. taicpu(p).changeopsize(S_W);
  2346. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2347. end;
  2348. {$ifdef x86_64}
  2349. S_BQ:
  2350. begin
  2351. DebugMsg('PeepHole Optimization,var16',p);
  2352. taicpu(p).changeopsize(S_Q);
  2353. taicpu(hp1).loadConst(
  2354. 0, taicpu(hp1).oper[0]^.val and $ff);
  2355. end;
  2356. S_WQ:
  2357. begin
  2358. DebugMsg('PeepHole Optimization,var17',p);
  2359. taicpu(p).changeopsize(S_Q);
  2360. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  2361. end;
  2362. S_LQ:
  2363. begin
  2364. DebugMsg('PeepHole Optimization,var18',p);
  2365. taicpu(p).changeopsize(S_Q);
  2366. taicpu(hp1).loadConst(
  2367. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  2368. end;
  2369. {$endif x86_64}
  2370. else
  2371. Internalerror(2017050704)
  2372. end;
  2373. end;
  2374. end;
  2375. end;
  2376. end;
  2377. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  2378. var
  2379. hp1 : tai;
  2380. begin
  2381. Result:=false;
  2382. if not(GetNextInstruction(p, hp1)) then
  2383. exit;
  2384. if MatchOpType(taicpu(p),top_const,top_reg) and
  2385. MatchInstruction(hp1,A_AND,[]) and
  2386. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2387. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2388. { the second register must contain the first one, so compare their subreg types }
  2389. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2390. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  2391. { change
  2392. and const1, reg
  2393. and const2, reg
  2394. to
  2395. and (const1 and const2), reg
  2396. }
  2397. begin
  2398. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  2399. DebugMsg('Peephole AndAnd2And done',hp1);
  2400. asml.remove(p);
  2401. p.Free;
  2402. p:=hp1;
  2403. Result:=true;
  2404. exit;
  2405. end
  2406. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2407. MatchInstruction(hp1,A_MOVZX,[]) and
  2408. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2409. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2410. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2411. (((taicpu(p).opsize=S_W) and
  2412. (taicpu(hp1).opsize=S_BW)) or
  2413. ((taicpu(p).opsize=S_L) and
  2414. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2415. {$ifdef x86_64}
  2416. or
  2417. ((taicpu(p).opsize=S_Q) and
  2418. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2419. {$endif x86_64}
  2420. ) then
  2421. begin
  2422. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2423. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  2424. ) or
  2425. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2426. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  2427. {$ifdef x86_64}
  2428. or
  2429. (((taicpu(hp1).opsize)=S_LQ) and
  2430. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  2431. )
  2432. {$endif x86_64}
  2433. then
  2434. begin
  2435. DebugMsg('Peephole AndMovzToAnd done',p);
  2436. asml.remove(hp1);
  2437. hp1.free;
  2438. end;
  2439. end
  2440. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2441. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  2442. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2443. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2444. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2445. (((taicpu(p).opsize=S_W) and
  2446. (taicpu(hp1).opsize=S_BW)) or
  2447. ((taicpu(p).opsize=S_L) and
  2448. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2449. {$ifdef x86_64}
  2450. or
  2451. ((taicpu(p).opsize=S_Q) and
  2452. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2453. {$endif x86_64}
  2454. ) then
  2455. begin
  2456. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2457. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  2458. ) or
  2459. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2460. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  2461. {$ifdef x86_64}
  2462. or
  2463. (((taicpu(hp1).opsize)=S_LQ) and
  2464. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  2465. )
  2466. {$endif x86_64}
  2467. then
  2468. begin
  2469. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  2470. asml.remove(hp1);
  2471. hp1.free;
  2472. end;
  2473. end
  2474. else if (taicpu(p).oper[1]^.typ = top_reg) and
  2475. (hp1.typ = ait_instruction) and
  2476. (taicpu(hp1).is_jmp) and
  2477. (taicpu(hp1).opcode<>A_JMP) and
  2478. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  2479. { change
  2480. and x, reg
  2481. jxx
  2482. to
  2483. test x, reg
  2484. jxx
  2485. if reg is deallocated before the
  2486. jump, but only if it's a conditional jump (PFV)
  2487. }
  2488. taicpu(p).opcode := A_TEST;
  2489. end;
  2490. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  2491. begin
  2492. if (taicpu(p).oper[1]^.typ = Top_Reg) and
  2493. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2494. begin
  2495. if (taicpu(p).oper[0]^.typ = top_const) then
  2496. begin
  2497. case taicpu(p).oper[0]^.val of
  2498. 0:
  2499. begin
  2500. { change "mov $0,%reg" into "xor %reg,%reg" }
  2501. taicpu(p).opcode := A_XOR;
  2502. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2503. end;
  2504. $1..$FFFFFFFF:
  2505. begin
  2506. { Code size reduction by J. Gareth "Kit" Moreton }
  2507. { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }
  2508. case taicpu(p).opsize of
  2509. S_Q:
  2510. begin
  2511. DebugMsg('Peephole Optimization: movq x,%reg -> movd x,%reg (x is a 32-bit constant)', p);
  2512. TRegisterRec(taicpu(p).oper[1]^.reg).subreg := R_SUBD;
  2513. taicpu(p).opsize := S_L;
  2514. end;
  2515. end;
  2516. end;
  2517. end;
  2518. end;
  2519. end;
  2520. end;
  2521. procedure TX86AsmOptimizer.OptReferences;
  2522. var
  2523. p: tai;
  2524. i: Integer;
  2525. begin
  2526. p := BlockStart;
  2527. while (p <> BlockEnd) Do
  2528. begin
  2529. if p.typ=ait_instruction then
  2530. begin
  2531. for i:=0 to taicpu(p).ops-1 do
  2532. if taicpu(p).oper[i]^.typ=top_ref then
  2533. optimize_ref(taicpu(p).oper[i]^.ref^,false);
  2534. end;
  2535. p:=tai(p.next);
  2536. end;
  2537. end;
  2538. end.