aoptx86.pas 110 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. protected
  33. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  34. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  35. { checks whether reading the value in reg1 depends on the value of reg2. This
  36. is very similar to SuperRegisterEquals, except it takes into account that
  37. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  38. depend on the value in AH). }
  39. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  40. procedure DebugMsg(const s : string; p : tai);inline;
  41. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  42. class function IsExitCode(p : tai) : boolean;
  43. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  44. procedure RemoveLastDeallocForFuncRes(p : tai);
  45. function PrePeepholeOptSxx(var p : tai) : boolean;
  46. function OptPass1AND(var p : tai) : boolean;
  47. function OptPass1VMOVAP(var p : tai) : boolean;
  48. function OptPass1VOP(const p : tai) : boolean;
  49. function OptPass1MOV(var p : tai) : boolean;
  50. function OptPass1Movx(var p : tai) : boolean;
  51. function OptPass1MOVAP(var p : tai) : boolean;
  52. function OptPass1MOVXX(var p : tai) : boolean;
  53. function OptPass1OP(const p : tai) : boolean;
  54. function OptPass1LEA(var p : tai) : boolean;
  55. function OptPass2MOV(var p : tai) : boolean;
  56. function OptPass2Imul(var p : tai) : boolean;
  57. function OptPass2Jmp(var p : tai) : boolean;
  58. function OptPass2Jcc(var p : tai) : boolean;
  59. procedure PostPeepholeOptMov(const p : tai);
  60. end;
  61. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  62. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  63. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  64. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  65. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  66. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  67. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  68. function RefsEqual(const r1, r2: treference): boolean;
  69. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  70. { returns true, if ref is a reference using only the registers passed as base and index
  71. and having an offset }
  72. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  73. implementation
  74. uses
  75. cutils,verbose,
  76. globals,
  77. cpuinfo,
  78. procinfo,
  79. aasmbase,
  80. aoptutils,
  81. symconst,symsym,
  82. cgx86,
  83. itcpugas;
  84. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  85. begin
  86. result :=
  87. (instr.typ = ait_instruction) and
  88. (taicpu(instr).opcode = op) and
  89. ((opsize = []) or (taicpu(instr).opsize in opsize));
  90. end;
  91. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  92. begin
  93. result :=
  94. (instr.typ = ait_instruction) and
  95. ((taicpu(instr).opcode = op1) or
  96. (taicpu(instr).opcode = op2)
  97. ) and
  98. ((opsize = []) or (taicpu(instr).opsize in opsize));
  99. end;
  100. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  101. begin
  102. result :=
  103. (instr.typ = ait_instruction) and
  104. ((taicpu(instr).opcode = op1) or
  105. (taicpu(instr).opcode = op2) or
  106. (taicpu(instr).opcode = op3)
  107. ) and
  108. ((opsize = []) or (taicpu(instr).opsize in opsize));
  109. end;
  110. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  111. const opsize : topsizes) : boolean;
  112. var
  113. op : TAsmOp;
  114. begin
  115. result:=false;
  116. for op in ops do
  117. begin
  118. if (instr.typ = ait_instruction) and
  119. (taicpu(instr).opcode = op) and
  120. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  121. begin
  122. result:=true;
  123. exit;
  124. end;
  125. end;
  126. end;
  127. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  128. begin
  129. result := (oper.typ = top_reg) and (oper.reg = reg);
  130. end;
  131. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  132. begin
  133. result := (oper.typ = top_const) and (oper.val = a);
  134. end;
  135. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  136. begin
  137. result := oper1.typ = oper2.typ;
  138. if result then
  139. case oper1.typ of
  140. top_const:
  141. Result:=oper1.val = oper2.val;
  142. top_reg:
  143. Result:=oper1.reg = oper2.reg;
  144. top_ref:
  145. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  146. else
  147. internalerror(2013102801);
  148. end
  149. end;
  150. function RefsEqual(const r1, r2: treference): boolean;
  151. begin
  152. RefsEqual :=
  153. (r1.offset = r2.offset) and
  154. (r1.segment = r2.segment) and (r1.base = r2.base) and
  155. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  156. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  157. (r1.relsymbol = r2.relsymbol);
  158. end;
  159. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  160. begin
  161. Result:=(ref.offset=0) and
  162. (ref.scalefactor in [0,1]) and
  163. (ref.segment=NR_NO) and
  164. (ref.symbol=nil) and
  165. (ref.relsymbol=nil) and
  166. ((base=NR_INVALID) or
  167. (ref.base=base)) and
  168. ((index=NR_INVALID) or
  169. (ref.index=index));
  170. end;
  171. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  172. begin
  173. Result:=(ref.scalefactor in [0,1]) and
  174. (ref.segment=NR_NO) and
  175. (ref.symbol=nil) and
  176. (ref.relsymbol=nil) and
  177. ((base=NR_INVALID) or
  178. (ref.base=base)) and
  179. ((index=NR_INVALID) or
  180. (ref.index=index));
  181. end;
  182. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  183. begin
  184. Result:=RegReadByInstruction(reg,hp);
  185. end;
  186. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  187. var
  188. p: taicpu;
  189. opcount: longint;
  190. begin
  191. RegReadByInstruction := false;
  192. if hp.typ <> ait_instruction then
  193. exit;
  194. p := taicpu(hp);
  195. case p.opcode of
  196. A_CALL:
  197. regreadbyinstruction := true;
  198. A_IMUL:
  199. case p.ops of
  200. 1:
  201. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  202. (
  203. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  204. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  205. );
  206. 2,3:
  207. regReadByInstruction :=
  208. reginop(reg,p.oper[0]^) or
  209. reginop(reg,p.oper[1]^);
  210. end;
  211. A_MUL:
  212. begin
  213. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  214. (
  215. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  216. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  217. );
  218. end;
  219. A_IDIV,A_DIV:
  220. begin
  221. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  222. (
  223. (getregtype(reg)=R_INTREGISTER) and
  224. (
  225. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  226. )
  227. );
  228. end;
  229. else
  230. begin
  231. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  232. begin
  233. RegReadByInstruction := false;
  234. exit;
  235. end;
  236. for opcount := 0 to p.ops-1 do
  237. if (p.oper[opCount]^.typ = top_ref) and
  238. RegInRef(reg,p.oper[opcount]^.ref^) then
  239. begin
  240. RegReadByInstruction := true;
  241. exit
  242. end;
  243. { special handling for SSE MOVSD }
  244. if (p.opcode=A_MOVSD) and (p.ops>0) then
  245. begin
  246. if p.ops<>2 then
  247. internalerror(2017042702);
  248. regReadByInstruction := reginop(reg,p.oper[0]^) or
  249. (
  250. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  251. );
  252. exit;
  253. end;
  254. with insprop[p.opcode] do
  255. begin
  256. if getregtype(reg)=R_INTREGISTER then
  257. begin
  258. case getsupreg(reg) of
  259. RS_EAX:
  260. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  261. begin
  262. RegReadByInstruction := true;
  263. exit
  264. end;
  265. RS_ECX:
  266. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  267. begin
  268. RegReadByInstruction := true;
  269. exit
  270. end;
  271. RS_EDX:
  272. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  273. begin
  274. RegReadByInstruction := true;
  275. exit
  276. end;
  277. RS_EBX:
  278. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  279. begin
  280. RegReadByInstruction := true;
  281. exit
  282. end;
  283. RS_ESP:
  284. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  285. begin
  286. RegReadByInstruction := true;
  287. exit
  288. end;
  289. RS_EBP:
  290. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  291. begin
  292. RegReadByInstruction := true;
  293. exit
  294. end;
  295. RS_ESI:
  296. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  297. begin
  298. RegReadByInstruction := true;
  299. exit
  300. end;
  301. RS_EDI:
  302. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  303. begin
  304. RegReadByInstruction := true;
  305. exit
  306. end;
  307. end;
  308. end;
  309. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  310. begin
  311. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  312. begin
  313. case p.condition of
  314. C_A,C_NBE, { CF=0 and ZF=0 }
  315. C_BE,C_NA: { CF=1 or ZF=1 }
  316. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  317. C_AE,C_NB,C_NC, { CF=0 }
  318. C_B,C_NAE,C_C: { CF=1 }
  319. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  320. C_NE,C_NZ, { ZF=0 }
  321. C_E,C_Z: { ZF=1 }
  322. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  323. C_G,C_NLE, { ZF=0 and SF=OF }
  324. C_LE,C_NG: { ZF=1 or SF<>OF }
  325. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  326. C_GE,C_NL, { SF=OF }
  327. C_L,C_NGE: { SF<>OF }
  328. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  329. C_NO, { OF=0 }
  330. C_O: { OF=1 }
  331. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  332. C_NP,C_PO, { PF=0 }
  333. C_P,C_PE: { PF=1 }
  334. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  335. C_NS, { SF=0 }
  336. C_S: { SF=1 }
  337. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  338. else
  339. internalerror(2017042701);
  340. end;
  341. if RegReadByInstruction then
  342. exit;
  343. end;
  344. case getsubreg(reg) of
  345. R_SUBW,R_SUBD,R_SUBQ:
  346. RegReadByInstruction :=
  347. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  348. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  349. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  350. R_SUBFLAGCARRY:
  351. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  352. R_SUBFLAGPARITY:
  353. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  354. R_SUBFLAGAUXILIARY:
  355. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  356. R_SUBFLAGZERO:
  357. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  358. R_SUBFLAGSIGN:
  359. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  360. R_SUBFLAGOVERFLOW:
  361. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  362. R_SUBFLAGINTERRUPT:
  363. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  364. R_SUBFLAGDIRECTION:
  365. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  366. else
  367. internalerror(2017042601);
  368. end;
  369. exit;
  370. end;
  371. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  372. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  373. (p.oper[0]^.reg=p.oper[1]^.reg) then
  374. exit;
  375. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  376. begin
  377. RegReadByInstruction := true;
  378. exit
  379. end;
  380. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  381. begin
  382. RegReadByInstruction := true;
  383. exit
  384. end;
  385. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  386. begin
  387. RegReadByInstruction := true;
  388. exit
  389. end;
  390. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  391. begin
  392. RegReadByInstruction := true;
  393. exit
  394. end;
  395. end;
  396. end;
  397. end;
  398. end;
  399. {$ifdef DEBUG_AOPTCPU}
  400. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  401. begin
  402. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  403. end;
  404. {$else DEBUG_AOPTCPU}
  405. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  406. begin
  407. end;
  408. {$endif DEBUG_AOPTCPU}
  409. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  410. begin
  411. if not SuperRegistersEqual(reg1,reg2) then
  412. exit(false);
  413. if getregtype(reg1)<>R_INTREGISTER then
  414. exit(true); {because SuperRegisterEqual is true}
  415. case getsubreg(reg1) of
  416. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  417. higher, it preserves the high bits, so the new value depends on
  418. reg2's previous value. In other words, it is equivalent to doing:
  419. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  420. R_SUBL:
  421. exit(getsubreg(reg2)=R_SUBL);
  422. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  423. higher, it actually does a:
  424. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  425. R_SUBH:
  426. exit(getsubreg(reg2)=R_SUBH);
  427. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  428. bits of reg2:
  429. reg2 := (reg2 and $ffff0000) or word(reg1); }
  430. R_SUBW:
  431. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  432. { a write to R_SUBD always overwrites every other subregister,
  433. because it clears the high 32 bits of R_SUBQ on x86_64 }
  434. R_SUBD,
  435. R_SUBQ:
  436. exit(true);
  437. else
  438. internalerror(2017042801);
  439. end;
  440. end;
  441. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  442. begin
  443. if not SuperRegistersEqual(reg1,reg2) then
  444. exit(false);
  445. if getregtype(reg1)<>R_INTREGISTER then
  446. exit(true); {because SuperRegisterEqual is true}
  447. case getsubreg(reg1) of
  448. R_SUBL:
  449. exit(getsubreg(reg2)<>R_SUBH);
  450. R_SUBH:
  451. exit(getsubreg(reg2)<>R_SUBL);
  452. R_SUBW,
  453. R_SUBD,
  454. R_SUBQ:
  455. exit(true);
  456. else
  457. internalerror(2017042802);
  458. end;
  459. end;
  460. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  461. var
  462. hp1 : tai;
  463. l : TCGInt;
  464. begin
  465. result:=false;
  466. { changes the code sequence
  467. shr/sar const1, x
  468. shl const2, x
  469. to
  470. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  471. if GetNextInstruction(p, hp1) and
  472. MatchInstruction(hp1,A_SHL,[]) and
  473. (taicpu(p).oper[0]^.typ = top_const) and
  474. (taicpu(hp1).oper[0]^.typ = top_const) and
  475. (taicpu(hp1).opsize = taicpu(p).opsize) and
  476. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  477. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  478. begin
  479. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  480. not(cs_opt_size in current_settings.optimizerswitches) then
  481. begin
  482. { shr/sar const1, %reg
  483. shl const2, %reg
  484. with const1 > const2 }
  485. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  486. taicpu(hp1).opcode := A_AND;
  487. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  488. case taicpu(p).opsize Of
  489. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  490. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  491. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  492. S_Q: taicpu(hp1).loadConst(0,l Xor aint($ffffffffffffffff));
  493. else
  494. Internalerror(2017050703)
  495. end;
  496. end
  497. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  498. not(cs_opt_size in current_settings.optimizerswitches) then
  499. begin
  500. { shr/sar const1, %reg
  501. shl const2, %reg
  502. with const1 < const2 }
  503. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  504. taicpu(p).opcode := A_AND;
  505. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  506. case taicpu(p).opsize Of
  507. S_B: taicpu(p).loadConst(0,l Xor $ff);
  508. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  509. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  510. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  511. else
  512. Internalerror(2017050702)
  513. end;
  514. end
  515. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  516. begin
  517. { shr/sar const1, %reg
  518. shl const2, %reg
  519. with const1 = const2 }
  520. taicpu(p).opcode := A_AND;
  521. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  522. case taicpu(p).opsize Of
  523. S_B: taicpu(p).loadConst(0,l Xor $ff);
  524. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  525. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  526. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  527. else
  528. Internalerror(2017050701)
  529. end;
  530. asml.remove(hp1);
  531. hp1.free;
  532. end;
  533. end;
  534. end;
  535. { allocates register reg between (and including) instructions p1 and p2
  536. the type of p1 and p2 must not be in SkipInstr
  537. note that this routine is both called from the peephole optimizer
  538. where optinfo is not yet initialised) and from the cse (where it is) }
  539. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  540. var
  541. hp, start: tai;
  542. removedsomething,
  543. firstRemovedWasAlloc,
  544. lastRemovedWasDealloc: boolean;
  545. begin
  546. {$ifdef EXTDEBUG}
  547. { if assigned(p1.optinfo) and
  548. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  549. internalerror(2004101010); }
  550. {$endif EXTDEBUG}
  551. start := p1;
  552. if (reg = NR_ESP) or
  553. (reg = current_procinfo.framepointer) or
  554. not(assigned(p1)) then
  555. { this happens with registers which are loaded implicitely, outside the }
  556. { current block (e.g. esi with self) }
  557. exit;
  558. { make sure we allocate it for this instruction }
  559. getnextinstruction(p2,p2);
  560. lastRemovedWasDealloc := false;
  561. removedSomething := false;
  562. firstRemovedWasAlloc := false;
  563. {$ifdef allocregdebug}
  564. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  565. ' from here...'));
  566. insertllitem(asml,p1.previous,p1,hp);
  567. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  568. ' till here...'));
  569. insertllitem(asml,p2,p2.next,hp);
  570. {$endif allocregdebug}
  571. { do it the safe way: always allocate the full super register,
  572. as we do no register re-allocation in the peephole optimizer,
  573. this does not hurt
  574. }
  575. case getregtype(reg) of
  576. R_MMREGISTER:
  577. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  578. R_INTREGISTER:
  579. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  580. end;
  581. if not(RegInUsedRegs(reg,initialusedregs)) then
  582. begin
  583. hp := tai_regalloc.alloc(reg,nil);
  584. insertllItem(p1.previous,p1,hp);
  585. IncludeRegInUsedRegs(reg,initialusedregs);
  586. end;
  587. while assigned(p1) and
  588. (p1 <> p2) do
  589. begin
  590. if assigned(p1.optinfo) then
  591. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  592. p1 := tai(p1.next);
  593. repeat
  594. while assigned(p1) and
  595. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  596. p1 := tai(p1.next);
  597. { remove all allocation/deallocation info about the register in between }
  598. if assigned(p1) and
  599. (p1.typ = ait_regalloc) then
  600. begin
  601. { same super register, different sub register? }
  602. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  603. begin
  604. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  605. internalerror(2016101501);
  606. tai_regalloc(p1).reg:=reg;
  607. end;
  608. if tai_regalloc(p1).reg=reg then
  609. begin
  610. if not removedSomething then
  611. begin
  612. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  613. removedSomething := true;
  614. end;
  615. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  616. hp := tai(p1.Next);
  617. asml.Remove(p1);
  618. p1.free;
  619. p1 := hp;
  620. end
  621. else
  622. p1 := tai(p1.next);
  623. end;
  624. until not(assigned(p1)) or
  625. not(p1.typ in SkipInstr);
  626. end;
  627. if assigned(p1) then
  628. begin
  629. if firstRemovedWasAlloc then
  630. begin
  631. hp := tai_regalloc.Alloc(reg,nil);
  632. insertLLItem(start.previous,start,hp);
  633. end;
  634. if lastRemovedWasDealloc then
  635. begin
  636. hp := tai_regalloc.DeAlloc(reg,nil);
  637. insertLLItem(p1.previous,p1,hp);
  638. end;
  639. end;
  640. end;
  641. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  642. var
  643. p: taicpu;
  644. begin
  645. if not assigned(hp) or
  646. (hp.typ <> ait_instruction) then
  647. begin
  648. Result := false;
  649. exit;
  650. end;
  651. p := taicpu(hp);
  652. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  653. with insprop[p.opcode] do
  654. begin
  655. case getsubreg(reg) of
  656. R_SUBW,R_SUBD,R_SUBQ:
  657. Result:=
  658. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  659. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  660. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  661. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  662. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  663. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  664. R_SUBFLAGCARRY:
  665. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  666. R_SUBFLAGPARITY:
  667. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  668. R_SUBFLAGAUXILIARY:
  669. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  670. R_SUBFLAGZERO:
  671. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  672. R_SUBFLAGSIGN:
  673. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  674. R_SUBFLAGOVERFLOW:
  675. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  676. R_SUBFLAGINTERRUPT:
  677. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  678. R_SUBFLAGDIRECTION:
  679. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  680. else
  681. internalerror(2017050501);
  682. end;
  683. exit;
  684. end;
  685. Result :=
  686. (((p.opcode = A_MOV) or
  687. (p.opcode = A_MOVZX) or
  688. (p.opcode = A_MOVSX) or
  689. (p.opcode = A_LEA) or
  690. (p.opcode = A_VMOVSS) or
  691. (p.opcode = A_VMOVSD) or
  692. (p.opcode = A_VMOVAPD) or
  693. (p.opcode = A_VMOVAPS) or
  694. (p.opcode = A_VMOVQ) or
  695. (p.opcode = A_MOVSS) or
  696. (p.opcode = A_MOVSD) or
  697. (p.opcode = A_MOVQ) or
  698. (p.opcode = A_MOVAPD) or
  699. (p.opcode = A_MOVAPS) or
  700. {$ifndef x86_64}
  701. (p.opcode = A_LDS) or
  702. (p.opcode = A_LES) or
  703. {$endif not x86_64}
  704. (p.opcode = A_LFS) or
  705. (p.opcode = A_LGS) or
  706. (p.opcode = A_LSS)) and
  707. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  708. (p.oper[1]^.typ = top_reg) and
  709. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  710. ((p.oper[0]^.typ = top_const) or
  711. ((p.oper[0]^.typ = top_reg) and
  712. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  713. ((p.oper[0]^.typ = top_ref) and
  714. not RegInRef(reg,p.oper[0]^.ref^)))) or
  715. ((p.opcode = A_POP) and
  716. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  717. ((p.opcode = A_IMUL) and
  718. (p.ops=3) and
  719. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  720. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  721. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  722. ((((p.opcode = A_IMUL) or
  723. (p.opcode = A_MUL)) and
  724. (p.ops=1)) and
  725. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  726. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  727. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  728. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  729. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  730. {$ifdef x86_64}
  731. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  732. {$endif x86_64}
  733. )) or
  734. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  735. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  736. {$ifdef x86_64}
  737. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  738. {$endif x86_64}
  739. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  740. {$ifndef x86_64}
  741. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  742. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  743. {$endif not x86_64}
  744. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  745. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  746. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  747. {$ifndef x86_64}
  748. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  749. {$endif not x86_64}
  750. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  751. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  752. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  753. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  754. {$ifdef x86_64}
  755. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  756. {$endif x86_64}
  757. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  758. (((p.opcode = A_FSTSW) or
  759. (p.opcode = A_FNSTSW)) and
  760. (p.oper[0]^.typ=top_reg) and
  761. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  762. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  763. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  764. (p.oper[0]^.reg=p.oper[1]^.reg) and
  765. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  766. end;
  767. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  768. var
  769. hp2,hp3 : tai;
  770. begin
  771. { some x86-64 issue a NOP before the real exit code }
  772. if MatchInstruction(p,A_NOP,[]) then
  773. GetNextInstruction(p,p);
  774. result:=assigned(p) and (p.typ=ait_instruction) and
  775. ((taicpu(p).opcode = A_RET) or
  776. ((taicpu(p).opcode=A_LEAVE) and
  777. GetNextInstruction(p,hp2) and
  778. MatchInstruction(hp2,A_RET,[S_NO])
  779. ) or
  780. ((((taicpu(p).opcode=A_MOV) and
  781. MatchOpType(taicpu(p),top_reg,top_reg) and
  782. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  783. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  784. ((taicpu(p).opcode=A_LEA) and
  785. MatchOpType(taicpu(p),top_ref,top_reg) and
  786. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  787. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  788. )
  789. ) and
  790. GetNextInstruction(p,hp2) and
  791. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  792. MatchOpType(taicpu(hp2),top_reg) and
  793. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  794. GetNextInstruction(hp2,hp3) and
  795. MatchInstruction(hp3,A_RET,[S_NO])
  796. )
  797. );
  798. end;
  799. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  800. begin
  801. isFoldableArithOp := False;
  802. case hp1.opcode of
  803. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  804. isFoldableArithOp :=
  805. ((taicpu(hp1).oper[0]^.typ = top_const) or
  806. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  807. (taicpu(hp1).oper[0]^.reg <> reg))) and
  808. (taicpu(hp1).oper[1]^.typ = top_reg) and
  809. (taicpu(hp1).oper[1]^.reg = reg);
  810. A_INC,A_DEC,A_NEG,A_NOT:
  811. isFoldableArithOp :=
  812. (taicpu(hp1).oper[0]^.typ = top_reg) and
  813. (taicpu(hp1).oper[0]^.reg = reg);
  814. end;
  815. end;
  816. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  817. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  818. var
  819. hp2: tai;
  820. begin
  821. hp2 := p;
  822. repeat
  823. hp2 := tai(hp2.previous);
  824. if assigned(hp2) and
  825. (hp2.typ = ait_regalloc) and
  826. (tai_regalloc(hp2).ratype=ra_dealloc) and
  827. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  828. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  829. begin
  830. asml.remove(hp2);
  831. hp2.free;
  832. break;
  833. end;
  834. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  835. end;
  836. begin
  837. case current_procinfo.procdef.returndef.typ of
  838. arraydef,recorddef,pointerdef,
  839. stringdef,enumdef,procdef,objectdef,errordef,
  840. filedef,setdef,procvardef,
  841. classrefdef,forwarddef:
  842. DoRemoveLastDeallocForFuncRes(RS_EAX);
  843. orddef:
  844. if current_procinfo.procdef.returndef.size <> 0 then
  845. begin
  846. DoRemoveLastDeallocForFuncRes(RS_EAX);
  847. { for int64/qword }
  848. if current_procinfo.procdef.returndef.size = 8 then
  849. DoRemoveLastDeallocForFuncRes(RS_EDX);
  850. end;
  851. end;
  852. end;
  853. function TX86AsmOptimizer.OptPass1MOVAP(var p : tai) : boolean;
  854. var
  855. TmpUsedRegs : TAllUsedRegs;
  856. hp1,hp2 : tai;
  857. alloc ,dealloc: tai_regalloc;
  858. begin
  859. result:=false;
  860. if MatchOpType(taicpu(p),top_reg,top_reg) and
  861. GetNextInstruction(p, hp1) and
  862. (hp1.typ = ait_instruction) and
  863. GetNextInstruction(hp1, hp2) and
  864. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  865. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  866. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  867. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  868. (((taicpu(p).opcode=A_MOVAPS) and
  869. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  870. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  871. ((taicpu(p).opcode=A_MOVAPD) and
  872. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  873. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  874. ) then
  875. { change
  876. movapX reg,reg2
  877. addsX/subsX/... reg3, reg2
  878. movapX reg2,reg
  879. to
  880. addsX/subsX/... reg3,reg
  881. }
  882. begin
  883. CopyUsedRegs(TmpUsedRegs);
  884. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  885. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  886. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  887. begin
  888. DebugMsg('Peephole Optimization MovapXOpMovapX2Op ('+
  889. std_op2str[taicpu(p).opcode]+' '+
  890. std_op2str[taicpu(hp1).opcode]+' '+
  891. std_op2str[taicpu(hp2).opcode]+') done',p);
  892. { we cannot eliminate the first move if
  893. the operations uses the same register for source and dest }
  894. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  895. begin
  896. asml.remove(p);
  897. p.Free;
  898. end;
  899. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  900. asml.remove(hp2);
  901. hp2.Free;
  902. p:=hp1;
  903. result:=true;
  904. end;
  905. ReleaseUsedRegs(TmpUsedRegs);
  906. end
  907. end;
  908. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  909. var
  910. TmpUsedRegs : TAllUsedRegs;
  911. hp1,hp2 : tai;
  912. begin
  913. result:=false;
  914. if MatchOpType(taicpu(p),top_reg,top_reg) then
  915. begin
  916. { vmova* reg1,reg1
  917. =>
  918. <nop> }
  919. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  920. begin
  921. GetNextInstruction(p,hp1);
  922. asml.Remove(p);
  923. p.Free;
  924. p:=hp1;
  925. result:=true;
  926. end
  927. else if GetNextInstruction(p,hp1) then
  928. begin
  929. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  930. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  931. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  932. begin
  933. { vmova* reg1,reg2
  934. vmova* reg2,reg3
  935. dealloc reg2
  936. =>
  937. vmova* reg1,reg3 }
  938. CopyUsedRegs(TmpUsedRegs);
  939. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  940. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  941. begin
  942. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  943. asml.Remove(hp1);
  944. hp1.Free;
  945. result:=true;
  946. end
  947. { special case:
  948. vmova* reg1,reg2
  949. vmova* reg2,reg1
  950. =>
  951. vmova* reg1,reg2 }
  952. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  953. begin
  954. asml.Remove(hp1);
  955. hp1.Free;
  956. result:=true;
  957. end
  958. end
  959. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  960. { we mix single and double opperations here because we assume that the compiler
  961. generates vmovapd only after double operations and vmovaps only after single operations }
  962. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  963. GetNextInstruction(hp1,hp2) and
  964. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  965. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  966. begin
  967. CopyUsedRegs(TmpUsedRegs);
  968. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  969. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  970. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  971. then
  972. begin
  973. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  974. asml.Remove(p);
  975. p.Free;
  976. asml.Remove(hp2);
  977. hp2.Free;
  978. p:=hp1;
  979. end;
  980. end;
  981. end;
  982. end;
  983. end;
  984. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  985. var
  986. TmpUsedRegs : TAllUsedRegs;
  987. hp1 : tai;
  988. begin
  989. result:=false;
  990. { replace
  991. V<Op>X %mreg1,%mreg2,%mreg3
  992. VMovX %mreg3,%mreg4
  993. dealloc %mreg3
  994. by
  995. V<Op>X %mreg1,%mreg2,%mreg4
  996. ?
  997. }
  998. if GetNextInstruction(p,hp1) and
  999. { we mix single and double operations here because we assume that the compiler
  1000. generates vmovapd only after double operations and vmovaps only after single operations }
  1001. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1002. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1003. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1004. begin
  1005. CopyUsedRegs(TmpUsedRegs);
  1006. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1007. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  1008. ) then
  1009. begin
  1010. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1011. DebugMsg('PeepHole Optimization VOpVmov2VOp done',p);
  1012. asml.Remove(hp1);
  1013. hp1.Free;
  1014. result:=true;
  1015. end;
  1016. end;
  1017. end;
  1018. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  1019. var
  1020. hp1, hp2: tai;
  1021. TmpUsedRegs : TAllUsedRegs;
  1022. GetNextInstruction_p : Boolean;
  1023. begin
  1024. Result:=false;
  1025. { remove mov reg1,reg1? }
  1026. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1027. begin
  1028. GetNextInstruction(p, hp1);
  1029. DebugMsg('PeepHole Optimization Mov2Nop done',p);
  1030. asml.remove(p);
  1031. p.free;
  1032. p:=hp1;
  1033. Result:=true;
  1034. exit;
  1035. end;
  1036. GetNextInstruction_p:=GetNextInstruction(p, hp1);
  1037. if GetNextInstruction_p and
  1038. MatchInstruction(hp1,A_AND,[]) and
  1039. (taicpu(p).oper[1]^.typ = top_reg) and
  1040. MatchOpType(taicpu(hp1),top_const,top_reg) and
  1041. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  1042. case taicpu(p).opsize Of
  1043. S_L:
  1044. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1045. begin
  1046. { Optimize out:
  1047. mov x, %reg
  1048. and ffffffffh, %reg
  1049. }
  1050. DebugMsg('PeepHole Optimization MovAnd2Mov 1 done',p);
  1051. asml.remove(hp1);
  1052. hp1.free;
  1053. Result:=true;
  1054. exit;
  1055. end;
  1056. S_Q: { TODO: Confirm if this is even possible }
  1057. if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
  1058. begin
  1059. { Optimize out:
  1060. mov x, %reg
  1061. and ffffffffffffffffh, %reg
  1062. }
  1063. DebugMsg('PeepHole Optimization MovAnd2Mov 2 done',p);
  1064. asml.remove(hp1);
  1065. hp1.free;
  1066. Result:=true;
  1067. exit;
  1068. end;
  1069. end
  1070. else if GetNextInstruction_p and
  1071. MatchInstruction(hp1,A_MOV,[]) and
  1072. (taicpu(p).oper[1]^.typ = top_reg) and
  1073. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  1074. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1075. begin
  1076. CopyUsedRegs(TmpUsedRegs);
  1077. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1078. { we have
  1079. mov x, %treg
  1080. mov %treg, y
  1081. }
  1082. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  1083. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1084. { we've got
  1085. mov x, %treg
  1086. mov %treg, y
  1087. with %treg is not used after }
  1088. case taicpu(p).oper[0]^.typ Of
  1089. top_reg:
  1090. begin
  1091. { change
  1092. mov %reg, %treg
  1093. mov %treg, y
  1094. to
  1095. mov %reg, y
  1096. }
  1097. if taicpu(hp1).oper[1]^.typ=top_reg then
  1098. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1099. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1100. DebugMsg('PeepHole Optimization MovMov2Mov 2 done',p);
  1101. asml.remove(hp1);
  1102. hp1.free;
  1103. ReleaseUsedRegs(TmpUsedRegs);
  1104. Result:=true;
  1105. Exit;
  1106. end;
  1107. top_ref:
  1108. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1109. begin
  1110. { change
  1111. mov mem, %treg
  1112. mov %treg, %reg
  1113. to
  1114. mov mem, %reg"
  1115. }
  1116. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1117. DebugMsg('PeepHole Optimization MovMov2Mov 3 done',p);
  1118. asml.remove(hp1);
  1119. hp1.free;
  1120. ReleaseUsedRegs(TmpUsedRegs);
  1121. Result:=true;
  1122. Exit;
  1123. end;
  1124. end;
  1125. ReleaseUsedRegs(TmpUsedRegs);
  1126. end
  1127. else
  1128. { Change
  1129. mov %reg1, %reg2
  1130. xxx %reg2, ???
  1131. to
  1132. mov %reg1, %reg2
  1133. xxx %reg1, ???
  1134. to avoid a write/read penalty
  1135. }
  1136. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1137. GetNextInstruction(p,hp1) and
  1138. (tai(hp1).typ = ait_instruction) and
  1139. (taicpu(hp1).ops >= 1) and
  1140. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1141. { we have
  1142. mov %reg1, %reg2
  1143. XXX %reg2, ???
  1144. }
  1145. begin
  1146. if ((taicpu(hp1).opcode = A_OR) or
  1147. (taicpu(hp1).opcode = A_AND) or
  1148. (taicpu(hp1).opcode = A_TEST)) and
  1149. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1150. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1151. { we have
  1152. mov %reg1, %reg2
  1153. test/or/and %reg2, %reg2
  1154. }
  1155. begin
  1156. CopyUsedRegs(TmpUsedRegs);
  1157. { reg1 will be used after the first instruction,
  1158. so update the allocation info }
  1159. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1160. if GetNextInstruction(hp1, hp2) and
  1161. (hp2.typ = ait_instruction) and
  1162. taicpu(hp2).is_jmp and
  1163. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1164. { change
  1165. mov %reg1, %reg2
  1166. test/or/and %reg2, %reg2
  1167. jxx
  1168. to
  1169. test %reg1, %reg1
  1170. jxx
  1171. }
  1172. begin
  1173. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1174. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1175. DebugMsg('PeepHole Optimization MovTestJxx2TestMov done',p);
  1176. asml.remove(p);
  1177. p.free;
  1178. p := hp1;
  1179. ReleaseUsedRegs(TmpUsedRegs);
  1180. Exit;
  1181. end
  1182. else
  1183. { change
  1184. mov %reg1, %reg2
  1185. test/or/and %reg2, %reg2
  1186. to
  1187. mov %reg1, %reg2
  1188. test/or/and %reg1, %reg1
  1189. }
  1190. begin
  1191. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1192. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1193. DebugMsg('PeepHole Optimization MovTestJxx2ovTestJxx done',p);
  1194. end;
  1195. ReleaseUsedRegs(TmpUsedRegs);
  1196. end
  1197. end
  1198. else
  1199. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1200. x >= RetOffset) as it doesn't do anything (it writes either to a
  1201. parameter or to the temporary storage room for the function
  1202. result)
  1203. }
  1204. if GetNextInstruction_p and
  1205. (tai(hp1).typ = ait_instruction) then
  1206. begin
  1207. if IsExitCode(hp1) and
  1208. MatchOpType(taicpu(p),top_reg,top_ref) and
  1209. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1210. not(assigned(current_procinfo.procdef.funcretsym) and
  1211. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1212. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  1213. begin
  1214. asml.remove(p);
  1215. p.free;
  1216. p:=hp1;
  1217. DebugMsg('Peephole removed deadstore before leave/ret',p);
  1218. RemoveLastDeallocForFuncRes(p);
  1219. exit;
  1220. end
  1221. { change
  1222. mov reg1, mem1
  1223. test/cmp x, mem1
  1224. to
  1225. mov reg1, mem1
  1226. test/cmp x, reg1
  1227. }
  1228. else if MatchOpType(taicpu(p),top_reg,top_ref) and
  1229. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  1230. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1231. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1232. begin
  1233. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1234. DebugMsg('Peephole MovTestCmp2MovTestCmp 1',hp1);
  1235. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1236. end;
  1237. end;
  1238. { Next instruction is also a MOV ? }
  1239. if GetNextInstruction_p and
  1240. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1241. begin
  1242. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1243. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1244. { mov reg1, mem1 or mov mem1, reg1
  1245. mov mem2, reg2 mov reg2, mem2}
  1246. begin
  1247. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1248. { mov reg1, mem1 or mov mem1, reg1
  1249. mov mem2, reg1 mov reg2, mem1}
  1250. begin
  1251. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1252. { Removes the second statement from
  1253. mov reg1, mem1/reg2
  1254. mov mem1/reg2, reg1 }
  1255. begin
  1256. if taicpu(p).oper[0]^.typ=top_reg then
  1257. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1258. DebugMsg('PeepHole Optimization MovMov2Mov 1',p);
  1259. asml.remove(hp1);
  1260. hp1.free;
  1261. Result:=true;
  1262. exit;
  1263. end
  1264. else
  1265. begin
  1266. CopyUsedRegs(TmpUsedRegs);
  1267. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1268. if (taicpu(p).oper[1]^.typ = top_ref) and
  1269. { mov reg1, mem1
  1270. mov mem2, reg1 }
  1271. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1272. GetNextInstruction(hp1, hp2) and
  1273. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1274. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1275. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1276. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1277. { change to
  1278. mov reg1, mem1 mov reg1, mem1
  1279. mov mem2, reg1 cmp reg1, mem2
  1280. cmp mem1, reg1
  1281. }
  1282. begin
  1283. asml.remove(hp2);
  1284. hp2.free;
  1285. taicpu(hp1).opcode := A_CMP;
  1286. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1287. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1288. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1289. DebugMsg('Peephole Optimization MovMovCmp2MovCmp done',hp1);
  1290. end;
  1291. ReleaseUsedRegs(TmpUsedRegs);
  1292. end;
  1293. end
  1294. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1295. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1296. begin
  1297. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1298. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1299. DebugMsg('PeepHole Optimization MovMov2MovMov1 done',p);
  1300. end
  1301. else
  1302. begin
  1303. CopyUsedRegs(TmpUsedRegs);
  1304. if GetNextInstruction(hp1, hp2) and
  1305. MatchOpType(taicpu(p),top_ref,top_reg) and
  1306. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1307. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1308. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1309. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1310. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1311. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1312. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1313. { mov mem1, %reg1
  1314. mov %reg1, mem2
  1315. mov mem2, reg2
  1316. to:
  1317. mov mem1, reg2
  1318. mov reg2, mem2}
  1319. begin
  1320. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1321. DebugMsg('PeepHole Optimization MovMovMov2MovMov 1 done',p);
  1322. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1323. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1324. asml.remove(hp2);
  1325. hp2.free;
  1326. end
  1327. {$ifdef i386}
  1328. { this is enabled for i386 only, as the rules to create the reg sets below
  1329. are too complicated for x86-64, so this makes this code too error prone
  1330. on x86-64
  1331. }
  1332. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1333. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1334. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1335. { mov mem1, reg1 mov mem1, reg1
  1336. mov reg1, mem2 mov reg1, mem2
  1337. mov mem2, reg2 mov mem2, reg1
  1338. to: to:
  1339. mov mem1, reg1 mov mem1, reg1
  1340. mov mem1, reg2 mov reg1, mem2
  1341. mov reg1, mem2
  1342. or (if mem1 depends on reg1
  1343. and/or if mem2 depends on reg2)
  1344. to:
  1345. mov mem1, reg1
  1346. mov reg1, mem2
  1347. mov reg1, reg2
  1348. }
  1349. begin
  1350. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1351. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1352. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1353. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1354. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1355. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1356. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1357. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1358. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1359. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1360. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1361. end
  1362. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1363. begin
  1364. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1365. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1366. end
  1367. else
  1368. begin
  1369. asml.remove(hp2);
  1370. hp2.free;
  1371. end
  1372. {$endif i386}
  1373. ;
  1374. ReleaseUsedRegs(TmpUsedRegs);
  1375. end;
  1376. end
  1377. (* { movl [mem1],reg1
  1378. movl [mem1],reg2
  1379. to
  1380. movl [mem1],reg1
  1381. movl reg1,reg2
  1382. }
  1383. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1384. (taicpu(p).oper[1]^.typ = top_reg) and
  1385. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1386. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1387. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1388. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1389. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1390. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1391. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1392. else*)
  1393. { movl const1,[mem1]
  1394. movl [mem1],reg1
  1395. to
  1396. movl const1,reg1
  1397. movl reg1,[mem1]
  1398. }
  1399. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1400. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1401. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1402. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1403. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1404. begin
  1405. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1406. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1407. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1408. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1409. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1410. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  1411. end
  1412. end
  1413. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1414. GetNextInstruction_p and
  1415. (hp1.typ = ait_instruction) and
  1416. GetNextInstruction(hp1, hp2) and
  1417. MatchInstruction(hp2,A_MOV,[]) and
  1418. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1419. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1420. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1421. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1422. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  1423. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1424. ) then
  1425. { change movsX/movzX reg/ref, reg2
  1426. add/sub/or/... reg3/$const, reg2
  1427. mov reg2 reg/ref
  1428. to add/sub/or/... reg3/$const, reg/ref }
  1429. begin
  1430. CopyUsedRegs(TmpUsedRegs);
  1431. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1432. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1433. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1434. begin
  1435. { by example:
  1436. movswl %si,%eax movswl %si,%eax p
  1437. decl %eax addl %edx,%eax hp1
  1438. movw %ax,%si movw %ax,%si hp2
  1439. ->
  1440. movswl %si,%eax movswl %si,%eax p
  1441. decw %eax addw %edx,%eax hp1
  1442. movw %ax,%si movw %ax,%si hp2
  1443. }
  1444. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  1445. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1446. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1447. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1448. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1449. {
  1450. ->
  1451. movswl %si,%eax movswl %si,%eax p
  1452. decw %si addw %dx,%si hp1
  1453. movw %ax,%si movw %ax,%si hp2
  1454. }
  1455. case taicpu(hp1).ops of
  1456. 1:
  1457. begin
  1458. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1459. if taicpu(hp1).oper[0]^.typ=top_reg then
  1460. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1461. end;
  1462. 2:
  1463. begin
  1464. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1465. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1466. (taicpu(hp1).opcode<>A_SHL) and
  1467. (taicpu(hp1).opcode<>A_SHR) and
  1468. (taicpu(hp1).opcode<>A_SAR) then
  1469. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1470. end;
  1471. else
  1472. internalerror(2008042701);
  1473. end;
  1474. {
  1475. ->
  1476. decw %si addw %dx,%si p
  1477. }
  1478. asml.remove(p);
  1479. asml.remove(hp2);
  1480. p.Free;
  1481. hp2.Free;
  1482. p := hp1;
  1483. end;
  1484. ReleaseUsedRegs(TmpUsedRegs);
  1485. end
  1486. else if GetNextInstruction_p and
  1487. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1488. GetNextInstruction(hp1, hp2) and
  1489. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1490. MatchOperand(Taicpu(p).oper[0]^,0) and
  1491. (Taicpu(p).oper[1]^.typ = top_reg) and
  1492. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1493. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1494. { mov reg1,0
  1495. bts reg1,operand1 --> mov reg1,operand2
  1496. or reg1,operand2 bts reg1,operand1}
  1497. begin
  1498. Taicpu(hp2).opcode:=A_MOV;
  1499. asml.remove(hp1);
  1500. insertllitem(hp2,hp2.next,hp1);
  1501. asml.remove(p);
  1502. p.free;
  1503. p:=hp1;
  1504. end
  1505. else if GetNextInstruction_p and
  1506. MatchInstruction(hp1,A_LEA,[S_L]) and
  1507. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1508. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1509. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1510. ) or
  1511. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1512. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1513. )
  1514. ) then
  1515. { mov reg1,ref
  1516. lea reg2,[reg1,reg2]
  1517. to
  1518. add reg2,ref}
  1519. begin
  1520. CopyUsedRegs(TmpUsedRegs);
  1521. { reg1 may not be used afterwards }
  1522. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1523. begin
  1524. Taicpu(hp1).opcode:=A_ADD;
  1525. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1526. DebugMsg('Peephole MovLea2Add done',hp1);
  1527. asml.remove(p);
  1528. p.free;
  1529. p:=hp1;
  1530. end;
  1531. ReleaseUsedRegs(TmpUsedRegs);
  1532. end;
  1533. end;
  1534. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  1535. var
  1536. hp1 : tai;
  1537. begin
  1538. Result:=false;
  1539. if taicpu(p).ops <> 2 then
  1540. exit;
  1541. if GetNextInstruction(p,hp1) and
  1542. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  1543. (taicpu(hp1).ops = 2) then
  1544. begin
  1545. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1546. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1547. { movXX reg1, mem1 or movXX mem1, reg1
  1548. movXX mem2, reg2 movXX reg2, mem2}
  1549. begin
  1550. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1551. { movXX reg1, mem1 or movXX mem1, reg1
  1552. movXX mem2, reg1 movXX reg2, mem1}
  1553. begin
  1554. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1555. begin
  1556. { Removes the second statement from
  1557. movXX reg1, mem1/reg2
  1558. movXX mem1/reg2, reg1
  1559. }
  1560. if taicpu(p).oper[0]^.typ=top_reg then
  1561. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1562. { Removes the second statement from
  1563. movXX mem1/reg1, reg2
  1564. movXX reg2, mem1/reg1
  1565. }
  1566. if (taicpu(p).oper[1]^.typ=top_reg) and
  1567. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  1568. begin
  1569. asml.remove(p);
  1570. p.free;
  1571. GetNextInstruction(hp1,p);
  1572. DebugMsg('PeepHole Optimization MovXXMovXX2Nop 1 done',p);
  1573. end
  1574. else
  1575. DebugMsg('PeepHole Optimization MovXXMovXX2MoVXX 1 done',p);
  1576. asml.remove(hp1);
  1577. hp1.free;
  1578. Result:=true;
  1579. exit;
  1580. end
  1581. end;
  1582. end;
  1583. end;
  1584. end;
  1585. function TX86AsmOptimizer.OptPass1OP(const p : tai) : boolean;
  1586. var
  1587. TmpUsedRegs : TAllUsedRegs;
  1588. hp1 : tai;
  1589. begin
  1590. result:=false;
  1591. { replace
  1592. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  1593. MovX %mreg2,%mreg1
  1594. dealloc %mreg2
  1595. by
  1596. <Op>X %mreg2,%mreg1
  1597. ?
  1598. }
  1599. if GetNextInstruction(p,hp1) and
  1600. { we mix single and double opperations here because we assume that the compiler
  1601. generates vmovapd only after double operations and vmovaps only after single operations }
  1602. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  1603. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1604. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  1605. (taicpu(p).oper[0]^.typ=top_reg) then
  1606. begin
  1607. CopyUsedRegs(TmpUsedRegs);
  1608. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1609. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1610. begin
  1611. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  1612. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1613. DebugMsg('PeepHole Optimization OpMov2Op done',p);
  1614. asml.Remove(hp1);
  1615. hp1.Free;
  1616. result:=true;
  1617. end;
  1618. ReleaseUsedRegs(TmpUsedRegs);
  1619. end;
  1620. end;
  1621. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  1622. var
  1623. hp1 : tai;
  1624. l : ASizeInt;
  1625. TmpUsedRegs : TAllUsedRegs;
  1626. begin
  1627. Result:=false;
  1628. { removes seg register prefixes from LEA operations, as they
  1629. don't do anything}
  1630. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  1631. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  1632. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1633. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1634. { do not mess with leas acessing the stack pointer }
  1635. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  1636. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1637. begin
  1638. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1639. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1640. begin
  1641. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  1642. taicpu(p).oper[1]^.reg);
  1643. InsertLLItem(p.previous,p.next, hp1);
  1644. DebugMsg('PeepHole Optimization Lea2Mov done',hp1);
  1645. p.free;
  1646. p:=hp1;
  1647. Result:=true;
  1648. exit;
  1649. end
  1650. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1651. begin
  1652. hp1:=taicpu(p.Next);
  1653. DebugMsg('PeepHole Optimization Lea2Nop done',p);
  1654. asml.remove(p);
  1655. p.free;
  1656. p:=hp1;
  1657. Result:=true;
  1658. exit;
  1659. end
  1660. { continue to use lea to adjust the stack pointer,
  1661. it is the recommended way, but only if not optimizing for size }
  1662. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1663. (cs_opt_size in current_settings.optimizerswitches) then
  1664. with taicpu(p).oper[0]^.ref^ do
  1665. if (base = taicpu(p).oper[1]^.reg) then
  1666. begin
  1667. l:=offset;
  1668. if (l=1) and UseIncDec then
  1669. begin
  1670. taicpu(p).opcode:=A_INC;
  1671. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1672. taicpu(p).ops:=1;
  1673. DebugMsg('PeepHole Optimization Lea2Inc done',p);
  1674. end
  1675. else if (l=-1) and UseIncDec then
  1676. begin
  1677. taicpu(p).opcode:=A_DEC;
  1678. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1679. taicpu(p).ops:=1;
  1680. DebugMsg('PeepHole Optimization Lea2Dec done',p);
  1681. end
  1682. else
  1683. begin
  1684. if (l<0) and (l<>-2147483648) then
  1685. begin
  1686. taicpu(p).opcode:=A_SUB;
  1687. taicpu(p).loadConst(0,-l);
  1688. DebugMsg('PeepHole Optimization Lea2Sub done',p);
  1689. end
  1690. else
  1691. begin
  1692. taicpu(p).opcode:=A_ADD;
  1693. taicpu(p).loadConst(0,l);
  1694. DebugMsg('PeepHole Optimization Lea2Add done',p);
  1695. end;
  1696. end;
  1697. Result:=true;
  1698. exit;
  1699. end;
  1700. end;
  1701. if GetNextInstruction(p,hp1) and
  1702. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  1703. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1704. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  1705. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  1706. begin
  1707. CopyUsedRegs(TmpUsedRegs);
  1708. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1709. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1710. begin
  1711. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1712. DebugMsg('PeepHole Optimization LeaMov2Lea done',p);
  1713. asml.Remove(hp1);
  1714. hp1.Free;
  1715. result:=true;
  1716. end;
  1717. ReleaseUsedRegs(TmpUsedRegs);
  1718. end;
  1719. (*
  1720. This is unsafe, lea doesn't modify the flags but "add"
  1721. does. This breaks webtbs/tw15694.pp. The above
  1722. transformations are also unsafe, but they don't seem to
  1723. be triggered by code that FPC generators (or that at
  1724. least does not occur in the tests...). This needs to be
  1725. fixed by checking for the liveness of the flags register.
  1726. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1727. begin
  1728. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1729. taicpu(p).oper[0]^.ref^.base);
  1730. InsertLLItem(asml,p.previous,p.next, hp1);
  1731. DebugMsg('Peephole Lea2AddBase done',hp1);
  1732. p.free;
  1733. p:=hp1;
  1734. continue;
  1735. end
  1736. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1737. begin
  1738. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1739. taicpu(p).oper[0]^.ref^.index);
  1740. InsertLLItem(asml,p.previous,p.next,hp1);
  1741. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1742. p.free;
  1743. p:=hp1;
  1744. continue;
  1745. end
  1746. *)
  1747. end;
  1748. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1749. var
  1750. TmpUsedRegs : TAllUsedRegs;
  1751. hp1,hp2: tai;
  1752. begin
  1753. Result:=false;
  1754. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1755. GetNextInstruction(p, hp1) and
  1756. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1757. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1758. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1759. or
  1760. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1761. ) and
  1762. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1763. { mov reg1, reg2
  1764. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1765. begin
  1766. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1767. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1768. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1769. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1770. DebugMsg('PeepHole Optimization MovMovXX2MoVXX 1 done',p);
  1771. asml.remove(p);
  1772. p.free;
  1773. p := hp1;
  1774. Result:=true;
  1775. exit;
  1776. end
  1777. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1778. GetNextInstruction(p,hp1) and
  1779. (hp1.typ = ait_instruction) and
  1780. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1781. doing it separately in both branches allows to do the cheap checks
  1782. with low probability earlier }
  1783. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1784. GetNextInstruction(hp1,hp2) and
  1785. MatchInstruction(hp2,A_MOV,[])
  1786. ) or
  1787. ((taicpu(hp1).opcode=A_LEA) and
  1788. GetNextInstruction(hp1,hp2) and
  1789. MatchInstruction(hp2,A_MOV,[]) and
  1790. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1791. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1792. ) or
  1793. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1794. taicpu(p).oper[1]^.reg) and
  1795. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1796. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1797. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1798. ) and
  1799. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1800. )
  1801. ) and
  1802. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1803. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1804. begin
  1805. CopyUsedRegs(TmpUsedRegs);
  1806. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1807. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1808. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1809. { change mov (ref), reg
  1810. add/sub/or/... reg2/$const, reg
  1811. mov reg, (ref)
  1812. # release reg
  1813. to add/sub/or/... reg2/$const, (ref) }
  1814. begin
  1815. case taicpu(hp1).opcode of
  1816. A_INC,A_DEC,A_NOT,A_NEG :
  1817. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1818. A_LEA :
  1819. begin
  1820. taicpu(hp1).opcode:=A_ADD;
  1821. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1822. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1823. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1824. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1825. else
  1826. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1827. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1828. DebugMsg('Peephole FoldLea done',hp1);
  1829. end
  1830. else
  1831. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1832. end;
  1833. asml.remove(p);
  1834. asml.remove(hp2);
  1835. p.free;
  1836. hp2.free;
  1837. p := hp1
  1838. end;
  1839. ReleaseUsedRegs(TmpUsedRegs);
  1840. end;
  1841. end;
  1842. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1843. var
  1844. TmpUsedRegs : TAllUsedRegs;
  1845. hp1 : tai;
  1846. begin
  1847. Result:=false;
  1848. if (taicpu(p).ops >= 2) and
  1849. ((taicpu(p).oper[0]^.typ = top_const) or
  1850. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1851. (taicpu(p).oper[1]^.typ = top_reg) and
  1852. ((taicpu(p).ops = 2) or
  1853. ((taicpu(p).oper[2]^.typ = top_reg) and
  1854. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1855. GetLastInstruction(p,hp1) and
  1856. MatchInstruction(hp1,A_MOV,[]) and
  1857. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1858. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1859. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1860. begin
  1861. CopyUsedRegs(TmpUsedRegs);
  1862. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1863. { change
  1864. mov reg1,reg2
  1865. imul y,reg2 to imul y,reg1,reg2 }
  1866. begin
  1867. taicpu(p).ops := 3;
  1868. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1869. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1870. DebugMsg('Peephole MovImul2Imul done',p);
  1871. asml.remove(hp1);
  1872. hp1.free;
  1873. result:=true;
  1874. end;
  1875. ReleaseUsedRegs(TmpUsedRegs);
  1876. end;
  1877. end;
  1878. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  1879. var
  1880. hp1 : tai;
  1881. begin
  1882. {
  1883. change
  1884. jmp .L1
  1885. ...
  1886. .L1:
  1887. ret
  1888. into
  1889. ret
  1890. }
  1891. result:=false;
  1892. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  1893. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  1894. begin
  1895. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1896. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  1897. MatchInstruction(hp1,A_RET,[S_NO]) then
  1898. begin
  1899. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1900. taicpu(p).opcode:=A_RET;
  1901. taicpu(p).is_jmp:=false;
  1902. taicpu(p).ops:=taicpu(hp1).ops;
  1903. case taicpu(hp1).ops of
  1904. 0:
  1905. taicpu(p).clearop(0);
  1906. 1:
  1907. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1908. else
  1909. internalerror(2016041301);
  1910. end;
  1911. result:=true;
  1912. end;
  1913. end;
  1914. end;
  1915. function CanBeCMOV(p : tai) : boolean;
  1916. begin
  1917. CanBeCMOV:=assigned(p) and
  1918. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  1919. { we can't use cmov ref,reg because
  1920. ref could be nil and cmov still throws an exception
  1921. if ref=nil but the mov isn't done (FK)
  1922. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1923. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1924. }
  1925. MatchOpType(taicpu(p),top_reg,top_reg);
  1926. end;
  1927. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  1928. var
  1929. hp1,hp2,hp3: tai;
  1930. carryadd_opcode : TAsmOp;
  1931. l : Longint;
  1932. condition : TAsmCond;
  1933. begin
  1934. { jb @@1 cmc
  1935. inc/dec operand --> adc/sbb operand,0
  1936. @@1:
  1937. ... and ...
  1938. jnb @@1
  1939. inc/dec operand --> adc/sbb operand,0
  1940. @@1: }
  1941. result:=false;
  1942. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1943. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1944. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1945. begin
  1946. carryadd_opcode:=A_NONE;
  1947. if Taicpu(p).condition in [C_NAE,C_B] then
  1948. begin
  1949. if Taicpu(hp1).opcode=A_INC then
  1950. carryadd_opcode:=A_ADC;
  1951. if Taicpu(hp1).opcode=A_DEC then
  1952. carryadd_opcode:=A_SBB;
  1953. if carryadd_opcode<>A_NONE then
  1954. begin
  1955. Taicpu(p).clearop(0);
  1956. Taicpu(p).ops:=0;
  1957. Taicpu(p).is_jmp:=false;
  1958. Taicpu(p).opcode:=A_CMC;
  1959. Taicpu(p).condition:=C_NONE;
  1960. Taicpu(hp1).ops:=2;
  1961. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1962. Taicpu(hp1).loadconst(0,0);
  1963. Taicpu(hp1).opcode:=carryadd_opcode;
  1964. result:=true;
  1965. exit;
  1966. end;
  1967. end;
  1968. if Taicpu(p).condition in [C_AE,C_NB] then
  1969. begin
  1970. if Taicpu(hp1).opcode=A_INC then
  1971. carryadd_opcode:=A_ADC;
  1972. if Taicpu(hp1).opcode=A_DEC then
  1973. carryadd_opcode:=A_SBB;
  1974. if carryadd_opcode<>A_NONE then
  1975. begin
  1976. asml.remove(p);
  1977. p.free;
  1978. Taicpu(hp1).ops:=2;
  1979. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1980. Taicpu(hp1).loadconst(0,0);
  1981. Taicpu(hp1).opcode:=carryadd_opcode;
  1982. p:=hp1;
  1983. result:=true;
  1984. exit;
  1985. end;
  1986. end;
  1987. end;
  1988. {$ifndef i8086}
  1989. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1990. begin
  1991. { check for
  1992. jCC xxx
  1993. <several movs>
  1994. xxx:
  1995. }
  1996. l:=0;
  1997. GetNextInstruction(p, hp1);
  1998. while assigned(hp1) and
  1999. CanBeCMOV(hp1) and
  2000. { stop on labels }
  2001. not(hp1.typ=ait_label) do
  2002. begin
  2003. inc(l);
  2004. GetNextInstruction(hp1,hp1);
  2005. end;
  2006. if assigned(hp1) then
  2007. begin
  2008. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2009. begin
  2010. if (l<=4) and (l>0) then
  2011. begin
  2012. condition:=inverse_cond(taicpu(p).condition);
  2013. hp2:=p;
  2014. GetNextInstruction(p,hp1);
  2015. p:=hp1;
  2016. repeat
  2017. taicpu(hp1).opcode:=A_CMOVcc;
  2018. taicpu(hp1).condition:=condition;
  2019. GetNextInstruction(hp1,hp1);
  2020. until not(assigned(hp1)) or
  2021. not(CanBeCMOV(hp1));
  2022. { wait with removing else GetNextInstruction could
  2023. ignore the label if it was the only usage in the
  2024. jump moved away }
  2025. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2026. { if the label refs. reach zero, remove any alignment before the label }
  2027. if (hp1.typ=ait_align) and (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).getrefs=0) then
  2028. begin
  2029. asml.Remove(hp1);
  2030. hp1.Free;
  2031. end;
  2032. asml.remove(hp2);
  2033. hp2.free;
  2034. result:=true;
  2035. exit;
  2036. end;
  2037. end
  2038. else
  2039. begin
  2040. { check further for
  2041. jCC xxx
  2042. <several movs 1>
  2043. jmp yyy
  2044. xxx:
  2045. <several movs 2>
  2046. yyy:
  2047. }
  2048. { hp2 points to jmp yyy }
  2049. hp2:=hp1;
  2050. { skip hp1 to xxx }
  2051. GetNextInstruction(hp1, hp1);
  2052. if assigned(hp2) and
  2053. assigned(hp1) and
  2054. (l<=3) and
  2055. (hp2.typ=ait_instruction) and
  2056. (taicpu(hp2).is_jmp) and
  2057. (taicpu(hp2).condition=C_None) and
  2058. { real label and jump, no further references to the
  2059. label are allowed }
  2060. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  2061. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2062. begin
  2063. l:=0;
  2064. { skip hp1 to <several moves 2> }
  2065. GetNextInstruction(hp1, hp1);
  2066. while assigned(hp1) and
  2067. CanBeCMOV(hp1) do
  2068. begin
  2069. inc(l);
  2070. GetNextInstruction(hp1, hp1);
  2071. end;
  2072. { hp1 points to yyy: }
  2073. if assigned(hp1) and
  2074. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2075. begin
  2076. condition:=inverse_cond(taicpu(p).condition);
  2077. GetNextInstruction(p,hp1);
  2078. hp3:=p;
  2079. p:=hp1;
  2080. repeat
  2081. taicpu(hp1).opcode:=A_CMOVcc;
  2082. taicpu(hp1).condition:=condition;
  2083. GetNextInstruction(hp1,hp1);
  2084. until not(assigned(hp1)) or
  2085. not(CanBeCMOV(hp1));
  2086. { hp2 is still at jmp yyy }
  2087. GetNextInstruction(hp2,hp1);
  2088. { hp2 is now at xxx: }
  2089. condition:=inverse_cond(condition);
  2090. GetNextInstruction(hp1,hp1);
  2091. { hp1 is now at <several movs 2> }
  2092. repeat
  2093. taicpu(hp1).opcode:=A_CMOVcc;
  2094. taicpu(hp1).condition:=condition;
  2095. GetNextInstruction(hp1,hp1);
  2096. until not(assigned(hp1)) or
  2097. not(CanBeCMOV(hp1));
  2098. {
  2099. asml.remove(hp1.next)
  2100. hp1.next.free;
  2101. asml.remove(hp1);
  2102. hp1.free;
  2103. }
  2104. { remove jCC }
  2105. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2106. asml.remove(hp3);
  2107. hp3.free;
  2108. { remove jmp }
  2109. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2110. asml.remove(hp2);
  2111. hp2.free;
  2112. result:=true;
  2113. exit;
  2114. end;
  2115. end;
  2116. end;
  2117. end;
  2118. end;
  2119. {$endif i8086}
  2120. end;
  2121. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  2122. var
  2123. hp1,hp2: tai;
  2124. begin
  2125. result:=false;
  2126. if (taicpu(p).oper[1]^.typ = top_reg) and
  2127. GetNextInstruction(p,hp1) and
  2128. (hp1.typ = ait_instruction) and
  2129. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  2130. GetNextInstruction(hp1,hp2) and
  2131. MatchInstruction(hp2,A_MOV,[]) and
  2132. (taicpu(hp2).oper[0]^.typ = top_reg) and
  2133. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  2134. {$ifdef i386}
  2135. { not all registers have byte size sub registers on i386 }
  2136. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  2137. {$endif i386}
  2138. (((taicpu(hp1).ops=2) and
  2139. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  2140. ((taicpu(hp1).ops=1) and
  2141. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  2142. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  2143. begin
  2144. { change movsX/movzX reg/ref, reg2
  2145. add/sub/or/... reg3/$const, reg2
  2146. mov reg2 reg/ref
  2147. to add/sub/or/... reg3/$const, reg/ref }
  2148. { by example:
  2149. movswl %si,%eax movswl %si,%eax p
  2150. decl %eax addl %edx,%eax hp1
  2151. movw %ax,%si movw %ax,%si hp2
  2152. ->
  2153. movswl %si,%eax movswl %si,%eax p
  2154. decw %eax addw %edx,%eax hp1
  2155. movw %ax,%si movw %ax,%si hp2
  2156. }
  2157. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  2158. {
  2159. ->
  2160. movswl %si,%eax movswl %si,%eax p
  2161. decw %si addw %dx,%si hp1
  2162. movw %ax,%si movw %ax,%si hp2
  2163. }
  2164. case taicpu(hp1).ops of
  2165. 1:
  2166. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  2167. 2:
  2168. begin
  2169. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  2170. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  2171. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2172. end;
  2173. else
  2174. internalerror(2008042701);
  2175. end;
  2176. {
  2177. ->
  2178. decw %si addw %dx,%si p
  2179. }
  2180. DebugMsg('PeepHole Optimization,var3',p);
  2181. asml.remove(p);
  2182. asml.remove(hp2);
  2183. p.free;
  2184. hp2.free;
  2185. p:=hp1;
  2186. end
  2187. { removes superfluous And's after movzx's }
  2188. else if taicpu(p).opcode=A_MOVZX then
  2189. begin
  2190. if (taicpu(p).oper[1]^.typ = top_reg) and
  2191. GetNextInstruction(p, hp1) and
  2192. (tai(hp1).typ = ait_instruction) and
  2193. (taicpu(hp1).opcode = A_AND) and
  2194. (taicpu(hp1).oper[0]^.typ = top_const) and
  2195. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2196. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2197. begin
  2198. case taicpu(p).opsize Of
  2199. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  2200. if (taicpu(hp1).oper[0]^.val = $ff) then
  2201. begin
  2202. DebugMsg('PeepHole Optimization,var4',p);
  2203. asml.remove(hp1);
  2204. hp1.free;
  2205. end;
  2206. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  2207. if (taicpu(hp1).oper[0]^.val = $ffff) then
  2208. begin
  2209. DebugMsg('PeepHole Optimization,var5',p);
  2210. asml.remove(hp1);
  2211. hp1.free;
  2212. end;
  2213. {$ifdef x86_64}
  2214. S_LQ:
  2215. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  2216. begin
  2217. if (cs_asm_source in current_settings.globalswitches) then
  2218. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  2219. asml.remove(hp1);
  2220. hp1.Free;
  2221. end;
  2222. {$endif x86_64}
  2223. end;
  2224. end;
  2225. { changes some movzx constructs to faster synonims (all examples
  2226. are given with eax/ax, but are also valid for other registers)}
  2227. if (taicpu(p).oper[1]^.typ = top_reg) then
  2228. if (taicpu(p).oper[0]^.typ = top_reg) then
  2229. case taicpu(p).opsize of
  2230. S_BW:
  2231. begin
  2232. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2233. not(cs_opt_size in current_settings.optimizerswitches) then
  2234. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  2235. begin
  2236. taicpu(p).opcode := A_AND;
  2237. taicpu(p).changeopsize(S_W);
  2238. taicpu(p).loadConst(0,$ff);
  2239. DebugMsg('PeepHole Optimization,var7',p);
  2240. end
  2241. else if GetNextInstruction(p, hp1) and
  2242. (tai(hp1).typ = ait_instruction) and
  2243. (taicpu(hp1).opcode = A_AND) and
  2244. (taicpu(hp1).oper[0]^.typ = top_const) and
  2245. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2246. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2247. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  2248. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  2249. begin
  2250. DebugMsg('PeepHole Optimization,var8',p);
  2251. taicpu(p).opcode := A_MOV;
  2252. taicpu(p).changeopsize(S_W);
  2253. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  2254. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2255. end;
  2256. end;
  2257. S_BL:
  2258. begin
  2259. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2260. not(cs_opt_size in current_settings.optimizerswitches) then
  2261. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  2262. begin
  2263. taicpu(p).opcode := A_AND;
  2264. taicpu(p).changeopsize(S_L);
  2265. taicpu(p).loadConst(0,$ff)
  2266. end
  2267. else if GetNextInstruction(p, hp1) and
  2268. (tai(hp1).typ = ait_instruction) and
  2269. (taicpu(hp1).opcode = A_AND) and
  2270. (taicpu(hp1).oper[0]^.typ = top_const) and
  2271. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2272. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2273. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  2274. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  2275. begin
  2276. DebugMsg('PeepHole Optimization,var10',p);
  2277. taicpu(p).opcode := A_MOV;
  2278. taicpu(p).changeopsize(S_L);
  2279. { do not use R_SUBWHOLE
  2280. as movl %rdx,%eax
  2281. is invalid in assembler PM }
  2282. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2283. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2284. end
  2285. end;
  2286. {$ifndef i8086}
  2287. S_WL:
  2288. begin
  2289. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2290. not(cs_opt_size in current_settings.optimizerswitches) then
  2291. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  2292. begin
  2293. DebugMsg('PeepHole Optimization,var11',p);
  2294. taicpu(p).opcode := A_AND;
  2295. taicpu(p).changeopsize(S_L);
  2296. taicpu(p).loadConst(0,$ffff);
  2297. end
  2298. else if GetNextInstruction(p, hp1) and
  2299. (tai(hp1).typ = ait_instruction) and
  2300. (taicpu(hp1).opcode = A_AND) and
  2301. (taicpu(hp1).oper[0]^.typ = top_const) and
  2302. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2303. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2304. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  2305. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  2306. begin
  2307. DebugMsg('PeepHole Optimization,var12',p);
  2308. taicpu(p).opcode := A_MOV;
  2309. taicpu(p).changeopsize(S_L);
  2310. { do not use R_SUBWHOLE
  2311. as movl %rdx,%eax
  2312. is invalid in assembler PM }
  2313. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2314. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2315. end;
  2316. end;
  2317. {$endif i8086}
  2318. end
  2319. else if (taicpu(p).oper[0]^.typ = top_ref) then
  2320. begin
  2321. if GetNextInstruction(p, hp1) and
  2322. (tai(hp1).typ = ait_instruction) and
  2323. (taicpu(hp1).opcode = A_AND) and
  2324. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2325. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2326. begin
  2327. taicpu(p).opcode := A_MOV;
  2328. case taicpu(p).opsize Of
  2329. S_BL:
  2330. begin
  2331. DebugMsg('PeepHole Optimization,var13',p);
  2332. taicpu(p).changeopsize(S_L);
  2333. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2334. end;
  2335. S_WL:
  2336. begin
  2337. DebugMsg('PeepHole Optimization,var14',p);
  2338. taicpu(p).changeopsize(S_L);
  2339. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2340. end;
  2341. S_BW:
  2342. begin
  2343. DebugMsg('PeepHole Optimization,var15',p);
  2344. taicpu(p).changeopsize(S_W);
  2345. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2346. end;
  2347. {$ifdef x86_64}
  2348. S_BQ:
  2349. begin
  2350. DebugMsg('PeepHole Optimization,var16',p);
  2351. taicpu(p).changeopsize(S_Q);
  2352. taicpu(hp1).loadConst(
  2353. 0, taicpu(hp1).oper[0]^.val and $ff);
  2354. end;
  2355. S_WQ:
  2356. begin
  2357. DebugMsg('PeepHole Optimization,var17',p);
  2358. taicpu(p).changeopsize(S_Q);
  2359. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  2360. end;
  2361. S_LQ:
  2362. begin
  2363. DebugMsg('PeepHole Optimization,var18',p);
  2364. taicpu(p).changeopsize(S_Q);
  2365. taicpu(hp1).loadConst(
  2366. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  2367. end;
  2368. {$endif x86_64}
  2369. else
  2370. Internalerror(2017050704)
  2371. end;
  2372. end;
  2373. end;
  2374. end;
  2375. end;
  2376. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  2377. var
  2378. hp1 : tai;
  2379. begin
  2380. Result:=false;
  2381. if not(GetNextInstruction(p, hp1)) then
  2382. exit;
  2383. if MatchOpType(taicpu(p),top_const,top_reg) and
  2384. MatchInstruction(hp1,A_AND,[]) and
  2385. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2386. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2387. { the second register must contain the first one, so compare their subreg types }
  2388. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2389. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  2390. { change
  2391. and const1, reg
  2392. and const2, reg
  2393. to
  2394. and (const1 and const2), reg
  2395. }
  2396. begin
  2397. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  2398. DebugMsg('Peephole AndAnd2And done',hp1);
  2399. asml.remove(p);
  2400. p.Free;
  2401. p:=hp1;
  2402. Result:=true;
  2403. exit;
  2404. end
  2405. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2406. MatchInstruction(hp1,A_MOVZX,[]) and
  2407. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2408. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2409. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2410. (((taicpu(p).opsize=S_W) and
  2411. (taicpu(hp1).opsize=S_BW)) or
  2412. ((taicpu(p).opsize=S_L) and
  2413. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2414. {$ifdef x86_64}
  2415. or
  2416. ((taicpu(p).opsize=S_Q) and
  2417. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2418. {$endif x86_64}
  2419. ) then
  2420. begin
  2421. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2422. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  2423. ) or
  2424. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2425. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  2426. {$ifdef x86_64}
  2427. or
  2428. (((taicpu(hp1).opsize)=S_LQ) and
  2429. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  2430. )
  2431. {$endif x86_64}
  2432. then
  2433. begin
  2434. DebugMsg('Peephole AndMovzToAnd done',p);
  2435. asml.remove(hp1);
  2436. hp1.free;
  2437. end;
  2438. end
  2439. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2440. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  2441. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2442. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2443. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2444. (((taicpu(p).opsize=S_W) and
  2445. (taicpu(hp1).opsize=S_BW)) or
  2446. ((taicpu(p).opsize=S_L) and
  2447. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2448. {$ifdef x86_64}
  2449. or
  2450. ((taicpu(p).opsize=S_Q) and
  2451. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2452. {$endif x86_64}
  2453. ) then
  2454. begin
  2455. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2456. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  2457. ) or
  2458. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2459. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  2460. {$ifdef x86_64}
  2461. or
  2462. (((taicpu(hp1).opsize)=S_LQ) and
  2463. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  2464. )
  2465. {$endif x86_64}
  2466. then
  2467. begin
  2468. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  2469. asml.remove(hp1);
  2470. hp1.free;
  2471. end;
  2472. end
  2473. else if (taicpu(p).oper[1]^.typ = top_reg) and
  2474. (hp1.typ = ait_instruction) and
  2475. (taicpu(hp1).is_jmp) and
  2476. (taicpu(hp1).opcode<>A_JMP) and
  2477. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  2478. { change
  2479. and x, reg
  2480. jxx
  2481. to
  2482. test x, reg
  2483. jxx
  2484. if reg is deallocated before the
  2485. jump, but only if it's a conditional jump (PFV)
  2486. }
  2487. taicpu(p).opcode := A_TEST;
  2488. end;
  2489. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  2490. begin
  2491. if (taicpu(p).oper[1]^.typ = Top_Reg) and
  2492. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2493. begin
  2494. if (taicpu(p).oper[0]^.typ = top_const) then
  2495. begin
  2496. case taicpu(p).oper[0]^.val of
  2497. 0:
  2498. begin
  2499. { change "mov $0,%reg" into "xor %reg,%reg" }
  2500. taicpu(p).opcode := A_XOR;
  2501. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2502. end;
  2503. $1..$FFFFFFFF:
  2504. begin
  2505. { Code size reduction by J. Gareth "Kit" Moreton }
  2506. { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }
  2507. case taicpu(p).opsize of
  2508. S_Q:
  2509. begin
  2510. DebugMsg('Peephole Optimization: movq x,%reg -> movd x,%reg (x is a 32-bit constant)', p);
  2511. TRegisterRec(taicpu(p).oper[1]^.reg).subreg := R_SUBD;
  2512. taicpu(p).opsize := S_L;
  2513. end;
  2514. end;
  2515. end;
  2516. end;
  2517. end;
  2518. end;
  2519. end;
  2520. end.