aoptx86.pas 111 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. protected
  33. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  34. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  35. { checks whether reading the value in reg1 depends on the value of reg2. This
  36. is very similar to SuperRegisterEquals, except it takes into account that
  37. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  38. depend on the value in AH). }
  39. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  40. procedure DebugMsg(const s : string; p : tai);inline;
  41. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  42. class function IsExitCode(p : tai) : boolean;
  43. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  44. procedure RemoveLastDeallocForFuncRes(p : tai);
  45. function PrePeepholeOptSxx(var p : tai) : boolean;
  46. function OptPass1AND(var p : tai) : boolean;
  47. function OptPass1VMOVAP(var p : tai) : boolean;
  48. function OptPass1VOP(const p : tai) : boolean;
  49. function OptPass1MOV(var p : tai) : boolean;
  50. function OptPass1Movx(var p : tai) : boolean;
  51. function OptPass1MOVAP(var p : tai) : boolean;
  52. function OptPass1MOVXX(var p : tai) : boolean;
  53. function OptPass1OP(const p : tai) : boolean;
  54. function OptPass1LEA(var p : tai) : boolean;
  55. function OptPass2MOV(var p : tai) : boolean;
  56. function OptPass2Imul(var p : tai) : boolean;
  57. function OptPass2Jmp(var p : tai) : boolean;
  58. function OptPass2Jcc(var p : tai) : boolean;
  59. function PostPeepholeOptMov(const p : tai) : Boolean;
  60. function PostPeepholeOptCmp(var p : tai) : Boolean;
  61. procedure OptReferences;
  62. end;
  63. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  64. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  65. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  66. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  67. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  68. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  69. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  70. function RefsEqual(const r1, r2: treference): boolean;
  71. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  72. { returns true, if ref is a reference using only the registers passed as base and index
  73. and having an offset }
  74. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  75. implementation
  76. uses
  77. cutils,verbose,
  78. globals,
  79. cpuinfo,
  80. procinfo,
  81. aasmbase,
  82. aoptutils,
  83. symconst,symsym,
  84. cgx86,
  85. itcpugas;
  86. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  87. begin
  88. result :=
  89. (instr.typ = ait_instruction) and
  90. (taicpu(instr).opcode = op) and
  91. ((opsize = []) or (taicpu(instr).opsize in opsize));
  92. end;
  93. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  94. begin
  95. result :=
  96. (instr.typ = ait_instruction) and
  97. ((taicpu(instr).opcode = op1) or
  98. (taicpu(instr).opcode = op2)
  99. ) and
  100. ((opsize = []) or (taicpu(instr).opsize in opsize));
  101. end;
  102. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  103. begin
  104. result :=
  105. (instr.typ = ait_instruction) and
  106. ((taicpu(instr).opcode = op1) or
  107. (taicpu(instr).opcode = op2) or
  108. (taicpu(instr).opcode = op3)
  109. ) and
  110. ((opsize = []) or (taicpu(instr).opsize in opsize));
  111. end;
  112. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  113. const opsize : topsizes) : boolean;
  114. var
  115. op : TAsmOp;
  116. begin
  117. result:=false;
  118. for op in ops do
  119. begin
  120. if (instr.typ = ait_instruction) and
  121. (taicpu(instr).opcode = op) and
  122. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  123. begin
  124. result:=true;
  125. exit;
  126. end;
  127. end;
  128. end;
  129. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  130. begin
  131. result := (oper.typ = top_reg) and (oper.reg = reg);
  132. end;
  133. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  134. begin
  135. result := (oper.typ = top_const) and (oper.val = a);
  136. end;
  137. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  138. begin
  139. result := oper1.typ = oper2.typ;
  140. if result then
  141. case oper1.typ of
  142. top_const:
  143. Result:=oper1.val = oper2.val;
  144. top_reg:
  145. Result:=oper1.reg = oper2.reg;
  146. top_ref:
  147. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  148. else
  149. internalerror(2013102801);
  150. end
  151. end;
  152. function RefsEqual(const r1, r2: treference): boolean;
  153. begin
  154. RefsEqual :=
  155. (r1.offset = r2.offset) and
  156. (r1.segment = r2.segment) and (r1.base = r2.base) and
  157. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  158. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  159. (r1.relsymbol = r2.relsymbol);
  160. end;
  161. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  162. begin
  163. Result:=(ref.offset=0) and
  164. (ref.scalefactor in [0,1]) and
  165. (ref.segment=NR_NO) and
  166. (ref.symbol=nil) and
  167. (ref.relsymbol=nil) and
  168. ((base=NR_INVALID) or
  169. (ref.base=base)) and
  170. ((index=NR_INVALID) or
  171. (ref.index=index));
  172. end;
  173. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  174. begin
  175. Result:=(ref.scalefactor in [0,1]) and
  176. (ref.segment=NR_NO) and
  177. (ref.symbol=nil) and
  178. (ref.relsymbol=nil) and
  179. ((base=NR_INVALID) or
  180. (ref.base=base)) and
  181. ((index=NR_INVALID) or
  182. (ref.index=index));
  183. end;
  184. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  185. begin
  186. Result:=RegReadByInstruction(reg,hp);
  187. end;
  188. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  189. var
  190. p: taicpu;
  191. opcount: longint;
  192. begin
  193. RegReadByInstruction := false;
  194. if hp.typ <> ait_instruction then
  195. exit;
  196. p := taicpu(hp);
  197. case p.opcode of
  198. A_CALL:
  199. regreadbyinstruction := true;
  200. A_IMUL:
  201. case p.ops of
  202. 1:
  203. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  204. (
  205. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  206. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  207. );
  208. 2,3:
  209. regReadByInstruction :=
  210. reginop(reg,p.oper[0]^) or
  211. reginop(reg,p.oper[1]^);
  212. end;
  213. A_MUL:
  214. begin
  215. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  216. (
  217. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  218. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  219. );
  220. end;
  221. A_IDIV,A_DIV:
  222. begin
  223. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  224. (
  225. (getregtype(reg)=R_INTREGISTER) and
  226. (
  227. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  228. )
  229. );
  230. end;
  231. else
  232. begin
  233. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  234. begin
  235. RegReadByInstruction := false;
  236. exit;
  237. end;
  238. for opcount := 0 to p.ops-1 do
  239. if (p.oper[opCount]^.typ = top_ref) and
  240. RegInRef(reg,p.oper[opcount]^.ref^) then
  241. begin
  242. RegReadByInstruction := true;
  243. exit
  244. end;
  245. { special handling for SSE MOVSD }
  246. if (p.opcode=A_MOVSD) and (p.ops>0) then
  247. begin
  248. if p.ops<>2 then
  249. internalerror(2017042702);
  250. regReadByInstruction := reginop(reg,p.oper[0]^) or
  251. (
  252. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  253. );
  254. exit;
  255. end;
  256. with insprop[p.opcode] do
  257. begin
  258. if getregtype(reg)=R_INTREGISTER then
  259. begin
  260. case getsupreg(reg) of
  261. RS_EAX:
  262. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  263. begin
  264. RegReadByInstruction := true;
  265. exit
  266. end;
  267. RS_ECX:
  268. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  269. begin
  270. RegReadByInstruction := true;
  271. exit
  272. end;
  273. RS_EDX:
  274. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  275. begin
  276. RegReadByInstruction := true;
  277. exit
  278. end;
  279. RS_EBX:
  280. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  281. begin
  282. RegReadByInstruction := true;
  283. exit
  284. end;
  285. RS_ESP:
  286. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  287. begin
  288. RegReadByInstruction := true;
  289. exit
  290. end;
  291. RS_EBP:
  292. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  293. begin
  294. RegReadByInstruction := true;
  295. exit
  296. end;
  297. RS_ESI:
  298. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  299. begin
  300. RegReadByInstruction := true;
  301. exit
  302. end;
  303. RS_EDI:
  304. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  305. begin
  306. RegReadByInstruction := true;
  307. exit
  308. end;
  309. end;
  310. end;
  311. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  312. begin
  313. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  314. begin
  315. case p.condition of
  316. C_A,C_NBE, { CF=0 and ZF=0 }
  317. C_BE,C_NA: { CF=1 or ZF=1 }
  318. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  319. C_AE,C_NB,C_NC, { CF=0 }
  320. C_B,C_NAE,C_C: { CF=1 }
  321. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  322. C_NE,C_NZ, { ZF=0 }
  323. C_E,C_Z: { ZF=1 }
  324. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  325. C_G,C_NLE, { ZF=0 and SF=OF }
  326. C_LE,C_NG: { ZF=1 or SF<>OF }
  327. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  328. C_GE,C_NL, { SF=OF }
  329. C_L,C_NGE: { SF<>OF }
  330. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  331. C_NO, { OF=0 }
  332. C_O: { OF=1 }
  333. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  334. C_NP,C_PO, { PF=0 }
  335. C_P,C_PE: { PF=1 }
  336. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  337. C_NS, { SF=0 }
  338. C_S: { SF=1 }
  339. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  340. else
  341. internalerror(2017042701);
  342. end;
  343. if RegReadByInstruction then
  344. exit;
  345. end;
  346. case getsubreg(reg) of
  347. R_SUBW,R_SUBD,R_SUBQ:
  348. RegReadByInstruction :=
  349. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  350. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  351. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  352. R_SUBFLAGCARRY:
  353. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  354. R_SUBFLAGPARITY:
  355. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  356. R_SUBFLAGAUXILIARY:
  357. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  358. R_SUBFLAGZERO:
  359. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  360. R_SUBFLAGSIGN:
  361. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  362. R_SUBFLAGOVERFLOW:
  363. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  364. R_SUBFLAGINTERRUPT:
  365. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  366. R_SUBFLAGDIRECTION:
  367. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  368. else
  369. internalerror(2017042601);
  370. end;
  371. exit;
  372. end;
  373. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  374. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  375. (p.oper[0]^.reg=p.oper[1]^.reg) then
  376. exit;
  377. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  378. begin
  379. RegReadByInstruction := true;
  380. exit
  381. end;
  382. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  383. begin
  384. RegReadByInstruction := true;
  385. exit
  386. end;
  387. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  388. begin
  389. RegReadByInstruction := true;
  390. exit
  391. end;
  392. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  393. begin
  394. RegReadByInstruction := true;
  395. exit
  396. end;
  397. end;
  398. end;
  399. end;
  400. end;
  401. {$ifdef DEBUG_AOPTCPU}
  402. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  403. begin
  404. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  405. end;
  406. {$else DEBUG_AOPTCPU}
  407. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  408. begin
  409. end;
  410. {$endif DEBUG_AOPTCPU}
  411. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  412. begin
  413. if not SuperRegistersEqual(reg1,reg2) then
  414. exit(false);
  415. if getregtype(reg1)<>R_INTREGISTER then
  416. exit(true); {because SuperRegisterEqual is true}
  417. case getsubreg(reg1) of
  418. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  419. higher, it preserves the high bits, so the new value depends on
  420. reg2's previous value. In other words, it is equivalent to doing:
  421. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  422. R_SUBL:
  423. exit(getsubreg(reg2)=R_SUBL);
  424. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  425. higher, it actually does a:
  426. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  427. R_SUBH:
  428. exit(getsubreg(reg2)=R_SUBH);
  429. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  430. bits of reg2:
  431. reg2 := (reg2 and $ffff0000) or word(reg1); }
  432. R_SUBW:
  433. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  434. { a write to R_SUBD always overwrites every other subregister,
  435. because it clears the high 32 bits of R_SUBQ on x86_64 }
  436. R_SUBD,
  437. R_SUBQ:
  438. exit(true);
  439. else
  440. internalerror(2017042801);
  441. end;
  442. end;
  443. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  444. begin
  445. if not SuperRegistersEqual(reg1,reg2) then
  446. exit(false);
  447. if getregtype(reg1)<>R_INTREGISTER then
  448. exit(true); {because SuperRegisterEqual is true}
  449. case getsubreg(reg1) of
  450. R_SUBL:
  451. exit(getsubreg(reg2)<>R_SUBH);
  452. R_SUBH:
  453. exit(getsubreg(reg2)<>R_SUBL);
  454. R_SUBW,
  455. R_SUBD,
  456. R_SUBQ:
  457. exit(true);
  458. else
  459. internalerror(2017042802);
  460. end;
  461. end;
  462. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  463. var
  464. hp1 : tai;
  465. l : TCGInt;
  466. begin
  467. result:=false;
  468. { changes the code sequence
  469. shr/sar const1, x
  470. shl const2, x
  471. to
  472. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  473. if GetNextInstruction(p, hp1) and
  474. MatchInstruction(hp1,A_SHL,[]) and
  475. (taicpu(p).oper[0]^.typ = top_const) and
  476. (taicpu(hp1).oper[0]^.typ = top_const) and
  477. (taicpu(hp1).opsize = taicpu(p).opsize) and
  478. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  479. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  480. begin
  481. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  482. not(cs_opt_size in current_settings.optimizerswitches) then
  483. begin
  484. { shr/sar const1, %reg
  485. shl const2, %reg
  486. with const1 > const2 }
  487. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  488. taicpu(hp1).opcode := A_AND;
  489. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  490. case taicpu(p).opsize Of
  491. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  492. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  493. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  494. S_Q: taicpu(hp1).loadConst(0,l Xor aint($ffffffffffffffff));
  495. else
  496. Internalerror(2017050703)
  497. end;
  498. end
  499. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  500. not(cs_opt_size in current_settings.optimizerswitches) then
  501. begin
  502. { shr/sar const1, %reg
  503. shl const2, %reg
  504. with const1 < const2 }
  505. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  506. taicpu(p).opcode := A_AND;
  507. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  508. case taicpu(p).opsize Of
  509. S_B: taicpu(p).loadConst(0,l Xor $ff);
  510. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  511. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  512. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  513. else
  514. Internalerror(2017050702)
  515. end;
  516. end
  517. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  518. begin
  519. { shr/sar const1, %reg
  520. shl const2, %reg
  521. with const1 = const2 }
  522. taicpu(p).opcode := A_AND;
  523. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  524. case taicpu(p).opsize Of
  525. S_B: taicpu(p).loadConst(0,l Xor $ff);
  526. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  527. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  528. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  529. else
  530. Internalerror(2017050701)
  531. end;
  532. asml.remove(hp1);
  533. hp1.free;
  534. end;
  535. end;
  536. end;
  537. { allocates register reg between (and including) instructions p1 and p2
  538. the type of p1 and p2 must not be in SkipInstr
  539. note that this routine is both called from the peephole optimizer
  540. where optinfo is not yet initialised) and from the cse (where it is) }
  541. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  542. var
  543. hp, start: tai;
  544. removedsomething,
  545. firstRemovedWasAlloc,
  546. lastRemovedWasDealloc: boolean;
  547. begin
  548. {$ifdef EXTDEBUG}
  549. { if assigned(p1.optinfo) and
  550. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  551. internalerror(2004101010); }
  552. {$endif EXTDEBUG}
  553. start := p1;
  554. if (reg = NR_ESP) or
  555. (reg = current_procinfo.framepointer) or
  556. not(assigned(p1)) then
  557. { this happens with registers which are loaded implicitely, outside the }
  558. { current block (e.g. esi with self) }
  559. exit;
  560. { make sure we allocate it for this instruction }
  561. getnextinstruction(p2,p2);
  562. lastRemovedWasDealloc := false;
  563. removedSomething := false;
  564. firstRemovedWasAlloc := false;
  565. {$ifdef allocregdebug}
  566. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  567. ' from here...'));
  568. insertllitem(asml,p1.previous,p1,hp);
  569. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  570. ' till here...'));
  571. insertllitem(asml,p2,p2.next,hp);
  572. {$endif allocregdebug}
  573. { do it the safe way: always allocate the full super register,
  574. as we do no register re-allocation in the peephole optimizer,
  575. this does not hurt
  576. }
  577. case getregtype(reg) of
  578. R_MMREGISTER:
  579. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  580. R_INTREGISTER:
  581. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  582. end;
  583. if not(RegInUsedRegs(reg,initialusedregs)) then
  584. begin
  585. hp := tai_regalloc.alloc(reg,nil);
  586. insertllItem(p1.previous,p1,hp);
  587. IncludeRegInUsedRegs(reg,initialusedregs);
  588. end;
  589. while assigned(p1) and
  590. (p1 <> p2) do
  591. begin
  592. if assigned(p1.optinfo) then
  593. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  594. p1 := tai(p1.next);
  595. repeat
  596. while assigned(p1) and
  597. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  598. p1 := tai(p1.next);
  599. { remove all allocation/deallocation info about the register in between }
  600. if assigned(p1) and
  601. (p1.typ = ait_regalloc) then
  602. begin
  603. { same super register, different sub register? }
  604. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  605. begin
  606. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  607. internalerror(2016101501);
  608. tai_regalloc(p1).reg:=reg;
  609. end;
  610. if tai_regalloc(p1).reg=reg then
  611. begin
  612. if not removedSomething then
  613. begin
  614. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  615. removedSomething := true;
  616. end;
  617. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  618. hp := tai(p1.Next);
  619. asml.Remove(p1);
  620. p1.free;
  621. p1 := hp;
  622. end
  623. else
  624. p1 := tai(p1.next);
  625. end;
  626. until not(assigned(p1)) or
  627. not(p1.typ in SkipInstr);
  628. end;
  629. if assigned(p1) then
  630. begin
  631. if firstRemovedWasAlloc then
  632. begin
  633. hp := tai_regalloc.Alloc(reg,nil);
  634. insertLLItem(start.previous,start,hp);
  635. end;
  636. if lastRemovedWasDealloc then
  637. begin
  638. hp := tai_regalloc.DeAlloc(reg,nil);
  639. insertLLItem(p1.previous,p1,hp);
  640. end;
  641. end;
  642. end;
  643. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  644. var
  645. p: taicpu;
  646. begin
  647. if not assigned(hp) or
  648. (hp.typ <> ait_instruction) then
  649. begin
  650. Result := false;
  651. exit;
  652. end;
  653. p := taicpu(hp);
  654. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  655. with insprop[p.opcode] do
  656. begin
  657. case getsubreg(reg) of
  658. R_SUBW,R_SUBD,R_SUBQ:
  659. Result:=
  660. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  661. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  662. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  663. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  664. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  665. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  666. R_SUBFLAGCARRY:
  667. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  668. R_SUBFLAGPARITY:
  669. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  670. R_SUBFLAGAUXILIARY:
  671. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  672. R_SUBFLAGZERO:
  673. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  674. R_SUBFLAGSIGN:
  675. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  676. R_SUBFLAGOVERFLOW:
  677. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  678. R_SUBFLAGINTERRUPT:
  679. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  680. R_SUBFLAGDIRECTION:
  681. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  682. else
  683. internalerror(2017050501);
  684. end;
  685. exit;
  686. end;
  687. Result :=
  688. (((p.opcode = A_MOV) or
  689. (p.opcode = A_MOVZX) or
  690. (p.opcode = A_MOVSX) or
  691. (p.opcode = A_LEA) or
  692. (p.opcode = A_VMOVSS) or
  693. (p.opcode = A_VMOVSD) or
  694. (p.opcode = A_VMOVAPD) or
  695. (p.opcode = A_VMOVAPS) or
  696. (p.opcode = A_VMOVQ) or
  697. (p.opcode = A_MOVSS) or
  698. (p.opcode = A_MOVSD) or
  699. (p.opcode = A_MOVQ) or
  700. (p.opcode = A_MOVAPD) or
  701. (p.opcode = A_MOVAPS) or
  702. {$ifndef x86_64}
  703. (p.opcode = A_LDS) or
  704. (p.opcode = A_LES) or
  705. {$endif not x86_64}
  706. (p.opcode = A_LFS) or
  707. (p.opcode = A_LGS) or
  708. (p.opcode = A_LSS)) and
  709. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  710. (p.oper[1]^.typ = top_reg) and
  711. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  712. ((p.oper[0]^.typ = top_const) or
  713. ((p.oper[0]^.typ = top_reg) and
  714. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  715. ((p.oper[0]^.typ = top_ref) and
  716. not RegInRef(reg,p.oper[0]^.ref^)))) or
  717. ((p.opcode = A_POP) and
  718. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  719. ((p.opcode = A_IMUL) and
  720. (p.ops=3) and
  721. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  722. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  723. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  724. ((((p.opcode = A_IMUL) or
  725. (p.opcode = A_MUL)) and
  726. (p.ops=1)) and
  727. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  728. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  729. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  730. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  731. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  732. {$ifdef x86_64}
  733. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  734. {$endif x86_64}
  735. )) or
  736. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  737. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  738. {$ifdef x86_64}
  739. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  740. {$endif x86_64}
  741. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  742. {$ifndef x86_64}
  743. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  744. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  745. {$endif not x86_64}
  746. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  747. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  748. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  749. {$ifndef x86_64}
  750. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  751. {$endif not x86_64}
  752. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  753. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  754. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  755. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  756. {$ifdef x86_64}
  757. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  758. {$endif x86_64}
  759. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  760. (((p.opcode = A_FSTSW) or
  761. (p.opcode = A_FNSTSW)) and
  762. (p.oper[0]^.typ=top_reg) and
  763. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  764. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  765. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  766. (p.oper[0]^.reg=p.oper[1]^.reg) and
  767. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  768. end;
  769. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  770. var
  771. hp2,hp3 : tai;
  772. begin
  773. { some x86-64 issue a NOP before the real exit code }
  774. if MatchInstruction(p,A_NOP,[]) then
  775. GetNextInstruction(p,p);
  776. result:=assigned(p) and (p.typ=ait_instruction) and
  777. ((taicpu(p).opcode = A_RET) or
  778. ((taicpu(p).opcode=A_LEAVE) and
  779. GetNextInstruction(p,hp2) and
  780. MatchInstruction(hp2,A_RET,[S_NO])
  781. ) or
  782. ((((taicpu(p).opcode=A_MOV) and
  783. MatchOpType(taicpu(p),top_reg,top_reg) and
  784. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  785. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  786. ((taicpu(p).opcode=A_LEA) and
  787. MatchOpType(taicpu(p),top_ref,top_reg) and
  788. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  789. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  790. )
  791. ) and
  792. GetNextInstruction(p,hp2) and
  793. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  794. MatchOpType(taicpu(hp2),top_reg) and
  795. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  796. GetNextInstruction(hp2,hp3) and
  797. MatchInstruction(hp3,A_RET,[S_NO])
  798. )
  799. );
  800. end;
  801. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  802. begin
  803. isFoldableArithOp := False;
  804. case hp1.opcode of
  805. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  806. isFoldableArithOp :=
  807. ((taicpu(hp1).oper[0]^.typ = top_const) or
  808. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  809. (taicpu(hp1).oper[0]^.reg <> reg))) and
  810. (taicpu(hp1).oper[1]^.typ = top_reg) and
  811. (taicpu(hp1).oper[1]^.reg = reg);
  812. A_INC,A_DEC,A_NEG,A_NOT:
  813. isFoldableArithOp :=
  814. (taicpu(hp1).oper[0]^.typ = top_reg) and
  815. (taicpu(hp1).oper[0]^.reg = reg);
  816. end;
  817. end;
  818. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  819. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  820. var
  821. hp2: tai;
  822. begin
  823. hp2 := p;
  824. repeat
  825. hp2 := tai(hp2.previous);
  826. if assigned(hp2) and
  827. (hp2.typ = ait_regalloc) and
  828. (tai_regalloc(hp2).ratype=ra_dealloc) and
  829. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  830. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  831. begin
  832. asml.remove(hp2);
  833. hp2.free;
  834. break;
  835. end;
  836. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  837. end;
  838. begin
  839. case current_procinfo.procdef.returndef.typ of
  840. arraydef,recorddef,pointerdef,
  841. stringdef,enumdef,procdef,objectdef,errordef,
  842. filedef,setdef,procvardef,
  843. classrefdef,forwarddef:
  844. DoRemoveLastDeallocForFuncRes(RS_EAX);
  845. orddef:
  846. if current_procinfo.procdef.returndef.size <> 0 then
  847. begin
  848. DoRemoveLastDeallocForFuncRes(RS_EAX);
  849. { for int64/qword }
  850. if current_procinfo.procdef.returndef.size = 8 then
  851. DoRemoveLastDeallocForFuncRes(RS_EDX);
  852. end;
  853. end;
  854. end;
  855. function TX86AsmOptimizer.OptPass1MOVAP(var p : tai) : boolean;
  856. var
  857. TmpUsedRegs : TAllUsedRegs;
  858. hp1,hp2 : tai;
  859. alloc ,dealloc: tai_regalloc;
  860. begin
  861. result:=false;
  862. if MatchOpType(taicpu(p),top_reg,top_reg) and
  863. GetNextInstruction(p, hp1) and
  864. (hp1.typ = ait_instruction) and
  865. GetNextInstruction(hp1, hp2) and
  866. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  867. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  868. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  869. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  870. (((taicpu(p).opcode=A_MOVAPS) and
  871. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  872. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  873. ((taicpu(p).opcode=A_MOVAPD) and
  874. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  875. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  876. ) then
  877. { change
  878. movapX reg,reg2
  879. addsX/subsX/... reg3, reg2
  880. movapX reg2,reg
  881. to
  882. addsX/subsX/... reg3,reg
  883. }
  884. begin
  885. CopyUsedRegs(TmpUsedRegs);
  886. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  887. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  888. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  889. begin
  890. DebugMsg('Peephole Optimization MovapXOpMovapX2Op ('+
  891. std_op2str[taicpu(p).opcode]+' '+
  892. std_op2str[taicpu(hp1).opcode]+' '+
  893. std_op2str[taicpu(hp2).opcode]+') done',p);
  894. { we cannot eliminate the first move if
  895. the operations uses the same register for source and dest }
  896. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  897. begin
  898. asml.remove(p);
  899. p.Free;
  900. end;
  901. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  902. asml.remove(hp2);
  903. hp2.Free;
  904. p:=hp1;
  905. result:=true;
  906. end;
  907. ReleaseUsedRegs(TmpUsedRegs);
  908. end
  909. end;
  910. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  911. var
  912. TmpUsedRegs : TAllUsedRegs;
  913. hp1,hp2 : tai;
  914. begin
  915. result:=false;
  916. if MatchOpType(taicpu(p),top_reg,top_reg) then
  917. begin
  918. { vmova* reg1,reg1
  919. =>
  920. <nop> }
  921. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  922. begin
  923. GetNextInstruction(p,hp1);
  924. asml.Remove(p);
  925. p.Free;
  926. p:=hp1;
  927. result:=true;
  928. end
  929. else if GetNextInstruction(p,hp1) then
  930. begin
  931. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  932. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  933. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  934. begin
  935. { vmova* reg1,reg2
  936. vmova* reg2,reg3
  937. dealloc reg2
  938. =>
  939. vmova* reg1,reg3 }
  940. CopyUsedRegs(TmpUsedRegs);
  941. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  942. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  943. begin
  944. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  945. asml.Remove(hp1);
  946. hp1.Free;
  947. result:=true;
  948. end
  949. { special case:
  950. vmova* reg1,reg2
  951. vmova* reg2,reg1
  952. =>
  953. vmova* reg1,reg2 }
  954. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  955. begin
  956. asml.Remove(hp1);
  957. hp1.Free;
  958. result:=true;
  959. end
  960. end
  961. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  962. { we mix single and double opperations here because we assume that the compiler
  963. generates vmovapd only after double operations and vmovaps only after single operations }
  964. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  965. GetNextInstruction(hp1,hp2) and
  966. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  967. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  968. begin
  969. CopyUsedRegs(TmpUsedRegs);
  970. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  971. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  972. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  973. then
  974. begin
  975. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  976. asml.Remove(p);
  977. p.Free;
  978. asml.Remove(hp2);
  979. hp2.Free;
  980. p:=hp1;
  981. end;
  982. end;
  983. end;
  984. end;
  985. end;
  986. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  987. var
  988. TmpUsedRegs : TAllUsedRegs;
  989. hp1 : tai;
  990. begin
  991. result:=false;
  992. { replace
  993. V<Op>X %mreg1,%mreg2,%mreg3
  994. VMovX %mreg3,%mreg4
  995. dealloc %mreg3
  996. by
  997. V<Op>X %mreg1,%mreg2,%mreg4
  998. ?
  999. }
  1000. if GetNextInstruction(p,hp1) and
  1001. { we mix single and double operations here because we assume that the compiler
  1002. generates vmovapd only after double operations and vmovaps only after single operations }
  1003. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1004. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1005. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1006. begin
  1007. CopyUsedRegs(TmpUsedRegs);
  1008. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1009. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  1010. ) then
  1011. begin
  1012. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1013. DebugMsg('PeepHole Optimization VOpVmov2VOp done',p);
  1014. asml.Remove(hp1);
  1015. hp1.Free;
  1016. result:=true;
  1017. end;
  1018. end;
  1019. end;
  1020. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  1021. var
  1022. hp1, hp2: tai;
  1023. TmpUsedRegs : TAllUsedRegs;
  1024. GetNextInstruction_p : Boolean;
  1025. begin
  1026. Result:=false;
  1027. { remove mov reg1,reg1? }
  1028. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1029. begin
  1030. GetNextInstruction(p, hp1);
  1031. DebugMsg('PeepHole Optimization Mov2Nop done',p);
  1032. asml.remove(p);
  1033. p.free;
  1034. p:=hp1;
  1035. Result:=true;
  1036. exit;
  1037. end;
  1038. GetNextInstruction_p:=GetNextInstruction(p, hp1);
  1039. if GetNextInstruction_p and
  1040. MatchInstruction(hp1,A_AND,[]) and
  1041. (taicpu(p).oper[1]^.typ = top_reg) and
  1042. MatchOpType(taicpu(hp1),top_const,top_reg) and
  1043. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  1044. case taicpu(p).opsize Of
  1045. S_L:
  1046. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1047. begin
  1048. { Optimize out:
  1049. mov x, %reg
  1050. and ffffffffh, %reg
  1051. }
  1052. DebugMsg('PeepHole Optimization MovAnd2Mov 1 done',p);
  1053. asml.remove(hp1);
  1054. hp1.free;
  1055. Result:=true;
  1056. exit;
  1057. end;
  1058. S_Q: { TODO: Confirm if this is even possible }
  1059. if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
  1060. begin
  1061. { Optimize out:
  1062. mov x, %reg
  1063. and ffffffffffffffffh, %reg
  1064. }
  1065. DebugMsg('PeepHole Optimization MovAnd2Mov 2 done',p);
  1066. asml.remove(hp1);
  1067. hp1.free;
  1068. Result:=true;
  1069. exit;
  1070. end;
  1071. end
  1072. else if GetNextInstruction_p and
  1073. MatchInstruction(hp1,A_MOV,[]) and
  1074. (taicpu(p).oper[1]^.typ = top_reg) and
  1075. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  1076. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1077. begin
  1078. CopyUsedRegs(TmpUsedRegs);
  1079. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1080. { we have
  1081. mov x, %treg
  1082. mov %treg, y
  1083. }
  1084. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  1085. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1086. { we've got
  1087. mov x, %treg
  1088. mov %treg, y
  1089. with %treg is not used after }
  1090. case taicpu(p).oper[0]^.typ Of
  1091. top_reg:
  1092. begin
  1093. { change
  1094. mov %reg, %treg
  1095. mov %treg, y
  1096. to
  1097. mov %reg, y
  1098. }
  1099. if taicpu(hp1).oper[1]^.typ=top_reg then
  1100. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1101. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1102. DebugMsg('PeepHole Optimization MovMov2Mov 2 done',p);
  1103. asml.remove(hp1);
  1104. hp1.free;
  1105. ReleaseUsedRegs(TmpUsedRegs);
  1106. Result:=true;
  1107. Exit;
  1108. end;
  1109. top_ref:
  1110. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1111. begin
  1112. { change
  1113. mov mem, %treg
  1114. mov %treg, %reg
  1115. to
  1116. mov mem, %reg"
  1117. }
  1118. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1119. DebugMsg('PeepHole Optimization MovMov2Mov 3 done',p);
  1120. asml.remove(hp1);
  1121. hp1.free;
  1122. ReleaseUsedRegs(TmpUsedRegs);
  1123. Result:=true;
  1124. Exit;
  1125. end;
  1126. end;
  1127. ReleaseUsedRegs(TmpUsedRegs);
  1128. end
  1129. else
  1130. { Change
  1131. mov %reg1, %reg2
  1132. xxx %reg2, ???
  1133. to
  1134. mov %reg1, %reg2
  1135. xxx %reg1, ???
  1136. to avoid a write/read penalty
  1137. }
  1138. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1139. GetNextInstruction(p,hp1) and
  1140. (tai(hp1).typ = ait_instruction) and
  1141. (taicpu(hp1).ops >= 1) and
  1142. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1143. { we have
  1144. mov %reg1, %reg2
  1145. XXX %reg2, ???
  1146. }
  1147. begin
  1148. if ((taicpu(hp1).opcode = A_OR) or
  1149. (taicpu(hp1).opcode = A_AND) or
  1150. (taicpu(hp1).opcode = A_TEST)) and
  1151. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1152. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1153. { we have
  1154. mov %reg1, %reg2
  1155. test/or/and %reg2, %reg2
  1156. }
  1157. begin
  1158. CopyUsedRegs(TmpUsedRegs);
  1159. { reg1 will be used after the first instruction,
  1160. so update the allocation info }
  1161. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1162. if GetNextInstruction(hp1, hp2) and
  1163. (hp2.typ = ait_instruction) and
  1164. taicpu(hp2).is_jmp and
  1165. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1166. { change
  1167. mov %reg1, %reg2
  1168. test/or/and %reg2, %reg2
  1169. jxx
  1170. to
  1171. test %reg1, %reg1
  1172. jxx
  1173. }
  1174. begin
  1175. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1176. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1177. DebugMsg('PeepHole Optimization MovTestJxx2TestMov done',p);
  1178. asml.remove(p);
  1179. p.free;
  1180. p := hp1;
  1181. ReleaseUsedRegs(TmpUsedRegs);
  1182. Exit;
  1183. end
  1184. else
  1185. { change
  1186. mov %reg1, %reg2
  1187. test/or/and %reg2, %reg2
  1188. to
  1189. mov %reg1, %reg2
  1190. test/or/and %reg1, %reg1
  1191. }
  1192. begin
  1193. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1194. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1195. DebugMsg('PeepHole Optimization MovTestJxx2ovTestJxx done',p);
  1196. end;
  1197. ReleaseUsedRegs(TmpUsedRegs);
  1198. end
  1199. end
  1200. else
  1201. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1202. x >= RetOffset) as it doesn't do anything (it writes either to a
  1203. parameter or to the temporary storage room for the function
  1204. result)
  1205. }
  1206. if GetNextInstruction_p and
  1207. (tai(hp1).typ = ait_instruction) then
  1208. begin
  1209. if IsExitCode(hp1) and
  1210. MatchOpType(taicpu(p),top_reg,top_ref) and
  1211. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1212. not(assigned(current_procinfo.procdef.funcretsym) and
  1213. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1214. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  1215. begin
  1216. asml.remove(p);
  1217. p.free;
  1218. p:=hp1;
  1219. DebugMsg('Peephole removed deadstore before leave/ret',p);
  1220. RemoveLastDeallocForFuncRes(p);
  1221. exit;
  1222. end
  1223. { change
  1224. mov reg1, mem1
  1225. test/cmp x, mem1
  1226. to
  1227. mov reg1, mem1
  1228. test/cmp x, reg1
  1229. }
  1230. else if MatchOpType(taicpu(p),top_reg,top_ref) and
  1231. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  1232. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1233. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1234. begin
  1235. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1236. DebugMsg('Peephole MovTestCmp2MovTestCmp 1',hp1);
  1237. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1238. end;
  1239. end;
  1240. { Next instruction is also a MOV ? }
  1241. if GetNextInstruction_p and
  1242. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1243. begin
  1244. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1245. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1246. { mov reg1, mem1 or mov mem1, reg1
  1247. mov mem2, reg2 mov reg2, mem2}
  1248. begin
  1249. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1250. { mov reg1, mem1 or mov mem1, reg1
  1251. mov mem2, reg1 mov reg2, mem1}
  1252. begin
  1253. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1254. { Removes the second statement from
  1255. mov reg1, mem1/reg2
  1256. mov mem1/reg2, reg1 }
  1257. begin
  1258. if taicpu(p).oper[0]^.typ=top_reg then
  1259. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1260. DebugMsg('PeepHole Optimization MovMov2Mov 1',p);
  1261. asml.remove(hp1);
  1262. hp1.free;
  1263. Result:=true;
  1264. exit;
  1265. end
  1266. else
  1267. begin
  1268. CopyUsedRegs(TmpUsedRegs);
  1269. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1270. if (taicpu(p).oper[1]^.typ = top_ref) and
  1271. { mov reg1, mem1
  1272. mov mem2, reg1 }
  1273. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1274. GetNextInstruction(hp1, hp2) and
  1275. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1276. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1277. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1278. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1279. { change to
  1280. mov reg1, mem1 mov reg1, mem1
  1281. mov mem2, reg1 cmp reg1, mem2
  1282. cmp mem1, reg1
  1283. }
  1284. begin
  1285. asml.remove(hp2);
  1286. hp2.free;
  1287. taicpu(hp1).opcode := A_CMP;
  1288. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1289. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1290. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1291. DebugMsg('Peephole Optimization MovMovCmp2MovCmp done',hp1);
  1292. end;
  1293. ReleaseUsedRegs(TmpUsedRegs);
  1294. end;
  1295. end
  1296. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1297. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1298. begin
  1299. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1300. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1301. DebugMsg('PeepHole Optimization MovMov2MovMov1 done',p);
  1302. end
  1303. else
  1304. begin
  1305. CopyUsedRegs(TmpUsedRegs);
  1306. if GetNextInstruction(hp1, hp2) and
  1307. MatchOpType(taicpu(p),top_ref,top_reg) and
  1308. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1309. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1310. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1311. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1312. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1313. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1314. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1315. { mov mem1, %reg1
  1316. mov %reg1, mem2
  1317. mov mem2, reg2
  1318. to:
  1319. mov mem1, reg2
  1320. mov reg2, mem2}
  1321. begin
  1322. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1323. DebugMsg('PeepHole Optimization MovMovMov2MovMov 1 done',p);
  1324. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1325. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1326. asml.remove(hp2);
  1327. hp2.free;
  1328. end
  1329. {$ifdef i386}
  1330. { this is enabled for i386 only, as the rules to create the reg sets below
  1331. are too complicated for x86-64, so this makes this code too error prone
  1332. on x86-64
  1333. }
  1334. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1335. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1336. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1337. { mov mem1, reg1 mov mem1, reg1
  1338. mov reg1, mem2 mov reg1, mem2
  1339. mov mem2, reg2 mov mem2, reg1
  1340. to: to:
  1341. mov mem1, reg1 mov mem1, reg1
  1342. mov mem1, reg2 mov reg1, mem2
  1343. mov reg1, mem2
  1344. or (if mem1 depends on reg1
  1345. and/or if mem2 depends on reg2)
  1346. to:
  1347. mov mem1, reg1
  1348. mov reg1, mem2
  1349. mov reg1, reg2
  1350. }
  1351. begin
  1352. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1353. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1354. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1355. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1356. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1357. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1358. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1359. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1360. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1361. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1362. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1363. end
  1364. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1365. begin
  1366. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1367. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1368. end
  1369. else
  1370. begin
  1371. asml.remove(hp2);
  1372. hp2.free;
  1373. end
  1374. {$endif i386}
  1375. ;
  1376. ReleaseUsedRegs(TmpUsedRegs);
  1377. end;
  1378. end
  1379. (* { movl [mem1],reg1
  1380. movl [mem1],reg2
  1381. to
  1382. movl [mem1],reg1
  1383. movl reg1,reg2
  1384. }
  1385. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1386. (taicpu(p).oper[1]^.typ = top_reg) and
  1387. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1388. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1389. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1390. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1391. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1392. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1393. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1394. else*)
  1395. { movl const1,[mem1]
  1396. movl [mem1],reg1
  1397. to
  1398. movl const1,reg1
  1399. movl reg1,[mem1]
  1400. }
  1401. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1402. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1403. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1404. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1405. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1406. begin
  1407. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1408. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1409. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1410. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1411. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1412. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  1413. end
  1414. end
  1415. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1416. GetNextInstruction_p and
  1417. (hp1.typ = ait_instruction) and
  1418. GetNextInstruction(hp1, hp2) and
  1419. MatchInstruction(hp2,A_MOV,[]) and
  1420. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1421. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1422. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1423. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1424. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  1425. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1426. ) then
  1427. { change movsX/movzX reg/ref, reg2
  1428. add/sub/or/... reg3/$const, reg2
  1429. mov reg2 reg/ref
  1430. to add/sub/or/... reg3/$const, reg/ref }
  1431. begin
  1432. CopyUsedRegs(TmpUsedRegs);
  1433. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1434. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1435. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1436. begin
  1437. { by example:
  1438. movswl %si,%eax movswl %si,%eax p
  1439. decl %eax addl %edx,%eax hp1
  1440. movw %ax,%si movw %ax,%si hp2
  1441. ->
  1442. movswl %si,%eax movswl %si,%eax p
  1443. decw %eax addw %edx,%eax hp1
  1444. movw %ax,%si movw %ax,%si hp2
  1445. }
  1446. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  1447. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1448. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1449. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1450. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1451. {
  1452. ->
  1453. movswl %si,%eax movswl %si,%eax p
  1454. decw %si addw %dx,%si hp1
  1455. movw %ax,%si movw %ax,%si hp2
  1456. }
  1457. case taicpu(hp1).ops of
  1458. 1:
  1459. begin
  1460. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1461. if taicpu(hp1).oper[0]^.typ=top_reg then
  1462. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1463. end;
  1464. 2:
  1465. begin
  1466. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1467. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1468. (taicpu(hp1).opcode<>A_SHL) and
  1469. (taicpu(hp1).opcode<>A_SHR) and
  1470. (taicpu(hp1).opcode<>A_SAR) then
  1471. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1472. end;
  1473. else
  1474. internalerror(2008042701);
  1475. end;
  1476. {
  1477. ->
  1478. decw %si addw %dx,%si p
  1479. }
  1480. asml.remove(p);
  1481. asml.remove(hp2);
  1482. p.Free;
  1483. hp2.Free;
  1484. p := hp1;
  1485. end;
  1486. ReleaseUsedRegs(TmpUsedRegs);
  1487. end
  1488. else if GetNextInstruction_p and
  1489. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1490. GetNextInstruction(hp1, hp2) and
  1491. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1492. MatchOperand(Taicpu(p).oper[0]^,0) and
  1493. (Taicpu(p).oper[1]^.typ = top_reg) and
  1494. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1495. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1496. { mov reg1,0
  1497. bts reg1,operand1 --> mov reg1,operand2
  1498. or reg1,operand2 bts reg1,operand1}
  1499. begin
  1500. Taicpu(hp2).opcode:=A_MOV;
  1501. asml.remove(hp1);
  1502. insertllitem(hp2,hp2.next,hp1);
  1503. asml.remove(p);
  1504. p.free;
  1505. p:=hp1;
  1506. end
  1507. else if GetNextInstruction_p and
  1508. MatchInstruction(hp1,A_LEA,[S_L]) and
  1509. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1510. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1511. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1512. ) or
  1513. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1514. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1515. )
  1516. ) then
  1517. { mov reg1,ref
  1518. lea reg2,[reg1,reg2]
  1519. to
  1520. add reg2,ref}
  1521. begin
  1522. CopyUsedRegs(TmpUsedRegs);
  1523. { reg1 may not be used afterwards }
  1524. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1525. begin
  1526. Taicpu(hp1).opcode:=A_ADD;
  1527. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1528. DebugMsg('Peephole MovLea2Add done',hp1);
  1529. asml.remove(p);
  1530. p.free;
  1531. p:=hp1;
  1532. end;
  1533. ReleaseUsedRegs(TmpUsedRegs);
  1534. end;
  1535. end;
  1536. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  1537. var
  1538. hp1 : tai;
  1539. begin
  1540. Result:=false;
  1541. if taicpu(p).ops <> 2 then
  1542. exit;
  1543. if GetNextInstruction(p,hp1) and
  1544. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  1545. (taicpu(hp1).ops = 2) then
  1546. begin
  1547. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1548. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1549. { movXX reg1, mem1 or movXX mem1, reg1
  1550. movXX mem2, reg2 movXX reg2, mem2}
  1551. begin
  1552. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1553. { movXX reg1, mem1 or movXX mem1, reg1
  1554. movXX mem2, reg1 movXX reg2, mem1}
  1555. begin
  1556. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1557. begin
  1558. { Removes the second statement from
  1559. movXX reg1, mem1/reg2
  1560. movXX mem1/reg2, reg1
  1561. }
  1562. if taicpu(p).oper[0]^.typ=top_reg then
  1563. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1564. { Removes the second statement from
  1565. movXX mem1/reg1, reg2
  1566. movXX reg2, mem1/reg1
  1567. }
  1568. if (taicpu(p).oper[1]^.typ=top_reg) and
  1569. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  1570. begin
  1571. asml.remove(p);
  1572. p.free;
  1573. GetNextInstruction(hp1,p);
  1574. DebugMsg('PeepHole Optimization MovXXMovXX2Nop 1 done',p);
  1575. end
  1576. else
  1577. DebugMsg('PeepHole Optimization MovXXMovXX2MoVXX 1 done',p);
  1578. asml.remove(hp1);
  1579. hp1.free;
  1580. Result:=true;
  1581. exit;
  1582. end
  1583. end;
  1584. end;
  1585. end;
  1586. end;
  1587. function TX86AsmOptimizer.OptPass1OP(const p : tai) : boolean;
  1588. var
  1589. TmpUsedRegs : TAllUsedRegs;
  1590. hp1 : tai;
  1591. begin
  1592. result:=false;
  1593. { replace
  1594. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  1595. MovX %mreg2,%mreg1
  1596. dealloc %mreg2
  1597. by
  1598. <Op>X %mreg2,%mreg1
  1599. ?
  1600. }
  1601. if GetNextInstruction(p,hp1) and
  1602. { we mix single and double opperations here because we assume that the compiler
  1603. generates vmovapd only after double operations and vmovaps only after single operations }
  1604. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  1605. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1606. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  1607. (taicpu(p).oper[0]^.typ=top_reg) then
  1608. begin
  1609. CopyUsedRegs(TmpUsedRegs);
  1610. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1611. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1612. begin
  1613. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  1614. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1615. DebugMsg('PeepHole Optimization OpMov2Op done',p);
  1616. asml.Remove(hp1);
  1617. hp1.Free;
  1618. result:=true;
  1619. end;
  1620. ReleaseUsedRegs(TmpUsedRegs);
  1621. end;
  1622. end;
  1623. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  1624. var
  1625. hp1 : tai;
  1626. l : ASizeInt;
  1627. TmpUsedRegs : TAllUsedRegs;
  1628. begin
  1629. Result:=false;
  1630. { removes seg register prefixes from LEA operations, as they
  1631. don't do anything}
  1632. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  1633. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  1634. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1635. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1636. { do not mess with leas acessing the stack pointer }
  1637. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  1638. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1639. begin
  1640. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1641. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1642. begin
  1643. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  1644. taicpu(p).oper[1]^.reg);
  1645. InsertLLItem(p.previous,p.next, hp1);
  1646. DebugMsg('PeepHole Optimization Lea2Mov done',hp1);
  1647. p.free;
  1648. p:=hp1;
  1649. Result:=true;
  1650. exit;
  1651. end
  1652. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1653. begin
  1654. hp1:=taicpu(p.Next);
  1655. DebugMsg('PeepHole Optimization Lea2Nop done',p);
  1656. asml.remove(p);
  1657. p.free;
  1658. p:=hp1;
  1659. Result:=true;
  1660. exit;
  1661. end
  1662. { continue to use lea to adjust the stack pointer,
  1663. it is the recommended way, but only if not optimizing for size }
  1664. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1665. (cs_opt_size in current_settings.optimizerswitches) then
  1666. with taicpu(p).oper[0]^.ref^ do
  1667. if (base = taicpu(p).oper[1]^.reg) then
  1668. begin
  1669. l:=offset;
  1670. if (l=1) and UseIncDec then
  1671. begin
  1672. taicpu(p).opcode:=A_INC;
  1673. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1674. taicpu(p).ops:=1;
  1675. DebugMsg('PeepHole Optimization Lea2Inc done',p);
  1676. end
  1677. else if (l=-1) and UseIncDec then
  1678. begin
  1679. taicpu(p).opcode:=A_DEC;
  1680. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1681. taicpu(p).ops:=1;
  1682. DebugMsg('PeepHole Optimization Lea2Dec done',p);
  1683. end
  1684. else
  1685. begin
  1686. if (l<0) and (l<>-2147483648) then
  1687. begin
  1688. taicpu(p).opcode:=A_SUB;
  1689. taicpu(p).loadConst(0,-l);
  1690. DebugMsg('PeepHole Optimization Lea2Sub done',p);
  1691. end
  1692. else
  1693. begin
  1694. taicpu(p).opcode:=A_ADD;
  1695. taicpu(p).loadConst(0,l);
  1696. DebugMsg('PeepHole Optimization Lea2Add done',p);
  1697. end;
  1698. end;
  1699. Result:=true;
  1700. exit;
  1701. end;
  1702. end;
  1703. if GetNextInstruction(p,hp1) and
  1704. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  1705. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1706. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  1707. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  1708. begin
  1709. CopyUsedRegs(TmpUsedRegs);
  1710. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1711. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1712. begin
  1713. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1714. DebugMsg('PeepHole Optimization LeaMov2Lea done',p);
  1715. asml.Remove(hp1);
  1716. hp1.Free;
  1717. result:=true;
  1718. end;
  1719. ReleaseUsedRegs(TmpUsedRegs);
  1720. end;
  1721. (*
  1722. This is unsafe, lea doesn't modify the flags but "add"
  1723. does. This breaks webtbs/tw15694.pp. The above
  1724. transformations are also unsafe, but they don't seem to
  1725. be triggered by code that FPC generators (or that at
  1726. least does not occur in the tests...). This needs to be
  1727. fixed by checking for the liveness of the flags register.
  1728. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1729. begin
  1730. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1731. taicpu(p).oper[0]^.ref^.base);
  1732. InsertLLItem(asml,p.previous,p.next, hp1);
  1733. DebugMsg('Peephole Lea2AddBase done',hp1);
  1734. p.free;
  1735. p:=hp1;
  1736. continue;
  1737. end
  1738. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1739. begin
  1740. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1741. taicpu(p).oper[0]^.ref^.index);
  1742. InsertLLItem(asml,p.previous,p.next,hp1);
  1743. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1744. p.free;
  1745. p:=hp1;
  1746. continue;
  1747. end
  1748. *)
  1749. end;
  1750. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1751. var
  1752. TmpUsedRegs : TAllUsedRegs;
  1753. hp1,hp2: tai;
  1754. begin
  1755. Result:=false;
  1756. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1757. GetNextInstruction(p, hp1) and
  1758. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1759. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1760. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1761. or
  1762. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1763. ) and
  1764. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1765. { mov reg1, reg2
  1766. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1767. begin
  1768. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1769. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1770. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1771. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1772. DebugMsg('PeepHole Optimization MovMovXX2MoVXX 1 done',p);
  1773. asml.remove(p);
  1774. p.free;
  1775. p := hp1;
  1776. Result:=true;
  1777. exit;
  1778. end
  1779. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1780. GetNextInstruction(p,hp1) and
  1781. (hp1.typ = ait_instruction) and
  1782. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1783. doing it separately in both branches allows to do the cheap checks
  1784. with low probability earlier }
  1785. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1786. GetNextInstruction(hp1,hp2) and
  1787. MatchInstruction(hp2,A_MOV,[])
  1788. ) or
  1789. ((taicpu(hp1).opcode=A_LEA) and
  1790. GetNextInstruction(hp1,hp2) and
  1791. MatchInstruction(hp2,A_MOV,[]) and
  1792. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1793. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1794. ) or
  1795. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1796. taicpu(p).oper[1]^.reg) and
  1797. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1798. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1799. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1800. ) and
  1801. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1802. )
  1803. ) and
  1804. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1805. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1806. begin
  1807. CopyUsedRegs(TmpUsedRegs);
  1808. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1809. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1810. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1811. { change mov (ref), reg
  1812. add/sub/or/... reg2/$const, reg
  1813. mov reg, (ref)
  1814. # release reg
  1815. to add/sub/or/... reg2/$const, (ref) }
  1816. begin
  1817. case taicpu(hp1).opcode of
  1818. A_INC,A_DEC,A_NOT,A_NEG :
  1819. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1820. A_LEA :
  1821. begin
  1822. taicpu(hp1).opcode:=A_ADD;
  1823. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1824. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1825. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1826. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1827. else
  1828. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1829. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1830. DebugMsg('Peephole FoldLea done',hp1);
  1831. end
  1832. else
  1833. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1834. end;
  1835. asml.remove(p);
  1836. asml.remove(hp2);
  1837. p.free;
  1838. hp2.free;
  1839. p := hp1
  1840. end;
  1841. ReleaseUsedRegs(TmpUsedRegs);
  1842. end;
  1843. end;
  1844. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1845. var
  1846. TmpUsedRegs : TAllUsedRegs;
  1847. hp1 : tai;
  1848. begin
  1849. Result:=false;
  1850. if (taicpu(p).ops >= 2) and
  1851. ((taicpu(p).oper[0]^.typ = top_const) or
  1852. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1853. (taicpu(p).oper[1]^.typ = top_reg) and
  1854. ((taicpu(p).ops = 2) or
  1855. ((taicpu(p).oper[2]^.typ = top_reg) and
  1856. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1857. GetLastInstruction(p,hp1) and
  1858. MatchInstruction(hp1,A_MOV,[]) and
  1859. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1860. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1861. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1862. begin
  1863. CopyUsedRegs(TmpUsedRegs);
  1864. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1865. { change
  1866. mov reg1,reg2
  1867. imul y,reg2 to imul y,reg1,reg2 }
  1868. begin
  1869. taicpu(p).ops := 3;
  1870. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1871. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1872. DebugMsg('Peephole MovImul2Imul done',p);
  1873. asml.remove(hp1);
  1874. hp1.free;
  1875. result:=true;
  1876. end;
  1877. ReleaseUsedRegs(TmpUsedRegs);
  1878. end;
  1879. end;
  1880. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  1881. var
  1882. hp1 : tai;
  1883. begin
  1884. {
  1885. change
  1886. jmp .L1
  1887. ...
  1888. .L1:
  1889. ret
  1890. into
  1891. ret
  1892. }
  1893. result:=false;
  1894. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  1895. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  1896. begin
  1897. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1898. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  1899. MatchInstruction(hp1,A_RET,[S_NO]) then
  1900. begin
  1901. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1902. taicpu(p).opcode:=A_RET;
  1903. taicpu(p).is_jmp:=false;
  1904. taicpu(p).ops:=taicpu(hp1).ops;
  1905. case taicpu(hp1).ops of
  1906. 0:
  1907. taicpu(p).clearop(0);
  1908. 1:
  1909. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1910. else
  1911. internalerror(2016041301);
  1912. end;
  1913. result:=true;
  1914. end;
  1915. end;
  1916. end;
  1917. function CanBeCMOV(p : tai) : boolean;
  1918. begin
  1919. CanBeCMOV:=assigned(p) and
  1920. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  1921. { we can't use cmov ref,reg because
  1922. ref could be nil and cmov still throws an exception
  1923. if ref=nil but the mov isn't done (FK)
  1924. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1925. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1926. }
  1927. MatchOpType(taicpu(p),top_reg,top_reg);
  1928. end;
  1929. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  1930. var
  1931. hp1,hp2,hp3: tai;
  1932. carryadd_opcode : TAsmOp;
  1933. l : Longint;
  1934. condition : TAsmCond;
  1935. begin
  1936. { jb @@1 cmc
  1937. inc/dec operand --> adc/sbb operand,0
  1938. @@1:
  1939. ... and ...
  1940. jnb @@1
  1941. inc/dec operand --> adc/sbb operand,0
  1942. @@1: }
  1943. result:=false;
  1944. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1945. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1946. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1947. begin
  1948. carryadd_opcode:=A_NONE;
  1949. if Taicpu(p).condition in [C_NAE,C_B] then
  1950. begin
  1951. if Taicpu(hp1).opcode=A_INC then
  1952. carryadd_opcode:=A_ADC;
  1953. if Taicpu(hp1).opcode=A_DEC then
  1954. carryadd_opcode:=A_SBB;
  1955. if carryadd_opcode<>A_NONE then
  1956. begin
  1957. Taicpu(p).clearop(0);
  1958. Taicpu(p).ops:=0;
  1959. Taicpu(p).is_jmp:=false;
  1960. Taicpu(p).opcode:=A_CMC;
  1961. Taicpu(p).condition:=C_NONE;
  1962. Taicpu(hp1).ops:=2;
  1963. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1964. Taicpu(hp1).loadconst(0,0);
  1965. Taicpu(hp1).opcode:=carryadd_opcode;
  1966. result:=true;
  1967. exit;
  1968. end;
  1969. end;
  1970. if Taicpu(p).condition in [C_AE,C_NB] then
  1971. begin
  1972. if Taicpu(hp1).opcode=A_INC then
  1973. carryadd_opcode:=A_ADC;
  1974. if Taicpu(hp1).opcode=A_DEC then
  1975. carryadd_opcode:=A_SBB;
  1976. if carryadd_opcode<>A_NONE then
  1977. begin
  1978. asml.remove(p);
  1979. p.free;
  1980. Taicpu(hp1).ops:=2;
  1981. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1982. Taicpu(hp1).loadconst(0,0);
  1983. Taicpu(hp1).opcode:=carryadd_opcode;
  1984. p:=hp1;
  1985. result:=true;
  1986. exit;
  1987. end;
  1988. end;
  1989. end;
  1990. {$ifndef i8086}
  1991. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1992. begin
  1993. { check for
  1994. jCC xxx
  1995. <several movs>
  1996. xxx:
  1997. }
  1998. l:=0;
  1999. GetNextInstruction(p, hp1);
  2000. while assigned(hp1) and
  2001. CanBeCMOV(hp1) and
  2002. { stop on labels }
  2003. not(hp1.typ=ait_label) do
  2004. begin
  2005. inc(l);
  2006. GetNextInstruction(hp1,hp1);
  2007. end;
  2008. if assigned(hp1) then
  2009. begin
  2010. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2011. begin
  2012. if (l<=4) and (l>0) then
  2013. begin
  2014. condition:=inverse_cond(taicpu(p).condition);
  2015. hp2:=p;
  2016. GetNextInstruction(p,hp1);
  2017. p:=hp1;
  2018. repeat
  2019. taicpu(hp1).opcode:=A_CMOVcc;
  2020. taicpu(hp1).condition:=condition;
  2021. GetNextInstruction(hp1,hp1);
  2022. until not(assigned(hp1)) or
  2023. not(CanBeCMOV(hp1));
  2024. { wait with removing else GetNextInstruction could
  2025. ignore the label if it was the only usage in the
  2026. jump moved away }
  2027. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2028. { if the label refs. reach zero, remove any alignment before the label }
  2029. if (hp1.typ=ait_align) and (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).getrefs=0) then
  2030. begin
  2031. asml.Remove(hp1);
  2032. hp1.Free;
  2033. end;
  2034. asml.remove(hp2);
  2035. hp2.free;
  2036. result:=true;
  2037. exit;
  2038. end;
  2039. end
  2040. else
  2041. begin
  2042. { check further for
  2043. jCC xxx
  2044. <several movs 1>
  2045. jmp yyy
  2046. xxx:
  2047. <several movs 2>
  2048. yyy:
  2049. }
  2050. { hp2 points to jmp yyy }
  2051. hp2:=hp1;
  2052. { skip hp1 to xxx }
  2053. GetNextInstruction(hp1, hp1);
  2054. if assigned(hp2) and
  2055. assigned(hp1) and
  2056. (l<=3) and
  2057. (hp2.typ=ait_instruction) and
  2058. (taicpu(hp2).is_jmp) and
  2059. (taicpu(hp2).condition=C_None) and
  2060. { real label and jump, no further references to the
  2061. label are allowed }
  2062. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  2063. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2064. begin
  2065. l:=0;
  2066. { skip hp1 to <several moves 2> }
  2067. GetNextInstruction(hp1, hp1);
  2068. while assigned(hp1) and
  2069. CanBeCMOV(hp1) do
  2070. begin
  2071. inc(l);
  2072. GetNextInstruction(hp1, hp1);
  2073. end;
  2074. { hp1 points to yyy: }
  2075. if assigned(hp1) and
  2076. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2077. begin
  2078. condition:=inverse_cond(taicpu(p).condition);
  2079. GetNextInstruction(p,hp1);
  2080. hp3:=p;
  2081. p:=hp1;
  2082. repeat
  2083. taicpu(hp1).opcode:=A_CMOVcc;
  2084. taicpu(hp1).condition:=condition;
  2085. GetNextInstruction(hp1,hp1);
  2086. until not(assigned(hp1)) or
  2087. not(CanBeCMOV(hp1));
  2088. { hp2 is still at jmp yyy }
  2089. GetNextInstruction(hp2,hp1);
  2090. { hp2 is now at xxx: }
  2091. condition:=inverse_cond(condition);
  2092. GetNextInstruction(hp1,hp1);
  2093. { hp1 is now at <several movs 2> }
  2094. repeat
  2095. taicpu(hp1).opcode:=A_CMOVcc;
  2096. taicpu(hp1).condition:=condition;
  2097. GetNextInstruction(hp1,hp1);
  2098. until not(assigned(hp1)) or
  2099. not(CanBeCMOV(hp1));
  2100. {
  2101. asml.remove(hp1.next)
  2102. hp1.next.free;
  2103. asml.remove(hp1);
  2104. hp1.free;
  2105. }
  2106. { remove jCC }
  2107. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2108. asml.remove(hp3);
  2109. hp3.free;
  2110. { remove jmp }
  2111. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2112. asml.remove(hp2);
  2113. hp2.free;
  2114. result:=true;
  2115. exit;
  2116. end;
  2117. end;
  2118. end;
  2119. end;
  2120. end;
  2121. {$endif i8086}
  2122. end;
  2123. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  2124. var
  2125. hp1,hp2: tai;
  2126. begin
  2127. result:=false;
  2128. if (taicpu(p).oper[1]^.typ = top_reg) and
  2129. GetNextInstruction(p,hp1) and
  2130. (hp1.typ = ait_instruction) and
  2131. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  2132. GetNextInstruction(hp1,hp2) and
  2133. MatchInstruction(hp2,A_MOV,[]) and
  2134. (taicpu(hp2).oper[0]^.typ = top_reg) and
  2135. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  2136. {$ifdef i386}
  2137. { not all registers have byte size sub registers on i386 }
  2138. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  2139. {$endif i386}
  2140. (((taicpu(hp1).ops=2) and
  2141. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  2142. ((taicpu(hp1).ops=1) and
  2143. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  2144. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  2145. begin
  2146. { change movsX/movzX reg/ref, reg2
  2147. add/sub/or/... reg3/$const, reg2
  2148. mov reg2 reg/ref
  2149. to add/sub/or/... reg3/$const, reg/ref }
  2150. { by example:
  2151. movswl %si,%eax movswl %si,%eax p
  2152. decl %eax addl %edx,%eax hp1
  2153. movw %ax,%si movw %ax,%si hp2
  2154. ->
  2155. movswl %si,%eax movswl %si,%eax p
  2156. decw %eax addw %edx,%eax hp1
  2157. movw %ax,%si movw %ax,%si hp2
  2158. }
  2159. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  2160. {
  2161. ->
  2162. movswl %si,%eax movswl %si,%eax p
  2163. decw %si addw %dx,%si hp1
  2164. movw %ax,%si movw %ax,%si hp2
  2165. }
  2166. case taicpu(hp1).ops of
  2167. 1:
  2168. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  2169. 2:
  2170. begin
  2171. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  2172. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  2173. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2174. end;
  2175. else
  2176. internalerror(2008042701);
  2177. end;
  2178. {
  2179. ->
  2180. decw %si addw %dx,%si p
  2181. }
  2182. DebugMsg('PeepHole Optimization,var3',p);
  2183. asml.remove(p);
  2184. asml.remove(hp2);
  2185. p.free;
  2186. hp2.free;
  2187. p:=hp1;
  2188. end
  2189. { removes superfluous And's after movzx's }
  2190. else if taicpu(p).opcode=A_MOVZX then
  2191. begin
  2192. if (taicpu(p).oper[1]^.typ = top_reg) and
  2193. GetNextInstruction(p, hp1) and
  2194. (tai(hp1).typ = ait_instruction) and
  2195. (taicpu(hp1).opcode = A_AND) and
  2196. (taicpu(hp1).oper[0]^.typ = top_const) and
  2197. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2198. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2199. begin
  2200. case taicpu(p).opsize Of
  2201. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  2202. if (taicpu(hp1).oper[0]^.val = $ff) then
  2203. begin
  2204. DebugMsg('PeepHole Optimization,var4',p);
  2205. asml.remove(hp1);
  2206. hp1.free;
  2207. end;
  2208. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  2209. if (taicpu(hp1).oper[0]^.val = $ffff) then
  2210. begin
  2211. DebugMsg('PeepHole Optimization,var5',p);
  2212. asml.remove(hp1);
  2213. hp1.free;
  2214. end;
  2215. {$ifdef x86_64}
  2216. S_LQ:
  2217. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  2218. begin
  2219. if (cs_asm_source in current_settings.globalswitches) then
  2220. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  2221. asml.remove(hp1);
  2222. hp1.Free;
  2223. end;
  2224. {$endif x86_64}
  2225. end;
  2226. end;
  2227. { changes some movzx constructs to faster synonims (all examples
  2228. are given with eax/ax, but are also valid for other registers)}
  2229. if (taicpu(p).oper[1]^.typ = top_reg) then
  2230. if (taicpu(p).oper[0]^.typ = top_reg) then
  2231. case taicpu(p).opsize of
  2232. S_BW:
  2233. begin
  2234. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2235. not(cs_opt_size in current_settings.optimizerswitches) then
  2236. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  2237. begin
  2238. taicpu(p).opcode := A_AND;
  2239. taicpu(p).changeopsize(S_W);
  2240. taicpu(p).loadConst(0,$ff);
  2241. DebugMsg('PeepHole Optimization,var7',p);
  2242. end
  2243. else if GetNextInstruction(p, hp1) and
  2244. (tai(hp1).typ = ait_instruction) and
  2245. (taicpu(hp1).opcode = A_AND) and
  2246. (taicpu(hp1).oper[0]^.typ = top_const) and
  2247. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2248. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2249. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  2250. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  2251. begin
  2252. DebugMsg('PeepHole Optimization,var8',p);
  2253. taicpu(p).opcode := A_MOV;
  2254. taicpu(p).changeopsize(S_W);
  2255. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  2256. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2257. end;
  2258. end;
  2259. S_BL:
  2260. begin
  2261. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2262. not(cs_opt_size in current_settings.optimizerswitches) then
  2263. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  2264. begin
  2265. taicpu(p).opcode := A_AND;
  2266. taicpu(p).changeopsize(S_L);
  2267. taicpu(p).loadConst(0,$ff)
  2268. end
  2269. else if GetNextInstruction(p, hp1) and
  2270. (tai(hp1).typ = ait_instruction) and
  2271. (taicpu(hp1).opcode = A_AND) and
  2272. (taicpu(hp1).oper[0]^.typ = top_const) and
  2273. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2274. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2275. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  2276. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  2277. begin
  2278. DebugMsg('PeepHole Optimization,var10',p);
  2279. taicpu(p).opcode := A_MOV;
  2280. taicpu(p).changeopsize(S_L);
  2281. { do not use R_SUBWHOLE
  2282. as movl %rdx,%eax
  2283. is invalid in assembler PM }
  2284. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2285. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2286. end
  2287. end;
  2288. {$ifndef i8086}
  2289. S_WL:
  2290. begin
  2291. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2292. not(cs_opt_size in current_settings.optimizerswitches) then
  2293. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  2294. begin
  2295. DebugMsg('PeepHole Optimization,var11',p);
  2296. taicpu(p).opcode := A_AND;
  2297. taicpu(p).changeopsize(S_L);
  2298. taicpu(p).loadConst(0,$ffff);
  2299. end
  2300. else if GetNextInstruction(p, hp1) and
  2301. (tai(hp1).typ = ait_instruction) and
  2302. (taicpu(hp1).opcode = A_AND) and
  2303. (taicpu(hp1).oper[0]^.typ = top_const) and
  2304. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2305. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2306. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  2307. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  2308. begin
  2309. DebugMsg('PeepHole Optimization,var12',p);
  2310. taicpu(p).opcode := A_MOV;
  2311. taicpu(p).changeopsize(S_L);
  2312. { do not use R_SUBWHOLE
  2313. as movl %rdx,%eax
  2314. is invalid in assembler PM }
  2315. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2316. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2317. end;
  2318. end;
  2319. {$endif i8086}
  2320. end
  2321. else if (taicpu(p).oper[0]^.typ = top_ref) then
  2322. begin
  2323. if GetNextInstruction(p, hp1) and
  2324. (tai(hp1).typ = ait_instruction) and
  2325. (taicpu(hp1).opcode = A_AND) and
  2326. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2327. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2328. begin
  2329. taicpu(p).opcode := A_MOV;
  2330. case taicpu(p).opsize Of
  2331. S_BL:
  2332. begin
  2333. DebugMsg('PeepHole Optimization,var13',p);
  2334. taicpu(p).changeopsize(S_L);
  2335. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2336. end;
  2337. S_WL:
  2338. begin
  2339. DebugMsg('PeepHole Optimization,var14',p);
  2340. taicpu(p).changeopsize(S_L);
  2341. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2342. end;
  2343. S_BW:
  2344. begin
  2345. DebugMsg('PeepHole Optimization,var15',p);
  2346. taicpu(p).changeopsize(S_W);
  2347. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2348. end;
  2349. {$ifdef x86_64}
  2350. S_BQ:
  2351. begin
  2352. DebugMsg('PeepHole Optimization,var16',p);
  2353. taicpu(p).changeopsize(S_Q);
  2354. taicpu(hp1).loadConst(
  2355. 0, taicpu(hp1).oper[0]^.val and $ff);
  2356. end;
  2357. S_WQ:
  2358. begin
  2359. DebugMsg('PeepHole Optimization,var17',p);
  2360. taicpu(p).changeopsize(S_Q);
  2361. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  2362. end;
  2363. S_LQ:
  2364. begin
  2365. DebugMsg('PeepHole Optimization,var18',p);
  2366. taicpu(p).changeopsize(S_Q);
  2367. taicpu(hp1).loadConst(
  2368. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  2369. end;
  2370. {$endif x86_64}
  2371. else
  2372. Internalerror(2017050704)
  2373. end;
  2374. end;
  2375. end;
  2376. end;
  2377. end;
  2378. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  2379. var
  2380. hp1 : tai;
  2381. begin
  2382. Result:=false;
  2383. if not(GetNextInstruction(p, hp1)) then
  2384. exit;
  2385. if MatchOpType(taicpu(p),top_const,top_reg) and
  2386. MatchInstruction(hp1,A_AND,[]) and
  2387. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2388. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2389. { the second register must contain the first one, so compare their subreg types }
  2390. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2391. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  2392. { change
  2393. and const1, reg
  2394. and const2, reg
  2395. to
  2396. and (const1 and const2), reg
  2397. }
  2398. begin
  2399. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  2400. DebugMsg('Peephole AndAnd2And done',hp1);
  2401. asml.remove(p);
  2402. p.Free;
  2403. p:=hp1;
  2404. Result:=true;
  2405. exit;
  2406. end
  2407. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2408. MatchInstruction(hp1,A_MOVZX,[]) and
  2409. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2410. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2411. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2412. (((taicpu(p).opsize=S_W) and
  2413. (taicpu(hp1).opsize=S_BW)) or
  2414. ((taicpu(p).opsize=S_L) and
  2415. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2416. {$ifdef x86_64}
  2417. or
  2418. ((taicpu(p).opsize=S_Q) and
  2419. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2420. {$endif x86_64}
  2421. ) then
  2422. begin
  2423. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2424. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  2425. ) or
  2426. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2427. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  2428. {$ifdef x86_64}
  2429. or
  2430. (((taicpu(hp1).opsize)=S_LQ) and
  2431. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  2432. )
  2433. {$endif x86_64}
  2434. then
  2435. begin
  2436. DebugMsg('Peephole AndMovzToAnd done',p);
  2437. asml.remove(hp1);
  2438. hp1.free;
  2439. end;
  2440. end
  2441. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2442. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  2443. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2444. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2445. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2446. (((taicpu(p).opsize=S_W) and
  2447. (taicpu(hp1).opsize=S_BW)) or
  2448. ((taicpu(p).opsize=S_L) and
  2449. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2450. {$ifdef x86_64}
  2451. or
  2452. ((taicpu(p).opsize=S_Q) and
  2453. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2454. {$endif x86_64}
  2455. ) then
  2456. begin
  2457. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2458. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  2459. ) or
  2460. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2461. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  2462. {$ifdef x86_64}
  2463. or
  2464. (((taicpu(hp1).opsize)=S_LQ) and
  2465. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  2466. )
  2467. {$endif x86_64}
  2468. then
  2469. begin
  2470. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  2471. asml.remove(hp1);
  2472. hp1.free;
  2473. end;
  2474. end
  2475. else if (taicpu(p).oper[1]^.typ = top_reg) and
  2476. (hp1.typ = ait_instruction) and
  2477. (taicpu(hp1).is_jmp) and
  2478. (taicpu(hp1).opcode<>A_JMP) and
  2479. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  2480. { change
  2481. and x, reg
  2482. jxx
  2483. to
  2484. test x, reg
  2485. jxx
  2486. if reg is deallocated before the
  2487. jump, but only if it's a conditional jump (PFV)
  2488. }
  2489. taicpu(p).opcode := A_TEST;
  2490. end;
  2491. function TX86AsmOptimizer.PostPeepholeOptMov(const p : tai) : Boolean;
  2492. begin
  2493. Result:=false;
  2494. if (taicpu(p).oper[1]^.typ = Top_Reg) and
  2495. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2496. begin
  2497. if (taicpu(p).oper[0]^.typ = top_const) then
  2498. begin
  2499. case taicpu(p).oper[0]^.val of
  2500. 0:
  2501. begin
  2502. { change "mov $0,%reg" into "xor %reg,%reg" }
  2503. taicpu(p).opcode := A_XOR;
  2504. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2505. end;
  2506. $1..$FFFFFFFF:
  2507. begin
  2508. { Code size reduction by J. Gareth "Kit" Moreton }
  2509. { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }
  2510. case taicpu(p).opsize of
  2511. S_Q:
  2512. begin
  2513. DebugMsg('Peephole Optimization: movq x,%reg -> movd x,%reg (x is a 32-bit constant)', p);
  2514. TRegisterRec(taicpu(p).oper[1]^.reg).subreg := R_SUBD;
  2515. taicpu(p).opsize := S_L;
  2516. end;
  2517. end;
  2518. end;
  2519. end;
  2520. end;
  2521. end;
  2522. end;
  2523. function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;
  2524. begin
  2525. Result:=false;
  2526. { change "cmp $0, %reg" to "test %reg, %reg" }
  2527. if MatchOpType(taicpu(p),top_const,top_reg) and
  2528. (taicpu(p).oper[0]^.val = 0) then
  2529. begin
  2530. taicpu(p).opcode := A_TEST;
  2531. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2532. Result:=true;
  2533. end;
  2534. end;
  2535. procedure TX86AsmOptimizer.OptReferences;
  2536. var
  2537. p: tai;
  2538. i: Integer;
  2539. begin
  2540. p := BlockStart;
  2541. while (p <> BlockEnd) Do
  2542. begin
  2543. if p.typ=ait_instruction then
  2544. begin
  2545. for i:=0 to taicpu(p).ops-1 do
  2546. if taicpu(p).oper[i]^.typ=top_ref then
  2547. optimize_ref(taicpu(p).oper[i]^.ref^,false);
  2548. end;
  2549. p:=tai(p.next);
  2550. end;
  2551. end;
  2552. end.